[Libguestfs] [PATCH 4/5] v2v: ova: don't extract files from OVA if it's not needed

Tomáš Golembiovský tgolembi at redhat.com
Fri Nov 4 13:52:52 UTC 2016


We don't have to always extract all files from the OVA archive. The OVA,
as defined in the standard, is plain tar. We can work directly over the
tar archive if we use correct 'offset' and 'size' options when defining
the backing file for QEMU.

This leads to improvements in speed and puts much lower requirement on
available disk space.

Signed-off-by: Tomáš Golembiovský <tgolembi at redhat.com>
---
 v2v/input_ova.ml | 113 +++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 98 insertions(+), 15 deletions(-)

diff --git a/v2v/input_ova.ml b/v2v/input_ova.ml
index f76fe82..60dbaf1 100644
--- a/v2v/input_ova.ml
+++ b/v2v/input_ova.ml
@@ -39,17 +39,20 @@ object
 
   method source () =
 
-    let untar ?(format = "") file outdir =
-      let cmd = [ "tar"; sprintf "-x%sf" format; file; "-C"; outdir ] in
+    (* Untar part or all files from tar archive. If [path] is specified it is
+     * a path in the tar archive.
+     *)
+    let untar ?(format = "") ?(path="") file outdir =
+      let cmd = [ "tar"; sprintf "-x%sf" format; file; "-C"; outdir; path ] in
       if run_command cmd <> 0 then
         error (f_"error unpacking %s, see earlier error messages") ova in
 
     (* Extract ova file. *)
-    let exploded =
+    let exploded, partial =
       (* The spec allows a directory to be specified as an ova.  This
        * is also pretty convenient.
        *)
-      if is_directory ova then ova
+      if is_directory ova then ova, false
       else (
         let uncompress_head zcat file =
           let cmd = sprintf "%s %s" zcat (quote file) in
@@ -67,11 +70,19 @@ object
 
           tmpfile in
 
+        (* Untar only ovf and manifest from the archive *)
+        let untar_partial file outdir =
+          let cmd1 = [ "tar"; "-tf" ; file ] in
+          let cmd2 = [ "grep"; "\\.\\(ovf\\|mf\\)$" ] in
+          let cmd3 = [ "xargs"; "tar"; "-xf" ; file; "-C"; outdir ] in
+          if shell_command ((stringify_args cmd1) ^ " | " ^ (stringify_args cmd2) ^ " | " ^ (stringify_args cmd3)) <> 0 then
+            error (f_"error unpacking %s, see earlier error messages") ova in
+
         match detect_file_type ova with
         | `Tar ->
           (* Normal ovas are tar file (not compressed). *)
-          untar ova tmpdir;
-          tmpdir
+          untar_partial ova tmpdir;
+          tmpdir, true
         | `Zip ->
           (* However, although not permitted by the spec, people ship
            * zip files as ova too.
@@ -81,7 +92,7 @@ object
             [ "-j"; "-d"; tmpdir; ova ] in
           if run_command cmd <> 0 then
             error (f_"error unpacking %s, see earlier error messages") ova;
-          tmpdir
+          tmpdir, false
         | (`GZip|`XZ) as format ->
           let zcat, tar_fmt =
             match format with
@@ -94,7 +105,7 @@ object
           (match tmpfiletype with
           | `Tar ->
             untar ~format:tar_fmt ova tmpdir;
-            tmpdir
+            tmpdir, false
           | `Zip | `GZip | `XZ | `Unknown ->
             error (f_"%s: unsupported file format\n\nFormats which we currently understand for '-i ova' are: tar (uncompressed, compress with gzip or xz), zip") ova
           )
@@ -135,6 +146,49 @@ object
       loop [dir]
     in
 
+    (* Find file in [tar] archive and return at which byte it starts and how
+     * long it is.
+     *)
+    let find_file_in_tar tar filename =
+      let cmd1 = [ "tar"; "tRvf"; tar ] in
+      let cmd2 = [ "awk"; sprintf
+        "$8 == \"%s\" {print substr($2, 1, index($2, \":\")-1), $5}"
+        filename ]
+      in
+      let lines =
+        external_command ((stringify_args cmd1) ^ " | " ^ (stringify_args cmd2))
+      in
+      if (List.length lines < 1) then
+        raise Not_found
+      else
+        let soffset, ssize = String.split " " (List.hd lines) in
+        let offset =
+          try Int64.of_string soffset
+          with Failure _ ->
+            error (f_"Invalid offset returned by `tar`: %s") soffset
+        in
+        let size =
+          try Int64.of_string ssize
+          with Failure _ ->
+            error (f_"Invalid size returend by `tar': %s") ssize
+        in
+        (* Note: Offset is actualy block number and there is a single block
+         * with tar header at the beginning of the file. So skip the header and
+         * convert the block number to bytes before returning.
+         *)
+        (offset +^ 1L) *^ 512L, size
+    in
+
+    let subfolder folder parent =
+      if String.is_prefix folder (parent // "") then
+        let len = String.length parent in
+        String.sub folder (len+1) (String.length folder-len-1)
+      else if folder = parent then
+        ""
+      else
+        assert(false)
+    in
+
     (* Search for the ovf file. *)
     let ovf = find_files exploded ".ovf" in
     let ovf =
@@ -152,6 +206,7 @@ object
       fun mf ->
         debug "processing manifest %s" mf;
         let mf_folder = Filename.dirname mf in
+        let mf_subfolder = subfolder mf_folder exploded in
         let chan = open_in mf in
         let rec loop () =
           let line = input_line chan in
@@ -160,7 +215,11 @@ object
             let disk = Str.matched_group 2 line in
             let expected = Str.matched_group 3 line in
             let csum = Checksums.of_string mode expected in
-            try Checksums.verify_checksum csum (mf_folder // disk)
+            try
+              if partial then
+                Checksums.verify_checksum csum ~tar:ova (mf_subfolder // disk)
+              else
+                Checksums.verify_checksum csum (mf_folder // disk)
             with Checksums.Mismatched_checksum (_, actual) ->
               error (f_"checksum of disk %s does not match manifest %s (actual %s(%s) = %s, expected %s(%s) = %s)")
                 disk mf mode disk actual mode disk expected;
@@ -283,25 +342,49 @@ object
             | Some "gzip" -> true
             | Some s -> error (f_"unsupported comprression in OVF: %s") s in
 
-          let filename = if compressed then (
+          let filename, partial = if compressed then (
+            if partial then
+              untar ~path:((subfolder ovf_folder exploded) // filename)
+                ova tmpdir;
             let new_filename = tmpdir // String.random8 () ^ ".vmdk" in
             let cmd =
               sprintf "zcat %s > %s" (quote ovf_folder // filename) (quote new_filename) in
             if shell_command cmd <> 0 then
               error (f_"error uncompressing %s, see earlier error messages")
                 filename;
-            new_filename
+            new_filename, false
           )
+          else if partial then
+            (subfolder ovf_folder exploded) // filename, partial
           else
-            ovf_folder // filename
+            ovf_folder // filename, partial
           in
 
-          (* Does the file exist and is it readable? *)
-          Unix.access filename [Unix.R_OK];
+          let qemu_uri =
+            if not partial then (
+              (* Does the file exist and is it readable? *)
+              Unix.access filename [Unix.R_OK];
+              filename
+            )
+            else (
+              let offset, size =
+                try find_file_in_tar ova filename
+                with Not_found ->
+                  error (f_"file '%s' not found in the ova") filename
+              in
+              (* QEMU requires size aligned to 512 bytes. This is safe because
+               * tar also works with 512 byte blocks.
+               *)
+              let size = roundup64 size 512L in
+              sprintf
+                "json:{ \"file\": { \"driver\":\"raw\", \"offset\":\"%Ld\", \"size\":\"%Ld\", \"file\": { \"filename\":\"%s\" } } }"
+                offset size ova
+            )
+          in
 
           let disk = {
             s_disk_id = i;
-            s_qemu_uri = filename;
+            s_qemu_uri = qemu_uri;
             s_format = Some "vmdk";
             s_controller = controller;
           } in
-- 
2.10.1




More information about the Libguestfs mailing list