[Libguestfs] [PATCH v2 3/9] v2v: Ignore miscellaneous tar messages when parsing tar for file locations.

Richard W.M. Jones rjones at redhat.com
Wed Apr 25 13:35:28 UTC 2018

We use ‘tar tRvf’ to parse the locations of files within the tarball.
However examination of tar.git:src/list.c shows various other messages
which can appear in the output:

  block <offset>: ** Block of NULs **
  block <offset>: ** End of File **

Indeed it was easy to produce the first message just by using modern
tar to create a tarball:

  $ tar tRvf '/var/tmp/bz1570407-reproducer.ova'
  block 0: -rw-r--r-- rjones/rjones   100 2018-04-22 17:06 RHEL7_3_042218_extra-disk1.vmdk.000000000
  block 2: -rw-r--r-- rjones/rjones   243 2018-04-22 17:07 RHEL7_3_042218_extra.mf
  block 4: -rw-r--r-- rjones/rjones 13066 2018-04-22 15:08 RHEL7_3_042218_extra.ovf
  block 31: ** Block of NULs **

Ignore these messages.
 v2v/utils.ml | 71 ++++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 40 insertions(+), 31 deletions(-)

diff --git a/v2v/utils.ml b/v2v/utils.ml
index 372ad8aaa..d73011f9f 100644
--- a/v2v/utils.ml
+++ b/v2v/utils.ml
@@ -147,6 +147,7 @@ let error_if_no_ssh_agent () =
     error (f_"ssh-agent authentication has not been set up ($SSH_AUTH_SOCK is not set).  This is required by qemu to do passwordless ssh access.  See the virt-v2v(1) man page for more information.")
 let ws = PCRE.compile "\\s+"
+let re_tar_message = PCRE.compile "\\*\\* [^*]+ \\*\\*$"
 let find_file_in_tar tar filename =
   let lines = external_command (sprintf "tar tRvf %s" (Filename.quote tar)) in
@@ -156,42 +157,50 @@ let find_file_in_tar tar filename =
     | line :: lines -> (
       (* Lines have the form:
        * block <offset>: <perms> <owner>/<group> <size> <mdate> <mtime> <file>
+       * or:
+       * block <offset>: ** Block of NULs **
+       * block <offset>: ** End of File **
-      let elems = PCRE.nsplit ~max:8 ws line in
-      if List.length elems = 8 && List.hd elems = "block" then (
-        let elems = Array.of_list elems in
-        let offset = elems.(1) in
-        let size = elems.(4) in
-        let fname = elems.(7) in
+      if PCRE.matches re_tar_message line then
+        loop lines (* ignore "** Block of NULs **" etc. *)
+      else (
+        let elems = PCRE.nsplit ~max:8 ws line in
+        if List.length elems = 8 && List.hd elems = "block" then (
+          let elems = Array.of_list elems in
+          let offset = elems.(1) in
+          let size = elems.(4) in
+          let fname = elems.(7) in
-        if fname <> filename then
-          loop lines
-        else (
-          let offset =
-            try
-              (* There should be a colon at the end *)
-              let i = String.rindex offset ':' in
-              if i == (String.length offset)-1 then
-                Int64.of_string (String.sub offset 0 i)
-              else
-                failwith "colon at wrong position"
-            with Failure _ | Not_found ->
-              failwithf (f_"invalid offset returned by tar: %S") offset in
+          if fname <> filename then
+            loop lines
+          else (
+            let offset =
+              try
+                (* There should be a colon at the end *)
+                let i = String.rindex offset ':' in
+                if i == (String.length offset)-1 then
+                  Int64.of_string (String.sub offset 0 i)
+                else
+                  failwith "colon at wrong position"
+              with Failure _ | Not_found ->
+                failwithf (f_"invalid offset returned by tar: %S") offset in
-          let size =
-            try Int64.of_string size
-            with Failure _ ->
-              failwithf (f_"invalid size returned by tar: %S") size in
+            let size =
+              try Int64.of_string size
+              with Failure _ ->
+                failwithf (f_"invalid size returned by tar: %S") size in
-          (* Note: Offset is actualy block number and there is a single
-           * block with tar header at the beginning of the file. So skip
-           * the header and convert the block number to bytes before
-           * returning.
-           *)
-          (offset +^ 1L) *^ 512L, size
+            (* Note: Offset is actualy block number and there is a single
+             * block with tar header at the beginning of the file. So skip
+             * the header and convert the block number to bytes before
+             * returning.
+             *)
+            (offset +^ 1L) *^ 512L, size
+          )
-      ) else
-        failwithf (f_"failed to parse line returned by tar: %S") line
+        else
+          failwithf (f_"failed to parse line returned by tar: %S") line
+      )
   loop lines

More information about the Libguestfs mailing list