[Libguestfs] [PATCH 4/5] v2v: ova: don't extract files from OVA if it's not needed

Pino Toscano ptoscano at redhat.com
Mon Nov 7 13:46:37 UTC 2016


On Friday, 4 November 2016 14:52:52 CET Tomáš Golembiovský wrote:
> We don't have to always extract all files from the OVA archive. The OVA,
> as defined in the standard, is plain tar. We can work directly over the
> tar archive if we use correct 'offset' and 'size' options when defining
> the backing file for QEMU.
> 
> This leads to improvements in speed and puts much lower requirement on
> available disk space.
> 
> Signed-off-by: Tomáš Golembiovský <tgolembi at redhat.com>
> ---

Just few notes, still need to review the actual bulk of the work done
by this patch.

>  v2v/input_ova.ml | 113 +++++++++++++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 98 insertions(+), 15 deletions(-)
> 
> diff --git a/v2v/input_ova.ml b/v2v/input_ova.ml
> index f76fe82..60dbaf1 100644
> --- a/v2v/input_ova.ml
> +++ b/v2v/input_ova.ml
> @@ -39,17 +39,20 @@ object
>  
>    method source () =
>  
> -    let untar ?(format = "") file outdir =
> -      let cmd = [ "tar"; sprintf "-x%sf" format; file; "-C"; outdir ] in
> +    (* Untar part or all files from tar archive. If [path] is specified it is
> +     * a path in the tar archive.
> +     *)
> +    let untar ?(format = "") ?(path="") file outdir =

The default value for 'path' can be omitted here, checking whether
~path was actually specified in the 'untar' call:

> +      let cmd = [ "tar"; sprintf "-x%sf" format; file; "-C"; outdir; path ] in

  let cmd =
    [ "tar"; sprintf "-x%sf" format; file; "-C"; outdir; ]
    @ (match path with None -> [] | Some p -> [p]) in

In this case, having an empty default value for 'path' means an empty
parameter will be passed as argument for 'tar'.

>        if run_command cmd <> 0 then
>          error (f_"error unpacking %s, see earlier error messages") ova in
>  
>      (* Extract ova file. *)
> -    let exploded =
> +    let exploded, partial =
>        (* The spec allows a directory to be specified as an ova.  This
>         * is also pretty convenient.
>         *)
> -      if is_directory ova then ova
> +      if is_directory ova then ova, false
>        else (
>          let uncompress_head zcat file =
>            let cmd = sprintf "%s %s" zcat (quote file) in
> @@ -67,11 +70,19 @@ object
>  
>            tmpfile in
>  
> +        (* Untar only ovf and manifest from the archive *)
> +        let untar_partial file outdir =
> +          let cmd1 = [ "tar"; "-tf" ; file ] in
> +          let cmd2 = [ "grep"; "\\.\\(ovf\\|mf\\)$" ] in
> +          let cmd3 = [ "xargs"; "tar"; "-xf" ; file; "-C"; outdir ] in
> +          if shell_command ((stringify_args cmd1) ^ " | " ^ (stringify_args cmd2) ^ " | " ^ (stringify_args cmd3)) <> 0 then

Hmm... maybe it's just taste, but I find this command concatenation
confusing, and prone to break. I'd use:
1) external_command to get the list of files in the archive
2) filter the filenames (see Filename.check_suffix) to pick only the
   needed ones
3) maybe using the helper 'untar' function above to extract the files

> +            error (f_"error unpacking %s, see earlier error messages") ova in
> +
>          match detect_file_type ova with
>          | `Tar ->
>            (* Normal ovas are tar file (not compressed). *)
> -          untar ova tmpdir;
> -          tmpdir
> +          untar_partial ova tmpdir;
> +          tmpdir, true
>          | `Zip ->
>            (* However, although not permitted by the spec, people ship
>             * zip files as ova too.
> @@ -81,7 +92,7 @@ object
>              [ "-j"; "-d"; tmpdir; ova ] in
>            if run_command cmd <> 0 then
>              error (f_"error unpacking %s, see earlier error messages") ova;
> -          tmpdir
> +          tmpdir, false
>          | (`GZip|`XZ) as format ->
>            let zcat, tar_fmt =
>              match format with
> @@ -94,7 +105,7 @@ object
>            (match tmpfiletype with
>            | `Tar ->
>              untar ~format:tar_fmt ova tmpdir;
> -            tmpdir
> +            tmpdir, false
>            | `Zip | `GZip | `XZ | `Unknown ->
>              error (f_"%s: unsupported file format\n\nFormats which we currently understand for '-i ova' are: tar (uncompressed, compress with gzip or xz), zip") ova
>            )
> @@ -135,6 +146,49 @@ object
>        loop [dir]
>      in
>  
> +    (* Find file in [tar] archive and return at which byte it starts and how
> +     * long it is.
> +     *)
> +    let find_file_in_tar tar filename =
> +      let cmd1 = [ "tar"; "tRvf"; tar ] in
> +      let cmd2 = [ "awk"; sprintf
> +        "$8 == \"%s\" {print substr($2, 1, index($2, \":\")-1), $5}"
> +        filename ]
> +      in

As above, I'd use external_command + manual filtering code in OCaml.
YMMV though.

> +      let lines =
> +        external_command ((stringify_args cmd1) ^ " | " ^ (stringify_args cmd2))
> +      in
> +      if (List.length lines < 1) then

No need for parenthesis, and it can be even simplified as:
  if lines = [] then

> +        raise Not_found
> +      else
> +        let soffset, ssize = String.split " " (List.hd lines) in
> +        let offset =
> +          try Int64.of_string soffset
> +          with Failure _ ->
> +            error (f_"Invalid offset returned by `tar`: %s") soffset
> +        in
> +        let size =
> +          try Int64.of_string ssize
> +          with Failure _ ->
> +            error (f_"Invalid size returend by `tar': %s") ssize

Typo in the error message, and different wrt the above one (`tar` vs
`tar') .

> +        in
> +        (* Note: Offset is actualy block number and there is a single block
> +         * with tar header at the beginning of the file. So skip the header and
> +         * convert the block number to bytes before returning.
> +         *)
> +        (offset +^ 1L) *^ 512L, size
> +    in
> +
> +    let subfolder folder parent =
> +      if String.is_prefix folder (parent // "") then
> +        let len = String.length parent in
> +        String.sub folder (len+1) (String.length folder-len-1)
> +      else if folder = parent then
> +        ""
> +      else
> +        assert(false)

No need for parenthesis.



-- 
Pino Toscano
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <http://listman.redhat.com/archives/libguestfs/attachments/20161107/f7f7b005/attachment.sig>


More information about the Libguestfs mailing list