[Libguestfs] [PATCH v2 4/5] v2v: ova: don't extract files from OVA if it's not needed

Wed Nov 23 15:16:44 UTC 2016

On Mon, 21 Nov 2016 16:21:02 +0100
Pino Toscano <ptoscano at redhat.com> wrote:

> On Saturday, 12 November 2016 16:37:52 CET Tomáš Golembiovský wrote:
> > We don't have to always extract all files from the OVA archive. The OVA,
> > as defined in the standard, is plain tar. We can work directly over the
> > tar archive if we use correct 'offset' and 'size' options when defining
> > the backing file for QEMU.
> > 
> > This leads to improvements in speed and puts much lower requirement on
> > available disk space.
> > 
> > Signed-off-by: Tomáš Golembiovský <tgolembi at redhat.com>
> > ---
> >  v2v/input_ova.ml | 177 +++++++++++++++++++++++++++++++++++++++++++++++--------
> >  1 file changed, 154 insertions(+), 23 deletions(-)
> > 
> > diff --git a/v2v/input_ova.ml b/v2v/input_ova.ml
> > index f76fe82..7a3e27b 100644
> > --- a/v2v/input_ova.ml
> > +++ b/v2v/input_ova.ml
> > @@ -39,17 +39,23 @@ object
> >  
> >    method source () =
> >  
> > -    let untar ?(format = "") file outdir =
> > -      let cmd = [ "tar"; sprintf "-x%sf" format; file; "-C"; outdir ] in
> > +    (* Untar part or all files from tar archive. If [path] is specified it is
> > +     * a path in the tar archive.
> > +     *)
> > +    let untar ?(format = "") ?(path) file outdir =  
> 
> No need for parenthesis around "path".
> Also, tar supports extracting more files at the same time, so I'd
> change "path" into "paths", and just call this once later on ...

You mean pass an array/list instead of single string? Yeah, why not.

> 
> > +      let cmd =
> > +        [ "tar"; sprintf "-x%sf" format; file; "-C"; outdir ]
> > +        @ (match path with None -> [] | Some p -> [p])
> > +      in
> >        if run_command cmd <> 0 then
> >          error (f_"error unpacking %s, see earlier error messages") ova in
> >  
> >      (* Extract ova file. *)
> > -    let exploded =
> > +    let exploded, partial =
> >        (* The spec allows a directory to be specified as an ova.  This
> >         * is also pretty convenient.
> >         *)
> > -      if is_directory ova then ova
> > +      if is_directory ova then ova, false
> >        else (
> >          let uncompress_head zcat file =
> >            let cmd = sprintf "%s %s" zcat (quote file) in
> > @@ -67,11 +73,53 @@ object
> >  
> >            tmpfile in
> >  
> > +        (* Untar only ovf and manifest from the archive *)
> > +        let untar_partial file outdir =  
> 
> I'd rename this as "untar_metadata", as it seems a better fitting name.
> YMMV.

Agreed.

> > +        let qemu_img_version () =
> > +          let cmd = "qemu-img --version" in
> > +          let lines = external_command cmd in
> > +          match lines with
> > +          | [] -> error ("'qemu-img --version' returned no output")
> > +          | line :: _ ->
> > +              let rex = Str.regexp "qemu-img version \\([0-9]+\\)\\.\\([0-9]+\\)" in
> > +              if Str.string_match rex line 0 then (
> > +                try
> > +                  int_of_string (Str.matched_group 1 line),
> > +                  int_of_string (Str.matched_group 2 line)
> > +                with Failure _ ->
> > +                  warning (f_"warning: failed to read qemu-img version! Line: %S")
> > +                  line;  
> 
> "warning" is already printed by the warning function; also I'd simplify
> the message as:
> 
>   warning (f_"failed to parse the version of qemu-img (%S), assuming 0.9")
>     line

Whops.

> > +      let rec loop lines =
> > +        match lines with
> > +        | [] -> raise Not_found
> > +        | line :: lines -> (
> > +          (* Lines have the form:
> > +           * block <offset>: <perms> <owner>/<group> <size> <mdate> <mtime> <file>
> > +           *)
> > +          let rex = Str.regexp "^block \\([0-9]+\\): \\([^ \\t]+\\) \\([^ \\t]+\\) +\\([^ \\t]+\\) \\([^ \\t]+\\) \\([^ \\t]+\\) \\(.+\\)$" in  
> 
> Whoa how unreadable :/ The alternative would be using String.nsplit,
> but it would play badly with filenames with spaces inside the archive.
> Maybe a faster alternative could be:
> a) find the 6th empty space, and pick the substring of all the rest
>    of the string after it
> b) filter the lines by picking only the wanted file, checking using (a)
> c) once a single line is found, use String.nsplit and parse only parts
>    1 (removing the ':' suffix) and 4
> 
> It feels slightly more complex, but this way we can avoid running the
> regex until we find the right filename.

The problem is there are multiple spaces used to align the column with
file sizes. Hence the complicated regexp. :(

-- 
Tomáš Golembiovský <tgolembi at redhat.com>