[Libguestfs] [PATCH 4/5] v2v: ova: don't extract files from OVA if it's not needed
Pino Toscano
ptoscano at redhat.com
Mon Nov 7 13:46:37 UTC 2016
On Friday, 4 November 2016 14:52:52 CET Tomáš Golembiovský wrote:
> We don't have to always extract all files from the OVA archive. The OVA,
> as defined in the standard, is plain tar. We can work directly over the
> tar archive if we use correct 'offset' and 'size' options when defining
> the backing file for QEMU.
>
> This leads to improvements in speed and puts much lower requirement on
> available disk space.
>
> Signed-off-by: Tomáš Golembiovský <tgolembi at redhat.com>
> ---
Just few notes, still need to review the actual bulk of the work done
by this patch.
> v2v/input_ova.ml | 113 +++++++++++++++++++++++++++++++++++++++++++++++--------
> 1 file changed, 98 insertions(+), 15 deletions(-)
>
> diff --git a/v2v/input_ova.ml b/v2v/input_ova.ml
> index f76fe82..60dbaf1 100644
> --- a/v2v/input_ova.ml
> +++ b/v2v/input_ova.ml
> @@ -39,17 +39,20 @@ object
>
> method source () =
>
> - let untar ?(format = "") file outdir =
> - let cmd = [ "tar"; sprintf "-x%sf" format; file; "-C"; outdir ] in
> + (* Untar part or all files from tar archive. If [path] is specified it is
> + * a path in the tar archive.
> + *)
> + let untar ?(format = "") ?(path="") file outdir =
The default value for 'path' can be omitted here, checking whether
~path was actually specified in the 'untar' call:
> + let cmd = [ "tar"; sprintf "-x%sf" format; file; "-C"; outdir; path ] in
let cmd =
[ "tar"; sprintf "-x%sf" format; file; "-C"; outdir; ]
@ (match path with None -> [] | Some p -> [p]) in
In this case, having an empty default value for 'path' means an empty
parameter will be passed as argument for 'tar'.
> if run_command cmd <> 0 then
> error (f_"error unpacking %s, see earlier error messages") ova in
>
> (* Extract ova file. *)
> - let exploded =
> + let exploded, partial =
> (* The spec allows a directory to be specified as an ova. This
> * is also pretty convenient.
> *)
> - if is_directory ova then ova
> + if is_directory ova then ova, false
> else (
> let uncompress_head zcat file =
> let cmd = sprintf "%s %s" zcat (quote file) in
> @@ -67,11 +70,19 @@ object
>
> tmpfile in
>
> + (* Untar only ovf and manifest from the archive *)
> + let untar_partial file outdir =
> + let cmd1 = [ "tar"; "-tf" ; file ] in
> + let cmd2 = [ "grep"; "\\.\\(ovf\\|mf\\)$" ] in
> + let cmd3 = [ "xargs"; "tar"; "-xf" ; file; "-C"; outdir ] in
> + if shell_command ((stringify_args cmd1) ^ " | " ^ (stringify_args cmd2) ^ " | " ^ (stringify_args cmd3)) <> 0 then
Hmm... maybe it's just taste, but I find this command concatenation
confusing, and prone to break. I'd use:
1) external_command to get the list of files in the archive
2) filter the filenames (see Filename.check_suffix) to pick only the
needed ones
3) maybe using the helper 'untar' function above to extract the files
> + error (f_"error unpacking %s, see earlier error messages") ova in
> +
> match detect_file_type ova with
> | `Tar ->
> (* Normal ovas are tar file (not compressed). *)
> - untar ova tmpdir;
> - tmpdir
> + untar_partial ova tmpdir;
> + tmpdir, true
> | `Zip ->
> (* However, although not permitted by the spec, people ship
> * zip files as ova too.
> @@ -81,7 +92,7 @@ object
> [ "-j"; "-d"; tmpdir; ova ] in
> if run_command cmd <> 0 then
> error (f_"error unpacking %s, see earlier error messages") ova;
> - tmpdir
> + tmpdir, false
> | (`GZip|`XZ) as format ->
> let zcat, tar_fmt =
> match format with
> @@ -94,7 +105,7 @@ object
> (match tmpfiletype with
> | `Tar ->
> untar ~format:tar_fmt ova tmpdir;
> - tmpdir
> + tmpdir, false
> | `Zip | `GZip | `XZ | `Unknown ->
> error (f_"%s: unsupported file format\n\nFormats which we currently understand for '-i ova' are: tar (uncompressed, compress with gzip or xz), zip") ova
> )
> @@ -135,6 +146,49 @@ object
> loop [dir]
> in
>
> + (* Find file in [tar] archive and return at which byte it starts and how
> + * long it is.
> + *)
> + let find_file_in_tar tar filename =
> + let cmd1 = [ "tar"; "tRvf"; tar ] in
> + let cmd2 = [ "awk"; sprintf
> + "$8 == \"%s\" {print substr($2, 1, index($2, \":\")-1), $5}"
> + filename ]
> + in
As above, I'd use external_command + manual filtering code in OCaml.
YMMV though.
> + let lines =
> + external_command ((stringify_args cmd1) ^ " | " ^ (stringify_args cmd2))
> + in
> + if (List.length lines < 1) then
No need for parenthesis, and it can be even simplified as:
if lines = [] then
> + raise Not_found
> + else
> + let soffset, ssize = String.split " " (List.hd lines) in
> + let offset =
> + try Int64.of_string soffset
> + with Failure _ ->
> + error (f_"Invalid offset returned by `tar`: %s") soffset
> + in
> + let size =
> + try Int64.of_string ssize
> + with Failure _ ->
> + error (f_"Invalid size returend by `tar': %s") ssize
Typo in the error message, and different wrt the above one (`tar` vs
`tar') .
> + in
> + (* Note: Offset is actualy block number and there is a single block
> + * with tar header at the beginning of the file. So skip the header and
> + * convert the block number to bytes before returning.
> + *)
> + (offset +^ 1L) *^ 512L, size
> + in
> +
> + let subfolder folder parent =
> + if String.is_prefix folder (parent // "") then
> + let len = String.length parent in
> + String.sub folder (len+1) (String.length folder-len-1)
> + else if folder = parent then
> + ""
> + else
> + assert(false)
No need for parenthesis.
--
Pino Toscano
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <http://listman.redhat.com/archives/libguestfs/attachments/20161107/f7f7b005/attachment.sig>
More information about the Libguestfs
mailing list