[Libguestfs] [PATCH v2] v2v/v2v.ml: Use larger request size for -o rhv-upload
Laszlo Ersek
lersek at redhat.com
Mon Feb 14 11:38:02 UTC 2022
On 02/13/22 20:56, Nir Soffer wrote:
> Output modules can specify now request_size to override the default
> request size in nbdcopy.
>
> The rhv-upload plugin is translating every NBD command to HTTP request,
> translated back to NBD command on imageio server. The HTTP client and
> server, and the NBD client on the imageio server side are synchronous
> and implemented in python, so they have high overhead per request. To
> get good performance we need to use larger request size.
>
> Testing shows that request size of 4 MiB speeds up the copy disk phase
> from 14.7 seconds to 7.9 seconds (1.8x times faster). Request size of 8
> MiB is a little bit faster but is not compatible with VDDK input.
>
> Here are stats extracted from imageio log when importing Fedora 35 image
> with 3 GiB of random data. For each copy, we have 4 connection stats.
>
> Before:
>
> connection 1 ops, 14.767843 s
> dispatch 4023 ops, 11.427662 s
> zero 38 ops, 0.053840 s, 327.91 MiB, 5.95 GiB/s
> write 3981 ops, 8.975877 s, 988.61 MiB, 110.14 MiB/s
> flush 4 ops, 0.001023 s
>
> connection 1 ops, 14.770026 s
> dispatch 4006 ops, 11.408732 s
> zero 37 ops, 0.057205 s, 633.21 MiB, 10.81 GiB/s
> write 3965 ops, 8.907420 s, 986.65 MiB, 110.77 MiB/s
> flush 4 ops, 0.000280 s
>
> connection 1 ops, 14.768180 s
> dispatch 4057 ops, 11.430712 s
> zero 42 ops, 0.030011 s, 470.47 MiB, 15.31 GiB/s
> write 4011 ops, 9.002055 s, 996.98 MiB, 110.75 MiB/s
> flush 4 ops, 0.000261 s
>
> connection 1 ops, 14.770744 s
> dispatch 4037 ops, 11.462050 s
> zero 45 ops, 0.026668 s, 750.82 MiB, 27.49 GiB/s
> write 3988 ops, 9.002721 s, 989.36 MiB, 109.90 MiB/s
> flush 4 ops, 0.000282 s
>
> After:
>
> connection 1 ops, 7.940377 s
> dispatch 323 ops, 6.695582 s
> zero 37 ops, 0.079958 s, 641.12 MiB, 7.83 GiB/s
> write 282 ops, 6.300242 s, 1.01 GiB, 164.54 MiB/s
> flush 4 ops, 0.000537 s
>
> connection 1 ops, 7.908156 s
> dispatch 305 ops, 6.643475 s
> zero 36 ops, 0.144166 s, 509.43 MiB, 3.45 GiB/s
> write 265 ops, 6.179187 s, 941.23 MiB, 152.32 MiB/s
> flush 4 ops, 0.000324 s
>
> connection 1 ops, 7.942349 s
> dispatch 325 ops, 6.744800 s
> zero 45 ops, 0.185335 s, 622.19 MiB, 3.28 GiB/s
> write 276 ops, 6.236819 s, 995.45 MiB, 159.61 MiB/s
> flush 4 ops, 0.000369 s
>
> connection 1 ops, 7.955572 s
> dispatch 317 ops, 6.721212 s
> zero 43 ops, 0.135771 s, 409.68 MiB, 2.95 GiB/s
> write 270 ops, 6.326366 s, 988.26 MiB, 156.21 MiB/s
> flush 4 ops, 0.001439 s
> ---
>
> Changes since v1:
>
> - Decrease request size to 4 MiB for compatibility with VDDK input.
> (Richard)
> - Reimplement in a nicer way based on
> https://github.com/libguestfs/virt-v2v/commit/08e764959ec9dadd71a95d22d3d88d647a18d165
> (Richard)
>
> v1 was here:
> https://listman.redhat.com/archives/libguestfs/2022-February/msg00183.html
>
> output/output.ml | 1 +
> output/output.mli | 2 ++
> output/output_disk.ml | 2 ++
> output/output_glance.ml | 2 ++
> output/output_json.ml | 2 ++
> output/output_libvirt.ml | 2 ++
> output/output_null.ml | 2 ++
> output/output_openstack.ml | 2 +-
> output/output_qemu.ml | 2 ++
> output/output_rhv.ml | 2 ++
> output/output_rhv_upload.ml | 7 +++++++
> output/output_vdsm.ml | 2 ++
> v2v/v2v.ml | 11 ++++++++---
> 13 files changed, 35 insertions(+), 4 deletions(-)
>
> diff --git a/output/output.ml b/output/output.ml
> index 786ee5d5..7256b547 100644
> --- a/output/output.ml
> +++ b/output/output.ml
> @@ -39,20 +39,21 @@ type options = {
> module type OUTPUT = sig
> type poptions
> type t
> val to_string : options -> string
> val query_output_options : unit -> unit
> val parse_options : options -> Types.source -> poptions
> val setup : string -> poptions -> Types.source -> t
> val finalize : string -> poptions -> t ->
> Types.source -> Types.inspect -> Types.target_meta ->
> unit
> + val request_size : int option
> end
>
> let error_option_cannot_be_used_in_output_mode mode opt =
> error (f_"-o %s: %s option cannot be used in this output mode") mode opt
>
> let get_disks dir =
> let rec loop acc i =
> let socket = sprintf "%s/in%d" dir i in
> if Sys.file_exists socket then (
> let size = Utils.with_nbd_connect_unix ~socket NBD.get_size in
> diff --git a/output/output.mli b/output/output.mli
> index eed204ed..8e3efd8e 100644
> --- a/output/output.mli
> +++ b/output/output.mli
> @@ -52,20 +52,22 @@ module type OUTPUT = sig
>
> Set up the output mode. Sets up a disk pipeline
> [dir // "outX"] for each output disk. *)
>
> val finalize : string -> poptions -> t ->
> Types.source -> Types.inspect -> Types.target_meta ->
> unit
> (** [finalize dir poptions t inspect target_meta]
>
> Finalizes the conversion and writes metadata. *)
> +
> + val request_size : int option
> end
>
> (** Helper functions for output modes. *)
>
> val error_option_cannot_be_used_in_output_mode : string -> string -> unit
> (** [error_option_cannot_be_used_in_output_mode mode option]
> prints error message that option cannot be used in this output mode. *)
>
> val get_disks : string -> (int * int64) list
> (** Examines the v2v directory and opens each input socket (in0 etc),
> diff --git a/output/output_disk.ml b/output/output_disk.ml
> index a05be559..bc5b4e1c 100644
> --- a/output/output_disk.ml
> +++ b/output/output_disk.ml
> @@ -105,11 +105,13 @@ module Disk = struct
> output_format output_name in
>
> let file = output_storage // output_name ^ ".xml" in
> with_open_out file (fun chan -> DOM.doc_to_chan chan doc);
>
> if verbose () then (
> eprintf "resulting local libvirt XML:\n";
> DOM.doc_to_chan Stdlib.stderr doc;
> eprintf "\n%!";
> )
> +
> + let request_size = None
> end
> diff --git a/output/output_glance.ml b/output/output_glance.ml
> index 9ca406e8..e6a7f789 100644
> --- a/output/output_glance.ml
> +++ b/output/output_glance.ml
> @@ -131,11 +131,13 @@ module Glance = struct
> properties in
> if run_command cmd <> 0 then
> error (f_"glance: image upload to glance failed, see earlier errors");
>
> (* Unlink the temporary files as soon as glance has got them. *)
> try unlink disk with Unix_error _ -> ()
> ) source.s_disks;
>
> (* Remove the temporary directory for the large files. *)
> (try rmdir tmpdir with Unix_error _ -> ())
> +
> + let request_size = None
> end
> diff --git a/output/output_json.ml b/output/output_json.ml
> index 97883217..6e81b639 100644
> --- a/output/output_json.ml
> +++ b/output/output_json.ml
> @@ -141,11 +141,13 @@ module Json = struct
> output_string Stdlib.stderr doc_string;
> eprintf "\n\n%!";
> );
>
> let file = output_storage // output_name ^ ".json" in
> with_open_out file (
> fun chan ->
> output_string chan doc_string;
> output_char chan '\n'
> )
> +
> + let request_size = None
> end
> diff --git a/output/output_libvirt.ml b/output/output_libvirt.ml
> index 2236573f..e0d3432d 100644
> --- a/output/output_libvirt.ml
> +++ b/output/output_libvirt.ml
> @@ -210,11 +210,13 @@ module Libvirt_ = struct
> warning (f_"the target hypervisor does not support a %s KVM guest") arch;
> []
> ) else (
> let node (* first matching <guest> *) = Xml.xpathobj_node obj 0 in
> Xml.xpathctx_set_current_context xpathctx node;
>
> (* Get guest/features/* nodes. *)
> let features = xpath_get_nodes xpathctx "features/*" in
> List.map Xml.node_name features
> )
> +
> + let request_size = None
> end
> diff --git a/output/output_null.ml b/output/output_null.ml
> index eeb15653..86d81eaa 100644
> --- a/output/output_null.ml
> +++ b/output/output_null.ml
> @@ -81,11 +81,13 @@ module Null = struct
> List.iter (
> fun (i, _) ->
> if i > 0 then (
> let output = sprintf "%s/out%d" dir i in
> link socket output
> )
> ) disks
>
> let finalize dir () () source inspect target_meta =
> () (* nothing to do *)
> +
> + let request_size = None
> end
> diff --git a/output/output_openstack.ml b/output/output_openstack.ml
> index 3d7a7882..d0af2ac7 100644
> --- a/output/output_openstack.ml
> +++ b/output/output_openstack.ml
> @@ -475,12 +475,12 @@ The os-* parameters and environment variables are optional.
> ) volume_ids
>
> (* UTC conversion time. *)
> and iso_time =
> let time = time () in
> let tm = gmtime time in
> sprintf "%04d/%02d/%02d %02d:%02d:%02d"
> (tm.tm_year + 1900) (tm.tm_mon + 1) tm.tm_mday
> tm.tm_hour tm.tm_min tm.tm_sec
>
> -
> + let request_size = None
> end
> diff --git a/output/output_qemu.ml b/output/output_qemu.ml
> index 873a63b7..f8d2e171 100644
> --- a/output/output_qemu.ml
> +++ b/output/output_qemu.ml
> @@ -320,11 +320,13 @@ module QEMU = struct
> Qemuopts.to_chan cmd chan
> );
>
> Unix.chmod file 0o755;
>
> (* If -oo qemu-boot option was specified then we should boot the guest. *)
> if qemu_boot then (
> let cmd = sprintf "%s &" (quote file) in
> ignore (shell_command cmd)
> )
> +
> + let request_size = None
> end
> diff --git a/output/output_rhv.ml b/output/output_rhv.ml
> index 022d96e3..119207fd 100644
> --- a/output/output_rhv.ml
> +++ b/output/output_rhv.ml
> @@ -275,11 +275,13 @@ module RHV = struct
> Create_ovf.create_ovf source inspect target_meta sizes
> output_alloc output_format output_name esd_uuid image_uuids vol_uuids
> ~need_actual_sizes:true dir vm_uuid
> Create_ovf.RHVExportStorageDomain in
>
> (* Write it to the metadata file. *)
> let dir = esd_mp // esd_uuid // "master" // "vms" // vm_uuid in
> Changeuid.mkdir changeuid_t dir 0o755;
> let file = dir // vm_uuid ^ ".ovf" in
> Changeuid.output changeuid_t file (fun chan -> DOM.doc_to_chan chan ovf)
> +
> + let request_size = None
> end
> diff --git a/output/output_rhv_upload.ml b/output/output_rhv_upload.ml
> index 49f13099..7c2434bd 100644
> --- a/output/output_rhv_upload.ml
> +++ b/output/output_rhv_upload.ml
> @@ -477,11 +477,18 @@ e command line has to match the number of guest disk images (for this guest: %d)
> let json_params =
> match rhv_cluster_uuid with
> | None -> assert false
> | Some uuid -> ("rhv_cluster_uuid", JSON.String uuid) :: json_params in
>
> let ovf_file = dir // "vm.ovf" in
> with_open_out ovf_file (fun chan -> output_string chan ovf);
> if Python_script.run_command createvm_script json_params [ovf_file] <> 0
> then
> error (f_"failed to create virtual machine, see earlier errors")
> +
> + (* The imageio server has high overhead per request. Using 4 MiB
> + * request size is 1.8x times faster compared with nbdcopy default
> + * request size (256k). Request size of 8 MiB is a little bit faster
> + * but is is not compatible with VDDK input.
> + *)
> + let request_size = Some (4*1024*1024)
> end
> diff --git a/output/output_vdsm.ml b/output/output_vdsm.ml
> index 14cbb961..a1e8c246 100644
> --- a/output/output_vdsm.ml
> +++ b/output/output_vdsm.ml
> @@ -218,11 +218,13 @@ For each disk you must supply one of each of these options:
> output_alloc output_format output_name dd_uuid
> image_uuids
> vol_uuids
> dir
> vm_uuid
> ovf_flavour in
>
> (* Write it to the metadata file. *)
> let file = ovf_output // vm_uuid ^ ".ovf" in
> with_open_out file (fun chan -> DOM.doc_to_chan chan ovf)
> +
> + let request_size = None
> end
> diff --git a/v2v/v2v.ml b/v2v/v2v.ml
> index fddb0742..cadf864d 100644
> --- a/v2v/v2v.ml
> +++ b/v2v/v2v.ml
> @@ -583,32 +583,33 @@ read the man page virt-v2v(1).
> List.rev acc
> in
> let disks = loop [] 0 in
> let nr_disks = List.length disks in
>
> (* Copy the disks. *)
> List.iter (
> fun (i, input_socket, output_socket) ->
> message (f_"Copying disk %d/%d") (i+1) nr_disks;
>
> - let input_uri = nbd_uri_of_socket input_socket
> + let request_size = Output_module.request_size
> + and input_uri = nbd_uri_of_socket input_socket
> and output_uri = nbd_uri_of_socket output_socket in
>
> (* In verbose mode print some information about each
> * side of the pipeline.
> *)
> if verbose () then (
> nbdinfo ~content:true input_uri;
> nbdinfo ~content:false output_uri
> );
>
> - nbdcopy output_alloc input_uri output_uri
> + nbdcopy ?request_size output_alloc input_uri output_uri
> ) disks;
>
> (* End of copying phase. *)
> unlink (tmpdir // "copy");
>
> (* Do the finalization step. *)
> message (f_"Creating output metadata");
> Output_module.finalize tmpdir output_poptions output_t
> source inspect target_meta;
>
> @@ -627,26 +628,30 @@ read the man page virt-v2v(1).
> * appliance may be created there. (RHBZ#1316479, RHBZ#2051394)
> *)
> and check_host_free_space () =
> let free_space = StatVFS.free_space (StatVFS.statvfs large_tmpdir) in
> debug "check_host_free_space: large_tmpdir=%s free_space=%Ld"
> large_tmpdir free_space;
> if free_space < 1_073_741_824L then
> error (f_"insufficient free space in the conversion server temporary directory %s (%s).\n\nEither free up space in that directory, or set the LIBGUESTFS_CACHEDIR environment variable to point to another directory with more than 1GB of free space.\n\nSee also the virt-v2v(1) manual, section \"Minimum free space check in the host\".")
> large_tmpdir (human_size free_space)
>
> -and nbdcopy output_alloc input_uri output_uri =
> +and nbdcopy ?request_size output_alloc input_uri output_uri =
> (* XXX It's possible that some output modes know whether
> * --target-is-zero which would be a useful optimization.
> *)
> let cmd = ref [] in
> List.push_back_list cmd [ "nbdcopy"; input_uri; output_uri ];
> + (match request_size with
> + | None -> ()
> + | Some size -> List.push_back cmd (sprintf "--request-size=%d" size)
> + );
> List.push_back cmd "--flush";
> (*List.push_back cmd "--verbose";*)
> if not (quiet ()) then List.push_back cmd "--progress";
> if output_alloc = Types.Preallocated then List.push_back cmd "--allocated";
> let cmd = !cmd in
>
> if run_command cmd <> 0 then
> error (f_"nbdcopy command failed, see earlier error messages")
>
> (* Run nbdinfo on a URI and dump the information to stderr.
>
Acked-by: Laszlo Ersek <lersek at redhat.com>
More information about the Libguestfs
mailing list