[Libguestfs] [PATCH v2] v2v/v2v.ml: Use larger request size for -o rhv-upload

Laszlo Ersek lersek at redhat.com
Mon Feb 14 11:38:02 UTC 2022


On 02/13/22 20:56, Nir Soffer wrote:
> Output modules can specify now request_size to override the default
> request size in nbdcopy.
> 
> The rhv-upload plugin is translating every NBD command to HTTP request,
> translated back to NBD command on imageio server. The HTTP client and
> server, and the NBD client on the imageio server side are synchronous
> and implemented in python, so they have high overhead per request. To
> get good performance we need to use larger request size.
> 
> Testing shows that request size of 4 MiB speeds up the copy disk phase
> from 14.7 seconds to 7.9 seconds (1.8x times faster). Request size of 8
> MiB is a little bit faster but is not compatible with VDDK input.
> 
> Here are stats extracted from imageio log when importing Fedora 35 image
> with 3 GiB of random data. For each copy, we have 4 connection stats.
> 
> Before:
> 
> connection 1 ops, 14.767843 s
> dispatch 4023 ops, 11.427662 s
> zero 38 ops, 0.053840 s, 327.91 MiB, 5.95 GiB/s
> write 3981 ops, 8.975877 s, 988.61 MiB, 110.14 MiB/s
> flush 4 ops, 0.001023 s
> 
> connection 1 ops, 14.770026 s
> dispatch 4006 ops, 11.408732 s
> zero 37 ops, 0.057205 s, 633.21 MiB, 10.81 GiB/s
> write 3965 ops, 8.907420 s, 986.65 MiB, 110.77 MiB/s
> flush 4 ops, 0.000280 s
> 
> connection 1 ops, 14.768180 s
> dispatch 4057 ops, 11.430712 s
> zero 42 ops, 0.030011 s, 470.47 MiB, 15.31 GiB/s
> write 4011 ops, 9.002055 s, 996.98 MiB, 110.75 MiB/s
> flush 4 ops, 0.000261 s
> 
> connection 1 ops, 14.770744 s
> dispatch 4037 ops, 11.462050 s
> zero 45 ops, 0.026668 s, 750.82 MiB, 27.49 GiB/s
> write 3988 ops, 9.002721 s, 989.36 MiB, 109.90 MiB/s
> flush 4 ops, 0.000282 s
> 
> After:
> 
> connection 1 ops, 7.940377 s
> dispatch 323 ops, 6.695582 s
> zero 37 ops, 0.079958 s, 641.12 MiB, 7.83 GiB/s
> write 282 ops, 6.300242 s, 1.01 GiB, 164.54 MiB/s
> flush 4 ops, 0.000537 s
> 
> connection 1 ops, 7.908156 s
> dispatch 305 ops, 6.643475 s
> zero 36 ops, 0.144166 s, 509.43 MiB, 3.45 GiB/s
> write 265 ops, 6.179187 s, 941.23 MiB, 152.32 MiB/s
> flush 4 ops, 0.000324 s
> 
> connection 1 ops, 7.942349 s
> dispatch 325 ops, 6.744800 s
> zero 45 ops, 0.185335 s, 622.19 MiB, 3.28 GiB/s
> write 276 ops, 6.236819 s, 995.45 MiB, 159.61 MiB/s
> flush 4 ops, 0.000369 s
> 
> connection 1 ops, 7.955572 s
> dispatch 317 ops, 6.721212 s
> zero 43 ops, 0.135771 s, 409.68 MiB, 2.95 GiB/s
> write 270 ops, 6.326366 s, 988.26 MiB, 156.21 MiB/s
> flush 4 ops, 0.001439 s
> ---
> 
> Changes since v1:
> 
> - Decrease request size to 4 MiB for compatibility with VDDK input.
>   (Richard)
> - Reimplement in a nicer way based on
>   https://github.com/libguestfs/virt-v2v/commit/08e764959ec9dadd71a95d22d3d88d647a18d165
>   (Richard)
> 
> v1 was here:
> https://listman.redhat.com/archives/libguestfs/2022-February/msg00183.html
> 
>  output/output.ml            |  1 +
>  output/output.mli           |  2 ++
>  output/output_disk.ml       |  2 ++
>  output/output_glance.ml     |  2 ++
>  output/output_json.ml       |  2 ++
>  output/output_libvirt.ml    |  2 ++
>  output/output_null.ml       |  2 ++
>  output/output_openstack.ml  |  2 +-
>  output/output_qemu.ml       |  2 ++
>  output/output_rhv.ml        |  2 ++
>  output/output_rhv_upload.ml |  7 +++++++
>  output/output_vdsm.ml       |  2 ++
>  v2v/v2v.ml                  | 11 ++++++++---
>  13 files changed, 35 insertions(+), 4 deletions(-)
> 
> diff --git a/output/output.ml b/output/output.ml
> index 786ee5d5..7256b547 100644
> --- a/output/output.ml
> +++ b/output/output.ml
> @@ -39,20 +39,21 @@ type options = {
>  module type OUTPUT = sig
>    type poptions
>    type t
>    val to_string : options -> string
>    val query_output_options : unit -> unit
>    val parse_options : options -> Types.source -> poptions
>    val setup : string -> poptions -> Types.source -> t
>    val finalize : string -> poptions -> t ->
>                   Types.source -> Types.inspect -> Types.target_meta ->
>                   unit
> +  val request_size : int option
>  end
>  
>  let error_option_cannot_be_used_in_output_mode mode opt =
>    error (f_"-o %s: %s option cannot be used in this output mode") mode opt
>  
>  let get_disks dir =
>    let rec loop acc i =
>      let socket = sprintf "%s/in%d" dir i in
>      if Sys.file_exists socket then (
>        let size = Utils.with_nbd_connect_unix ~socket NBD.get_size in
> diff --git a/output/output.mli b/output/output.mli
> index eed204ed..8e3efd8e 100644
> --- a/output/output.mli
> +++ b/output/output.mli
> @@ -52,20 +52,22 @@ module type OUTPUT = sig
>  
>        Set up the output mode.  Sets up a disk pipeline
>        [dir // "outX"] for each output disk. *)
>  
>    val finalize : string -> poptions -> t ->
>                   Types.source -> Types.inspect -> Types.target_meta ->
>                   unit
>    (** [finalize dir poptions t inspect target_meta]
>  
>        Finalizes the conversion and writes metadata. *)
> +
> +  val request_size : int option
>  end
>  
>  (** Helper functions for output modes. *)
>  
>  val error_option_cannot_be_used_in_output_mode : string -> string -> unit
>  (** [error_option_cannot_be_used_in_output_mode mode option]
>      prints error message that option cannot be used in this output mode. *)
>  
>  val get_disks : string -> (int * int64) list
>  (** Examines the v2v directory and opens each input socket (in0 etc),
> diff --git a/output/output_disk.ml b/output/output_disk.ml
> index a05be559..bc5b4e1c 100644
> --- a/output/output_disk.ml
> +++ b/output/output_disk.ml
> @@ -105,11 +105,13 @@ module Disk = struct
>                  output_format output_name in
>  
>      let file = output_storage // output_name ^ ".xml" in
>      with_open_out file (fun chan -> DOM.doc_to_chan chan doc);
>  
>      if verbose () then (
>        eprintf "resulting local libvirt XML:\n";
>        DOM.doc_to_chan Stdlib.stderr doc;
>        eprintf "\n%!";
>      )
> +
> +  let request_size = None
>  end
> diff --git a/output/output_glance.ml b/output/output_glance.ml
> index 9ca406e8..e6a7f789 100644
> --- a/output/output_glance.ml
> +++ b/output/output_glance.ml
> @@ -131,11 +131,13 @@ module Glance = struct
>                      properties in
>          if run_command cmd <> 0 then
>            error (f_"glance: image upload to glance failed, see earlier errors");
>  
>          (* Unlink the temporary files as soon as glance has got them. *)
>          try unlink disk with Unix_error _ -> ()
>      ) source.s_disks;
>  
>      (* Remove the temporary directory for the large files. *)
>      (try rmdir tmpdir with Unix_error _ -> ())
> +
> +  let request_size = None
>  end
> diff --git a/output/output_json.ml b/output/output_json.ml
> index 97883217..6e81b639 100644
> --- a/output/output_json.ml
> +++ b/output/output_json.ml
> @@ -141,11 +141,13 @@ module Json = struct
>        output_string Stdlib.stderr doc_string;
>        eprintf "\n\n%!";
>      );
>  
>      let file = output_storage // output_name ^ ".json" in
>      with_open_out file (
>        fun chan ->
>          output_string chan doc_string;
>          output_char chan '\n'
>      )
> +
> +  let request_size = None
>  end
> diff --git a/output/output_libvirt.ml b/output/output_libvirt.ml
> index 2236573f..e0d3432d 100644
> --- a/output/output_libvirt.ml
> +++ b/output/output_libvirt.ml
> @@ -210,11 +210,13 @@ module Libvirt_ = struct
>        warning (f_"the target hypervisor does not support a %s KVM guest") arch;
>        []
>      ) else (
>        let node (* first matching <guest> *) = Xml.xpathobj_node obj 0 in
>        Xml.xpathctx_set_current_context xpathctx node;
>  
>        (* Get guest/features/* nodes. *)
>        let features = xpath_get_nodes xpathctx "features/*" in
>        List.map Xml.node_name features
>      )
> +
> +  let request_size = None
>  end
> diff --git a/output/output_null.ml b/output/output_null.ml
> index eeb15653..86d81eaa 100644
> --- a/output/output_null.ml
> +++ b/output/output_null.ml
> @@ -81,11 +81,13 @@ module Null = struct
>      List.iter (
>        fun (i, _) ->
>          if i > 0 then (
>            let output = sprintf "%s/out%d" dir i in
>            link socket output
>          )
>      ) disks
>  
>    let finalize dir () () source inspect target_meta =
>      () (* nothing to do *)
> +
> +  let request_size = None
>  end
> diff --git a/output/output_openstack.ml b/output/output_openstack.ml
> index 3d7a7882..d0af2ac7 100644
> --- a/output/output_openstack.ml
> +++ b/output/output_openstack.ml
> @@ -475,12 +475,12 @@ The os-* parameters and environment variables are optional.
>      ) volume_ids
>  
>    (* UTC conversion time. *)
>    and iso_time =
>      let time = time () in
>      let tm = gmtime time in
>      sprintf "%04d/%02d/%02d %02d:%02d:%02d"
>        (tm.tm_year + 1900) (tm.tm_mon + 1) tm.tm_mday
>        tm.tm_hour tm.tm_min tm.tm_sec
>  
> -
> +  let request_size = None
>  end
> diff --git a/output/output_qemu.ml b/output/output_qemu.ml
> index 873a63b7..f8d2e171 100644
> --- a/output/output_qemu.ml
> +++ b/output/output_qemu.ml
> @@ -320,11 +320,13 @@ module QEMU = struct
>          Qemuopts.to_chan cmd chan
>      );
>  
>      Unix.chmod file 0o755;
>  
>      (* If -oo qemu-boot option was specified then we should boot the guest. *)
>      if qemu_boot then (
>        let cmd = sprintf "%s &" (quote file) in
>        ignore (shell_command cmd)
>      )
> +
> +  let request_size = None
>  end
> diff --git a/output/output_rhv.ml b/output/output_rhv.ml
> index 022d96e3..119207fd 100644
> --- a/output/output_rhv.ml
> +++ b/output/output_rhv.ml
> @@ -275,11 +275,13 @@ module RHV = struct
>        Create_ovf.create_ovf source inspect target_meta sizes
>          output_alloc output_format output_name esd_uuid image_uuids vol_uuids
>          ~need_actual_sizes:true dir vm_uuid
>          Create_ovf.RHVExportStorageDomain in
>  
>      (* Write it to the metadata file. *)
>      let dir = esd_mp // esd_uuid // "master" // "vms" // vm_uuid in
>      Changeuid.mkdir changeuid_t dir 0o755;
>      let file = dir // vm_uuid ^ ".ovf" in
>      Changeuid.output changeuid_t file (fun chan -> DOM.doc_to_chan chan ovf)
> +
> +  let request_size = None
>  end
> diff --git a/output/output_rhv_upload.ml b/output/output_rhv_upload.ml
> index 49f13099..7c2434bd 100644
> --- a/output/output_rhv_upload.ml
> +++ b/output/output_rhv_upload.ml
> @@ -477,11 +477,18 @@ e command line has to match the number of guest disk images (for this guest: %d)
>      let json_params =
>        match rhv_cluster_uuid with
>        | None -> assert false
>        | Some uuid -> ("rhv_cluster_uuid", JSON.String uuid) :: json_params in
>  
>      let ovf_file = dir // "vm.ovf" in
>      with_open_out ovf_file (fun chan -> output_string chan ovf);
>      if Python_script.run_command createvm_script json_params [ovf_file] <> 0
>      then
>        error (f_"failed to create virtual machine, see earlier errors")
> +
> +  (* The imageio server has high overhead per request. Using 4 MiB
> +   * request size is 1.8x times faster compared with nbdcopy default
> +   * request size (256k). Request size of 8 MiB is a little bit faster
> +   * but is is not compatible with VDDK input.
> +   *)
> +  let request_size = Some (4*1024*1024)
>  end
> diff --git a/output/output_vdsm.ml b/output/output_vdsm.ml
> index 14cbb961..a1e8c246 100644
> --- a/output/output_vdsm.ml
> +++ b/output/output_vdsm.ml
> @@ -218,11 +218,13 @@ For each disk you must supply one of each of these options:
>                  output_alloc output_format output_name dd_uuid
>                  image_uuids
>                  vol_uuids
>                  dir
>                  vm_uuid
>                  ovf_flavour in
>  
>      (* Write it to the metadata file. *)
>      let file = ovf_output // vm_uuid ^ ".ovf" in
>      with_open_out file (fun chan -> DOM.doc_to_chan chan ovf)
> +
> +  let request_size = None
>  end
> diff --git a/v2v/v2v.ml b/v2v/v2v.ml
> index fddb0742..cadf864d 100644
> --- a/v2v/v2v.ml
> +++ b/v2v/v2v.ml
> @@ -583,32 +583,33 @@ read the man page virt-v2v(1).
>        List.rev acc
>    in
>    let disks = loop [] 0 in
>    let nr_disks = List.length disks in
>  
>    (* Copy the disks. *)
>    List.iter (
>      fun (i, input_socket, output_socket) ->
>        message (f_"Copying disk %d/%d") (i+1) nr_disks;
>  
> -      let input_uri = nbd_uri_of_socket input_socket
> +      let request_size = Output_module.request_size
> +      and input_uri = nbd_uri_of_socket input_socket
>        and output_uri = nbd_uri_of_socket output_socket in
>  
>        (* In verbose mode print some information about each
>         * side of the pipeline.
>         *)
>        if verbose () then (
>          nbdinfo ~content:true input_uri;
>          nbdinfo ~content:false output_uri
>        );
>  
> -      nbdcopy output_alloc input_uri output_uri
> +      nbdcopy ?request_size output_alloc input_uri output_uri
>    ) disks;
>  
>    (* End of copying phase. *)
>    unlink (tmpdir // "copy");
>  
>    (* Do the finalization step. *)
>    message (f_"Creating output metadata");
>    Output_module.finalize tmpdir output_poptions output_t
>      source inspect target_meta;
>  
> @@ -627,26 +628,30 @@ read the man page virt-v2v(1).
>   * appliance may be created there.  (RHBZ#1316479, RHBZ#2051394)
>   *)
>  and check_host_free_space () =
>    let free_space = StatVFS.free_space (StatVFS.statvfs large_tmpdir) in
>    debug "check_host_free_space: large_tmpdir=%s free_space=%Ld"
>          large_tmpdir free_space;
>    if free_space < 1_073_741_824L then
>      error (f_"insufficient free space in the conversion server temporary directory %s (%s).\n\nEither free up space in that directory, or set the LIBGUESTFS_CACHEDIR environment variable to point to another directory with more than 1GB of free space.\n\nSee also the virt-v2v(1) manual, section \"Minimum free space check in the host\".")
>            large_tmpdir (human_size free_space)
>  
> -and nbdcopy output_alloc input_uri output_uri =
> +and nbdcopy ?request_size output_alloc input_uri output_uri =
>    (* XXX It's possible that some output modes know whether
>     * --target-is-zero which would be a useful optimization.
>     *)
>    let cmd = ref [] in
>    List.push_back_list cmd [ "nbdcopy"; input_uri; output_uri ];
> +  (match request_size with
> +    | None -> ()
> +    | Some size -> List.push_back cmd (sprintf "--request-size=%d" size)
> +  );
>    List.push_back cmd "--flush";
>    (*List.push_back cmd "--verbose";*)
>    if not (quiet ()) then List.push_back cmd "--progress";
>    if output_alloc = Types.Preallocated then List.push_back cmd "--allocated";
>    let cmd = !cmd in
>  
>    if run_command cmd <> 0 then
>      error (f_"nbdcopy command failed, see earlier error messages")
>  
>  (* Run nbdinfo on a URI and dump the information to stderr.
> 

Acked-by: Laszlo Ersek <lersek at redhat.com>




More information about the Libguestfs mailing list