[Libguestfs] [PATCH v2] v2v/v2v.ml: Use larger request size for -o rhv-upload

Nir Soffer nsoffer at redhat.com
Sun Feb 13 19:56:34 UTC 2022


Output modules can specify now request_size to override the default
request size in nbdcopy.

The rhv-upload plugin is translating every NBD command to HTTP request,
translated back to NBD command on imageio server. The HTTP client and
server, and the NBD client on the imageio server side are synchronous
and implemented in python, so they have high overhead per request. To
get good performance we need to use larger request size.

Testing shows that request size of 4 MiB speeds up the copy disk phase
from 14.7 seconds to 7.9 seconds (1.8x times faster). Request size of 8
MiB is a little bit faster but is not compatible with VDDK input.

Here are stats extracted from imageio log when importing Fedora 35 image
with 3 GiB of random data. For each copy, we have 4 connection stats.

Before:

connection 1 ops, 14.767843 s
dispatch 4023 ops, 11.427662 s
zero 38 ops, 0.053840 s, 327.91 MiB, 5.95 GiB/s
write 3981 ops, 8.975877 s, 988.61 MiB, 110.14 MiB/s
flush 4 ops, 0.001023 s

connection 1 ops, 14.770026 s
dispatch 4006 ops, 11.408732 s
zero 37 ops, 0.057205 s, 633.21 MiB, 10.81 GiB/s
write 3965 ops, 8.907420 s, 986.65 MiB, 110.77 MiB/s
flush 4 ops, 0.000280 s

connection 1 ops, 14.768180 s
dispatch 4057 ops, 11.430712 s
zero 42 ops, 0.030011 s, 470.47 MiB, 15.31 GiB/s
write 4011 ops, 9.002055 s, 996.98 MiB, 110.75 MiB/s
flush 4 ops, 0.000261 s

connection 1 ops, 14.770744 s
dispatch 4037 ops, 11.462050 s
zero 45 ops, 0.026668 s, 750.82 MiB, 27.49 GiB/s
write 3988 ops, 9.002721 s, 989.36 MiB, 109.90 MiB/s
flush 4 ops, 0.000282 s

After:

connection 1 ops, 7.940377 s
dispatch 323 ops, 6.695582 s
zero 37 ops, 0.079958 s, 641.12 MiB, 7.83 GiB/s
write 282 ops, 6.300242 s, 1.01 GiB, 164.54 MiB/s
flush 4 ops, 0.000537 s

connection 1 ops, 7.908156 s
dispatch 305 ops, 6.643475 s
zero 36 ops, 0.144166 s, 509.43 MiB, 3.45 GiB/s
write 265 ops, 6.179187 s, 941.23 MiB, 152.32 MiB/s
flush 4 ops, 0.000324 s

connection 1 ops, 7.942349 s
dispatch 325 ops, 6.744800 s
zero 45 ops, 0.185335 s, 622.19 MiB, 3.28 GiB/s
write 276 ops, 6.236819 s, 995.45 MiB, 159.61 MiB/s
flush 4 ops, 0.000369 s

connection 1 ops, 7.955572 s
dispatch 317 ops, 6.721212 s
zero 43 ops, 0.135771 s, 409.68 MiB, 2.95 GiB/s
write 270 ops, 6.326366 s, 988.26 MiB, 156.21 MiB/s
flush 4 ops, 0.001439 s
---

Changes since v1:

- Decrease request size to 4 MiB for compatibility with VDDK input.
  (Richard)
- Reimplement in a nicer way based on
  https://github.com/libguestfs/virt-v2v/commit/08e764959ec9dadd71a95d22d3d88d647a18d165
  (Richard)

v1 was here:
https://listman.redhat.com/archives/libguestfs/2022-February/msg00183.html

 output/output.ml            |  1 +
 output/output.mli           |  2 ++
 output/output_disk.ml       |  2 ++
 output/output_glance.ml     |  2 ++
 output/output_json.ml       |  2 ++
 output/output_libvirt.ml    |  2 ++
 output/output_null.ml       |  2 ++
 output/output_openstack.ml  |  2 +-
 output/output_qemu.ml       |  2 ++
 output/output_rhv.ml        |  2 ++
 output/output_rhv_upload.ml |  7 +++++++
 output/output_vdsm.ml       |  2 ++
 v2v/v2v.ml                  | 11 ++++++++---
 13 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/output/output.ml b/output/output.ml
index 786ee5d5..7256b547 100644
--- a/output/output.ml
+++ b/output/output.ml
@@ -39,20 +39,21 @@ type options = {
 module type OUTPUT = sig
   type poptions
   type t
   val to_string : options -> string
   val query_output_options : unit -> unit
   val parse_options : options -> Types.source -> poptions
   val setup : string -> poptions -> Types.source -> t
   val finalize : string -> poptions -> t ->
                  Types.source -> Types.inspect -> Types.target_meta ->
                  unit
+  val request_size : int option
 end
 
 let error_option_cannot_be_used_in_output_mode mode opt =
   error (f_"-o %s: %s option cannot be used in this output mode") mode opt
 
 let get_disks dir =
   let rec loop acc i =
     let socket = sprintf "%s/in%d" dir i in
     if Sys.file_exists socket then (
       let size = Utils.with_nbd_connect_unix ~socket NBD.get_size in
diff --git a/output/output.mli b/output/output.mli
index eed204ed..8e3efd8e 100644
--- a/output/output.mli
+++ b/output/output.mli
@@ -52,20 +52,22 @@ module type OUTPUT = sig
 
       Set up the output mode.  Sets up a disk pipeline
       [dir // "outX"] for each output disk. *)
 
   val finalize : string -> poptions -> t ->
                  Types.source -> Types.inspect -> Types.target_meta ->
                  unit
   (** [finalize dir poptions t inspect target_meta]
 
       Finalizes the conversion and writes metadata. *)
+
+  val request_size : int option
 end
 
 (** Helper functions for output modes. *)
 
 val error_option_cannot_be_used_in_output_mode : string -> string -> unit
 (** [error_option_cannot_be_used_in_output_mode mode option]
     prints error message that option cannot be used in this output mode. *)
 
 val get_disks : string -> (int * int64) list
 (** Examines the v2v directory and opens each input socket (in0 etc),
diff --git a/output/output_disk.ml b/output/output_disk.ml
index a05be559..bc5b4e1c 100644
--- a/output/output_disk.ml
+++ b/output/output_disk.ml
@@ -105,11 +105,13 @@ module Disk = struct
                 output_format output_name in
 
     let file = output_storage // output_name ^ ".xml" in
     with_open_out file (fun chan -> DOM.doc_to_chan chan doc);
 
     if verbose () then (
       eprintf "resulting local libvirt XML:\n";
       DOM.doc_to_chan Stdlib.stderr doc;
       eprintf "\n%!";
     )
+
+  let request_size = None
 end
diff --git a/output/output_glance.ml b/output/output_glance.ml
index 9ca406e8..e6a7f789 100644
--- a/output/output_glance.ml
+++ b/output/output_glance.ml
@@ -131,11 +131,13 @@ module Glance = struct
                     properties in
         if run_command cmd <> 0 then
           error (f_"glance: image upload to glance failed, see earlier errors");
 
         (* Unlink the temporary files as soon as glance has got them. *)
         try unlink disk with Unix_error _ -> ()
     ) source.s_disks;
 
     (* Remove the temporary directory for the large files. *)
     (try rmdir tmpdir with Unix_error _ -> ())
+
+  let request_size = None
 end
diff --git a/output/output_json.ml b/output/output_json.ml
index 97883217..6e81b639 100644
--- a/output/output_json.ml
+++ b/output/output_json.ml
@@ -141,11 +141,13 @@ module Json = struct
       output_string Stdlib.stderr doc_string;
       eprintf "\n\n%!";
     );
 
     let file = output_storage // output_name ^ ".json" in
     with_open_out file (
       fun chan ->
         output_string chan doc_string;
         output_char chan '\n'
     )
+
+  let request_size = None
 end
diff --git a/output/output_libvirt.ml b/output/output_libvirt.ml
index 2236573f..e0d3432d 100644
--- a/output/output_libvirt.ml
+++ b/output/output_libvirt.ml
@@ -210,11 +210,13 @@ module Libvirt_ = struct
       warning (f_"the target hypervisor does not support a %s KVM guest") arch;
       []
     ) else (
       let node (* first matching <guest> *) = Xml.xpathobj_node obj 0 in
       Xml.xpathctx_set_current_context xpathctx node;
 
       (* Get guest/features/* nodes. *)
       let features = xpath_get_nodes xpathctx "features/*" in
       List.map Xml.node_name features
     )
+
+  let request_size = None
 end
diff --git a/output/output_null.ml b/output/output_null.ml
index eeb15653..86d81eaa 100644
--- a/output/output_null.ml
+++ b/output/output_null.ml
@@ -81,11 +81,13 @@ module Null = struct
     List.iter (
       fun (i, _) ->
         if i > 0 then (
           let output = sprintf "%s/out%d" dir i in
           link socket output
         )
     ) disks
 
   let finalize dir () () source inspect target_meta =
     () (* nothing to do *)
+
+  let request_size = None
 end
diff --git a/output/output_openstack.ml b/output/output_openstack.ml
index 3d7a7882..d0af2ac7 100644
--- a/output/output_openstack.ml
+++ b/output/output_openstack.ml
@@ -475,12 +475,12 @@ The os-* parameters and environment variables are optional.
     ) volume_ids
 
   (* UTC conversion time. *)
   and iso_time =
     let time = time () in
     let tm = gmtime time in
     sprintf "%04d/%02d/%02d %02d:%02d:%02d"
       (tm.tm_year + 1900) (tm.tm_mon + 1) tm.tm_mday
       tm.tm_hour tm.tm_min tm.tm_sec
 
-
+  let request_size = None
 end
diff --git a/output/output_qemu.ml b/output/output_qemu.ml
index 873a63b7..f8d2e171 100644
--- a/output/output_qemu.ml
+++ b/output/output_qemu.ml
@@ -320,11 +320,13 @@ module QEMU = struct
         Qemuopts.to_chan cmd chan
     );
 
     Unix.chmod file 0o755;
 
     (* If -oo qemu-boot option was specified then we should boot the guest. *)
     if qemu_boot then (
       let cmd = sprintf "%s &" (quote file) in
       ignore (shell_command cmd)
     )
+
+  let request_size = None
 end
diff --git a/output/output_rhv.ml b/output/output_rhv.ml
index 022d96e3..119207fd 100644
--- a/output/output_rhv.ml
+++ b/output/output_rhv.ml
@@ -275,11 +275,13 @@ module RHV = struct
       Create_ovf.create_ovf source inspect target_meta sizes
         output_alloc output_format output_name esd_uuid image_uuids vol_uuids
         ~need_actual_sizes:true dir vm_uuid
         Create_ovf.RHVExportStorageDomain in
 
     (* Write it to the metadata file. *)
     let dir = esd_mp // esd_uuid // "master" // "vms" // vm_uuid in
     Changeuid.mkdir changeuid_t dir 0o755;
     let file = dir // vm_uuid ^ ".ovf" in
     Changeuid.output changeuid_t file (fun chan -> DOM.doc_to_chan chan ovf)
+
+  let request_size = None
 end
diff --git a/output/output_rhv_upload.ml b/output/output_rhv_upload.ml
index 49f13099..7c2434bd 100644
--- a/output/output_rhv_upload.ml
+++ b/output/output_rhv_upload.ml
@@ -477,11 +477,18 @@ e command line has to match the number of guest disk images (for this guest: %d)
     let json_params =
       match rhv_cluster_uuid with
       | None -> assert false
       | Some uuid -> ("rhv_cluster_uuid", JSON.String uuid) :: json_params in
 
     let ovf_file = dir // "vm.ovf" in
     with_open_out ovf_file (fun chan -> output_string chan ovf);
     if Python_script.run_command createvm_script json_params [ovf_file] <> 0
     then
       error (f_"failed to create virtual machine, see earlier errors")
+
+  (* The imageio server has high overhead per request. Using 4 MiB
+   * request size is 1.8x times faster compared with nbdcopy default
+   * request size (256k). Request size of 8 MiB is a little bit faster
+   * but is is not compatible with VDDK input.
+   *)
+  let request_size = Some (4*1024*1024)
 end
diff --git a/output/output_vdsm.ml b/output/output_vdsm.ml
index 14cbb961..a1e8c246 100644
--- a/output/output_vdsm.ml
+++ b/output/output_vdsm.ml
@@ -218,11 +218,13 @@ For each disk you must supply one of each of these options:
                 output_alloc output_format output_name dd_uuid
                 image_uuids
                 vol_uuids
                 dir
                 vm_uuid
                 ovf_flavour in
 
     (* Write it to the metadata file. *)
     let file = ovf_output // vm_uuid ^ ".ovf" in
     with_open_out file (fun chan -> DOM.doc_to_chan chan ovf)
+
+  let request_size = None
 end
diff --git a/v2v/v2v.ml b/v2v/v2v.ml
index fddb0742..cadf864d 100644
--- a/v2v/v2v.ml
+++ b/v2v/v2v.ml
@@ -583,32 +583,33 @@ read the man page virt-v2v(1).
       List.rev acc
   in
   let disks = loop [] 0 in
   let nr_disks = List.length disks in
 
   (* Copy the disks. *)
   List.iter (
     fun (i, input_socket, output_socket) ->
       message (f_"Copying disk %d/%d") (i+1) nr_disks;
 
-      let input_uri = nbd_uri_of_socket input_socket
+      let request_size = Output_module.request_size
+      and input_uri = nbd_uri_of_socket input_socket
       and output_uri = nbd_uri_of_socket output_socket in
 
       (* In verbose mode print some information about each
        * side of the pipeline.
        *)
       if verbose () then (
         nbdinfo ~content:true input_uri;
         nbdinfo ~content:false output_uri
       );
 
-      nbdcopy output_alloc input_uri output_uri
+      nbdcopy ?request_size output_alloc input_uri output_uri
   ) disks;
 
   (* End of copying phase. *)
   unlink (tmpdir // "copy");
 
   (* Do the finalization step. *)
   message (f_"Creating output metadata");
   Output_module.finalize tmpdir output_poptions output_t
     source inspect target_meta;
 
@@ -627,26 +628,30 @@ read the man page virt-v2v(1).
  * appliance may be created there.  (RHBZ#1316479, RHBZ#2051394)
  *)
 and check_host_free_space () =
   let free_space = StatVFS.free_space (StatVFS.statvfs large_tmpdir) in
   debug "check_host_free_space: large_tmpdir=%s free_space=%Ld"
         large_tmpdir free_space;
   if free_space < 1_073_741_824L then
     error (f_"insufficient free space in the conversion server temporary directory %s (%s).\n\nEither free up space in that directory, or set the LIBGUESTFS_CACHEDIR environment variable to point to another directory with more than 1GB of free space.\n\nSee also the virt-v2v(1) manual, section \"Minimum free space check in the host\".")
           large_tmpdir (human_size free_space)
 
-and nbdcopy output_alloc input_uri output_uri =
+and nbdcopy ?request_size output_alloc input_uri output_uri =
   (* XXX It's possible that some output modes know whether
    * --target-is-zero which would be a useful optimization.
    *)
   let cmd = ref [] in
   List.push_back_list cmd [ "nbdcopy"; input_uri; output_uri ];
+  (match request_size with
+    | None -> ()
+    | Some size -> List.push_back cmd (sprintf "--request-size=%d" size)
+  );
   List.push_back cmd "--flush";
   (*List.push_back cmd "--verbose";*)
   if not (quiet ()) then List.push_back cmd "--progress";
   if output_alloc = Types.Preallocated then List.push_back cmd "--allocated";
   let cmd = !cmd in
 
   if run_command cmd <> 0 then
     error (f_"nbdcopy command failed, see earlier error messages")
 
 (* Run nbdinfo on a URI and dump the information to stderr.
-- 
2.34.1




More information about the Libguestfs mailing list