[Libguestfs] [PATCH 5/5] v2v: Add -o rhv-upload output mode.

Richard W.M. Jones rjones at redhat.com
Thu Feb 22 13:57:25 UTC 2018

 - Spools to a temporary disk
 - Need to specify direct/indirect upload via flag
 - Location of ca.pem
 - Target cluster
 - Delete disks on failure, or rename disks on success?
 - Handling of sparseness in raw format disks

This adds a new output mode to virt-v2v.  virt-v2v -o rhv-upload
streams images directly to an oVirt or RHV >= 4 Data Domain using the
oVirt SDK v4.  It is more efficient than -o rhv because it does not
need to go via the Export Storage Domain, and is possible for humans
to use unlike -o vdsm.

The implementation uses the Python SDK by running snippets of Python
code to interact with the ‘ovirtsdk4’ module.  It requires both Python 3
and the Python SDK v4 to be installed at run time (these are not,
however, new dependencies of virt-v2v since most people wouldn't have
 v2v/Makefile.am           |   2 +
 v2v/cmdline.ml            |  25 +++
 v2v/output_rhv_upload.ml  | 403 ++++++++++++++++++++++++++++++++++++++++++++++
 v2v/output_rhv_upload.mli |  26 +++
 4 files changed, 456 insertions(+)

diff --git a/v2v/Makefile.am b/v2v/Makefile.am
index 83f0c30c7..c028babe6 100644
--- a/v2v/Makefile.am
+++ b/v2v/Makefile.am
@@ -64,6 +64,7 @@ SOURCES_MLI = \
 	output_null.mli \
 	output_qemu.mli \
 	output_rhv.mli \
+	output_rhv_upload.mli \
 	output_vdsm.mli \
 	parse_ovf_from_ova.mli \
 	parse_libvirt_xml.mli \
@@ -116,6 +117,7 @@ SOURCES_ML = \
 	output_local.ml \
 	output_qemu.ml \
 	output_rhv.ml \
+	output_rhv_upload.ml \
 	output_vdsm.ml \
 	inspect_source.ml \
 	target_bus_assignment.ml \
diff --git a/v2v/cmdline.ml b/v2v/cmdline.ml
index fceda1f82..4424863fe 100644
--- a/v2v/cmdline.ml
+++ b/v2v/cmdline.ml
@@ -138,6 +138,8 @@ let parse_cmdline () =
     | "disk" | "local" -> output_mode := `Local
     | "null" -> output_mode := `Null
     | "ovirt" | "rhv" | "rhev" -> output_mode := `RHV
+    | "ovirt-upload" | "ovirt_upload" | "rhv-upload" | "rhv_upload" ->
+       output_mode := `RHV_Upload
     | "qemu" -> output_mode := `QEmu
     | "vdsm" -> output_mode := `VDSM
     | s ->
@@ -537,6 +539,29 @@ read the man page virt-v2v(1).
       Output_rhv.output_rhv os output_alloc,
       output_format, output_alloc
+    | `RHV_Upload ->
+      let output_conn =
+        match output_conn with
+        | None ->
+           error (f_"-o rhv-upload: output connection was not specified, use ‘-oc’ to point to the oVirt or RHV server REST API URL")
+        | Some oc -> oc in
+      (* In theory we could make the password optional in future. *)
+      let output_password =
+        match output_password with
+        | None ->
+           error (f_"-o rhv-upload: output password file was not specified, use ‘-op’ to point to a file which contains the password used to connect to the oVirt or RHV server")
+        | Some op -> op in
+      let os =
+        match output_storage with
+        | None ->
+           error (f_"-o rhv-upload: output storage was not specified, use ‘-os’");
+        | Some os -> os in
+      if qemu_boot then
+        error_option_cannot_be_used_in_output_mode "rhv-upload" "--qemu-boot";
+      Output_rhv_upload.output_rhv_upload output_alloc output_conn
+                                          output_password os,
+      output_format, output_alloc
     | `VDSM ->
       if output_password <> None then
         error_option_cannot_be_used_in_output_mode "vdsm" "-op";
diff --git a/v2v/output_rhv_upload.ml b/v2v/output_rhv_upload.ml
new file mode 100644
index 000000000..77f7bc988
--- /dev/null
+++ b/v2v/output_rhv_upload.ml
@@ -0,0 +1,403 @@
+(* virt-v2v
+ * Copyright (C) 2009-2018 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *)
+open Printf
+open Unix
+open Std_utils
+open Tools_utils
+open Unix_utils
+open Common_gettext.Gettext
+open Types
+open Utils
+(* These correspond mostly to the fields in the Python
+ * sdk.Connection object, except for the password which
+ * is handled separately.
+ *)
+type connection = {
+  conn_url : string;
+  conn_username : string;
+  conn_debug : bool;
+let string_of_connection conn =
+  sprintf "url=%s username=%s debug=%b"
+          conn.conn_url conn.conn_username conn.conn_debug
+(* Python code fragments go first.  Note these must not be
+ * indented because of Python's stupid whitespace thing.
+ *)
+(* Print the Python version. *)
+let python_get_version = "
+import sys
+print (sys.version[0]) # syntax works on py2 or py3
+(* Import all the Python modules needed. *)
+let python_imports = "
+import logging
+import ovirtsdk4 as sdk
+import ovirtsdk4.types as types
+import ssl
+import sys
+import time
+from http.client import HTTPSConnection
+    from urllib.parse import urlparse
+except ImportError:
+    from urlparse import urlparse
+(* Create the Python prologue which connects to the system service.
+ * This returns a string of Python code.
+ *)
+let python_connect tmpdir conn output_password =
+  sprintf "
+password_file = %s
+with open(password_file, 'r') as file:
+    password = file.read()
+password = password.rstrip()
+# Open the connection.
+connection = sdk.Connection(
+    url = %s,
+    username = %s,
+    password = password,
+    debug = %s,
+    log = logging.getLogger(),
+    insecure = True,
+system_service = connection.system_service()
+" (py_quote output_password)
+  (py_quote conn.conn_url)
+  (py_quote conn.conn_username)
+  (py_bool conn.conn_debug)
+let python_create_one_disk disk_name disk_format
+                           output_alloc output_storage disk_size =
+  sprintf "
+disks_service = system_service.disks_service()
+disk = disks_service.add(
+    disk = types.Disk(
+        name = %s,
+        format = %s,
+        sparse = %s,
+        provisioned_size = %Ld,
+        storage_domains = [types.StorageDomain(name = %s)],
+    )
+disk_id = disk.id
+# Wait til the disk is up.  The transfer cannot start if the
+# disk is locked.
+disk_service = disks_service.disk_service(disk_id)
+while True:
+    time.sleep(5)
+    disk = disk_service.get()
+    if disk.status == types.DiskStatus.OK:
+        break
+# Return the disk ID.
+" (py_quote disk_name)
+  disk_format (* it's a raw Python expression, don't quote *)
+  (py_bool (output_alloc = Sparse))
+  disk_size
+  (py_quote output_storage)
+(* XXX Temporary function. *)
+let python_upload_one_disk disk_id disk_size filename =
+  sprintf "
+transfers_service = system_service.image_transfers_service()
+transfer = transfers_service.add(
+    types.ImageTransfer(
+        image = types.Image (id=%s)
+    )
+transfer_service = transfers_service.image_transfer_service(transfer.id)
+# After adding a new transfer for the disk, the transfer's status will
+# be INITIALIZING.  Wait until the init phase is over.
+while transfer.phase == types.ImageTransferPhase.INITIALIZING:
+    time.sleep(1)
+    transfer = transfer_service.get()
+if %s:
+    if transfer.transfer_url is None:
+        print(\"Direct upload to host not supported, requires ovirt-engine >= 4.2\")
+        sys.exit(1)
+    destination_url = urlparse(transfer.transfer_url)
+    destination_url = urlparse(transfer.proxy_url)
+context = ssl.create_default_context()
+context.load_verify_locations(cafile = %s)
+proxy_connection = HTTPSConnection(
+    destination_url.hostname,
+    destination_url.port,
+    context = context
+image_path = %s
+image_size = %Ld
+proxy_connection.putrequest(\"PUT\", destination_url.path)
+proxy_connection.putheader('Content-Length', image_size)
+# This seems to give the best throughput when uploading from Yaniv's
+# machine to a server that drops the data. You may need to tune this
+# on your setup.
+BUF_SIZE = 128 * 1024
+with open(image_path, \"rb\") as disk:
+    pos = 0
+    while pos < image_size:
+        # Send the next chunk to the proxy.
+        to_read = min(image_size - pos, BUF_SIZE)
+        chunk = disk.read(to_read)
+        if not chunk:
+            transfer_service.pause()
+            raise RuntimeError(\"Unexpected end of file at pos=%%d\" %% pos)
+        proxy_connection.send(chunk)
+        pos += len(chunk)
+# Get the response
+response = proxy_connection.getresponse()
+if response.status != 200:
+    transfer_service.pause()
+    print(\"Upload failed: %%s %%s\" %%
+          (response.status, response.reason))
+    sys.exit(1)
+# Successful cleanup
+" (py_quote disk_id)
+  (py_bool true(*direct_upload XXX*))
+  (py_quote "/tmp/ca.pem"(*cafile XXX*))
+  (py_quote filename)
+  disk_size
+let python_create_virtual_machine ovf =
+  sprintf "
+vms_service = system_service.vms_service()
+vm = vms_service.add(
+    types.Vm(
+        cluster=types.Cluster(name = %s),
+        initialization=types.Initialization(
+            configuration = types.Configuration(
+                type = types.ConfigurationType.OVA,
+                data = %s
+            )
+        )
+    )
+" (py_quote "Default" (* XXX target cluster *))
+  (py_quote (DOM.doc_to_string ovf))
+(* Find the Python 3 binary. *)
+let find_python3 () =
+  let rec loop = function
+    | [] ->
+       error "could not locate Python 3 binary on the $PATH.  You may have to install Python 3.  If Python 3 is already installed then you may need to create a directory containing a binary called ‘python3’ which runs Python 3."
+    | python :: rest ->
+       (* Use shell_command first to check the binary exists. *)
+       let cmd = sprintf "%s --help >/dev/null 2>&1" (quote python) in
+       if shell_command cmd = 0 &&
+            run_python ~python python_get_version = ["3"] then (
+         debug "rhv-upload: python binary: %s" python;
+         python
+       )
+       else
+         loop rest
+  in
+  loop ["python3"; "python"]
+(* Parse the -oc URI. *)
+let parse_output_conn oc =
+  let uri = Xml.parse_uri oc in
+  if uri.Xml.uri_scheme <> Some "https" then
+    error (f_"rhv-upload: -oc: URI must start with https://...");
+  if uri.Xml.uri_server = None then
+    error (f_"rhv-upload: -oc: no remote server name in the URI");
+  if uri.Xml.uri_path = None || uri.Xml.uri_path = Some "/" then
+    error (f_"rhv-upload: -oc: URI path component looks incorrect");
+  let username =
+    match uri.Xml.uri_user with
+    | None ->
+       warning (f_"rhv-upload: -oc: username was missing from URI, assuming ‘admin at internal’");
+       "admin at internal"
+    | Some user -> user in
+  (* Reconstruct the URI without the username. *)
+  let url = sprintf "%s://%s%s"
+                    (Option.default "https" uri.Xml.uri_scheme)
+                    (Option.default "localhost" uri.Xml.uri_server)
+                    (Option.default "" uri.Xml.uri_path) in
+  let conn = { conn_url = url; conn_username = username;
+               conn_debug = verbose () } in
+  debug "rhv-upload: connection=%s" (string_of_connection conn);
+  conn
+(* Create a single, empty disk on the target. *)
+let create_one_disk run_python tmpdir conn
+                    output_password output_format
+                    output_alloc output_storage
+                    source target =
+  (* Give the disk a predictable name based on the source
+   * name and disk index.
+   *)
+  let disk_name =
+    let id = target.target_overlay.ov_source.s_disk_id in
+    sprintf "%s-%03d" source.s_name id in
+  let disk_format =
+    match output_format with
+    | `Raw -> "types.DiskFormat.RAW"
+    | `COW -> "types.DiskFormat.COW" in
+  (* This is the virtual size in bytes. *)
+  let disk_size = target.target_overlay.ov_virtual_size in
+  let code =
+    python_imports ^
+    python_connect tmpdir conn output_password ^
+    python_create_one_disk disk_name disk_format
+                           output_alloc output_storage disk_size in
+  match run_python code with
+  | [id] -> id
+  | _ -> error (f_"rhv-upload: create_one_disk: error creating disks, see previous output")
+(* XXX Temporary function to upload spooled disk. *)
+let upload_one_disk run_python tmpdir conn output_password t filename disk_id =
+  let disk_size = t.target_overlay.ov_virtual_size in
+  let code =
+    python_imports ^
+    python_connect tmpdir conn output_password ^
+    python_upload_one_disk disk_id disk_size filename in
+  ignore (run_python code)
+(* Upload the virtual machine metadata (ie OVF) and create a VM. *)
+let create_virtual_machine run_python tmpdir conn output_password ovf =
+  let code =
+    python_imports ^
+    python_connect tmpdir conn output_password ^
+    python_create_virtual_machine ovf in
+  ignore (run_python code)
+class output_rhv_upload output_alloc output_conn
+                        output_password output_storage =
+  let run_python =
+    let python = find_python3 () in
+    run_python ~python in
+  let conn = parse_output_conn output_conn in
+  (* The temporary directory is used for a few things such as passing
+   * passwords securely and (temporarily) for spooling disks (XXX).
+   *)
+  let tmpdir =
+    let base_dir = (open_guestfs ())#get_cachedir () in
+    let t = Mkdtemp.temp_dir ~base_dir "rhvupload." in
+    rmdir_on_exit t;
+    t in
+  inherit output
+  method precheck () =
+    (* Check all the dependencies including the Python 3 oVirt SDK v4
+     * module are installed.  This will fail with a Python error message.
+     *)
+    ignore (run_python python_imports)
+  method as_options =
+    "-o rhv-upload" ^
+    (match output_alloc with
+     | Sparse -> "" (* default, don't need to print it *)
+     | Preallocated -> " -oa preallocated") ^
+    sprintf " -oc %s -op %s -os %s"
+            output_conn output_password output_storage
+  method supported_firmware = [ TargetBIOS ]
+  (* List of disks we have created.  There will be one per target. *)
+  val mutable target_disk_ids = []
+  method prepare_targets source targets =
+    let targets =
+      List.map (
+        fun t ->
+          (* Only allow output format "raw" or "qcow2". *)
+          let output_format =
+            match t.target_format with
+            | "raw" -> `Raw
+            | "qcow2" -> `COW
+            | _ ->
+               error (f_"rhv-upload: -of %s: Only output format ‘raw’ or ‘qcow2’ is supported.  If the input is in a different format then force one of these output formats by adding either ‘-of raw’ or ‘-of qcow’ on the command line.")
+                     t.target_format in
+          let disk_id = create_one_disk run_python tmpdir conn output_password
+                                        output_format output_alloc
+                                        output_storage source t in
+          (* XXX Temporarily spool disks to tmpdir. *)
+          let target_file = TargetFile (tmpdir // t.target_overlay.ov_sd) in
+          { t with target_file }, disk_id
+      ) targets in
+    target_disk_ids <- List.map snd targets;
+    List.map fst targets
+  method create_metadata source targets _ guestcaps inspect target_firmware =
+    (* Upload the spooled disks. *)
+    List.iter (
+      fun (t, disk_id) ->
+        let filename =
+          match t.target_file with
+          | TargetFile filename -> filename
+          | TargetURI _ -> assert false in
+        upload_one_disk run_python tmpdir conn output_password
+                        t filename disk_id
+    ) (List.combine targets target_disk_ids);
+    (* Create the metadata. *)
+    let ovf =
+      Create_ovf.create_ovf source targets guestcaps inspect
+                            Sparse
+                            "domain"
+                            (List.map (fun t -> "") targets)
+                            target_disk_ids
+                            "vm" in
+    (* Add the virtual machine. *)
+    create_virtual_machine run_python tmpdir conn output_password ovf
+let output_rhv_upload = new output_rhv_upload
+let () = Modules_list.register_output_module "rhv-upload"
diff --git a/v2v/output_rhv_upload.mli b/v2v/output_rhv_upload.mli
new file mode 100644
index 000000000..54999b727
--- /dev/null
+++ b/v2v/output_rhv_upload.mli
@@ -0,0 +1,26 @@
+(* virt-v2v
+ * Copyright (C) 2009-2018 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *)
+(** [-o rhv-upload] target. *)
+val output_rhv_upload : Types.output_allocation -> string -> string ->
+                        string ->
+                        Types.output
+(** [output_rhv_upload output_alloc output_conn output_password output_storage]
+    creates and returns a new {!Types.output} object specialized for writing
+    output to oVirt or RHV directly via RHV APIs. *)

More information about the Libguestfs mailing list