[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[Libguestfs] [PATCH] v2v: rhv-upload-plugin: Defer imageio connection



When using vddk input with certain vmware version, qemu-img may spend
lot of time getting source image extents. If getting image extents takes
more than 60 seconds, imageio server closes the idle connection, and the
transfer will fail on the first write with:

nbdkit: python[1]: error: /var/tmp/rhvupload.0OKqWA/rhv-upload-plugin.py: pwrite: error:
Traceback (most recent call last):
   File "/var/tmp/rhvupload.0OKqWA/rhv-upload-plugin.py", line 94, in wrapper
    return func(h, *args)
   File "/var/tmp/rhvupload.0OKqWA/rhv-upload-plugin.py", line 230, in pwrite
    r = http.getresponse()
   File "/usr/lib64/python3.6/http/client.py", line 1361, in getresponse
    response.begin()
   File "/usr/lib64/python3.6/http/client.py", line 311, in begin
    version, status, reason = self._read_status()
   File "/usr/lib64/python3.6/http/client.py", line 280, in _read_status
    raise RemoteDisconnected("Remote end closed connection without"
 http.client.RemoteDisconnected: Remote end closed connection without response

Fix the issue by deferring the actual connection used for the transfer
until qemu try to write to the server:

- The first connection is created in open(). This connection is used to
  query imageio server options and initialize the handle.

- The second connection is created on the first call of the nbd
  callbacks (pwrite, zero, ...). We store it in the handle and reuse it
  for the rest of the transfer.

Since we set inactivity_timeout to 3600 seconds, oVirt will keep the
transfer alive for one hour. If more time is needed to get image
extents, we need to increase the inactivity timeout.

I did not test this change yet, but I tested the approach using oVirt
upload_disk.py example script. Posting early to get feedback on this
approach.

Signed-off-by: Nir Soffer <nsoffer redhat com>
---
 v2v/rhv-upload-plugin.py | 86 +++++++++++++++++++++++++---------------
 1 file changed, 53 insertions(+), 33 deletions(-)

diff --git a/v2v/rhv-upload-plugin.py b/v2v/rhv-upload-plugin.py
index 8c11012b..c96b950c 100644
--- a/v2v/rhv-upload-plugin.py
+++ b/v2v/rhv-upload-plugin.py
@@ -114,10 +114,15 @@ def open(readonly):
 
     transfer = create_transfer(connection, disk, host)
     try:
+        # Connect to imageio server for getting server options and verify
+        # connectivity. The actual connection for transferring data will be
+        # opened when qemu-img is ready to write to the server.
         destination_url = parse_transfer_url(transfer)
         http = create_http(destination_url)
-        options = get_options(http, destination_url)
-        http = optimize_http(http, host, options)
+        try:
+            options = get_options(http, destination_url)
+        finally:
+            http.close()
     except:
         cancel_transfer(connection, transfer)
         raise
@@ -127,7 +132,7 @@ def open(readonly):
           % options)
 
     # Save everything we need to make requests in the handle.
-    return {
+    h = {
         'can_flush': options['can_flush'],
         'can_trim': options['can_trim'],
         'can_zero': options['can_zero'],
@@ -137,10 +142,16 @@ def open(readonly):
         'transfer': transfer,
         'failed': False,
         'highestwrite': 0,
-        'http': http,
         'path': destination_url.path,
+        'url': destination_url,
     }
 
+    # If the local host is used and the server supports unix socket, we can
+    # optimize the connection using unix socket.
+    if host is not None and options['unix_socket'] is not None:
+        h['unix_socket'] = options['unix_socket']
+
+    return h
 
 @failing
 def can_trim(h):
@@ -185,7 +196,7 @@ def request_failed(r, msg):
 
 @failing
 def pread(h, count, offset):
-    http = h['http']
+    http = get_http(h)
     transfer = h['transfer']
 
     headers = {"Range": "bytes=%d-%d" % (offset, offset + count - 1)}
@@ -206,7 +217,7 @@ def pread(h, count, offset):
 
 @failing
 def pwrite(h, buf, offset):
-    http = h['http']
+    http = get_http(h)
     transfer = h['transfer']
 
     count = len(buf)
@@ -237,7 +248,7 @@ def pwrite(h, buf, offset):
 
 @failing
 def zero(h, count, offset, may_trim):
-    http = h['http']
+    http = get_http(h)
 
     # Unlike the trim and flush calls, there is no 'can_zero' method
     # so nbdkit could call this even if the server doesn't support
@@ -267,7 +278,7 @@ def zero(h, count, offset, may_trim):
 
 
 def emulate_zero(h, count, offset):
-    http = h['http']
+    http = get_http(h)
     transfer = h['transfer']
 
     # qemu-img convert starts by trying to zero/trim the whole device.
@@ -303,7 +314,7 @@ def emulate_zero(h, count, offset):
 
 @failing
 def trim(h, count, offset):
-    http = h['http']
+    http = get_http(h)
 
     # Construct the JSON request for trimming.
     buf = json.dumps({'op': "trim",
@@ -327,7 +338,7 @@ def trim(h, count, offset):
 
 @failing
 def flush(h):
-    http = h['http']
+    http = get_http(h)
 
     # Construct the JSON request for flushing.
     buf = json.dumps({'op': "flush"}).encode()
@@ -345,7 +356,6 @@ def flush(h):
 
 
 def close(h):
-    http = h['http']
     connection = h['connection']
     transfer = h['transfer']
     disk_id = h['disk_id']
@@ -356,7 +366,8 @@ def close(h):
     # plugin exits.
     sys.stderr.flush()
 
-    http.close()
+    if 'http' in h:
+        h['http'].close()
 
     # If the connection failed earlier ensure we cancel the transfer. Canceling
     # the transfer will delete the disk.
@@ -679,12 +690,41 @@ def parse_transfer_url(transfer):
         return urlparse(transfer.proxy_url)
 
 
-def create_http(url):
+def get_http(h):
+    """
+    Create http connection lazily on the first access.
+
+    Needed because qemu-img may spend lot of time detecting image extents in
+    source image before sending the first request. If getting source image
+    extents takes more than 60 seconds, imageio server will close the
+    connection, and the first pwrite() or zero() will fail.
+
+    See https://bugzilla.redhat.com/1916176.
+    """
+    if 'http' not in h:
+        h['http'] = create_http(h['url'], unix_socket=h.get('unix_socket'))
+
+    return h['http']
+
+
+def create_http(url, unix_socket=None):
     """
     Create http connection for transfer url.
 
     Returns HTTPConnection.
     """
+    if unix_socket:
+        try:
+            http = UnixHTTPConnection(unix_socket)
+            http.connect()
+        except Exception as e:
+            # Very unlikely failure, but we can recover by using the https
+            # connection.
+            debug("cannot create unix socket connection, using https: %s" % e)
+        else:
+            debug("optimizing connection using unix socket %r" % unix_socket)
+            return http
+
     if url.scheme == "https":
         context = \
             ssl.create_default_context(purpose=ssl.Purpose.SERVER_AUTH,
@@ -735,23 +775,3 @@ def get_options(http, url):
     else:
         raise RuntimeError("could not use OPTIONS request: %d: %s" %
                            (r.status, r.reason))
-
-
-def optimize_http(http, host, options):
-    """
-    Return an optimized http connection using unix socket if we are connected
-    to imageio server on the local host and it features a unix socket.
-    """
-    unix_socket = options['unix_socket']
-
-    if host is not None and unix_socket is not None:
-        try:
-            http = UnixHTTPConnection(unix_socket)
-        except Exception as e:
-            # Very unlikely failure, but we can recover by using the https
-            # connection.
-            debug("cannot create unix socket connection, using https: %s" % e)
-        else:
-            debug("optimizing connection using unix socket %r" % unix_socket)
-
-    return http
-- 
2.26.2


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]