[Libguestfs] nbdkit & qemu 2.12: qemu-img: Protocol error: simple reply when structured reply chunk was expected

Eric Blake eblake at redhat.com
Sat Mar 23 12:26:24 UTC 2019


[adding qemu]

On 3/23/19 6:57 AM, Eric Blake wrote:
> On 3/23/19 6:42 AM, Richard W.M. Jones wrote:
>> nbdkit (upstream 5a7a394c699) currently fails with qemu 2.12.0:
>>
>>   $ ./nbdkit memory size=64M --run 'qemu-img convert $nbd /var/tmp/out'
>>   nbdkit: memory.2: error: invalid request: unknown command (7) ignored
>>   qemu-img: Protocol error: simple reply when structured reply chunk was expected
>>
>> This was a bug in qemu which was fixed upstream quite a long time ago
>> by the commit I've attached at the end of this email.
>>

>> ----------------------------------------------------------------------
>> 89aa0d87634e2cb98517509dc8bdb876f26ecf8b is the first bad commit
>> commit 89aa0d87634e2cb98517509dc8bdb876f26ecf8b
>> Author: Vladimir Sementsov-Ogievskiy <vsementsov at virtuozzo.com>
>> Date:   Fri Apr 27 17:20:01 2018 +0300
>>
>>     nbd/client: fix nbd_negotiate_simple_meta_context
>>     
>>     Initialize received variable. Otherwise, is is possible for server to
>>     answer without any contexts, but we will set context_id to something
>>     random (received_id is not initialized too) and return 1, which is
>>     wrong.
>>     

Ouch - upstream qemu still has a latent bug in this area. When qemu as
server sends an error to NBD_CMD_BLOCK_STATUS, it uses a structured
error reply, so we haven't tickled it before.  But the NBD spec permits
a simple error reply to any command except NBD_CMD_READ when structured
replies are negotiated, and that's what nbdkit currently implements. If
I add this hack patch to qemu's server (to intentionally fail
NBD_CMD_BLOCK_STATUS with a simple error), then the qemu client _should_
be able to just report that block status failed and keep the connection
alive, rather than its current behavior of hanging up. So we still need
a patch for qemu 4.0 for the client to accept simple errors. :(

With the hack applied, I got:

$ ./qemu-io -f raw nbd://localhost:10809 --trace=nbd_\* -c map
...
17218 at 1553343857.048440:nbd_send_request Sending request to server: {
.from = 0, .len = 1049088, .handle = 94796979915216, .flags = 0x8, .type
= 7 (block status) }
17218 at 1553343857.048643:nbd_receive_simple_reply Got simple reply: {
.error = 22 (EINVAL), handle = 94796979915216 }
17218 at 1553343857.048655:nbd_co_request_fail Request failed { .from = 0,
.len = 1049088, .handle = 94796979915216, .flags = 0x8, .type = 7 (block
status) } ret = -22, err: Protocol error: simple reply when structured
reply chunk was expected
Failed to get allocation status: Invalid argument


diff --git i/nbd/server.c w/nbd/server.c
index fd013a2817a..35f549abbf9 100644
--- i/nbd/server.c
+++ w/nbd/server.c
@@ -2148,16 +2148,16 @@ static int nbd_co_receive_request(NBDRequestData
*req, NBDRequest *request,
  * Returns 0 if connection is still live, -errno on failure to talk to
client
  */
 static coroutine_fn int nbd_send_generic_reply(NBDClient *client,
-                                               uint64_t handle,
+                                               NBDRequest *request,
                                                int ret,
                                                const char *error_msg,
                                                Error **errp)
 {
-    if (client->structured_reply && ret < 0) {
-        return nbd_co_send_structured_error(client, handle, -ret,
error_msg,
+    if (client->structured_reply && ret < 0 && request->type ==
NBD_CMD_READ) {
+        return nbd_co_send_structured_error(client, request->handle,
-ret, error_msg,
                                             errp);
     } else {
-        return nbd_co_send_simple_reply(client, handle, ret < 0 ? -ret : 0,
+        return nbd_co_send_simple_reply(client, request->handle, ret <
0 ? -ret : 0,
                                         NULL, 0, errp);
     }
 }
@@ -2177,7 +2177,7 @@ static coroutine_fn int nbd_do_cmd_read(NBDClient
*client, NBDRequest *request,
     if (request->flags & NBD_CMD_FLAG_FUA) {
         ret = blk_co_flush(exp->blk);
         if (ret < 0) {
-            return nbd_send_generic_reply(client, request->handle, ret,
+            return nbd_send_generic_reply(client, request, ret,
                                           "flush failed", errp);
         }
     }
@@ -2192,7 +2192,7 @@ static coroutine_fn int nbd_do_cmd_read(NBDClient
*client, NBDRequest *request,
     ret = blk_pread(exp->blk, request->from + exp->dev_offset, data,
                     request->len);
     if (ret < 0 || request->type == NBD_CMD_CACHE) {
-        return nbd_send_generic_reply(client, request->handle, ret,
+        return nbd_send_generic_reply(client, request, ret,
                                       "reading from file failed", errp);
     }

@@ -2234,7 +2234,7 @@ static coroutine_fn int
nbd_handle_request(NBDClient *client,
         }
         ret = blk_pwrite(exp->blk, request->from + exp->dev_offset,
                          data, request->len, flags);
-        return nbd_send_generic_reply(client, request->handle, ret,
+        return nbd_send_generic_reply(client, request, ret,
                                       "writing to file failed", errp);

     case NBD_CMD_WRITE_ZEROES:
@@ -2247,7 +2247,7 @@ static coroutine_fn int
nbd_handle_request(NBDClient *client,
         }
         ret = blk_pwrite_zeroes(exp->blk, request->from + exp->dev_offset,
                                 request->len, flags);
-        return nbd_send_generic_reply(client, request->handle, ret,
+        return nbd_send_generic_reply(client, request, ret,
                                       "writing to file failed", errp);

     case NBD_CMD_DISC:
@@ -2256,7 +2256,7 @@ static coroutine_fn int
nbd_handle_request(NBDClient *client,

     case NBD_CMD_FLUSH:
         ret = blk_co_flush(exp->blk);
-        return nbd_send_generic_reply(client, request->handle, ret,
+        return nbd_send_generic_reply(client, request, ret,
                                       "flush failed", errp);

     case NBD_CMD_TRIM:
@@ -2265,12 +2265,14 @@ static coroutine_fn int
nbd_handle_request(NBDClient *client,
         if (ret == 0 && request->flags & NBD_CMD_FLAG_FUA) {
             ret = blk_co_flush(exp->blk);
         }
-        return nbd_send_generic_reply(client, request->handle, ret,
+        return nbd_send_generic_reply(client, request, ret,
                                       "discard failed", errp);

     case NBD_CMD_BLOCK_STATUS:
+        return nbd_send_generic_reply(client, request, -EINVAL,
+                                      "no status for you today", errp);
         if (!request->len) {
-            return nbd_send_generic_reply(client, request->handle, -EINVAL,
+            return nbd_send_generic_reply(client, request, -EINVAL,
                                           "need non-zero length", errp);
         }
         if (client->export_meta.valid &&
@@ -2304,7 +2306,7 @@ static coroutine_fn int
nbd_handle_request(NBDClient *client,

             return ret;
         } else {
-            return nbd_send_generic_reply(client, request->handle, -EINVAL,
+            return nbd_send_generic_reply(client, request, -EINVAL,
                                           "CMD_BLOCK_STATUS not
negotiated",
                                           errp);
         }
@@ -2312,7 +2314,7 @@ static coroutine_fn int
nbd_handle_request(NBDClient *client,
     default:
         msg = g_strdup_printf("invalid request type (%" PRIu32 ")
received",
                               request->type);
-        ret = nbd_send_generic_reply(client, request->handle, -EINVAL, msg,
+        ret = nbd_send_generic_reply(client, request, -EINVAL, msg,
                                      errp);
         g_free(msg);
         return ret;
@@ -2357,7 +2359,7 @@ static coroutine_fn void nbd_trip(void *opaque)
         Error *export_err = local_err;

         local_err = NULL;
-        ret = nbd_send_generic_reply(client, request.handle, -EINVAL,
+        ret = nbd_send_generic_reply(client, &request, -EINVAL,
                                      error_get_pretty(export_err),
&local_err);
         error_free(export_err);
     } else {


-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.           +1-919-301-3226
Virtualization:  qemu.org | libvirt.org

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 488 bytes
Desc: OpenPGP digital signature
URL: <http://listman.redhat.com/archives/libguestfs/attachments/20190323/b8646a62/attachment.sig>


More information about the Libguestfs mailing list