[Libguestfs] [libnbd PATCH 3/3] states: Allow in-flight read while writing next command

Richard W.M. Jones rjones at redhat.com
Tue May 21 17:06:08 UTC 2019


On Tue, May 21, 2019 at 10:09:30AM -0500, Eric Blake wrote:
> As already noted in our state machine, a client that batches up a
> large read followed by large writes, coupled with a server that only
> processes commands in order, can result in deadlock (the server won't
> read more until we unblock its ability to write out its reply to our
> first command; but we aren't willing to read until we are done writing
> out our second command).  Break the deadlock by teaching the generator
> that while we are in the middle of writing a command, we must remain
> responsive to read_notify events; if the server has data for us to
> read, we should consume that before jumping back into the middle of
> our command issue (and consuming a reply can invalidate sbuf, so we
> have to drop an assertion in PREPARE_WRITE_PAYLOAD).
> ---
>  generator/generator              | 20 ++++++++++++++++++--
>  generator/states-issue-command.c | 25 ++++++++++++++++++++++++-
>  generator/states-reply.c         |  5 ++++-
>  lib/internal.h                   |  1 +
>  4 files changed, 47 insertions(+), 4 deletions(-)
> 
> diff --git a/generator/generator b/generator/generator
> index a4ad362..23b3cbf 100755
> --- a/generator/generator
> +++ b/generator/generator
> @@ -634,7 +634,15 @@ and issue_command_state_machine = [
>      default_state with
>      name = "SEND_REQUEST";
>      comment = "Sending a request to the remote server";
> -    external_events = [ NotifyWrite, "" ];
> +    external_events = [ NotifyWrite, "";
> +                        NotifyRead, "PAUSE_SEND_REQUEST" ];
> +  };
> +
> +  State {
> +    default_state with
> +    name = "PAUSE_SEND_REQUEST";
> +    comment = "Interrupt send request to receive an earlier command's reply";
> +    external_events = [];
>    };
> 
>    State {
> @@ -648,7 +656,15 @@ and issue_command_state_machine = [
>      default_state with
>      name = "SEND_WRITE_PAYLOAD";
>      comment = "Sending the write payload to the remote server";
> -    external_events = [ NotifyWrite, "" ];
> +    external_events = [ NotifyWrite, "";
> +                        NotifyRead, "PAUSE_WRITE_PAYLOAD" ];
> +  };
> +
> +State {
> +    default_state with
> +    name = "PAUSE_WRITE_PAYLOAD";
> +    comment = "Interrupt write payload to receive an earlier command's reply";
> +    external_events = [];
>    };
> 
>  State {
> diff --git a/generator/states-issue-command.c b/generator/states-issue-command.c
> index e24ea34..3a5980d 100644
> --- a/generator/states-issue-command.c
> +++ b/generator/states-issue-command.c
> @@ -25,6 +25,15 @@
>    assert (conn->cmds_to_issue != NULL);
>    cmd = conn->cmds_to_issue;
> 
> +  /* Were we interrupted by reading a reply to an earlier command? */
> +  if (conn->wlen) {
> +    if (conn->in_write_payload)
> +      SET_NEXT_STATE(%SEND_WRITE_PAYLOAD);
> +    else
> +      SET_NEXT_STATE(%SEND_REQUEST);
> +    return 0;
> +  }
> +
>    conn->sbuf.request.magic = htobe32 (NBD_REQUEST_MAGIC);
>    conn->sbuf.request.flags = htobe16 (cmd->flags);
>    conn->sbuf.request.type = htobe16 (cmd->type);
> @@ -43,12 +52,18 @@
>    }
>    return 0;
> 
> + ISSUE_COMMAND.PAUSE_SEND_REQUEST:
> +  assert (conn->wlen);
> +  assert (conn->cmds_to_issue != NULL);
> +  conn->in_write_payload = false;
> +  SET_NEXT_STATE (%^REPLY.START);
> +  return 0;
> +
>   ISSUE_COMMAND.PREPARE_WRITE_PAYLOAD:
>    struct command_in_flight *cmd;
> 
>    assert (conn->cmds_to_issue != NULL);
>    cmd = conn->cmds_to_issue;
> -  assert (cmd->handle == be64toh (conn->sbuf.request.handle));
>    if (cmd->type == NBD_CMD_WRITE) {
>      conn->wbuf = cmd->data;
>      conn->wlen = cmd->count;
> @@ -65,9 +80,17 @@
>    }
>    return 0;
> 
> + ISSUE_COMMAND.PAUSE_WRITE_PAYLOAD:
> +  assert (conn->wlen);
> +  assert (conn->cmds_to_issue != NULL);
> +  conn->in_write_payload = true;
> +  SET_NEXT_STATE (%^REPLY.START);
> +  return 0;
> +
>   ISSUE_COMMAND.FINISH:
>    struct command_in_flight *cmd;
> 
> +  assert (!conn->wlen);
>    assert (conn->cmds_to_issue != NULL);
>    cmd = conn->cmds_to_issue;
>    conn->cmds_to_issue = cmd->next;
> diff --git a/generator/states-reply.c b/generator/states-reply.c
> index 45362d4..6bb503a 100644
> --- a/generator/states-reply.c
> +++ b/generator/states-reply.c
> @@ -118,7 +118,10 @@
>    else
>      conn->cmds_done = cmd;
> 
> -  SET_NEXT_STATE (%.READY);
> +  if (conn->cmds_to_issue)
> +    SET_NEXT_STATE (%^ISSUE_COMMAND.START);
> +  else
> +    SET_NEXT_STATE (%.READY);
>    return 0;
> 
>  } /* END STATE MACHINE */
> diff --git a/lib/internal.h b/lib/internal.h
> index 3f2b729..466af9d 100644
> --- a/lib/internal.h
> +++ b/lib/internal.h
> @@ -182,6 +182,7 @@ struct nbd_connection {
>     * acknowledge them.
>     */
>    struct command_in_flight *cmds_to_issue, *cmds_in_flight, *cmds_done;
> +  bool in_write_payload;
>  };
> 
>  struct meta_context {

Yes, I think this is correct.

However, I also think that cmds_to_issue might be replaced by a single
command pointer.  (If I'm wrong in my reasoning, we'll very quickly
see an assert fail if something tries to overwrite a non-NULL
conn->cmd_to_issue pointer).

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
virt-df lists disk usage of guests without needing to install any
software inside the virtual machine.  Supports Linux and Windows.
http://people.redhat.com/~rjones/virt-df/




More information about the Libguestfs mailing list