[libvirt] PATCH: 17/25: Concurrent client dispatch in libvirtd

Daniel P. Berrange berrange at redhat.com
Tue Jan 13 17:45:43 UTC 2009


Historically libvirtd was single threaded, serializing all
requests across clients. An recent patch allowed multiple
threads, so multiple clients could run in parallel. A single
client was still serialized.

This patch removes that final restriction, allowing a single
client to have multiple in-flight RPC requests & replies.
Each client now has 3 variables

 - rx: zero or one. If ready for, or in process of reading
   a message this will be non-null. If we're throttling the
   client requests, it'll be NULL. Once completely read, moved
   to the 'dx' queue.
 - dx: zero or many. Requests read off wire currently waiting
   to be picked up for processing by a worker thread. Once a
   worker is available the message is removed from the 'dx'
   queue for duration of processing. A reply is put on the
   'tx' queue once a call is finished
 - tx: zero or many. Replies in process of, or ready to be,
   sent back to a client. Also includes any asynchronous
   event notifications to be sent.

The 'max_client_requests' configuration parameter controls
how many RPC request+reply calls can be processed in parallel.
Once this limit is reached, no more requests will be read off
the wire until a reply has been completed transmitted.

Each request requires upto 256 KB of memory, thus memory usage
for I/O is bounded by 'max_client_requests * max_clients * 256k'

Compatability:

 - old client -> old server - everything serialized
 - old client -> new server - client never sends a new
   request until its first is finished, so effectively
   serialized, and no compatability problems
 - new client -> old server - client sends many requests
   without waiting for replies. The server will only
   read and proess one at a time, so effectively
   serialized, and no compatability problems
 - new client -> new server - fully parallelized

The code has been stress tested by running 500 concurrent
clients and fixing the crashes, deadlocks and memory leaks.
Seems reasonably robust now.

 libvirtd.aug      |    2 
 libvirtd.conf     |   16 +
 qemud.c           |  630 ++++++++++++++++++++++++++++++++----------------------
 qemud.h           |   66 +++--
 remote.c          |   48 +++-
 test_libvirtd.aug |   26 ++
 6 files changed, 502 insertions(+), 286 deletions(-)

Daniel

diff --git a/qemud/libvirtd.aug b/qemud/libvirtd.aug
--- a/qemud/libvirtd.aug
+++ b/qemud/libvirtd.aug
@@ -53,6 +53,8 @@ module Libvirtd =
    let processing_entry = int_entry "min_workers"
                         | int_entry "max_workers"
                         | int_entry "max_clients"
+                        | int_entry "max_requests"
+                        | int_entry "max_client_requests"
 
    let logging_entry = int_entry "log_level"
                      | str_entry "log_filters"
diff --git a/qemud/libvirtd.conf b/qemud/libvirtd.conf
--- a/qemud/libvirtd.conf
+++ b/qemud/libvirtd.conf
@@ -247,6 +247,22 @@
 #min_workers = 5
 #max_workers = 20
 
+# Total global limit on concurrent RPC calls. Should be
+# at least as large as max_workers. Beyond this, RPC requests
+# will be read into memory and queued. This directly impact
+# memory usage, currently each request requires 256 KB of
+# memory. So by default upto 5 MB of memory is used
+#
+# XXX this isn't actually enforced yet, only the per-client
+# limit is used so far
+#max_requests = 20
+
+# Limit on concurrent requests from a single client
+# connection. To avoid one client monopolizing the server
+# this should be a small fraction of the global max_requests
+# and max_workers parameter
+#max_client_requests = 5
+
 #################################################################
 #
 # Logging controls
diff --git a/qemud/qemud.c b/qemud/qemud.c
--- a/qemud/qemud.c
+++ b/qemud/qemud.c
@@ -138,6 +138,11 @@ static int min_workers = 5;
 static int max_workers = 20;
 static int max_clients = 20;
 
+/* Total number of 'in-process' RPC calls allowed across all clients */
+static int max_requests = 20;
+/* Total number of 'in-process' RPC calls allowed by a single client*/
+static int max_client_requests = 5;
+
 #define DH_BITS 1024
 
 static sig_atomic_t sig_errors = 0;
@@ -162,9 +167,36 @@ static void sig_handler(int sig, siginfo
 
 static void qemudDispatchClientEvent(int watch, int fd, int events, void *opaque);
 static void qemudDispatchServerEvent(int watch, int fd, int events, void *opaque);
-static int qemudRegisterClientEvent(struct qemud_server *server,
-                                    struct qemud_client *client,
-                                    int removeFirst);
+
+
+void
+qemudClientMessageQueuePush(struct qemud_client_message **queue,
+                            struct qemud_client_message *msg)
+{
+    struct qemud_client_message *tmp = *queue;
+
+    if (tmp) {
+        while (tmp->next)
+            tmp = tmp->next;
+        tmp->next = msg;
+    } else {
+        *queue = msg;
+    }
+}
+
+static struct qemud_client_message *
+qemudClientMessageQueuePop(struct qemud_client_message **queue)
+{
+    struct qemud_client_message *tmp = *queue;
+
+    if (tmp)
+        *queue = tmp->next;
+    else
+        *queue = NULL;
+
+    tmp->next = NULL;
+    return tmp;
+}
 
 static int
 remoteCheckCertFile(const char *type, const char *file)
@@ -1042,6 +1074,8 @@ remoteCheckCertificate (gnutls_session_t
 static int
 remoteCheckAccess (struct qemud_client *client)
 {
+    struct qemud_client_message *confirm;
+
     /* Verify client certificate. */
     if (remoteCheckCertificate (client->tlssession) == -1) {
         VIR_ERROR0(_("remoteCheckCertificate: "
@@ -1051,14 +1085,25 @@ remoteCheckAccess (struct qemud_client *
                           "is set so the bad certificate is ignored"));
     }
 
+    if (client->tx) {
+        VIR_INFO("%s",
+                 _("client had unexpected data pending tx after access check"));
+        return -1;
+    }
+
+    if (VIR_ALLOC(confirm) < 0)
+        return -1;
+
     /* Checks have succeeded.  Write a '\1' byte back to the client to
      * indicate this (otherwise the socket is abruptly closed).
      * (NB. The '\1' byte is sent in an encrypted record).
      */
-    client->bufferLength = 1;
-    client->bufferOffset = 0;
-    client->buffer[0] = '\1';
-    client->mode = QEMUD_MODE_TX_PACKET;
+    confirm->async = 1;
+    confirm->bufferLength = 1;
+    confirm->bufferOffset = 0;
+    confirm->buffer[0] = '\1';
+
+    client->tx = confirm;
     return 0;
 }
 
@@ -1084,6 +1129,7 @@ int qemudGetSocketIdentity(int fd, uid_t
 }
 #endif
 
+
 static int qemudDispatchServer(struct qemud_server *server, struct qemud_socket *sock) {
     int fd;
     struct sockaddr_storage addr;
@@ -1099,7 +1145,7 @@ static int qemudDispatchServer(struct qe
     }
 
     if (server->nclients >= max_clients) {
-        VIR_ERROR0(_("Too many active clients, dropping connection"));
+        VIR_ERROR(_("Too many active clients (%d), dropping connection"), max_clients);
         close(fd);
         return -1;
     }
@@ -1137,6 +1183,12 @@ static int qemudDispatchServer(struct qe
     client->addrlen = addrlen;
     client->server = server;
 
+    /* Prepare one for packet receive */
+    if (VIR_ALLOC(client->rx) < 0)
+        goto cleanup;
+    client->rx->bufferLength = REMOTE_MESSAGE_HEADER_XDR_LEN;
+
+
 #if HAVE_POLKIT
     /* Only do policy checks for non-root - allow root user
        through with no checks, as a fail-safe - root can easily
@@ -1158,9 +1210,7 @@ static int qemudDispatchServer(struct qe
 #endif
 
     if (client->type != QEMUD_SOCK_TYPE_TLS) {
-        client->mode = QEMUD_MODE_RX_HEADER;
-        client->bufferLength = REMOTE_MESSAGE_HEADER_XDR_LEN;
-
+        /* Plain socket, so prepare to read first message */
         if (qemudRegisterClientEvent (server, client, 0) < 0)
             goto cleanup;
     } else {
@@ -1180,12 +1230,12 @@ static int qemudDispatchServer(struct qe
             if (remoteCheckAccess (client) == -1)
                 goto cleanup;
 
+            /* Handshake & cert check OK,  so prepare to read first message */
             if (qemudRegisterClientEvent(server, client, 0) < 0)
                 goto cleanup;
         } else if (ret == GNUTLS_E_INTERRUPTED || ret == GNUTLS_E_AGAIN) {
-            /* Most likely. */
-            client->mode = QEMUD_MODE_TLS_HANDSHAKE;
-            client->bufferLength = -1;
+            /* Most likely, need to do more handshake data */
+            client->handshake = 1;
 
             if (qemudRegisterClientEvent (server, client, 0) < 0)
                 goto cleanup;
@@ -1204,7 +1254,8 @@ static int qemudDispatchServer(struct qe
     if (client &&
         client->tlssession) gnutls_deinit (client->tlssession);
     close (fd);
-    free (client);
+    VIR_FREE(client->rx);
+    VIR_FREE(client);
     return -1;
 }
 
@@ -1216,8 +1267,7 @@ static int qemudDispatchServer(struct qe
  * We keep the libvirt connection open until any async
  * jobs have finished, then clean it up elsehwere
  */
-static void qemudDispatchClientFailure(struct qemud_server *server ATTRIBUTE_UNUSED,
-                                       struct qemud_client *client) {
+void qemudDispatchClientFailure(struct qemud_client *client) {
     virEventRemoveHandleImpl(client->watch);
 
     /* Deregister event delivery callback */
@@ -1242,7 +1292,7 @@ static struct qemud_client *qemudPending
     int i;
     for (i = 0 ; i < server->nclients ; i++) {
         virMutexLock(&server->clients[i]->lock);
-        if (server->clients[i]->mode == QEMUD_MODE_WAIT_DISPATCH) {
+        if (server->clients[i]->dx) {
             /* Delibrately don't unlock client - caller wants the lock */
             return server->clients[i];
         }
@@ -1256,8 +1306,9 @@ static void *qemudWorker(void *data)
     struct qemud_server *server = data;
 
     while (1) {
-        struct qemud_client *client;
-        int len;
+        struct qemud_client *client = NULL;
+        struct qemud_client_message *reply;
+
         virMutexLock(&server->lock);
         while ((client = qemudPendingJob(server)) == NULL) {
             if (virCondWait(&server->job, &server->lock) < 0) {
@@ -1268,55 +1319,64 @@ static void *qemudWorker(void *data)
         virMutexUnlock(&server->lock);
 
         /* We own a locked client now... */
-        client->mode = QEMUD_MODE_IN_DISPATCH;
         client->refs++;
 
-        if ((len = remoteDispatchClientRequest (server, client)) == 0)
-            qemudDispatchClientFailure(server, client);
+        /* Remove out message from dispatch queue while we use it */
+        reply = qemudClientMessageQueuePop(&client->dx);
 
-        /* Set up the output buffer. */
-        client->mode = QEMUD_MODE_TX_PACKET;
-        client->bufferLength = len;
-        client->bufferOffset = 0;
+        /* This function drops the lock during dispatch,
+         * and re-acquires it before returning */
+        if (remoteDispatchClientRequest (server, client, reply) < 0) {
+            VIR_FREE(reply);
+            qemudDispatchClientFailure(client);
+            client->refs--;
+            virMutexUnlock(&client->lock);
+            continue;
+        }
+
+        /* Put reply on end of tx queue to send out  */
+        qemudClientMessageQueuePush(&client->tx, reply);
 
         if (qemudRegisterClientEvent(server, client, 1) < 0)
-            qemudDispatchClientFailure(server, client);
+            qemudDispatchClientFailure(client);
 
         client->refs--;
         virMutexUnlock(&client->lock);
-        virMutexUnlock(&server->lock);
     }
 }
 
 
-static int qemudClientReadBuf(struct qemud_server *server,
-                              struct qemud_client *client,
+/*
+ * Read data into buffer using wire decoding (plain or TLS)
+ */
+static int qemudClientReadBuf(struct qemud_client *client,
                               char *data, unsigned len) {
     int ret;
 
     /*qemudDebug ("qemudClientRead: len = %d", len);*/
 
     if (!client->tlssession) {
-        if ((ret = read (client->fd, data, len)) <= 0) {
-            if (ret == 0 || errno != EAGAIN) {
-                if (ret != 0)
-                    VIR_ERROR(_("read: %s"), strerror (errno));
-                qemudDispatchClientFailure(server, client);
-            }
+        ret = read (client->fd, data, len);
+        if (ret == -1 && (errno == EAGAIN ||
+                          errno == EINTR))
+            return 0;
+        if (ret <= 0) {
+            if (ret != 0)
+                VIR_ERROR(_("read: %s"), strerror (errno));
+            qemudDispatchClientFailure(client);
             return -1;
         }
     } else {
         ret = gnutls_record_recv (client->tlssession, data, len);
-        if (qemudRegisterClientEvent (server, client, 1) < 0)
-            qemudDispatchClientFailure (server, client);
-        else if (ret <= 0) {
-            if (ret == 0 || (ret != GNUTLS_E_AGAIN &&
-                             ret != GNUTLS_E_INTERRUPTED)) {
-                if (ret != 0)
-                    VIR_ERROR(_("gnutls_record_recv: %s"),
-                              gnutls_strerror (ret));
-                qemudDispatchClientFailure (server, client);
-            }
+
+        if (ret == -1 && (ret == GNUTLS_E_AGAIN &&
+                          ret == GNUTLS_E_INTERRUPTED))
+            return 0;
+        if (ret <= 0) {
+            if (ret != 0)
+                VIR_ERROR(_("gnutls_record_recv: %s"),
+                          gnutls_strerror (ret));
+            qemudDispatchClientFailure(client);
             return -1;
         }
     }
@@ -1324,21 +1384,26 @@ static int qemudClientReadBuf(struct qem
     return ret;
 }
 
-static int qemudClientReadPlain(struct qemud_server *server,
-                                struct qemud_client *client) {
+/*
+ * Read data into buffer without decoding
+ */
+static int qemudClientReadPlain(struct qemud_client *client) {
     int ret;
-    ret = qemudClientReadBuf(server, client,
-                             client->buffer + client->bufferOffset,
-                             client->bufferLength - client->bufferOffset);
-    if (ret < 0)
-        return ret;
-    client->bufferOffset += ret;
-    return 0;
+    ret = qemudClientReadBuf(client,
+                             client->rx->buffer + client->rx->bufferOffset,
+                             client->rx->bufferLength - client->rx->bufferOffset);
+    if (ret <= 0)
+        return ret; /* -1 error, 0 eagain */
+
+    client->rx->bufferOffset += ret;
+    return ret;
 }
 
 #if HAVE_SASL
-static int qemudClientReadSASL(struct qemud_server *server,
-                               struct qemud_client *client) {
+/*
+ * Read data into buffer decoding with SASL
+ */
+static int qemudClientReadSASL(struct qemud_client *client) {
     int got, want;
 
     /* We're doing a SSF data read, so now its times to ensure
@@ -1350,30 +1415,33 @@ static int qemudClientReadSASL(struct qe
 
     /* Need to read some more data off the wire */
     if (client->saslDecoded == NULL) {
+        int ret;
         char encoded[8192];
         int encodedLen = sizeof(encoded);
-        encodedLen = qemudClientReadBuf(server, client, encoded, encodedLen);
+        encodedLen = qemudClientReadBuf(client, encoded, encodedLen);
 
         if (encodedLen < 0)
             return -1;
 
-        sasl_decode(client->saslconn, encoded, encodedLen,
-                    &client->saslDecoded, &client->saslDecodedLength);
+        ret = sasl_decode(client->saslconn, encoded, encodedLen,
+                          &client->saslDecoded, &client->saslDecodedLength);
+        if (ret != SASL_OK)
+            return -1;
 
         client->saslDecodedOffset = 0;
     }
 
     /* Some buffered decoded data to return now */
     got = client->saslDecodedLength - client->saslDecodedOffset;
-    want = client->bufferLength - client->bufferOffset;
+    want = client->rx->bufferLength - client->rx->bufferOffset;
 
     if (want > got)
         want = got;
 
-    memcpy(client->buffer + client->bufferOffset,
+    memcpy(client->rx->buffer + client->rx->bufferOffset,
            client->saslDecoded + client->saslDecodedOffset, want);
     client->saslDecodedOffset += want;
-    client->bufferOffset += want;
+    client->rx->bufferOffset += want;
 
     if (client->saslDecodedOffset == client->saslDecodedLength) {
         client->saslDecoded = NULL;
@@ -1384,132 +1452,125 @@ static int qemudClientReadSASL(struct qe
 }
 #endif
 
-static int qemudClientRead(struct qemud_server *server,
-                           struct qemud_client *client) {
+/*
+ * Read as much data off wire as possible till we fill our
+ * buffer, or would block on I/O
+ */
+static int qemudClientRead(struct qemud_client *client) {
 #if HAVE_SASL
     if (client->saslSSF & QEMUD_SASL_SSF_READ)
-        return qemudClientReadSASL(server, client);
+        return qemudClientReadSASL(client);
     else
 #endif
-        return qemudClientReadPlain(server, client);
+        return qemudClientReadPlain(client);
 }
 
 
-static void qemudDispatchClientRead(struct qemud_server *server, struct qemud_client *client) {
-    unsigned int len;
+/*
+ * Read data until we get a complete message to process
+ */
+static void qemudDispatchClientRead(struct qemud_server *server,
+                                    struct qemud_client *client) {
     /*qemudDebug ("qemudDispatchClientRead: mode = %d", client->mode);*/
 
-    switch (client->mode) {
-    case QEMUD_MODE_RX_HEADER: {
+readmore:
+    if (qemudClientRead(client) < 0)
+        return; /* Error, or blocking */
+
+    if (client->rx->bufferOffset < client->rx->bufferLength)
+        return; /* Not read enough */
+
+    /* Either done with length word header */
+    if (client->rx->bufferLength == REMOTE_MESSAGE_HEADER_XDR_LEN) {
+        int len;
         XDR x;
 
-        if (qemudClientRead(server, client) < 0)
-            return; /* Error, or blocking */
+        xdrmem_create(&x, client->rx->buffer, client->rx->bufferLength, XDR_DECODE);
 
-        if (client->bufferOffset < client->bufferLength)
-            return; /* Not read enough */
-
-        xdrmem_create(&x, client->buffer, client->bufferLength, XDR_DECODE);
-
-        if (!xdr_u_int(&x, &len)) {
+        if (!xdr_int(&x, &len)) {
             xdr_destroy (&x);
             DEBUG0("Failed to decode packet length");
-            qemudDispatchClientFailure(server, client);
+            qemudDispatchClientFailure(client);
             return;
         }
         xdr_destroy (&x);
 
+        /* Length includes the size of the length word itself */
+        len -= REMOTE_MESSAGE_HEADER_XDR_LEN;
+
         if (len > REMOTE_MESSAGE_MAX) {
             DEBUG("Packet length %u too large", len);
-            qemudDispatchClientFailure(server, client);
+            qemudDispatchClientFailure(client);
             return;
         }
 
         /* Length include length of the length field itself, so
          * check minimum size requirements */
-        if (len <= REMOTE_MESSAGE_HEADER_XDR_LEN) {
+        if (len <= 0) {
             DEBUG("Packet length %u too small", len);
-            qemudDispatchClientFailure(server, client);
+            qemudDispatchClientFailure(client);
             return;
         }
 
-        client->mode = QEMUD_MODE_RX_PAYLOAD;
-        client->bufferLength = len - REMOTE_MESSAGE_HEADER_XDR_LEN;
-        client->bufferOffset = 0;
+        /* Prepare to read rest of message */
+        client->rx->bufferLength += len;
 
         if (qemudRegisterClientEvent(server, client, 1) < 0) {
-            qemudDispatchClientFailure(server, client);
+            qemudDispatchClientFailure(client);
             return;
         }
 
-        /* Fall through */
-    }
+        /* Try and read payload immediately instead of going back
+           into poll() because chances are the data is already
+           waiting for us */
+        goto readmore;
+    } else {
+        /* Move completed message to the end of the dispatch queue */
+        qemudClientMessageQueuePush(&client->dx, client->rx);
+        client->rx = NULL;
+        client->nrequests++;
 
-    case QEMUD_MODE_RX_PAYLOAD: {
-        if (qemudClientRead(server, client) < 0)
-            return; /* Error, or blocking */
+        /* Possibly need to create another receive buffer */
+        if ((client->nrequests < max_client_requests &&
+             VIR_ALLOC(client->rx) < 0)) {
+            qemudDispatchClientFailure(client);
+        } else {
+            if (client->rx)
+                client->rx->bufferLength = REMOTE_MESSAGE_HEADER_XDR_LEN;
 
-        if (client->bufferOffset < client->bufferLength)
-            return; /* Not read enough */
-
-        client->mode = QEMUD_MODE_WAIT_DISPATCH;
-        if (qemudRegisterClientEvent(server, client, 1) < 0)
-            qemudDispatchClientFailure(server, client);
-
-        virCondSignal(&server->job);
-
-        break;
-    }
-
-    case QEMUD_MODE_TLS_HANDSHAKE: {
-        int ret;
-
-        /* Continue the handshake. */
-        ret = gnutls_handshake (client->tlssession);
-        if (ret == 0) {
-            /* Finished.  Next step is to check the certificate. */
-            if (remoteCheckAccess (client) == -1)
-                qemudDispatchClientFailure (server, client);
-            else if (qemudRegisterClientEvent (server, client, 1) < 0)
-                qemudDispatchClientFailure (server, client);
-        } else if (ret != GNUTLS_E_AGAIN && ret != GNUTLS_E_INTERRUPTED) {
-            VIR_ERROR(_("TLS handshake failed: %s"),
-                      gnutls_strerror (ret));
-            qemudDispatchClientFailure (server, client);
-        } else {
-            if (qemudRegisterClientEvent (server ,client, 1) < 0)
-                qemudDispatchClientFailure (server, client);
+            if (qemudRegisterClientEvent(server, client, 1) < 0)
+                qemudDispatchClientFailure(client);
+            else
+                /* Tell one of the workers to get on with it... */
+                virCondSignal(&server->job);
         }
-
-        break;
-    }
-
-    default:
-        DEBUG("Got unexpected data read while in %d mode", client->mode);
-        qemudDispatchClientFailure(server, client);
     }
 }
 
 
-static int qemudClientWriteBuf(struct qemud_server *server,
-                               struct qemud_client *client,
+/*
+ * Send a chunk of data using wire encoding (plain or TLS)
+ */
+static int qemudClientWriteBuf(struct qemud_client *client,
                                const char *data, int len) {
     int ret;
     if (!client->tlssession) {
-        if ((ret = safewrite(client->fd, data, len)) == -1) {
+        if ((ret = write(client->fd, data, len)) == -1) {
+            if (errno == EAGAIN || errno == EINTR)
+                return 0;
             VIR_ERROR(_("write: %s"), strerror (errno));
-            qemudDispatchClientFailure(server, client);
+            qemudDispatchClientFailure(client);
             return -1;
         }
     } else {
         ret = gnutls_record_send (client->tlssession, data, len);
-        if (qemudRegisterClientEvent (server, client, 1) < 0)
-            qemudDispatchClientFailure (server, client);
-        else if (ret < 0) {
-            if (ret != GNUTLS_E_INTERRUPTED && ret != GNUTLS_E_AGAIN) {
-                VIR_ERROR(_("gnutls_record_send: %s"), gnutls_strerror (ret));
-                qemudDispatchClientFailure (server, client);
-            }
+        if (ret < 0) {
+            if (ret == GNUTLS_E_INTERRUPTED ||
+                ret == GNUTLS_E_AGAIN)
+                return 0;
+
+            VIR_ERROR(_("gnutls_record_send: %s"), gnutls_strerror (ret));
+            qemudDispatchClientFailure(client);
             return -1;
         }
     }
@@ -1517,42 +1578,49 @@ static int qemudClientWriteBuf(struct qe
 }
 
 
-static int qemudClientWritePlain(struct qemud_server *server,
-                                 struct qemud_client *client) {
-    int ret = qemudClientWriteBuf(server, client,
-                                  client->buffer + client->bufferOffset,
-                                  client->bufferLength - client->bufferOffset);
-    if (ret < 0)
-        return -1;
-    client->bufferOffset += ret;
-    return 0;
+/*
+ * Send client->tx using no encoding
+ */
+static int qemudClientWritePlain(struct qemud_client *client) {
+    int ret = qemudClientWriteBuf(client,
+                                  client->tx->buffer + client->tx->bufferOffset,
+                                  client->tx->bufferLength - client->tx->bufferOffset);
+    if (ret <= 0)
+        return ret; /* -1 error, 0 = egain */
+    client->tx->bufferOffset += ret;
+    return ret;
 }
 
 
 #if HAVE_SASL
-static int qemudClientWriteSASL(struct qemud_server *server,
-                                struct qemud_client *client) {
+/*
+ * Send client->tx using SASL encoding
+ */
+static int qemudClientWriteSASL(struct qemud_client *client) {
     int ret;
 
     /* Not got any pending encoded data, so we need to encode raw stuff */
     if (client->saslEncoded == NULL) {
         int err;
         err = sasl_encode(client->saslconn,
-                          client->buffer + client->bufferOffset,
-                          client->bufferLength - client->bufferOffset,
+                          client->tx->buffer + client->tx->bufferOffset,
+                          client->tx->bufferLength - client->tx->bufferOffset,
                           &client->saslEncoded,
                           &client->saslEncodedLength);
 
+        if (err != SASL_OK)
+            return -1;
+
         client->saslEncodedOffset = 0;
     }
 
     /* Send some of the encoded stuff out on the wire */
-    ret = qemudClientWriteBuf(server, client,
+    ret = qemudClientWriteBuf(client,
                               client->saslEncoded + client->saslEncodedOffset,
                               client->saslEncodedLength - client->saslEncodedOffset);
 
-    if (ret < 0)
-        return -1;
+    if (ret <= 0)
+        return ret; /* -1 error, 0 == egain */
 
     /* Note how much we sent */
     client->saslEncodedOffset += ret;
@@ -1561,77 +1629,101 @@ static int qemudClientWriteSASL(struct q
     if (client->saslEncodedOffset == client->saslEncodedLength) {
         client->saslEncoded = NULL;
         client->saslEncodedOffset = client->saslEncodedLength = 0;
-        client->bufferOffset = client->bufferLength;
+
+        /* Mark as complete, so caller detects completion */
+        client->tx->bufferOffset = client->tx->bufferLength;
     }
 
-    return 0;
+    return ret;
 }
 #endif
 
-static int qemudClientWrite(struct qemud_server *server,
-                            struct qemud_client *client) {
+/*
+ * Send as much data in the client->tx as possible
+ */
+static int qemudClientWrite(struct qemud_client *client) {
 #if HAVE_SASL
     if (client->saslSSF & QEMUD_SASL_SSF_WRITE)
-        return qemudClientWriteSASL(server, client);
+        return qemudClientWriteSASL(client);
     else
 #endif
-        return qemudClientWritePlain(server, client);
+        return qemudClientWritePlain(client);
 }
 
 
-void
+/*
+ * Process all queued client->tx messages until
+ * we would block on I/O
+ */
+static void
 qemudDispatchClientWrite(struct qemud_server *server,
                          struct qemud_client *client) {
-    switch (client->mode) {
-    case QEMUD_MODE_TX_PACKET: {
-        if (qemudClientWrite(server, client) < 0)
-            return;
-
-        if (client->bufferOffset == client->bufferLength) {
-            if (client->closing) {
-                qemudDispatchClientFailure (server, client);
-            } else {
-                /* Done writing, switch back to receive */
-                client->mode = QEMUD_MODE_RX_HEADER;
-                client->bufferLength = REMOTE_MESSAGE_HEADER_XDR_LEN;
-                client->bufferOffset = 0;
-
-                if (qemudRegisterClientEvent (server, client, 1) < 0)
-                    qemudDispatchClientFailure (server, client);
-            }
-        }
-        /* Still writing */
-        break;
-    }
-
-    case QEMUD_MODE_TLS_HANDSHAKE: {
+    while (client->tx) {
         int ret;
 
-        /* Continue the handshake. */
-        ret = gnutls_handshake (client->tlssession);
-        if (ret == 0) {
-            /* Finished.  Next step is to check the certificate. */
-            if (remoteCheckAccess (client) == -1)
-                qemudDispatchClientFailure (server, client);
-            else if (qemudRegisterClientEvent (server, client, 1))
-                qemudDispatchClientFailure (server, client);
-        } else if (ret != GNUTLS_E_AGAIN && ret != GNUTLS_E_INTERRUPTED) {
-            VIR_ERROR(_("TLS handshake failed: %s"), gnutls_strerror (ret));
-            qemudDispatchClientFailure (server, client);
-        } else {
-            if (qemudRegisterClientEvent (server, client, 1))
-                qemudDispatchClientFailure (server, client);
+        ret = qemudClientWrite(client);
+        if (ret < 0) {
+            qemudDispatchClientFailure(client);
+            return;
         }
+        if (ret == 0)
+            return; /* Would block on write EAGAIN */
 
-        break;
-    }
+        if (client->tx->bufferOffset == client->tx->bufferLength) {
+            struct qemud_client_message *reply;
 
-    default:
-        DEBUG("Got unexpected data write while in %d mode", client->mode);
-        qemudDispatchClientFailure(server, client);
+            /* Get finished reply from head of tx queue */
+            reply = qemudClientMessageQueuePop(&client->tx);
+
+            /* If its not an async message, then we have
+             * just completed an RPC request */
+            if (!reply->async)
+                client->nrequests--;
+
+            /* Move record to end of 'rx' ist */
+            if (!client->rx &&
+                client->nrequests < max_client_requests) {
+                /* Reset message record for next RX attempt */
+                client->rx = reply;
+                client->rx->bufferOffset = 0;
+                client->rx->bufferLength = REMOTE_MESSAGE_HEADER_XDR_LEN;
+            } else {
+                VIR_FREE(reply);
+            }
+
+            if (client->closing ||
+                qemudRegisterClientEvent (server, client, 1) < 0)
+                 qemudDispatchClientFailure(client);
+         }
     }
 }
 
+static void
+qemudDispatchClientHandshake(struct qemud_server *server,
+                             struct qemud_client *client) {
+    int ret;
+    /* Continue the handshake. */
+    ret = gnutls_handshake (client->tlssession);
+    if (ret == 0) {
+        /* Finished.  Next step is to check the certificate. */
+        if (remoteCheckAccess (client) == -1)
+            qemudDispatchClientFailure(client);
+        else if (qemudRegisterClientEvent (server, client, 1))
+            qemudDispatchClientFailure(client);
+    } else if (ret == GNUTLS_E_AGAIN ||
+               ret == GNUTLS_E_INTERRUPTED) {
+        /* Carry on waiting for more handshake. Update
+           the events just in case handshake data flow
+           direction has changed */
+        if (qemudRegisterClientEvent (server, client, 1))
+            qemudDispatchClientFailure(client);
+    } else {
+        /* Fatal error in handshake */
+        VIR_ERROR(_("TLS handshake failed: %s"),
+                  gnutls_strerror (ret));
+        qemudDispatchClientFailure(client);
+    }
+}
 
 static void
 qemudDispatchClientEvent(int watch, int fd, int events, void *opaque) {
@@ -1642,59 +1734,66 @@ qemudDispatchClientEvent(int watch, int 
     virMutexLock(&server->lock);
 
     for (i = 0 ; i < server->nclients ; i++) {
+        virMutexLock(&server->clients[i]->lock);
         if (server->clients[i]->watch == watch) {
             client = server->clients[i];
             break;
         }
+        virMutexUnlock(&server->clients[i]->lock);
     }
 
+    virMutexUnlock(&server->lock);
+
     if (!client) {
-        virMutexUnlock(&server->lock);
         return;
     }
 
-    virMutexLock(&client->lock);
-    virMutexUnlock(&server->lock);
+    if (client->fd != fd) {
+        virMutexUnlock(&client->lock);
+        return;
+    }
 
-    if (client->fd != fd)
-        return;
+    if (events & (VIR_EVENT_HANDLE_WRITABLE |
+                  VIR_EVENT_HANDLE_READABLE)) {
+        if (client->handshake) {
+            qemudDispatchClientHandshake(server, client);
+        } else {
+            if (events & VIR_EVENT_HANDLE_WRITABLE)
+                qemudDispatchClientWrite(server, client);
+            if (events == VIR_EVENT_HANDLE_READABLE)
+                qemudDispatchClientRead(server, client);
+        }
+    }
 
-    if (events == VIR_EVENT_HANDLE_WRITABLE)
-        qemudDispatchClientWrite(server, client);
-    else if (events == VIR_EVENT_HANDLE_READABLE)
-        qemudDispatchClientRead(server, client);
-    else
-        qemudDispatchClientFailure(server, client);
+    /* NB, will get HANGUP + READABLE at same time upon
+     * disconnect */
+    if (events & (VIR_EVENT_HANDLE_ERROR |
+                  VIR_EVENT_HANDLE_HANGUP))
+        qemudDispatchClientFailure(client);
+
     virMutexUnlock(&client->lock);
 }
 
-static int qemudRegisterClientEvent(struct qemud_server *server,
-                                    struct qemud_client *client,
-                                    int update) {
-    int mode;
-    switch (client->mode) {
-    case QEMUD_MODE_TLS_HANDSHAKE:
+int qemudRegisterClientEvent(struct qemud_server *server,
+                             struct qemud_client *client,
+                             int update) {
+    int mode = 0;
+
+    if (client->handshake) {
         if (gnutls_record_get_direction (client->tlssession) == 0)
-            mode = VIR_EVENT_HANDLE_READABLE;
+            mode |= VIR_EVENT_HANDLE_READABLE;
         else
-            mode = VIR_EVENT_HANDLE_WRITABLE;
-        break;
+            mode |= VIR_EVENT_HANDLE_WRITABLE;
+    } else {
+        /* If there is a message on the rx queue then
+         * we're wanting more input */
+        if (client->rx)
+            mode |= VIR_EVENT_HANDLE_READABLE;
 
-    case QEMUD_MODE_RX_HEADER:
-    case QEMUD_MODE_RX_PAYLOAD:
-        mode = VIR_EVENT_HANDLE_READABLE;
-        break;
-
-    case QEMUD_MODE_TX_PACKET:
-        mode = VIR_EVENT_HANDLE_WRITABLE;
-        break;
-
-    case QEMUD_MODE_WAIT_DISPATCH:
-        mode = 0;
-        break;
-
-    default:
-        return -1;
+        /* If there are one or more messages to send back to client,
+           then monitor for writability on socket */
+        if (client->tx)
+            mode |= VIR_EVENT_HANDLE_WRITABLE;
     }
 
     if (update) {
@@ -1760,6 +1859,29 @@ static void qemudInactiveTimer(int timer
     }
 }
 
+static void qemudFreeClient(struct qemud_client *client) {
+    while (client->rx) {
+        struct qemud_client_message *msg
+            = qemudClientMessageQueuePop(&client->rx);
+        VIR_FREE(msg);
+    }
+    while (client->dx) {
+        struct qemud_client_message *msg
+            = qemudClientMessageQueuePop(&client->dx);
+        VIR_FREE(msg);
+    }
+    while (client->tx) {
+        struct qemud_client_message *msg
+            = qemudClientMessageQueuePop(&client->tx);
+        VIR_FREE(msg);
+    }
+
+    if (client->conn)
+        virConnectClose(client->conn);
+    virMutexDestroy(&client->lock);
+    VIR_FREE(client);
+}
+
 static int qemudRunLoop(struct qemud_server *server) {
     int timerid = -1;
     int ret = -1, i;
@@ -1796,8 +1918,11 @@ static int qemudRunLoop(struct qemud_ser
         }
 
         virMutexUnlock(&server->lock);
-        if (qemudOneLoop() < 0)
+        if (qemudOneLoop() < 0) {
+            virMutexLock(&server->lock);
+            DEBUG0("Loop iteration error, exiting\n");
             break;
+        }
         virMutexLock(&server->lock);
 
     reprocess:
@@ -1808,17 +1933,18 @@ static int qemudRunLoop(struct qemud_ser
                 && server->clients[i]->refs == 0;
             virMutexUnlock(&server->clients[i]->lock);
             if (inactive) {
-                if (server->clients[i]->conn)
-                    virConnectClose(server->clients[i]->conn);
-                virMutexDestroy(&server->clients[i]->lock);
-                VIR_FREE(server->clients[i]);
+                qemudFreeClient(server->clients[i]);
                 server->nclients--;
-                if (i < server->nclients) {
+                if (i < server->nclients)
                     memmove(server->clients + i,
                             server->clients + i + 1,
-                            server->nclients - i);
-                    goto reprocess;
+                            sizeof (*server->clients) * (server->nclients - i));
+
+                if (VIR_REALLOC_N(server->clients,
+                                  server->nclients) < 0) {
+                    ; /* ignore */
                 }
+                goto reprocess;
             }
         }
 
@@ -1843,6 +1969,7 @@ static int qemudRunLoop(struct qemud_ser
         pthread_join(thread, NULL);
         virMutexLock(&server->lock);
     }
+    VIR_FREE(server->workers);
 
     free(server->workers);
     virMutexUnlock(&server->lock);
@@ -2223,6 +2350,9 @@ remoteReadConfigFile (struct qemud_serve
     GET_CONF_INT (conf, filename, max_workers);
     GET_CONF_INT (conf, filename, max_clients);
 
+    GET_CONF_INT (conf, filename, max_requests);
+    GET_CONF_INT (conf, filename, max_client_requests);
+
     virConfFree (conf);
     return 0;
 
@@ -2389,7 +2519,7 @@ int main(int argc, char **argv) {
         static const char *default_config_file
             = SYSCONF_DIR "/libvirt/libvirtd.conf";
         remote_config_file =
-            (access(default_config_file, X_OK) == 0
+            (access(default_config_file, R_OK) == 0
              ? default_config_file
              : "/dev/null");
     }
diff --git a/qemud/qemud.h b/qemud/qemud.h
--- a/qemud/qemud.h
+++ b/qemud/qemud.h
@@ -65,15 +65,6 @@
 
 #define qemudDebug DEBUG
 
-enum qemud_mode {
-    QEMUD_MODE_RX_HEADER,       /* Receiving the fixed length RPC header data */
-    QEMUD_MODE_RX_PAYLOAD,      /* Receiving the variable length RPC payload data */
-    QEMUD_MODE_WAIT_DISPATCH,   /* Message received, waiting for worker to process */
-    QEMUD_MODE_IN_DISPATCH,     /* RPC call being processed */
-    QEMUD_MODE_TX_PACKET,       /* Transmitting reply to RPC call */
-    QEMUD_MODE_TLS_HANDSHAKE,   /* Performing TLS handshake */
-};
-
 /* Whether we're passing reads & writes through a sasl SSF */
 enum qemud_sasl_ssf {
     QEMUD_SASL_SSF_NONE = 0,
@@ -87,6 +78,18 @@ enum qemud_sock_type {
     QEMUD_SOCK_TYPE_TLS = 2,
 };
 
+struct qemud_client_message;
+
+struct qemud_client_message {
+    char buffer [REMOTE_MESSAGE_MAX + REMOTE_MESSAGE_HEADER_XDR_LEN];
+    unsigned int bufferLength;
+    unsigned int bufferOffset;
+
+    int async : 1;
+
+    struct qemud_client_message *next;
+};
+
 /* Stores the per-client connection state */
 struct qemud_client {
     virMutex lock;
@@ -97,7 +100,6 @@ struct qemud_client {
     int watch;
     int readonly:1;
     int closing:1;
-    enum qemud_mode mode;
 
     struct sockaddr_storage addr;
     socklen_t addrlen;
@@ -105,6 +107,7 @@ struct qemud_client {
     int type; /* qemud_sock_type */
     gnutls_session_t tlssession;
     int auth;
+    int handshake : 1; /* If we're in progress for TLS handshake */
 #if HAVE_SASL
     sasl_conn_t *saslconn;
     int saslSSF;
@@ -117,12 +120,20 @@ struct qemud_client {
     char *saslUsername;
 #endif
 
-    unsigned int incomingSerial;
-    unsigned int outgoingSerial;
-
-    char buffer [REMOTE_MESSAGE_MAX];
-    unsigned int bufferLength;
-    unsigned int bufferOffset;
+    /* Count of meages in 'dx' or 'tx' queue
+     * ie RPC calls in progress. Does not count
+     * async events which are not used for
+     * throttling calculations */
+    int nrequests;
+    /* Zero or one messages being received. Zero if
+     * nrequests >= max_clients and throttling */
+    struct qemud_client_message *rx;
+    /* Zero or many messages waiting for a worker
+     * to process them */
+    struct qemud_client_message *dx;
+    /* Zero or many messages waiting for transmit
+     * back to client, including async events */
+    struct qemud_client_message *tx;
 
     /* This is only valid if a remote open call has been made on this
      * connection, otherwise it will be NULL.  Also if remote close is
@@ -181,16 +192,20 @@ void qemudLog(int priority, const char *
 int qemudSetCloseExec(int fd);
 int qemudSetNonBlock(int fd);
 
-unsigned int
+int
 remoteDispatchClientRequest (struct qemud_server *server,
-                             struct qemud_client *client);
+                             struct qemud_client *client,
+                             struct qemud_client_message *req);
 
-void qemudDispatchClientWrite(struct qemud_server *server,
-                             struct qemud_client *client);
+int qemudRegisterClientEvent(struct qemud_server *server,
+                             struct qemud_client *client,
+                             int update);
 
-#if HAVE_POLKIT
-int qemudGetSocketIdentity(int fd, uid_t *uid, pid_t *pid);
-#endif
+void qemudDispatchClientFailure(struct qemud_client *client);
+
+void
+qemudClientMessageQueuePush(struct qemud_client_message **queue,
+                            struct qemud_client_message *msg);
 
 int remoteRelayDomainEvent (virConnectPtr conn ATTRIBUTE_UNUSED,
                             virDomainPtr dom,
@@ -198,4 +213,9 @@ int remoteRelayDomainEvent (virConnectPt
                             int detail,
                             void *opaque);
 
+
+#if HAVE_POLKIT
+int qemudGetSocketIdentity(int fd, uid_t *uid, pid_t *pid);
 #endif
+
+#endif
diff --git a/qemud/remote.c b/qemud/remote.c
--- a/qemud/remote.c
+++ b/qemud/remote.c
@@ -111,6 +111,7 @@ static const dispatch_data const dispatc
 /* Prototypes */
 static void
 remoteDispatchDomainEventSend (struct qemud_client *client,
+                               struct qemud_client_message *msg,
                                virDomainPtr dom,
                                int event,
                                int detail);
@@ -219,9 +220,10 @@ remoteDispatchConnError (remote_error *r
  * Server object is unlocked
  * Client object is locked
  */
-unsigned int
+int
 remoteDispatchClientRequest (struct qemud_server *server,
-                             struct qemud_client *client)
+                             struct qemud_client *client,
+                             struct qemud_client_message *msg)
 {
     XDR xdr;
     remote_message_header req, rep;
@@ -237,7 +239,10 @@ remoteDispatchClientRequest (struct qemu
     memset(&rerr, 0, sizeof rerr);
 
     /* Parse the header. */
-    xdrmem_create (&xdr, client->buffer, client->bufferLength, XDR_DECODE);
+    xdrmem_create (&xdr,
+                   msg->buffer + REMOTE_MESSAGE_HEADER_XDR_LEN,
+                   msg->bufferLength - REMOTE_MESSAGE_HEADER_XDR_LEN,
+                   XDR_DECODE);
 
     if (!xdr_remote_message_header (&xdr, &req))
         goto fatal_error;
@@ -333,7 +338,7 @@ rpc_error:
     rep.status = rv < 0 ? REMOTE_ERROR : REMOTE_OK;
 
     /* Serialise the return header. */
-    xdrmem_create (&xdr, client->buffer, sizeof client->buffer, XDR_ENCODE);
+    xdrmem_create (&xdr, msg->buffer, sizeof msg->buffer, XDR_ENCODE);
 
     len = 0; /* We'll come back and write this later. */
     if (!xdr_int (&xdr, &len)) {
@@ -368,13 +373,17 @@ rpc_error:
         goto fatal_error;
 
     xdr_destroy (&xdr);
-    return len;
+
+    msg->bufferLength = len;
+    msg->bufferOffset = 0;
+
+    return 0;
 
 fatal_error:
     /* Seriously bad stuff happened, so we'll kill off this client
        and not send back any RPC error */
     xdr_destroy (&xdr);
-    return 0;
+    return -1;
 }
 
 int remoteRelayDomainEvent (virConnectPtr conn ATTRIBUTE_UNUSED,
@@ -386,9 +395,20 @@ int remoteRelayDomainEvent (virConnectPt
     struct qemud_client *client = opaque;
     REMOTE_DEBUG("Relaying domain event %d %d", event, detail);
 
-    if(client) {
-        remoteDispatchDomainEventSend (client, dom, event, detail);
-        qemudDispatchClientWrite(client->server,client);
+    if (client) {
+        struct qemud_client_message *ev;
+
+        if (VIR_ALLOC(ev) < 0)
+            return -1;
+
+        virMutexLock(&client->lock);
+
+        remoteDispatchDomainEventSend (client, ev, dom, event, detail);
+
+        if (qemudRegisterClientEvent(client->server, client, 1) < 0)
+            qemudDispatchClientFailure(client);
+
+        virMutexUnlock(&client->lock);
     }
     return 0;
 }
@@ -4202,6 +4222,7 @@ remoteDispatchDomainEventsDeregister (st
 
 static void
 remoteDispatchDomainEventSend (struct qemud_client *client,
+                               struct qemud_client_message *msg,
                                virDomainPtr dom,
                                int event,
                                int detail)
@@ -4222,7 +4243,7 @@ remoteDispatchDomainEventSend (struct qe
     rep.status = REMOTE_OK;
 
     /* Serialise the return header and event. */
-    xdrmem_create (&xdr, client->buffer, sizeof client->buffer, XDR_ENCODE);
+    xdrmem_create (&xdr, msg->buffer, sizeof msg->buffer, XDR_ENCODE);
 
     len = 0; /* We'll come back and write this later. */
     if (!xdr_int (&xdr, &len)) {
@@ -4263,9 +4284,10 @@ remoteDispatchDomainEventSend (struct qe
     xdr_destroy (&xdr);
 
     /* Send it. */
-    client->mode = QEMUD_MODE_TX_PACKET;
-    client->bufferLength = len;
-    client->bufferOffset = 0;
+    msg->async = 1;
+    msg->bufferLength = len;
+    msg->bufferOffset = 0;
+    qemudClientMessageQueuePush(&client->tx, msg);
 }
 
 /*----- Helpers. -----*/
diff --git a/qemud/test_libvirtd.aug b/qemud/test_libvirtd.aug
--- a/qemud/test_libvirtd.aug
+++ b/qemud/test_libvirtd.aug
@@ -246,6 +246,19 @@ max_clients = 20
 # of clients allowed
 min_workers = 5
 max_workers = 20
+
+# Total global limit on concurrent RPC calls. Should be
+# at least as large as max_workers. Beyond this, RPC requests
+# will be read into memory and queued. This directly impact
+# memory usage, currently each request requires 256 KB of
+# memory. So by default upto 5 MB of memory is used
+max_requests = 20
+
+# Limit on concurrent requests from a single client
+# connection. To avoid one client monopolizing the server
+# this should be a small fraction of the global max_requests
+# and max_workers parameter
+max_client_requests = 5
 "
 
    test Libvirtd.lns get conf =
@@ -499,3 +512,16 @@ max_workers = 20
         { "#comment" = "of clients allowed"}
         { "min_workers" = "5" }
         { "max_workers" = "20" }
+	{ "#empty" }
+        { "#comment" = "Total global limit on concurrent RPC calls. Should be" }
+        { "#comment" = "at least as large as max_workers. Beyond this, RPC requests" }
+        { "#comment" = "will be read into memory and queued. This directly impact" }
+        { "#comment" = "memory usage, currently each request requires 256 KB of" }
+        { "#comment" = "memory. So by default upto 5 MB of memory is used" }
+        { "max_requests" = "20" }
+	{ "#empty" }
+        { "#comment" = "Limit on concurrent requests from a single client" }
+        { "#comment" = "connection. To avoid one client monopolizing the server" }
+        { "#comment" = "this should be a small fraction of the global max_requests" }
+        { "#comment" = "and max_workers parameter" }
+        { "max_client_requests" = "5" }

-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|




More information about the libvir-list mailing list