[libvirt] [PATCH RESEND] qemu: Process RDMA GID state change event

Michal Privoznik mprivozn at redhat.com
Mon Nov 5 16:42:10 UTC 2018


[There is no need to resend your patch just to put yourself onto CC
list. The review policy is always "reply to all"]

On 11/05/2018 11:50 AM, Yuval Shaia wrote:
> This event is emitted on the monitor when a GID table in pvrdma device
> is modified and the change needs to be propagate to the backend RDMA
> device's GID table.

Not yet, your qemu patches are not merged yet ;-) I will provide review
anyway, but even if this patch was good as is we couldn't merge it
unless it's merged into qemu repo first. We have our history with that.

> 
> The control over the RDMA device's GID table is done by updating the
> device's Ethernet function addresses.
> Usually the first GID entry is determine by the MAC address, the second
> by the first IPv6 address and the third by the IPv4 address. Other
> entries can be added by adding more IP addresses. The opposite is the
> same, i.e. whenever an address is removed, the corresponding GID entry
> is removed.
> 
> The process is done by the network and RDMA stacks. Whenever an address
> is added the ib_core driver is notified and calls the device driver's
> add_gid function which in turn update the device.
> 
> To support this in pvrdma device we need to hook into the create_bind
> and destroy_bind HW commands triggered by pvrdma driver in guest.
> Whenever a changed is made to the pvrdma device's GID table a special
> QMP messages is sent to be processed by libvirt to update the address of
> the backend Ethernet device.
> 
> Signed-off-by: Yuval Shaia <yuval.shaia at oracle.com>
> ---
>  src/qemu/qemu_domain.c       |  3 ++
>  src/qemu/qemu_domain.h       | 15 +++++++++
>  src/qemu/qemu_driver.c       | 40 ++++++++++++++++++++++++
>  src/qemu/qemu_monitor.c      | 28 +++++++++++++++++
>  src/qemu/qemu_monitor.h      | 13 ++++++++
>  src/qemu/qemu_monitor_json.c | 36 ++++++++++++++++++++++
>  src/qemu/qemu_process.c      | 59 ++++++++++++++++++++++++++++++++++++
>  7 files changed, 194 insertions(+)
> 
> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
> index ba3fff607a..8da54c7ee9 100644
> --- a/src/qemu/qemu_domain.c
> +++ b/src/qemu/qemu_domain.c
> @@ -13479,6 +13479,9 @@ qemuProcessEventFree(struct qemuProcessEvent *event)
>      case QEMU_PROCESS_EVENT_GUESTPANIC:
>          qemuMonitorEventPanicInfoFree(event->data);
>          break;
> +    case QEMU_PROCESS_EVENT_RDMA_GID_STATUS_CHANGED:
> +        qemuMonitorEventRdmaGidStatusFree(event->data);
> +        break;
>      case QEMU_PROCESS_EVENT_WATCHDOG:
>      case QEMU_PROCESS_EVENT_DEVICE_DELETED:
>      case QEMU_PROCESS_EVENT_NIC_RX_FILTER_CHANGED:
> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
> index 80bd4bde91..1b188843e3 100644
> --- a/src/qemu/qemu_domain.h
> +++ b/src/qemu/qemu_domain.h
> @@ -478,6 +478,18 @@ struct _qemuDomainVsockPrivate {
>  };
>  
>  
> +typedef struct _qemuDomainRdmaGidStatusChangedPrivate qemuDomainRdmaGidStatusChangedPrivate;
> +typedef qemuDomainRdmaGidStatusChangedPrivate *qemuDomainRdmaGidStatusChangedPrivatePtr;
> +struct _qemuDomainRdmaGidStatusChangedPrivate {
> +    virObject parent;
> +
> +    char *netdev;
> +    bool gid_status;
> +    uint64_t subnet_prefix;
> +    uint64_t interface_id;

We use ULL instead of uint64_t.

> +};
> +
> +
>  typedef enum {
>      QEMU_PROCESS_EVENT_WATCHDOG = 0,
>      QEMU_PROCESS_EVENT_GUESTPANIC,
> @@ -487,6 +499,7 @@ typedef enum {
>      QEMU_PROCESS_EVENT_BLOCK_JOB,
>      QEMU_PROCESS_EVENT_MONITOR_EOF,
>      QEMU_PROCESS_EVENT_PR_DISCONNECT,
> +    QEMU_PROCESS_EVENT_RDMA_GID_STATUS_CHANGED,
>  
>      QEMU_PROCESS_EVENT_LAST
>  } qemuProcessEventType;
> @@ -499,6 +512,8 @@ struct qemuProcessEvent {
>      void *data;
>  };
>  
> +void qemuMonitorEventRdmaGidStatusFree(qemuDomainRdmaGidStatusChangedPrivatePtr info);

If this function has prefix qemuMonitor then is should be in
qemu_monitor* file. Just like qemuMonitorEventPanicInfoFree() is.

> +
>  void qemuProcessEventFree(struct qemuProcessEvent *event);
>  
>  typedef struct _qemuDomainLogContext qemuDomainLogContext;
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index a52e2495d5..dc088d844f 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -4788,6 +4788,43 @@ processPRDisconnectEvent(virDomainObjPtr vm)
>  }
>  
>  
> +static void
> +processRdmaGidStatusChangedEvent(virDomainObjPtr vm,
> +                                 qemuDomainRdmaGidStatusChangedPrivatePtr info)
> +{
> +    unsigned int prefix_len;
> +    virSocketAddr addr = {0};
> +    int rc;
> +
> +    if (!virDomainObjIsActive(vm))
> +        return;
> +
> +    VIR_DEBUG("netdev=%s,gid_status=%d,subnet_prefix=0x%lx,interface_id=0x%lx",
> +              info->netdev, info->gid_status, info->subnet_prefix,
> +              info->interface_id);
> +
> +    if (info->subnet_prefix) {
> +        prefix_len = 64;
> +        uint32_t ipv6[4];
> +        memcpy(&ipv6[0], &info->subnet_prefix, sizeof(info->subnet_prefix));
> +        memcpy(&ipv6[2], &info->interface_id, sizeof(info->subnet_prefix));

This should have been sizeof(info->interface_id). I know it doesn't
matter that much since they are both the same size, but still.

> +        virSocketAddrSetIPv6AddrNetOrder(&addr, ipv6);
> +    } else {
> +        prefix_len = 24;
> +        virSocketAddrSetIPv4AddrNetOrder(&addr, info->interface_id >> 32);
> +    }
> +
> +    if (info->gid_status)
> +        rc = virNetDevIPAddrAdd(info->netdev, &addr, NULL, prefix_len);
> +    else
> +        rc = virNetDevIPAddrDel(info->netdev, &addr, prefix_len);
> +
> +    if (rc)

if (rc < 0)

A negative retval denotes failure, a non-negative one means success. So
if virNetDevIPAddr*() would return +1 (even though it is not right now),
your condition would still trigger, which is not good.

> +        VIR_ERROR(_("Fail to update address 0x%lx to %s"), info->interface_id,
> +                  info->netdev);

VIR_WARN(). Also, since this is IP address, should we format it as such?

Frankly, I don't know about RDMA that much, but the qemu interface looks
weird to me. If it wants to send an IP address to us it's doing that in
a cryptic way.

> +}
> +
> +
>  static void qemuProcessEventHandler(void *data, void *opaque)
>  {
>      struct qemuProcessEvent *processEvent = data;
> @@ -4828,6 +4865,9 @@ static void qemuProcessEventHandler(void *data, void *opaque)
>      case QEMU_PROCESS_EVENT_PR_DISCONNECT:
>          processPRDisconnectEvent(vm);
>          break;
> +    case QEMU_PROCESS_EVENT_RDMA_GID_STATUS_CHANGED:
> +        processRdmaGidStatusChangedEvent(vm, processEvent->data);
> +        break;
>      case QEMU_PROCESS_EVENT_LAST:
>          break;
>      }
> diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
> index 7f7013e115..375ed7ceaf 100644
> --- a/src/qemu/qemu_monitor.c
> +++ b/src/qemu/qemu_monitor.c
> @@ -1686,6 +1686,23 @@ qemuMonitorEmitPRManagerStatusChanged(qemuMonitorPtr mon,
>  }
>  
>  
> +int
> +qemuMonitorEmitRdmaGidStatusChanged(qemuMonitorPtr mon, const char *netdev,
> +                                    bool gid_status, uint64_t subnet_prefix,
> +                                    uint64_t interface_id)
> +{
> +    int ret = -1;
> +    VIR_DEBUG("mon=%p", mon);
> +    VIR_DEBUG("netdev='%s',gid_status=%d,subnet_prefix=0x%lx,interface_id=0x%lx",
> +              netdev, gid_status, subnet_prefix, interface_id);

No need for two separate VIR_DEBUG() calls. One would be sufficient.

> +
> +    QEMU_MONITOR_CALLBACK(mon, ret, domainRdmaGidStatusChanged, mon->vm, netdev,
> +                          gid_status, subnet_prefix, interface_id);
> +
> +    return ret;
> +}
> +
> +
>  int
>  qemuMonitorSetCapabilities(qemuMonitorPtr mon)
>  {
> @@ -4298,6 +4315,17 @@ qemuMonitorEventPanicInfoFree(qemuMonitorEventPanicInfoPtr info)
>  }
>  
>  
> +void
> +qemuMonitorEventRdmaGidStatusFree(qemuDomainRdmaGidStatusChangedPrivatePtr info)
> +{
> +    if (!info)
> +        return;
> +
> +    VIR_FREE(info->netdev);
> +    VIR_FREE(info);
> +}
> +
> +
>  int
>  qemuMonitorSetWatchdogAction(qemuMonitorPtr mon,
>                               const char *action)
> diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
> index 48b142a4f4..f5affe615a 100644
> --- a/src/qemu/qemu_monitor.h
> +++ b/src/qemu/qemu_monitor.h
> @@ -281,6 +281,14 @@ typedef int (*qemuMonitorDomainPRManagerStatusChangedCallback)(qemuMonitorPtr mo
>                                                                 bool connected,
>                                                                 void *opaque);
>  
> +typedef int (*qemuMonitorDomainRdmaGidStatusChangedCallback)(qemuMonitorPtr mon,
> +                                                             virDomainObjPtr vm,
> +                                                             const char *netdev,
> +                                                             bool gid_status,
> +                                                             uint64_t subnet_prefix,
> +                                                             uint64_t interface_id,
> +                                                             void *opaque);
> +
>  typedef struct _qemuMonitorCallbacks qemuMonitorCallbacks;
>  typedef qemuMonitorCallbacks *qemuMonitorCallbacksPtr;
>  struct _qemuMonitorCallbacks {
> @@ -314,6 +322,7 @@ struct _qemuMonitorCallbacks {
>      qemuMonitorDomainBlockThresholdCallback domainBlockThreshold;
>      qemuMonitorDomainDumpCompletedCallback domainDumpCompleted;
>      qemuMonitorDomainPRManagerStatusChangedCallback domainPRManagerStatusChanged;
> +    qemuMonitorDomainRdmaGidStatusChangedCallback domainRdmaGidStatusChanged;
>  };
>  
>  char *qemuMonitorEscapeArg(const char *in);
> @@ -448,6 +457,10 @@ int qemuMonitorEmitPRManagerStatusChanged(qemuMonitorPtr mon,
>                                            const char *prManager,
>                                            bool connected);
>  
> +int qemuMonitorEmitRdmaGidStatusChanged(qemuMonitorPtr mon, const char *netdev,
> +                                        bool gid_status, uint64_t subnet_prefix,
> +                                        uint64_t interface_id);
> +
>  int qemuMonitorStartCPUs(qemuMonitorPtr mon);
>  int qemuMonitorStopCPUs(qemuMonitorPtr mon);
>  
> diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c
> index 3de298c9e2..8df9b426ba 100644
> --- a/src/qemu/qemu_monitor_json.c
> +++ b/src/qemu/qemu_monitor_json.c
> @@ -91,6 +91,7 @@ static void qemuMonitorJSONHandleAcpiOstInfo(qemuMonitorPtr mon, virJSONValuePtr
>  static void qemuMonitorJSONHandleBlockThreshold(qemuMonitorPtr mon, virJSONValuePtr data);
>  static void qemuMonitorJSONHandleDumpCompleted(qemuMonitorPtr mon, virJSONValuePtr data);
>  static void qemuMonitorJSONHandlePRManagerStatusChanged(qemuMonitorPtr mon, virJSONValuePtr data);
> +static void qemuMonitorJSONHandleRdmaGidStatusChanged(qemuMonitorPtr mon, virJSONValuePtr data);
>  
>  typedef struct {
>      const char *type;
> @@ -114,6 +115,7 @@ static qemuEventHandler eventHandlers[] = {
>      { "NIC_RX_FILTER_CHANGED", qemuMonitorJSONHandleNicRxFilterChanged, },
>      { "POWERDOWN", qemuMonitorJSONHandlePowerdown, },
>      { "PR_MANAGER_STATUS_CHANGED", qemuMonitorJSONHandlePRManagerStatusChanged, },
> +    { "RDMA_GID_STATUS_CHANGED", qemuMonitorJSONHandleRdmaGidStatusChanged, },
>      { "RESET", qemuMonitorJSONHandleReset, },
>      { "RESUME", qemuMonitorJSONHandleResume, },
>      { "RTC_CHANGE", qemuMonitorJSONHandleRTCChange, },
> @@ -1351,6 +1353,40 @@ static void qemuMonitorJSONHandlePRManagerStatusChanged(qemuMonitorPtr mon,
>  }
>  
>  
> +static void qemuMonitorJSONHandleRdmaGidStatusChanged(qemuMonitorPtr mon,
> +                                                      virJSONValuePtr data)
> +{
> +    const char *netdev;
> +    bool gid_status;
> +    unsigned long long subnet_prefix, interface_id;
> +
> +    if (!(netdev = virJSONValueObjectGetString(data, "netdev"))) {
> +        VIR_WARN("missing netdev in GID_STATUS_CHANGED event");
> +        return;
> +    }
> +
> +    if (virJSONValueObjectGetBoolean(data, "gid-status", &gid_status)) {
> +        VIR_WARN("missing gid-status in GID_STATUS_CHANGED event");
> +        return;
> +    }
> +
> +    if (virJSONValueObjectGetNumberUlong(data, "subnet-prefix",
> +                                         &subnet_prefix)) {
> +        VIR_WARN("missing subnet-prefix in GID_STATUS_CHANGED event");
> +        return;
> +    }
> +
> +    if (virJSONValueObjectGetNumberUlong(data, "interface-id",
> +                                         &interface_id)) {
> +        VIR_WARN("missing interface-id in GID_STATUS_CHANGED event");
> +        return;
> +    }
> +
> +    qemuMonitorEmitRdmaGidStatusChanged(mon, netdev, gid_status, subnet_prefix,
> +                                        interface_id);
> +}
> +
> +
>  int
>  qemuMonitorJSONHumanCommandWithFd(qemuMonitorPtr mon,
>                                    const char *cmd_str,
> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index 9cf971808c..45fc155d31 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -1703,6 +1703,64 @@ qemuProcessHandlePRManagerStatusChanged(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
>  }
>  
>  
> +static int
> +qemuProcessHandleRdmaGidStatusChanged(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
> +                                      virDomainObjPtr vm, const char *netdev,
> +                                      bool gid_status, uint64_t subnet_prefix,
> +                                      uint64_t interface_id, void *opaque)
> +{
> +    virQEMUDriverPtr driver = opaque;
> +    qemuDomainObjPrivatePtr priv;
> +    struct qemuProcessEvent *processEvent = NULL;
> +    qemuDomainRdmaGidStatusChangedPrivatePtr rdmaGitStatusChangedPriv;
> +    int ret = -1;
> +
> +    virObjectLock(vm);
> +
> +    VIR_DEBUG("netdev=%s,gid_status=%d,subnet_prefix=0x%lx,interface_id=0x%lx",
> +              netdev, gid_status, subnet_prefix, interface_id);
> +
> +    priv = vm->privateData;
> +    priv->prDaemonRunning = false;

Oops, I don't think that RDMA GID has anything to do with PR daemon ;-)

> +
> +    if (VIR_ALLOC(rdmaGitStatusChangedPriv) < 0)
> +        goto out_unlock;

Our VIR_FREE() accepts NULL (despite some other public vir*Free() APIs
which don't). Anyway, there is no need to have one label per each
VIR_FREE() call.

> +
> +    if (VIR_STRDUP(rdmaGitStatusChangedPriv->netdev, netdev) < 0)
> +        goto out_free_rdma_priv;
> +
> +    rdmaGitStatusChangedPriv->gid_status = gid_status;
> +    rdmaGitStatusChangedPriv->subnet_prefix = subnet_prefix;
> +    rdmaGitStatusChangedPriv->interface_id = interface_id;
> +
> +    if (VIR_ALLOC(processEvent) < 0)
> +        goto out_free_rdma_priv_netdev;
> +
> +    processEvent->eventType = QEMU_PROCESS_EVENT_RDMA_GID_STATUS_CHANGED;
> +    processEvent->vm = virObjectRef(vm);
> +    processEvent->data = rdmaGitStatusChangedPriv;
> +
> +    if (virThreadPoolSendJob(driver->workerPool, 0, processEvent) < 0) {
> +        qemuProcessEventFree(processEvent);
> +        virObjectUnref(vm);
> +        goto out_free_rdma_priv_netdev;
> +    }
> +
> +    ret = 0;
> +    goto out_unlock;
> +
> + out_free_rdma_priv_netdev:
> +    VIR_FREE(rdmaGitStatusChangedPriv->netdev);> +
> + out_free_rdma_priv:
> +    VIR_FREE(rdmaGitStatusChangedPriv);

How about calling qemuMonitorEventRdmaGidStatusFree() instead of these
VIR_FREE()? That way we don't have to care/change this area of the code
- updating the former would be just enough.

> +
> + out_unlock:
> +    virObjectUnlock(vm);
> +    return ret;
> +}
> +
> +
>  static qemuMonitorCallbacks monitorCallbacks = {
>      .eofNotify = qemuProcessHandleMonitorEOF,
>      .errorNotify = qemuProcessHandleMonitorError,
> @@ -1732,6 +1790,7 @@ static qemuMonitorCallbacks monitorCallbacks = {
>      .domainBlockThreshold = qemuProcessHandleBlockThreshold,
>      .domainDumpCompleted = qemuProcessHandleDumpCompleted,
>      .domainPRManagerStatusChanged = qemuProcessHandlePRManagerStatusChanged,
> +    .domainRdmaGidStatusChanged = qemuProcessHandleRdmaGidStatusChanged,
>  };
>  
>  static void
> 


Michal




More information about the libvir-list mailing list