[PATCH v2 1/4] API: introduce memory failure

zhenwei pi pizhenwei at bytedance.com
Mon Oct 12 12:31:56 UTC 2020


Introduce memory failure event. Libvirt should monitor domain's
event, then posts it to uplayer. According to the hardware memory
corrupted message, the cloud scheduler could migrate domain to another
health physical server.

Signed-off-by: zhenwei pi <pizhenwei at bytedance.com>
---
 include/libvirt/libvirt-domain.h    | 82 +++++++++++++++++++++++++++++++++++++
 src/conf/domain_event.c             | 80 ++++++++++++++++++++++++++++++++++++
 src/conf/domain_event.h             | 12 ++++++
 src/libvirt_private.syms            |  2 +
 src/remote/remote_daemon_dispatch.c | 32 +++++++++++++++
 src/remote/remote_driver.c          | 32 +++++++++++++++
 src/remote/remote_protocol.x        | 16 +++++++-
 src/remote_protocol-structs         |  8 ++++
 8 files changed, 263 insertions(+), 1 deletion(-)

diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h
index 77f9116675..5138843a56 100644
--- a/include/libvirt/libvirt-domain.h
+++ b/include/libvirt/libvirt-domain.h
@@ -3196,6 +3196,64 @@ typedef enum {
 } virDomainEventCrashedDetailType;
 
 /**
+ * virDomainMemoryFailureRecipientType:
+ *
+ * Recipient of a memory failure event.
+ */
+typedef enum {
+    /* memory failure at hypersivor memory address space */
+    VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_HYPERVISOR = 0,
+
+    /* memory failure at guest memory address space */
+    VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_GUEST = 1,
+
+# ifdef VIR_ENUM_SENTINELS
+    VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_LAST
+# endif
+} virDomainMemoryFailureRecipientType;
+
+
+/**
+ * virDomainMemoryFailureActionType:
+ *
+ * Action of a memory failure event.
+ */
+typedef enum {
+    /* the memory failure could be ignored. This will only be the case for
+     * action-optional failures. */
+    VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_IGNORE = 0,
+
+    /* memory failure occurred in guest memory, the guest enabled MCE handling
+     * mechanism, and hypervisor could inject the MCE into the guest
+     * successfully. */
+    VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_INJECT = 1,
+
+    /* the failure is unrecoverable.  This occurs for action-required failures
+     * if the recipient is the hypervisor; hypervisor will exit. */
+    VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_FATAL = 2,
+
+    /* the failure is unrecoverable but confined to the guest. This occurs if
+     * the recipient is a guest which is not ready to handle memory failures. */
+    VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_RESET = 3,
+
+# ifdef VIR_ENUM_SENTINELS
+    VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_LAST
+# endif
+} virDomainMemoryFailureActionType;
+
+
+typedef enum {
+    /* whether a memory failure event is action-required or action-optional
+     * (e.g. a failure during memory scrub). */
+    VIR_DOMAIN_MEMORY_FAILURE_ACTION_REQUIRED = (1 << 0),
+
+    /* whether the failure occurred while the previous failure was still in
+     * progress. */
+    VIR_DOMAIN_MEMORY_FAILURE_RECURSIVE = (1 << 1),
+} virDomainMemoryFailureFlags;
+
+
+/**
  * virConnectDomainEventCallback:
  * @conn: virConnect connection
  * @dom: The domain on which the event occurred
@@ -4565,6 +4623,29 @@ typedef void (*virConnectDomainEventBlockThresholdCallback)(virConnectPtr conn,
                                                             void *opaque);
 
 /**
+ * virConnectDomainEventMemoryFailureCallback:
+ * @conn: connection object
+ * @dom: domain on which the event occurred
+ * @recipient: the recipient of hardware memory failure
+ * @action: the action of hardware memory failure
+ * @flags: the flags of hardware memory failure
+ * @opaque: application specified data
+ *
+ * The callback occurs when the hypervisor handles the hardware memory
+ * corrupted event.
+ *
+ * The callback signature to use when registering for an event of type
+ * VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE with virConnectDomainEventRegisterAny()
+ */
+typedef void (*virConnectDomainEventMemoryFailureCallback)(virConnectPtr conn,
+                                                           virDomainPtr dom,
+                                                           virDomainMemoryFailureRecipientType recipient,
+                                                           virDomainMemoryFailureActionType action,
+                                                           unsigned int flags,
+                                                           void *opaque);
+
+
+/**
  * VIR_DOMAIN_EVENT_CALLBACK:
  *
  * Used to cast the event specific callback into the generic one
@@ -4606,6 +4687,7 @@ typedef enum {
     VIR_DOMAIN_EVENT_ID_DEVICE_REMOVAL_FAILED = 22, /* virConnectDomainEventDeviceRemovalFailedCallback */
     VIR_DOMAIN_EVENT_ID_METADATA_CHANGE = 23, /* virConnectDomainEventMetadataChangeCallback */
     VIR_DOMAIN_EVENT_ID_BLOCK_THRESHOLD = 24, /* virConnectDomainEventBlockThresholdCallback */
+    VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE = 25,  /* virConnectDomainEventMemoryFailureCallback */
 
 # ifdef VIR_ENUM_SENTINELS
     VIR_DOMAIN_EVENT_ID_LAST
diff --git a/src/conf/domain_event.c b/src/conf/domain_event.c
index a8bd9f1595..4a6051a6ab 100644
--- a/src/conf/domain_event.c
+++ b/src/conf/domain_event.c
@@ -57,6 +57,7 @@ static virClassPtr virDomainEventJobCompletedClass;
 static virClassPtr virDomainEventDeviceRemovalFailedClass;
 static virClassPtr virDomainEventMetadataChangeClass;
 static virClassPtr virDomainEventBlockThresholdClass;
+static virClassPtr virDomainEventMemoryFailureClass;
 
 static void virDomainEventDispose(void *obj);
 static void virDomainEventLifecycleDispose(void *obj);
@@ -79,6 +80,7 @@ static void virDomainEventJobCompletedDispose(void *obj);
 static void virDomainEventDeviceRemovalFailedDispose(void *obj);
 static void virDomainEventMetadataChangeDispose(void *obj);
 static void virDomainEventBlockThresholdDispose(void *obj);
+static void virDomainEventMemoryFailureDispose(void *obj);
 
 static void
 virDomainEventDispatchDefaultFunc(virConnectPtr conn,
@@ -287,6 +289,15 @@ struct _virDomainEventBlockThreshold {
 typedef struct _virDomainEventBlockThreshold virDomainEventBlockThreshold;
 typedef virDomainEventBlockThreshold *virDomainEventBlockThresholdPtr;
 
+struct _virDomainEventMemoryFailure {
+    virDomainEvent parent;
+
+    virDomainMemoryFailureRecipientType recipient;
+    virDomainMemoryFailureActionType action;
+    unsigned int flags;
+};
+typedef struct _virDomainEventMemoryFailure virDomainEventMemoryFailure;
+typedef virDomainEventMemoryFailure *virDomainEventMemoryFailurePtr;
 
 static int
 virDomainEventsOnceInit(void)
@@ -333,6 +344,8 @@ virDomainEventsOnceInit(void)
         return -1;
     if (!VIR_CLASS_NEW(virDomainEventBlockThreshold, virDomainEventClass))
         return -1;
+    if (!VIR_CLASS_NEW(virDomainEventMemoryFailure, virDomainEventClass))
+        return -1;
     return 0;
 }
 
@@ -542,6 +555,14 @@ virDomainEventBlockThresholdDispose(void *obj)
 }
 
 
+static void
+virDomainEventMemoryFailureDispose(void *obj)
+{
+    virDomainEventMemoryFailurePtr event = obj;
+    VIR_DEBUG("obj=%p", event);
+}
+
+
 static void *
 virDomainEventNew(virClassPtr klass,
                   int eventID,
@@ -1619,6 +1640,52 @@ virDomainEventBlockThresholdNewFromDom(virDomainPtr dom,
 }
 
 
+static virObjectEventPtr
+virDomainEventMemoryFailureNew(int id,
+                               const char *name,
+                               unsigned char *uuid,
+                               virDomainMemoryFailureRecipientType recipient,
+                               virDomainMemoryFailureActionType action,
+                               unsigned int flags)
+{
+    virDomainEventMemoryFailurePtr ev;
+
+    if (virDomainEventsInitialize() < 0)
+        return NULL;
+
+    if (!(ev = virDomainEventNew(virDomainEventMemoryFailureClass,
+                                 VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE,
+                                 id, name, uuid)))
+        return NULL;
+
+    ev->recipient = recipient;
+    ev->action = action;
+    ev->flags = flags;
+
+    return (virObjectEventPtr)ev;
+}
+
+virObjectEventPtr
+virDomainEventMemoryFailureNewFromObj(virDomainObjPtr obj,
+                                      virDomainMemoryFailureRecipientType recipient,
+                                      virDomainMemoryFailureActionType action,
+                                      unsigned int flags)
+{
+    return virDomainEventMemoryFailureNew(obj->def->id, obj->def->name,
+                                          obj->def->uuid, recipient, action,
+                                          flags);
+}
+
+virObjectEventPtr
+virDomainEventMemoryFailureNewFromDom(virDomainPtr dom,
+                                      virDomainMemoryFailureRecipientType recipient,
+                                      virDomainMemoryFailureActionType action,
+                                      unsigned int flags)
+{
+    return virDomainEventMemoryFailureNew(dom->id, dom->name, dom->uuid,
+                                          recipient, action, flags);
+}
+
 static void
 virDomainEventDispatchDefaultFunc(virConnectPtr conn,
                                   virObjectEventPtr event,
@@ -1902,6 +1969,19 @@ virDomainEventDispatchDefaultFunc(virConnectPtr conn,
                                                               cbopaque);
             goto cleanup;
         }
+    case VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE:
+        {
+            virDomainEventMemoryFailurePtr memoryFailureEvent;
+
+            memoryFailureEvent = (virDomainEventMemoryFailurePtr)event;
+            ((virConnectDomainEventMemoryFailureCallback)cb)(conn, dom,
+                                                             memoryFailureEvent->recipient,
+                                                             memoryFailureEvent->action,
+                                                             memoryFailureEvent->flags,
+                                                             cbopaque);
+            goto cleanup;
+        }
+
     case VIR_DOMAIN_EVENT_ID_LAST:
         break;
     }
diff --git a/src/conf/domain_event.h b/src/conf/domain_event.h
index d1cfb81d62..1d001e164e 100644
--- a/src/conf/domain_event.h
+++ b/src/conf/domain_event.h
@@ -255,6 +255,18 @@ virDomainEventBlockThresholdNewFromDom(virDomainPtr dom,
                                        unsigned long long threshold,
                                        unsigned long long excess);
 
+virObjectEventPtr
+virDomainEventMemoryFailureNewFromObj(virDomainObjPtr obj,
+                                      virDomainMemoryFailureRecipientType recipient,
+                                      virDomainMemoryFailureActionType action,
+                                      unsigned int flags);
+
+virObjectEventPtr
+virDomainEventMemoryFailureNewFromDom(virDomainPtr dom,
+                                      virDomainMemoryFailureRecipientType recipient,
+                                      virDomainMemoryFailureActionType action,
+                                      unsigned int flags);
+
 int
 virDomainEventStateRegister(virConnectPtr conn,
                             virObjectEventStatePtr state,
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 152083d220..927de5001a 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -704,6 +704,8 @@ virDomainEventLifecycleNew;
 virDomainEventLifecycleNewFromDef;
 virDomainEventLifecycleNewFromDom;
 virDomainEventLifecycleNewFromObj;
+virDomainEventMemoryFailureNewFromDom;
+virDomainEventMemoryFailureNewFromObj;
 virDomainEventMetadataChangeNewFromDom;
 virDomainEventMetadataChangeNewFromObj;
 virDomainEventMigrationIterationNewFromDom;
diff --git a/src/remote/remote_daemon_dispatch.c b/src/remote/remote_daemon_dispatch.c
index 32ebcd8f36..078467f8da 100644
--- a/src/remote/remote_daemon_dispatch.c
+++ b/src/remote/remote_daemon_dispatch.c
@@ -1302,6 +1302,37 @@ remoteRelayDomainEventBlockThreshold(virConnectPtr conn,
 }
 
 
+static int
+remoteRelayDomainEventMemoryFailure(virConnectPtr conn,
+                                    virDomainPtr dom,
+                                    virDomainMemoryFailureRecipientType recipient,
+                                    virDomainMemoryFailureActionType action,
+                                    unsigned int flags,
+                                    void *opaque)
+{
+    daemonClientEventCallbackPtr callback = opaque;
+    remote_domain_event_memory_failure_msg data;
+
+    if (callback->callbackID < 0 ||
+        !remoteRelayDomainEventCheckACL(callback->client, conn, dom))
+        return -1;
+
+    /* build return data */
+    memset(&data, 0, sizeof(data));
+    data.callbackID = callback->callbackID;
+    data.recipient = recipient;
+    data.action = action;
+    data.flags = flags;
+    make_nonnull_domain(&data.dom, dom);
+
+    remoteDispatchObjectEventSend(callback->client, remoteProgram,
+                                  REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE,
+                                  (xdrproc_t)xdr_remote_domain_event_memory_failure_msg, &data);
+
+    return 0;
+}
+
+
 static virConnectDomainEventGenericCallback domainEventCallbacks[] = {
     VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventLifecycle),
     VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventReboot),
@@ -1328,6 +1359,7 @@ static virConnectDomainEventGenericCallback domainEventCallbacks[] = {
     VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventDeviceRemovalFailed),
     VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventMetadataChange),
     VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventBlockThreshold),
+    VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventMemoryFailure),
 };
 
 G_STATIC_ASSERT(G_N_ELEMENTS(domainEventCallbacks) == VIR_DOMAIN_EVENT_ID_LAST);
diff --git a/src/remote/remote_driver.c b/src/remote/remote_driver.c
index d318224605..9cd2fd36ae 100644
--- a/src/remote/remote_driver.c
+++ b/src/remote/remote_driver.c
@@ -405,6 +405,11 @@ remoteDomainBuildEventBlockThreshold(virNetClientProgramPtr prog,
                                      void *evdata, void *opaque);
 
 static void
+remoteDomainBuildEventMemoryFailure(virNetClientProgramPtr prog,
+                                    virNetClientPtr client,
+                                    void *evdata, void *opaque);
+
+static void
 remoteConnectNotifyEventConnectionClosed(virNetClientProgramPtr prog G_GNUC_UNUSED,
                                          virNetClientPtr client G_GNUC_UNUSED,
                                          void *evdata, void *opaque);
@@ -615,6 +620,10 @@ static virNetClientProgramEvent remoteEvents[] = {
       remoteDomainBuildEventBlockThreshold,
       sizeof(remote_domain_event_block_threshold_msg),
       (xdrproc_t)xdr_remote_domain_event_block_threshold_msg },
+    { REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE,
+      remoteDomainBuildEventMemoryFailure,
+      sizeof(remote_domain_event_memory_failure_msg),
+      (xdrproc_t)xdr_remote_domain_event_memory_failure_msg },
 };
 
 static void
@@ -5440,6 +5449,29 @@ remoteDomainBuildEventBlockThreshold(virNetClientProgramPtr prog G_GNUC_UNUSED,
 }
 
 
+static void
+remoteDomainBuildEventMemoryFailure(virNetClientProgramPtr prog G_GNUC_UNUSED,
+                                    virNetClientPtr client G_GNUC_UNUSED,
+                                    void *evdata, void *opaque)
+{
+    virConnectPtr conn = opaque;
+    remote_domain_event_memory_failure_msg *msg = evdata;
+    struct private_data *priv = conn->privateData;
+    virDomainPtr dom;
+    virObjectEventPtr event = NULL;
+
+    if (!(dom = get_nonnull_domain(conn, msg->dom)))
+        return;
+
+    event = virDomainEventMemoryFailureNewFromDom(dom, msg->recipient,
+                                                  msg->action, msg->flags);
+
+    virObjectUnref(dom);
+
+    virObjectEventStateQueueRemote(priv->eventState, event, msg->callbackID);
+}
+
+
 static int
 remoteStreamSend(virStreamPtr st,
                  const char *data,
diff --git a/src/remote/remote_protocol.x b/src/remote/remote_protocol.x
index f4d6147676..5e5e781e76 100644
--- a/src/remote/remote_protocol.x
+++ b/src/remote/remote_protocol.x
@@ -3469,6 +3469,14 @@ struct remote_domain_event_callback_metadata_change_msg {
     remote_string nsuri;
 };
 
+struct remote_domain_event_memory_failure_msg {
+    int callbackID;
+    remote_nonnull_domain dom;
+    int recipient;
+    int action;
+    unsigned int flags;
+};
+
 struct remote_connect_secret_event_register_any_args {
     int eventID;
     remote_secret secret;
@@ -6668,5 +6676,11 @@ enum remote_procedure {
      * @priority: high
      * @acl: domain:read
      */
-    REMOTE_PROC_DOMAIN_BACKUP_GET_XML_DESC = 422
+    REMOTE_PROC_DOMAIN_BACKUP_GET_XML_DESC = 422,
+
+    /**
+     * @generate: both
+     * @acl: none
+     */
+    REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE = 423
 };
diff --git a/src/remote_protocol-structs b/src/remote_protocol-structs
index bae0f0b545..c2ae411885 100644
--- a/src/remote_protocol-structs
+++ b/src/remote_protocol-structs
@@ -2862,6 +2862,13 @@ struct remote_domain_event_callback_metadata_change_msg {
         int                        type;
         remote_string              nsuri;
 };
+struct remote_domain_event_memory_failure_msg {
+        int                        callbackID;
+        remote_nonnull_domain      dom;
+        int                        recipient;
+        int                        action;
+        u_int                      flags;
+};
 struct remote_connect_secret_event_register_any_args {
         int                        eventID;
         remote_secret              secret;
@@ -3558,4 +3565,5 @@ enum remote_procedure {
         REMOTE_PROC_DOMAIN_AGENT_SET_RESPONSE_TIMEOUT = 420,
         REMOTE_PROC_DOMAIN_BACKUP_BEGIN = 421,
         REMOTE_PROC_DOMAIN_BACKUP_GET_XML_DESC = 422,
+        REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE = 423,
 };
-- 
2.11.0




More information about the libvir-list mailing list