[PATCH v3 2/2] qemu: implement memory failure event

zhenwei pi pizhenwei at bytedance.com
Wed Oct 14 10:37:51 UTC 2020


Since QEMU 5.2 (commit-77b285f7f6), QEMU supports 'memory failure'
event, posts event to monitor if hitting a hardware memory error.
Fully support this feature for QEMU.

Test with commit 'libvirt: support memory failure event', build a
little complex environment(nested KVM):
1, install newly built libvirt in L1, and start a L2 vm. run command
in L1:
 ~# virsh event l2 --event memory-failure

2, run command in L0 to inject MCE to L1:
 ~# virsh qemu-monitor-command l1 --hmp mce 0 9 0xbd000000000000c0 0xd 0x62000000 0x8c

Test result in l1(recipient hypervisor case):
event 'memory-failure' for domain l2:
recipient: hypervisor
action: ignore
flags:
        action required: 0
        recursive: 0

Test result in l1(recipient guest case):
event 'memory-failure' for domain l2:
recipient: guest
action: inject
flags:
        action required: 0
        recursive: 0

Signed-off-by: zhenwei pi <pizhenwei at bytedance.com>
---
 src/qemu/qemu_monitor.c      | 21 +++++++++++++++-
 src/qemu/qemu_monitor.h      | 39 +++++++++++++++++++++++++++++
 src/qemu/qemu_monitor_json.c | 49 ++++++++++++++++++++++++++++++++++++
 src/qemu/qemu_process.c      | 59 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 167 insertions(+), 1 deletion(-)

diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
index 8c991fefbb..189b789bb8 100644
--- a/src/qemu/qemu_monitor.c
+++ b/src/qemu/qemu_monitor.c
@@ -159,7 +159,6 @@ static int qemuMonitorOnceInit(void)
 
 VIR_ONCE_GLOBAL_INIT(qemuMonitor);
 
-
 VIR_ENUM_IMPL(qemuMonitorMigrationStatus,
               QEMU_MONITOR_MIGRATION_STATUS_LAST,
               "inactive", "setup",
@@ -197,6 +196,14 @@ VIR_ENUM_IMPL(qemuMonitorDumpStatus,
               "none", "active", "completed", "failed",
 );
 
+VIR_ENUM_IMPL(qemuMonitorMemoryFailureRecipient,
+              QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_LAST,
+              "hypervisor", "guest");
+
+VIR_ENUM_IMPL(qemuMonitorMemoryFailureAction,
+              QEMU_MONITOR_MEMORY_FAILURE_ACTION_LAST,
+              "ignore", "inject",
+              "fatal", "reset");
 
 #if DEBUG_RAW_IO
 static char *
@@ -1428,6 +1435,18 @@ qemuMonitorEmitSpiceMigrated(qemuMonitorPtr mon)
 
 
 int
+qemuMonitorEmitMemoryFailure(qemuMonitorPtr mon,
+                             qemuMonitorEventMemoryFailurePtr mfp)
+{
+    int ret = -1;
+
+    QEMU_MONITOR_CALLBACK(mon, ret, domainMemoryFailure, mon->vm, mfp);
+
+    return ret;
+}
+
+
+int
 qemuMonitorEmitMigrationStatus(qemuMonitorPtr mon,
                                int status)
 {
diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
index a744c8975b..17ba006a2f 100644
--- a/src/qemu/qemu_monitor.h
+++ b/src/qemu/qemu_monitor.h
@@ -340,6 +340,40 @@ typedef int (*qemuMonitorDomainGuestCrashloadedCallback)(qemuMonitorPtr mon,
                                                          virDomainObjPtr vm,
                                                          void *opaque);
 
+typedef enum {
+    QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_HYPERVISOR,
+    QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_GUEST,
+
+    QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_LAST
+} qemuMonitorMemoryFailureRecipient;
+
+VIR_ENUM_DECL(qemuMonitorMemoryFailureRecipient);
+
+typedef enum {
+    QEMU_MONITOR_MEMORY_FAILURE_ACTION_IGNORE,
+    QEMU_MONITOR_MEMORY_FAILURE_ACTION_INJECT,
+    QEMU_MONITOR_MEMORY_FAILURE_ACTION_FATAL,
+    QEMU_MONITOR_MEMORY_FAILURE_ACTION_RESET,
+
+    QEMU_MONITOR_MEMORY_FAILURE_ACTION_LAST
+} qemuMonitorMemoryFailureAction;
+
+VIR_ENUM_DECL(qemuMonitorMemoryFailureAction);
+
+typedef struct _qemuMonitorEventMemoryFailure qemuMonitorEventMemoryFailure;
+typedef qemuMonitorEventMemoryFailure *qemuMonitorEventMemoryFailurePtr;
+struct _qemuMonitorEventMemoryFailure {
+    qemuMonitorMemoryFailureRecipient recipient;
+    qemuMonitorMemoryFailureAction action;
+    bool action_required;
+    bool recursive;
+};
+
+typedef int (*qemuMonitorDomainMemoryFailureCallback)(qemuMonitorPtr mon,
+                                                      virDomainObjPtr vm,
+                                                      qemuMonitorEventMemoryFailurePtr mfp,
+                                                      void *opaque);
+
 typedef struct _qemuMonitorCallbacks qemuMonitorCallbacks;
 typedef qemuMonitorCallbacks *qemuMonitorCallbacksPtr;
 struct _qemuMonitorCallbacks {
@@ -376,6 +410,7 @@ struct _qemuMonitorCallbacks {
     qemuMonitorDomainPRManagerStatusChangedCallback domainPRManagerStatusChanged;
     qemuMonitorDomainRdmaGidStatusChangedCallback domainRdmaGidStatusChanged;
     qemuMonitorDomainGuestCrashloadedCallback domainGuestCrashloaded;
+    qemuMonitorDomainMemoryFailureCallback domainMemoryFailure;
 };
 
 qemuMonitorPtr qemuMonitorOpen(virDomainObjPtr vm,
@@ -475,6 +510,10 @@ int qemuMonitorEmitSerialChange(qemuMonitorPtr mon,
                                 const char *devAlias,
                                 bool connected);
 int qemuMonitorEmitSpiceMigrated(qemuMonitorPtr mon);
+
+int qemuMonitorEmitMemoryFailure(qemuMonitorPtr mon,
+                                 qemuMonitorEventMemoryFailurePtr mfp);
+
 int qemuMonitorEmitMigrationStatus(qemuMonitorPtr mon,
                                    int status);
 int qemuMonitorEmitMigrationPass(qemuMonitorPtr mon,
diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c
index 26ac499fc5..aa256727d6 100644
--- a/src/qemu/qemu_monitor_json.c
+++ b/src/qemu/qemu_monitor_json.c
@@ -112,6 +112,7 @@ static void qemuMonitorJSONHandleBlockThreshold(qemuMonitorPtr mon, virJSONValue
 static void qemuMonitorJSONHandleDumpCompleted(qemuMonitorPtr mon, virJSONValuePtr data);
 static void qemuMonitorJSONHandlePRManagerStatusChanged(qemuMonitorPtr mon, virJSONValuePtr data);
 static void qemuMonitorJSONHandleRdmaGidStatusChanged(qemuMonitorPtr mon, virJSONValuePtr data);
+static void qemuMonitorJSONHandleMemoryFailure(qemuMonitorPtr mon, virJSONValuePtr data);
 
 typedef struct {
     const char *type;
@@ -132,6 +133,7 @@ static qemuEventHandler eventHandlers[] = {
     { "GUEST_CRASHLOADED", qemuMonitorJSONHandleGuestCrashloaded, },
     { "GUEST_PANICKED", qemuMonitorJSONHandleGuestPanic, },
     { "JOB_STATUS_CHANGE", qemuMonitorJSONHandleJobStatusChange, },
+    { "MEMORY_FAILURE", qemuMonitorJSONHandleMemoryFailure, },
     { "MIGRATION", qemuMonitorJSONHandleMigrationStatus, },
     { "MIGRATION_PASS", qemuMonitorJSONHandleMigrationPass, },
     { "NIC_RX_FILTER_CHANGED", qemuMonitorJSONHandleNicRxFilterChanged, },
@@ -1336,6 +1338,53 @@ qemuMonitorJSONHandleSpiceMigrated(qemuMonitorPtr mon,
 
 
 static void
+qemuMonitorJSONHandleMemoryFailure(qemuMonitorPtr mon,
+                                   virJSONValuePtr data)
+{
+    virJSONValuePtr flagsjson = virJSONValueObjectGetObject(data, "flags");
+    const char *str;
+    int recipient;
+    int action;
+    bool ar = false;
+    bool recursive = false;
+    qemuMonitorEventMemoryFailure mf = {0};
+
+    if (!(str = virJSONValueObjectGetString(data, "recipient"))) {
+        VIR_WARN("missing recipient in memory failure event");
+        return;
+    }
+
+    recipient = qemuMonitorMemoryFailureRecipientTypeFromString(str);
+    if (recipient == -1) {
+        VIR_WARN("unknown recipient '%s' in memory_failure event", str);
+        return;
+    }
+
+    if (!(str = virJSONValueObjectGetString(data, "action"))) {
+        VIR_WARN("missing action in memory failure event");
+        return;
+    }
+
+    action = qemuMonitorMemoryFailureActionTypeFromString(str);
+    if (action == -1) {
+        VIR_WARN("unknown action '%s' in memory_failure event", str);
+        return;
+    }
+
+    if (flagsjson) {
+        virJSONValueObjectGetBoolean(flagsjson, "action-required", &ar);
+        virJSONValueObjectGetBoolean(flagsjson, "recursive", &recursive);
+    }
+
+    mf.recipient = recipient;
+    mf.action = action;
+    mf.action_required = ar;
+    mf.recursive = recursive;
+    qemuMonitorEmitMemoryFailure(mon, &mf);
+}
+
+
+static void
 qemuMonitorJSONHandleMigrationStatus(qemuMonitorPtr mon,
                                      virJSONValuePtr data)
 {
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 5bc76a75e3..77a9573933 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -1878,6 +1878,64 @@ qemuProcessHandleGuestCrashloaded(qemuMonitorPtr mon G_GNUC_UNUSED,
 }
 
 
+static int
+qemuProcessHandleMemoryFailure(qemuMonitorPtr mon G_GNUC_UNUSED,
+                               virDomainObjPtr vm,
+                               qemuMonitorEventMemoryFailurePtr mfp,
+                               void *opaque)
+{
+    virQEMUDriverPtr driver = opaque;
+    virObjectEventPtr event = NULL;
+    virDomainMemoryFailureRecipientType recipient;
+    virDomainMemoryFailureActionType action;
+    unsigned int flags = 0;
+
+    switch (mfp->recipient) {
+    case QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_HYPERVISOR:
+        recipient = VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_HYPERVISOR;
+        break;
+    case QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_GUEST:
+        recipient = VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_GUEST;
+        break;
+    case QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_LAST:
+    default:
+        virReportError(VIR_ERR_INVALID_ARG, "%s",
+                       _("requested unknown memory failure recipient"));
+        return -1;
+    }
+
+    switch (mfp->action) {
+    case QEMU_MONITOR_MEMORY_FAILURE_ACTION_IGNORE:
+        action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_IGNORE;
+        break;
+    case QEMU_MONITOR_MEMORY_FAILURE_ACTION_INJECT:
+        action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_INJECT;
+        break;
+    case QEMU_MONITOR_MEMORY_FAILURE_ACTION_FATAL:
+        action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_FATAL;
+        break;
+    case QEMU_MONITOR_MEMORY_FAILURE_ACTION_RESET:
+        action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_RESET;
+        break;
+    case QEMU_MONITOR_MEMORY_FAILURE_ACTION_LAST:
+    default:
+        virReportError(VIR_ERR_INVALID_ARG, "%s",
+                       _("requested unknown memory failure action"));
+        return -1;
+    }
+
+    if (mfp->action_required)
+        flags |= VIR_DOMAIN_MEMORY_FAILURE_ACTION_REQUIRED;
+    if (mfp->recursive)
+        flags |= VIR_DOMAIN_MEMORY_FAILURE_RECURSIVE;
+
+    event = virDomainEventMemoryFailureNewFromObj(vm, recipient, action, flags);
+    virObjectEventStateQueue(driver->domainEventState, event);
+
+    return 0;
+}
+
+
 static qemuMonitorCallbacks monitorCallbacks = {
     .eofNotify = qemuProcessHandleMonitorEOF,
     .errorNotify = qemuProcessHandleMonitorError,
@@ -1910,6 +1968,7 @@ static qemuMonitorCallbacks monitorCallbacks = {
     .domainPRManagerStatusChanged = qemuProcessHandlePRManagerStatusChanged,
     .domainRdmaGidStatusChanged = qemuProcessHandleRdmaGidStatusChanged,
     .domainGuestCrashloaded = qemuProcessHandleGuestCrashloaded,
+    .domainMemoryFailure = qemuProcessHandleMemoryFailure,
 };
 
 static void
-- 
2.11.0




More information about the libvir-list mailing list