[PATCH v3 2/2] qemu: implement memory failure event

Michal Privoznik mprivozn at redhat.com
Thu Oct 22 17:00:01 UTC 2020


On 10/14/20 12:37 PM, zhenwei pi wrote:
> Since QEMU 5.2 (commit-77b285f7f6), QEMU supports 'memory failure'
> event, posts event to monitor if hitting a hardware memory error.
> Fully support this feature for QEMU.
> 
> Test with commit 'libvirt: support memory failure event', build a
> little complex environment(nested KVM):
> 1, install newly built libvirt in L1, and start a L2 vm. run command
> in L1:
>   ~# virsh event l2 --event memory-failure
> 
> 2, run command in L0 to inject MCE to L1:
>   ~# virsh qemu-monitor-command l1 --hmp mce 0 9 0xbd000000000000c0 0xd 0x62000000 0x8c
> 
> Test result in l1(recipient hypervisor case):
> event 'memory-failure' for domain l2:
> recipient: hypervisor
> action: ignore
> flags:
>          action required: 0
>          recursive: 0
> 
> Test result in l1(recipient guest case):
> event 'memory-failure' for domain l2:
> recipient: guest
> action: inject
> flags:
>          action required: 0
>          recursive: 0
> 
> Signed-off-by: zhenwei pi <pizhenwei at bytedance.com>
> ---
>   src/qemu/qemu_monitor.c      | 21 +++++++++++++++-
>   src/qemu/qemu_monitor.h      | 39 +++++++++++++++++++++++++++++
>   src/qemu/qemu_monitor_json.c | 49 ++++++++++++++++++++++++++++++++++++
>   src/qemu/qemu_process.c      | 59 ++++++++++++++++++++++++++++++++++++++++++++
>   4 files changed, 167 insertions(+), 1 deletion(-)
> 
> diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
> index 8c991fefbb..189b789bb8 100644
> --- a/src/qemu/qemu_monitor.c
> +++ b/src/qemu/qemu_monitor.c
> @@ -159,7 +159,6 @@ static int qemuMonitorOnceInit(void)
>   
>   VIR_ONCE_GLOBAL_INIT(qemuMonitor);
>   
> -
>   VIR_ENUM_IMPL(qemuMonitorMigrationStatus,
>                 QEMU_MONITOR_MIGRATION_STATUS_LAST,
>                 "inactive", "setup",
> @@ -197,6 +196,14 @@ VIR_ENUM_IMPL(qemuMonitorDumpStatus,
>                 "none", "active", "completed", "failed",
>   );
>   
> +VIR_ENUM_IMPL(qemuMonitorMemoryFailureRecipient,
> +              QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_LAST,
> +              "hypervisor", "guest");
> +
> +VIR_ENUM_IMPL(qemuMonitorMemoryFailureAction,
> +              QEMU_MONITOR_MEMORY_FAILURE_ACTION_LAST,
> +              "ignore", "inject",
> +              "fatal", "reset");
>   
>   #if DEBUG_RAW_IO
>   static char *
> @@ -1428,6 +1435,18 @@ qemuMonitorEmitSpiceMigrated(qemuMonitorPtr mon)
>   
>   
>   int
> +qemuMonitorEmitMemoryFailure(qemuMonitorPtr mon,
> +                             qemuMonitorEventMemoryFailurePtr mfp)
> +{
> +    int ret = -1;
> +
> +    QEMU_MONITOR_CALLBACK(mon, ret, domainMemoryFailure, mon->vm, mfp);
> +
> +    return ret;
> +}
> +
> +
> +int
>   qemuMonitorEmitMigrationStatus(qemuMonitorPtr mon,
>                                  int status)
>   {
> diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
> index a744c8975b..17ba006a2f 100644
> --- a/src/qemu/qemu_monitor.h
> +++ b/src/qemu/qemu_monitor.h
> @@ -340,6 +340,40 @@ typedef int (*qemuMonitorDomainGuestCrashloadedCallback)(qemuMonitorPtr mon,
>                                                            virDomainObjPtr vm,
>                                                            void *opaque);
>   
> +typedef enum {
> +    QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_HYPERVISOR,
> +    QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_GUEST,
> +
> +    QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_LAST
> +} qemuMonitorMemoryFailureRecipient;
> +
> +VIR_ENUM_DECL(qemuMonitorMemoryFailureRecipient);
> +
> +typedef enum {
> +    QEMU_MONITOR_MEMORY_FAILURE_ACTION_IGNORE,
> +    QEMU_MONITOR_MEMORY_FAILURE_ACTION_INJECT,
> +    QEMU_MONITOR_MEMORY_FAILURE_ACTION_FATAL,
> +    QEMU_MONITOR_MEMORY_FAILURE_ACTION_RESET,
> +
> +    QEMU_MONITOR_MEMORY_FAILURE_ACTION_LAST
> +} qemuMonitorMemoryFailureAction;
> +
> +VIR_ENUM_DECL(qemuMonitorMemoryFailureAction);
> +
> +typedef struct _qemuMonitorEventMemoryFailure qemuMonitorEventMemoryFailure;
> +typedef qemuMonitorEventMemoryFailure *qemuMonitorEventMemoryFailurePtr;
> +struct _qemuMonitorEventMemoryFailure {
> +    qemuMonitorMemoryFailureRecipient recipient;
> +    qemuMonitorMemoryFailureAction action;
> +    bool action_required;
> +    bool recursive;
> +};
> +
> +typedef int (*qemuMonitorDomainMemoryFailureCallback)(qemuMonitorPtr mon,
> +                                                      virDomainObjPtr vm,
> +                                                      qemuMonitorEventMemoryFailurePtr mfp,
> +                                                      void *opaque);
> +
>   typedef struct _qemuMonitorCallbacks qemuMonitorCallbacks;
>   typedef qemuMonitorCallbacks *qemuMonitorCallbacksPtr;
>   struct _qemuMonitorCallbacks {
> @@ -376,6 +410,7 @@ struct _qemuMonitorCallbacks {
>       qemuMonitorDomainPRManagerStatusChangedCallback domainPRManagerStatusChanged;
>       qemuMonitorDomainRdmaGidStatusChangedCallback domainRdmaGidStatusChanged;
>       qemuMonitorDomainGuestCrashloadedCallback domainGuestCrashloaded;
> +    qemuMonitorDomainMemoryFailureCallback domainMemoryFailure;
>   };
>   
>   qemuMonitorPtr qemuMonitorOpen(virDomainObjPtr vm,
> @@ -475,6 +510,10 @@ int qemuMonitorEmitSerialChange(qemuMonitorPtr mon,
>                                   const char *devAlias,
>                                   bool connected);
>   int qemuMonitorEmitSpiceMigrated(qemuMonitorPtr mon);
> +
> +int qemuMonitorEmitMemoryFailure(qemuMonitorPtr mon,
> +                                 qemuMonitorEventMemoryFailurePtr mfp);
> +
>   int qemuMonitorEmitMigrationStatus(qemuMonitorPtr mon,
>                                      int status);
>   int qemuMonitorEmitMigrationPass(qemuMonitorPtr mon,
> diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c
> index 26ac499fc5..aa256727d6 100644
> --- a/src/qemu/qemu_monitor_json.c
> +++ b/src/qemu/qemu_monitor_json.c
> @@ -112,6 +112,7 @@ static void qemuMonitorJSONHandleBlockThreshold(qemuMonitorPtr mon, virJSONValue
>   static void qemuMonitorJSONHandleDumpCompleted(qemuMonitorPtr mon, virJSONValuePtr data);
>   static void qemuMonitorJSONHandlePRManagerStatusChanged(qemuMonitorPtr mon, virJSONValuePtr data);
>   static void qemuMonitorJSONHandleRdmaGidStatusChanged(qemuMonitorPtr mon, virJSONValuePtr data);
> +static void qemuMonitorJSONHandleMemoryFailure(qemuMonitorPtr mon, virJSONValuePtr data);
>   
>   typedef struct {
>       const char *type;
> @@ -132,6 +133,7 @@ static qemuEventHandler eventHandlers[] = {
>       { "GUEST_CRASHLOADED", qemuMonitorJSONHandleGuestCrashloaded, },
>       { "GUEST_PANICKED", qemuMonitorJSONHandleGuestPanic, },
>       { "JOB_STATUS_CHANGE", qemuMonitorJSONHandleJobStatusChange, },
> +    { "MEMORY_FAILURE", qemuMonitorJSONHandleMemoryFailure, },
>       { "MIGRATION", qemuMonitorJSONHandleMigrationStatus, },
>       { "MIGRATION_PASS", qemuMonitorJSONHandleMigrationPass, },
>       { "NIC_RX_FILTER_CHANGED", qemuMonitorJSONHandleNicRxFilterChanged, },
> @@ -1336,6 +1338,53 @@ qemuMonitorJSONHandleSpiceMigrated(qemuMonitorPtr mon,
>   
>   
>   static void
> +qemuMonitorJSONHandleMemoryFailure(qemuMonitorPtr mon,
> +                                   virJSONValuePtr data)
> +{
> +    virJSONValuePtr flagsjson = virJSONValueObjectGetObject(data, "flags");
> +    const char *str;
> +    int recipient;
> +    int action;
> +    bool ar = false;
> +    bool recursive = false;
> +    qemuMonitorEventMemoryFailure mf = {0};
> +
> +    if (!(str = virJSONValueObjectGetString(data, "recipient"))) {
> +        VIR_WARN("missing recipient in memory failure event");
> +        return;
> +    }
> +
> +    recipient = qemuMonitorMemoryFailureRecipientTypeFromString(str);
> +    if (recipient == -1) {

Here ^^

> +        VIR_WARN("unknown recipient '%s' in memory_failure event", str);
> +        return;
> +    }
> +
> +    if (!(str = virJSONValueObjectGetString(data, "action"))) {
> +        VIR_WARN("missing action in memory failure event");
> +        return;
> +    }
> +
> +    action = qemuMonitorMemoryFailureActionTypeFromString(str);
> +    if (action == -1) {

and here - I'd prefer if this was more robust check: < 0 because there's 
this common practice in libvirt: negative values represent an error 
state, positive and zero represent success. Now, it will probably never 
happen but if happened for other functions. If we need to introduce new 
error state, the function can return -2 to represent this new state. 
Now, all places that check for a negative value can stay as they are. 
Places which would check for exactly -1 need to be updated. This would 
produce unnecessary big change. Therefore I suggest the following change 
(again, no need to resend, I can do the change locally before pushing, I 
just want your confirmation):

diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c
index f4ea9ea782..cba9ec7b19 100644
--- a/src/qemu/qemu_monitor_json.c
+++ b/src/qemu/qemu_monitor_json.c
@@ -1355,7 +1355,7 @@ qemuMonitorJSONHandleMemoryFailure(qemuMonitorPtr mon,
      }

      recipient = qemuMonitorMemoryFailureRecipientTypeFromString(str);
-    if (recipient == -1) {
+    if (recipient < 0) {
          VIR_WARN("unknown recipient '%s' in memory_failure event", str);
          return;
      }
@@ -1366,7 +1366,7 @@ qemuMonitorJSONHandleMemoryFailure(qemuMonitorPtr mon,
      }

      action = qemuMonitorMemoryFailureActionTypeFromString(str);
-    if (action == -1) {
+    if (action < 0) {
          VIR_WARN("unknown action '%s' in memory_failure event", str);
          return;
      }


Michal




More information about the libvir-list mailing list