[libvirt] [PATCH] Keep track of guest paused state after disk IO errors

Daniel Veillard veillard at redhat.com
Fri Mar 19 11:16:59 UTC 2010


On Tue, Mar 16, 2010 at 05:24:38PM +0000, Daniel P. Berrange wrote:
> With the QMP mode monitor, it is possible to get a notification
> when the guest is paused indirectly (eg as result of a disk IO
> error). This patch enables such reporting and when receiving an
> error updates libvirt's view of the guest to indicate that it is
> now paused. It also emits an event
> 
>   VIR_DOMAIN_EVENT_SUSPENDED
> 
> with a detail of:
> 
>   VIR_DOMAIN_EVENT_SUSPENDED_IOERROR
> 
> NB this patch does not make the error behaviour configurable. It
> just copes with the pause transition, if QEMU is setup to do that
> by default (current upstream now works this way for disks).
> 
> * include/libvirt/libvirt.h.in: Add VIR_DOMAIN_EVENT_SUSPENDED_IOERROR
> * src/qemu/qemu_driver.c: Update VM state to paused when IO error
>   occurrs
> * src/qemu/qemu_monitor.c, src/qemu/qemu_monitor.h,
>   src/qemu/qemu_monitor_json.c: Wire up handlers for disk IO errors
> ---
>  include/libvirt/libvirt.h.in |    1 +
>  src/qemu/qemu_driver.c       |   42 ++++++++++++++++++++++++++++++++++++++++++
>  src/qemu/qemu_monitor.c      |   18 ++++++++++++++++++
>  src/qemu/qemu_monitor.h      |   16 ++++++++++++++++
>  src/qemu/qemu_monitor_json.c |   34 ++++++++++++++++++++++++++++++++++
>  5 files changed, 111 insertions(+), 0 deletions(-)
> 
> diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in
> index 260505e..b7a6922 100644
> --- a/include/libvirt/libvirt.h.in
> +++ b/include/libvirt/libvirt.h.in
> @@ -1361,6 +1361,7 @@ typedef enum {
>  typedef enum {
>      VIR_DOMAIN_EVENT_SUSPENDED_PAUSED = 0,   /* Normal suspend due to admin pause */
>      VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED = 1, /* Suspended for offline migration */
> +    VIR_DOMAIN_EVENT_SUSPENDED_IOERROR = 2,  /* Suspended due to a disk I/O error */
>  } virDomainEventSuspendedDetailType;
>  
>  /**
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 8766ca2..ae19097 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -824,9 +824,51 @@ cleanup:
>      return ret;
>  }
>  
> +
> +static int
> +qemuHandleDiskIOError(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
> +                      virDomainObjPtr vm,
> +                      int action,
> +                      const char *diskalias ATTRIBUTE_UNUSED)
> +{
> +    struct qemud_driver *driver = qemu_driver;
> +    virDomainEventPtr event = NULL;
> +
> +    VIR_DEBUG("Received IO error on %p '%s': action=%d disk=%s", vm, vm->def->name, action, diskalias);
> +
> +    if (action != QEMU_MONITOR_DISK_IO_ERROR_STOP)
> +        return 0;
> +
> +    virDomainObjLock(vm);
> +
> +    if (action == QEMU_MONITOR_DISK_IO_ERROR_STOP &&
> +        vm->state == VIR_DOMAIN_RUNNING) {
> +        VIR_DEBUG("Transitioned guest %s to paused state", vm->def->name);
> +
> +        vm->state = VIR_DOMAIN_PAUSED;
> +        event = virDomainEventNewFromObj(vm,
> +                                         VIR_DOMAIN_EVENT_SUSPENDED,
> +                                         VIR_DOMAIN_EVENT_SUSPENDED_IOERROR);
> +
> +        if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0)
> +            VIR_WARN("Unable to save status on vm %s after IO error", vm->def->name);
> +    }
> +
> +    virDomainObjUnlock(vm);
> +
> +    if (event) {
> +        qemuDriverLock(driver);
> +        qemuDomainEventQueue(driver, event);
> +        qemuDriverUnlock(driver);
> +    }
> +    return 0;
> +}
> +
> +
>  static qemuMonitorCallbacks monitorCallbacks = {
>      .eofNotify = qemuHandleMonitorEOF,
>      .diskSecretLookup = findVolumeQcowPassphrase,
> +    .diskIOError = qemuHandleDiskIOError,
>  };
>  
>  static int
> diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
> index b88532c..7205001 100644
> --- a/src/qemu/qemu_monitor.c
> +++ b/src/qemu/qemu_monitor.c
> @@ -791,6 +791,24 @@ int qemuMonitorEmitStop(qemuMonitorPtr mon)
>  }
>  
>  
> +int qemuMonitorEmitDiskIOError(qemuMonitorPtr mon,
> +                               int action,
> +                               const char *deviceAlias)
> +{
> +    int ret = -1;
> +    VIR_DEBUG("mon=%p action=%d deviceAlias=%s", mon, action, deviceAlias);
> +
> +    qemuMonitorRef(mon);
> +    qemuMonitorUnlock(mon);
> +    if (mon->cb && mon->cb->diskIOError)
> +        ret = mon->cb->diskIOError(mon, mon->vm, action, deviceAlias);
> +    qemuMonitorLock(mon);
> +    qemuMonitorUnref(mon);
> +
> +    return ret;
> +}
> +
> +
>  int qemuMonitorSetCapabilities(qemuMonitorPtr mon)
>  {
>      int ret;
> diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
> index 0ac3957..75a2853 100644
> --- a/src/qemu/qemu_monitor.h
> +++ b/src/qemu/qemu_monitor.h
> @@ -60,6 +60,14 @@ struct _qemuMonitorMessage {
>      void *passwordOpaque;
>  };
>  
> +typedef enum {
> +    QEMU_MONITOR_DISK_IO_ERROR_STOP,
> +    QEMU_MONITOR_DISK_IO_ERROR_REPORT,
> +    QEMU_MONITOR_DISK_IO_ERROR_IGNORE,
> +
> +    QEMU_MONITOR_DISK_IO_ERROR_LAST
> +} qemuMonitorDiskIOErrorAction;
> +
>  typedef struct _qemuMonitorCallbacks qemuMonitorCallbacks;
>  typedef qemuMonitorCallbacks *qemuMonitorCallbacksPtr;
>  struct _qemuMonitorCallbacks {
> @@ -86,6 +94,11 @@ struct _qemuMonitorCallbacks {
>                             virDomainObjPtr vm);
>      int (*domainStop)(qemuMonitorPtr mon,
>                        virDomainObjPtr vm);
> +
> +    int (*diskIOError)(qemuMonitorPtr mon,
> +                       virDomainObjPtr vm,
> +                       int actOBion,
> +                       const char *diskAlias);
>  };
>  
>  
> @@ -122,6 +135,9 @@ int qemuMonitorEmitShutdown(qemuMonitorPtr mon);
>  int qemuMonitorEmitReset(qemuMonitorPtr mon);
>  int qemuMonitorEmitPowerdown(qemuMonitorPtr mon);
>  int qemuMonitorEmitStop(qemuMonitorPtr mon);
> +int qemuMonitorEmitDiskIOError(qemuMonitorPtr mon,
> +                               int action,
> +                               const char *deviceAlias);
>  
>  int qemuMonitorStartCPUs(qemuMonitorPtr mon,
>                           virConnectPtr conn);
> diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c
> index 7b45594..8b3cda1 100644
> --- a/src/qemu/qemu_monitor_json.c
> +++ b/src/qemu/qemu_monitor_json.c
> @@ -49,6 +49,7 @@ static void qemuMonitorJSONHandleShutdown(qemuMonitorPtr mon, virJSONValuePtr da
>  static void qemuMonitorJSONHandleReset(qemuMonitorPtr mon, virJSONValuePtr data);
>  static void qemuMonitorJSONHandlePowerdown(qemuMonitorPtr mon, virJSONValuePtr data);
>  static void qemuMonitorJSONHandleStop(qemuMonitorPtr mon, virJSONValuePtr data);
> +static void qemuMonitorJSONHandleDiskIOError(qemuMonitorPtr mon, virJSONValuePtr data);
>  
>  struct {
>      const char *type;
> @@ -58,6 +59,7 @@ struct {
>      { "RESET", qemuMonitorJSONHandleReset, },
>      { "POWERDOWN", qemuMonitorJSONHandlePowerdown, },
>      { "STOP", qemuMonitorJSONHandleStop, },
> +    { "BLOCK_IO_ERROR", qemuMonitorJSONHandleDiskIOError, },
>  };
>  
>  
> @@ -496,6 +498,38 @@ static void qemuMonitorJSONHandleStop(qemuMonitorPtr mon, virJSONValuePtr data A
>  }
>  
>  
> +VIR_ENUM_DECL(qemuMonitorDiskIOErrorAction)
> +VIR_ENUM_IMPL(qemuMonitorDiskIOErrorAction, QEMU_MONITOR_DISK_IO_ERROR_LAST,
> +              "stop", "report", "ignore");
> +
> +
> +static void qemuMonitorJSONHandleDiskIOError(qemuMonitorPtr mon, virJSONValuePtr data)
> +{
> +    const char *device;
> +    const char *action;
> +    int actionID;
> +
> +    /* Throughout here we try our best to carry on upon errors,
> +       since its imporatant to get as much info as possible out
> +       to the application */
> +
> +    if ((action = virJSONValueObjectGetString(data, "action")) == NULL) {
> +        VIR_WARN0("Missing action in disk io error event");
> +        action = "ignore";
> +    }
> +
> +    if ((device = virJSONValueObjectGetString(data, "device")) == NULL) {
> +        VIR_WARN0("missing device in disk io error event");
> +    }
> +
> +    if ((actionID = qemuMonitorDiskIOErrorActionTypeFromString(action)) < 0) {
> +        VIR_WARN("unknown disk io error action '%s'", action);
> +        actionID = QEMU_MONITOR_DISK_IO_ERROR_IGNORE;
> +    }
> +
> +    qemuMonitorEmitDiskIOError(mon, actionID, device);
> +}
> +
>  int
>  qemuMonitorJSONSetCapabilities(qemuMonitorPtr mon)
>  {

  ACK,

important, please push :-)

Daniel

-- 
Daniel Veillard      | libxml Gnome XML XSLT toolkit  http://xmlsoft.org/
daniel at veillard.com  | Rpmfind RPM search engine http://rpmfind.net/
http://veillard.com/ | virtualization library  http://libvirt.org/




More information about the libvir-list mailing list