[PATCH v2] Add VM info to improve error log message for qemu monitor

Ani Sinha ani at anisinha.ca
Mon Jan 3 13:42:12 UTC 2022



On Wed, 22 Dec 2021, Rohit Kumar wrote:

> This patch is to determine the VM which had IO or socket hangup error.
> Accessing directly vm->def->name inside qemuMonitorIO() or qemuMonitorSend()
> might leads to illegal access as we are out of 'vm' context and vm->def might
> not exist. Adding a field "domainName" inside mon object to access vm name
> and initialising it when creating mon object.

The commit message should desribe the purpose of the change, not how it is
done. That can be easily understood reading the code. As for why the
implementation follows a particular way, we can explain it in the code
comments.

This change adds the domain name in the error and debug logs during
monitor IO processing so that we may infer which VM experienced those
errors etc. This may help in debugging monitor IO errors. IMHO this
decription is enough.

>
> Signed-off-by: Rohit Kumar <rohit.kumar3 at nutanix.com>
> ---
>  diff to v1:
>   - Adding a field domainName inside _qemuMonitor struct for accessing vm name
>     instead of directly accessing mon->vm->def->name.
>   - Link to v1: https://listman.redhat.com/archives/libvir-list/2021-December/msg00217.html
>   - Talked with peter on RFC and he suggested me to add a field inside
>     monitor struct to get VM name.
>
>  src/qemu/qemu_monitor.c | 47 +++++++++++++++++++++++++----------------
>  1 file changed, 29 insertions(+), 18 deletions(-)
>
> diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
> index dda6ae9796..c3a0227795 100644
> --- a/src/qemu/qemu_monitor.c
> +++ b/src/qemu/qemu_monitor.c
> @@ -80,6 +80,7 @@ struct _qemuMonitor {
>      GSource *watch;
>
>      virDomainObj *vm;
> +    char *domainName;
>

Here we can explain why we are adding an extra member as opposed to
referencing it directly.

>      qemuMonitorCallbacks *cb;
>      void *callbackOpaque;
> @@ -243,6 +244,7 @@ qemuMonitorDispose(void *obj)
>      virCondDestroy(&mon->notify);
>      g_free(mon->buffer);
>      g_free(mon->balloonpath);
> +    g_free(mon->domainName);
>  }
>
>
> @@ -530,13 +532,18 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>      qemuMonitor *mon = opaque;
>      bool error = false;
>      bool hangup = false;
> +    virDomainObj *vm = NULL;
> +    char *vmName = NULL;
>

These initializations are redundant since ...

>      virObjectRef(mon);
>
> +    vm = mon->vm;
> +    vmName = mon->domainName;

you initialize them here anyway.

> +
>      /* lock access to the monitor and protect fd */
>      virObjectLock(mon);
>  #if DEBUG_IO
> -    VIR_DEBUG("Monitor %p I/O on socket %p cond %d", mon, socket, cond);
> +    VIR_DEBUG("Monitor %p I/O on socket %p cond %d vm=%p name=%s", mon, socket, cond, vm, NULLSTR(vmName));
>  #endif
>      if (mon->fd == -1 || !mon->watch) {
>          virObjectUnlock(mon);
> @@ -583,8 +590,8 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>
>          if (!error && !mon->goteof &&
>              cond & G_IO_ERR) {
> -            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> -                           _("Invalid file descriptor while waiting for monitor"));
> +            virReportError(VIR_ERR_INTERNAL_ERROR,
> +                           _("%s: Invalid file descriptor while waiting for monitor"), NULLSTR(vmName));

As per https://libvirt.org/coding-style.html#error-message-format, please
enclose %s within quotes:
_("'%s': Invalid file descriptor while waiting for monitor")

>              mon->goteof = true;
>          }
>      }
> @@ -609,13 +616,14 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>              virResetLastError();
>          } else {
>              if (virGetLastErrorCode() == VIR_ERR_OK && !mon->goteof)
> -                virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> -                               _("Error while processing monitor IO"));
> +                virReportError(VIR_ERR_INTERNAL_ERROR,
> +                               _("%s: Error while processing monitor IO"), NULLSTR(vmName));

Ditto.

>              virCopyLastError(&mon->lastError);
>              virResetLastError();
>          }
>
> -        VIR_DEBUG("Error on monitor %s", NULLSTR(mon->lastError.message));
> +        VIR_DEBUG("Error on monitor %s mon=%p vm=%p name=%s",
> +                  NULLSTR(mon->lastError.message), mon, vm, NULLSTR(vmName));
>          /* If IO process resulted in an error & we have a message,
>           * then wakeup that waiter */
>          if (mon->msg && !mon->msg->finished) {
> @@ -631,22 +639,22 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>       * will try to acquire the virDomainObj *mutex next */
>      if (mon->goteof) {
>          qemuMonitorEofNotifyCallback eofNotify = mon->cb->eofNotify;
> -        virDomainObj *vm = mon->vm;
>
>          /* Make sure anyone waiting wakes up now */
>          virCondSignal(&mon->notify);
>          virObjectUnlock(mon);
> -        VIR_DEBUG("Triggering EOF callback");
> +        VIR_DEBUG("Triggering EOF callback mon=%p vm=%p name=%s",
> +                  mon, vm, NULLSTR(vmName));
>          (eofNotify)(mon, vm, mon->callbackOpaque);
>          virObjectUnref(mon);
>      } else if (error) {
>          qemuMonitorErrorNotifyCallback errorNotify = mon->cb->errorNotify;
> -        virDomainObj *vm = mon->vm;
>
>          /* Make sure anyone waiting wakes up now */
>          virCondSignal(&mon->notify);
>          virObjectUnlock(mon);
> -        VIR_DEBUG("Triggering error callback");
> +        VIR_DEBUG("Triggering error callback mon=%p vm=%p name=%s",
> +                  mon, vm, NULLSTR(vmName));
>          (errorNotify)(mon, vm, mon->callbackOpaque);
>          virObjectUnref(mon);
>      } else {
> @@ -694,6 +702,7 @@ qemuMonitorOpenInternal(virDomainObj *vm,
>      mon->fd = fd;
>      mon->context = g_main_context_ref(context);
>      mon->vm = virObjectRef(vm);
> +    mon->domainName = g_strdup(vm->def->name);

do not forget to g_free() this during cleanup in the same function.

>      mon->waitGreeting = true;
>      mon->cb = cb;
>      mon->callbackOpaque = opaque;
> @@ -932,17 +941,19 @@ qemuMonitorSend(qemuMonitor *mon,
>                  qemuMonitorMessage *msg)
>  {
>      int ret = -1;
> +    virDomainObj *vm = mon->vm;
> +    char *vmName = mon->domainName;
>
>      /* Check whether qemu quit unexpectedly */
>      if (mon->lastError.code != VIR_ERR_OK) {
> -        VIR_DEBUG("Attempt to send command while error is set %s",
> -                  NULLSTR(mon->lastError.message));
> +        VIR_DEBUG("Attempt to send command while error is set %s mon=%p vm=%p name=%s",
> +                  NULLSTR(mon->lastError.message), mon, vm, NULLSTR(vmName));
>          virSetError(&mon->lastError);
>          return -1;
>      }
>      if (mon->goteof) {
> -        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> -                       _("End of file from qemu monitor"));
> +        virReportError(VIR_ERR_INTERNAL_ERROR,
> +                       _("%s: End of file from qemu monitor"), NULLSTR(vmName));

ditto re error string format.

>          return -1;
>      }
>
> @@ -955,15 +966,15 @@ qemuMonitorSend(qemuMonitor *mon,
>
>      while (!mon->msg->finished) {
>          if (virCondWait(&mon->notify, &mon->parent.lock) < 0) {
> -            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> -                           _("Unable to wait on monitor condition"));
> +            virReportError(VIR_ERR_INTERNAL_ERROR,
> +                           _("%s: Unable to wait on monitor condition"), NULLSTR(vmName));

ditto as above.

>              goto cleanup;
>          }
>      }
>
>      if (mon->lastError.code != VIR_ERR_OK) {
> -        VIR_DEBUG("Send command resulted in error %s",
> -                  NULLSTR(mon->lastError.message));
> +        VIR_DEBUG("Send command resulted in error %s mon=%p vm=%p name=%s",
> +                  NULLSTR(mon->lastError.message), mon, vm, NULLSTR(vmName));
>          virSetError(&mon->lastError);
>          goto cleanup;
>      }
> --
> 2.25.1
>
>




More information about the libvir-list mailing list