[PATCH v2] Add VM info to improve error log message for qemu monitor

Rohit Kumar rohit.kumar3 at nutanix.com
Tue Jan 4 08:40:49 UTC 2022


On 03/01/22 10:12 pm, Peter Krempa wrote:
> On Wed, Dec 22, 2021 at 22:39:21 -0800, Rohit Kumar wrote:
>> This patch is to determine the VM which had IO or socket hangup error.
>> Accessing directly vm->def->name inside qemuMonitorIO() or qemuMonitorSend()
>> might leads to illegal access as we are out of 'vm' context and vm->def might
>> not exist. Adding a field "domainName" inside mon object to access vm name
>> and initialising it when creating mon object.
>>
>> Signed-off-by: Rohit Kumar <rohit.kumar3 at nutanix.com>
>> ---
>>   diff to v1:
>>    - Adding a field domainName inside _qemuMonitor struct for accessing vm name
>>      instead of directly accessing mon->vm->def->name.
>>    - Link to v1: https://urldefense.proofpoint.com/v2/url?u=https-3A__listman.redhat.com_archives_libvir-2Dlist_2021-2DDecember_msg00217.html&d=DwIBAg&c=s883GpUCOChKOHiocYtGcg&r=ABSkr7gy7ZTfApFfI-Xxt1gZNtsDDiXoXOXc0OrkyFs&m=Zx9EFUnjB-Vnos3Ycje0GLa92yVGi-GQA7I5kFT9ll8jJUujvx-k2cEpTTb40KZW&s=KjRU2euJOCStdmi2A-a_axYFby40T-oNyr6kApd0nYI&e=
>>    - Talked with peter on RFC and he suggested me to add a field inside
>>      monitor struct to get VM name.
>>   
>>   src/qemu/qemu_monitor.c | 47 +++++++++++++++++++++++++----------------
>>   1 file changed, 29 insertions(+), 18 deletions(-)
>>
>> diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
>> index dda6ae9796..c3a0227795 100644
>> --- a/src/qemu/qemu_monitor.c
>> +++ b/src/qemu/qemu_monitor.c
> [...]
>
>> @@ -530,13 +532,18 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>>       qemuMonitor *mon = opaque;
>>       bool error = false;
>>       bool hangup = false;
>> +    virDomainObj *vm = NULL;
>> +    char *vmName = NULL;
> These local variables are not very useful, you can access 'mon' directly
> without the need for this alias ...
Thanks for the review Peter. I will update it in the next patch.
>
>>   
>>       virObjectRef(mon);
>>   
>> +    vm = mon->vm;
>> +    vmName = mon->domainName;
>> +
>>       /* lock access to the monitor and protect fd */
>>       virObjectLock(mon);
>>   #if DEBUG_IO
>> -    VIR_DEBUG("Monitor %p I/O on socket %p cond %d", mon, socket, cond);
>> +    VIR_DEBUG("Monitor %p I/O on socket %p cond %d vm=%p name=%s", mon, socket, cond, vm, NULLSTR(vmName));
>>   #endif
> There's no much point in adding to thhese debug statements (inside
> DEBUG_IO) as they are not compiled by default. In fact I'll propose a
> removal of those.
Okay. I will remove these from here.
>
>>       if (mon->fd == -1 || !mon->watch) {
>>           virObjectUnlock(mon);
>> @@ -583,8 +590,8 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>>   
>>           if (!error && !mon->goteof &&
>>               cond & G_IO_ERR) {
>> -            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> -                           _("Invalid file descriptor while waiting for monitor"));
>> +            virReportError(VIR_ERR_INTERNAL_ERROR,
>> +                           _("%s: Invalid file descriptor while waiting for monitor"), NULLSTR(vmName));
> Do not prefix the error message by the VM name, but rather put it at the
> end or inside:
>
>   Invalid file descriptor while waiting for monitor (vm='%s')
>
> or smilar.
Acknowledge. Will update in next patch. Thanks.
>
>>               mon->goteof = true;
>>           }
>>       }
>> @@ -609,13 +616,14 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>>               virResetLastError();
>>           } else {
>>               if (virGetLastErrorCode() == VIR_ERR_OK && !mon->goteof)
>> -                virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> -                               _("Error while processing monitor IO"));
>> +                virReportError(VIR_ERR_INTERNAL_ERROR,
>> +                               _("%s: Error while processing monitor IO"), NULLSTR(vmName));
> Same here. Additionally did you ever get to a situation where vmName
> would be NULL?
There might be a situation when g_strdup() fails to allocate vm name, 
for e.g. if host ran out of memory ?
Let me know your thoughts on this, I would be happy to remove NULLSTR if 
it's not the case.
>
>>               virCopyLastError(&mon->lastError);
>>               virResetLastError();
>>           }
>>   
>> -        VIR_DEBUG("Error on monitor %s", NULLSTR(mon->lastError.message));
>> +        VIR_DEBUG("Error on monitor %s mon=%p vm=%p name=%s",
>> +                  NULLSTR(mon->lastError.message), mon, vm, NULLSTR(vmName));
> This is a good example.
Okay.
>
>>           /* If IO process resulted in an error & we have a message,
>>            * then wakeup that waiter */
>>           if (mon->msg && !mon->msg->finished) {
>> @@ -631,22 +639,22 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>>        * will try to acquire the virDomainObj *mutex next */
>>       if (mon->goteof) {
>>           qemuMonitorEofNotifyCallback eofNotify = mon->cb->eofNotify;
>> -        virDomainObj *vm = mon->vm;
> You can remove this local when you are accessing it directly.
Sure. Ack.
>
>>   
>>           /* Make sure anyone waiting wakes up now */
>>           virCondSignal(&mon->notify);
>>           virObjectUnlock(mon);
>> -        VIR_DEBUG("Triggering EOF callback");
>> +        VIR_DEBUG("Triggering EOF callback mon=%p vm=%p name=%s",
>> +                  mon, vm, NULLSTR(vmName));
>>           (eofNotify)(mon, vm, mon->callbackOpaque);
>>           virObjectUnref(mon);
>>       } else if (error) {
>>           qemuMonitorErrorNotifyCallback errorNotify = mon->cb->errorNotify;
>> -        virDomainObj *vm = mon->vm;
>>   
>>           /* Make sure anyone waiting wakes up now */
>>           virCondSignal(&mon->notify);
>>           virObjectUnlock(mon);
>> -        VIR_DEBUG("Triggering error callback");
>> +        VIR_DEBUG("Triggering error callback mon=%p vm=%p name=%s",
>> +                  mon, vm, NULLSTR(vmName));
>>           (errorNotify)(mon, vm, mon->callbackOpaque);
>>           virObjectUnref(mon);
>>       } else {
>> @@ -694,6 +702,7 @@ qemuMonitorOpenInternal(virDomainObj *vm,
>>       mon->fd = fd;
>>       mon->context = g_main_context_ref(context);
>>       mon->vm = virObjectRef(vm);
>> +    mon->domainName = g_strdup(vm->def->name);
> As noted above, did you ever run into a situation where this would be
> NULL? That would seem weird.
My concern was with g_strdup() allocation failing. Let me know if that 
isn't the case.
>
>>       mon->waitGreeting = true;
>>       mon->cb = cb;
>>       mon->callbackOpaque = opaque;
>> @@ -932,17 +941,19 @@ qemuMonitorSend(qemuMonitor *mon,
>>                   qemuMonitorMessage *msg)
>>   {
>>       int ret = -1;
>> +    virDomainObj *vm = mon->vm;
>> +    char *vmName = mon->domainName;
> Avoid these.
Ack.
>
>>   
>>       /* Check whether qemu quit unexpectedly */
>>       if (mon->lastError.code != VIR_ERR_OK) {
>> -        VIR_DEBUG("Attempt to send command while error is set %s",
>> -                  NULLSTR(mon->lastError.message));
>> +        VIR_DEBUG("Attempt to send command while error is set %s mon=%p vm=%p name=%s",
>> +                  NULLSTR(mon->lastError.message), mon, vm, NULLSTR(vmName));
>>           virSetError(&mon->lastError);
>>           return -1;
>>       }
>>       if (mon->goteof) {
>> -        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> -                       _("End of file from qemu monitor"));
>> +        virReportError(VIR_ERR_INTERNAL_ERROR,
>> +                       _("%s: End of file from qemu monitor"), NULLSTR(vmName));
> No prefixing.
Ack.
>
>>           return -1;
>>       }
>>   
>> @@ -955,15 +966,15 @@ qemuMonitorSend(qemuMonitor *mon,
>>   
>>       while (!mon->msg->finished) {
>>           if (virCondWait(&mon->notify, &mon->parent.lock) < 0) {
>> -            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> -                           _("Unable to wait on monitor condition"));
>> +            virReportError(VIR_ERR_INTERNAL_ERROR,
>> +                           _("%s: Unable to wait on monitor condition"), NULLSTR(vmName));
> Same here.
Ack.
Thanks, I will update these in the next patch.
>
>>               goto cleanup;
>>           }
>>       }
>>   
>>       if (mon->lastError.code != VIR_ERR_OK) {
>> -        VIR_DEBUG("Send command resulted in error %s",
>> -                  NULLSTR(mon->lastError.message));
>> +        VIR_DEBUG("Send command resulted in error %s mon=%p vm=%p name=%s",
>> +                  NULLSTR(mon->lastError.message), mon, vm, NULLSTR(vmName));
>>           virSetError(&mon->lastError);
>>           goto cleanup;
>>       }
>> -- 
>> 2.25.1
>>




More information about the libvir-list mailing list