[PATCH v2] Add VM info to improve error log message for qemu monitor
Rohit Kumar
rohit.kumar3 at nutanix.com
Tue Jan 4 08:31:34 UTC 2022
On 03/01/22 7:12 pm, Ani Sinha wrote:
>
> On Wed, 22 Dec 2021, Rohit Kumar wrote:
>
>> This patch is to determine the VM which had IO or socket hangup error.
>> Accessing directly vm->def->name inside qemuMonitorIO() or qemuMonitorSend()
>> might leads to illegal access as we are out of 'vm' context and vm->def might
>> not exist. Adding a field "domainName" inside mon object to access vm name
>> and initialising it when creating mon object.
> The commit message should desribe the purpose of the change, not how it is
> done. That can be easily understood reading the code. As for why the
> implementation follows a particular way, we can explain it in the code
> comments.
>
> This change adds the domain name in the error and debug logs during
> monitor IO processing so that we may infer which VM experienced those
> errors etc. This may help in debugging monitor IO errors. IMHO this
> decription is enough.
Thanks Ani, I will update the commit message in the next patch.
>
>> Signed-off-by: Rohit Kumar <rohit.kumar3 at nutanix.com>
>> ---
>> diff to v1:
>> - Adding a field domainName inside _qemuMonitor struct for accessing vm name
>> instead of directly accessing mon->vm->def->name.
>> - Link to v1: https://urldefense.proofpoint.com/v2/url?u=https-3A__listman.redhat.com_archives_libvir-2Dlist_2021-2DDecember_msg00217.html&d=DwIBAg&c=s883GpUCOChKOHiocYtGcg&r=ABSkr7gy7ZTfApFfI-Xxt1gZNtsDDiXoXOXc0OrkyFs&m=W2jEgk2s_xjLOLC-j8lPBnwVgteHS0-yYEGMqin16nmfzndzNgBE9mEvtbvyVzQo&s=Sa2q9edb4cvXo-EUriIPwlJxNo-itpp6--ZxY7rla2U&e=
>> - Talked with peter on RFC and he suggested me to add a field inside
>> monitor struct to get VM name.
>>
>> src/qemu/qemu_monitor.c | 47 +++++++++++++++++++++++++----------------
>> 1 file changed, 29 insertions(+), 18 deletions(-)
>>
>> diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
>> index dda6ae9796..c3a0227795 100644
>> --- a/src/qemu/qemu_monitor.c
>> +++ b/src/qemu/qemu_monitor.c
>> @@ -80,6 +80,7 @@ struct _qemuMonitor {
>> GSource *watch;
>>
>> virDomainObj *vm;
>> + char *domainName;
>>
> Here we can explain why we are adding an extra member as opposed to
> referencing it directly.
Right, we can access it directly.
>
>> qemuMonitorCallbacks *cb;
>> void *callbackOpaque;
>> @@ -243,6 +244,7 @@ qemuMonitorDispose(void *obj)
>> virCondDestroy(&mon->notify);
>> g_free(mon->buffer);
>> g_free(mon->balloonpath);
>> + g_free(mon->domainName);
>> }
>>
>>
>> @@ -530,13 +532,18 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>> qemuMonitor *mon = opaque;
>> bool error = false;
>> bool hangup = false;
>> + virDomainObj *vm = NULL;
>> + char *vmName = NULL;
>>
> These initializations are redundant since ...
Acknowledged.
>> virObjectRef(mon);
>>
>> + vm = mon->vm;
>> + vmName = mon->domainName;
> you initialize them here anyway.
I will remove these in next patch.
>
>> +
>> /* lock access to the monitor and protect fd */
>> virObjectLock(mon);
>> #if DEBUG_IO
>> - VIR_DEBUG("Monitor %p I/O on socket %p cond %d", mon, socket, cond);
>> + VIR_DEBUG("Monitor %p I/O on socket %p cond %d vm=%p name=%s", mon, socket, cond, vm, NULLSTR(vmName));
>> #endif
>> if (mon->fd == -1 || !mon->watch) {
>> virObjectUnlock(mon);
>> @@ -583,8 +590,8 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>>
>> if (!error && !mon->goteof &&
>> cond & G_IO_ERR) {
>> - virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> - _("Invalid file descriptor while waiting for monitor"));
>> + virReportError(VIR_ERR_INTERNAL_ERROR,
>> + _("%s: Invalid file descriptor while waiting for monitor"), NULLSTR(vmName));
> As per https://urldefense.proofpoint.com/v2/url?u=https-3A__libvirt.org_coding-2Dstyle.html-23error-2Dmessage-2Dformat&d=DwIBAg&c=s883GpUCOChKOHiocYtGcg&r=ABSkr7gy7ZTfApFfI-Xxt1gZNtsDDiXoXOXc0OrkyFs&m=W2jEgk2s_xjLOLC-j8lPBnwVgteHS0-yYEGMqin16nmfzndzNgBE9mEvtbvyVzQo&s=zU-TBR-xTsKRXEW5gdgL0WRRYBg7B7cbLluu65_PfkI&e= , please
> enclose %s within quotes:
> _("'%s': Invalid file descriptor while waiting for monitor")
Ack. Thanks.
>> mon->goteof = true;
>> }
>> }
>> @@ -609,13 +616,14 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>> virResetLastError();
>> } else {
>> if (virGetLastErrorCode() == VIR_ERR_OK && !mon->goteof)
>> - virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> - _("Error while processing monitor IO"));
>> + virReportError(VIR_ERR_INTERNAL_ERROR,
>> + _("%s: Error while processing monitor IO"), NULLSTR(vmName));
> Ditto.
Ack.
>> virCopyLastError(&mon->lastError);
>> virResetLastError();
>> }
>>
>> - VIR_DEBUG("Error on monitor %s", NULLSTR(mon->lastError.message));
>> + VIR_DEBUG("Error on monitor %s mon=%p vm=%p name=%s",
>> + NULLSTR(mon->lastError.message), mon, vm, NULLSTR(vmName));
>> /* If IO process resulted in an error & we have a message,
>> * then wakeup that waiter */
>> if (mon->msg && !mon->msg->finished) {
>> @@ -631,22 +639,22 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>> * will try to acquire the virDomainObj *mutex next */
>> if (mon->goteof) {
>> qemuMonitorEofNotifyCallback eofNotify = mon->cb->eofNotify;
>> - virDomainObj *vm = mon->vm;
>>
>> /* Make sure anyone waiting wakes up now */
>> virCondSignal(&mon->notify);
>> virObjectUnlock(mon);
>> - VIR_DEBUG("Triggering EOF callback");
>> + VIR_DEBUG("Triggering EOF callback mon=%p vm=%p name=%s",
>> + mon, vm, NULLSTR(vmName));
>> (eofNotify)(mon, vm, mon->callbackOpaque);
>> virObjectUnref(mon);
>> } else if (error) {
>> qemuMonitorErrorNotifyCallback errorNotify = mon->cb->errorNotify;
>> - virDomainObj *vm = mon->vm;
>>
>> /* Make sure anyone waiting wakes up now */
>> virCondSignal(&mon->notify);
>> virObjectUnlock(mon);
>> - VIR_DEBUG("Triggering error callback");
>> + VIR_DEBUG("Triggering error callback mon=%p vm=%p name=%s",
>> + mon, vm, NULLSTR(vmName));
>> (errorNotify)(mon, vm, mon->callbackOpaque);
>> virObjectUnref(mon);
>> } else {
>> @@ -694,6 +702,7 @@ qemuMonitorOpenInternal(virDomainObj *vm,
>> mon->fd = fd;
>> mon->context = g_main_context_ref(context);
>> mon->vm = virObjectRef(vm);
>> + mon->domainName = g_strdup(vm->def->name);
> do not forget to g_free() this during cleanup in the same function.
So, in cleanup: qemuMonitorDispose is called. And there I have added
g_free() to clean mon->domainName.
>
>> mon->waitGreeting = true;
>> mon->cb = cb;
>> mon->callbackOpaque = opaque;
>> @@ -932,17 +941,19 @@ qemuMonitorSend(qemuMonitor *mon,
>> qemuMonitorMessage *msg)
>> {
>> int ret = -1;
>> + virDomainObj *vm = mon->vm;
>> + char *vmName = mon->domainName;
>>
>> /* Check whether qemu quit unexpectedly */
>> if (mon->lastError.code != VIR_ERR_OK) {
>> - VIR_DEBUG("Attempt to send command while error is set %s",
>> - NULLSTR(mon->lastError.message));
>> + VIR_DEBUG("Attempt to send command while error is set %s mon=%p vm=%p name=%s",
>> + NULLSTR(mon->lastError.message), mon, vm, NULLSTR(vmName));
>> virSetError(&mon->lastError);
>> return -1;
>> }
>> if (mon->goteof) {
>> - virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> - _("End of file from qemu monitor"));
>> + virReportError(VIR_ERR_INTERNAL_ERROR,
>> + _("%s: End of file from qemu monitor"), NULLSTR(vmName));
> ditto re error string format.
Ack.
>
>> return -1;
>> }
>>
>> @@ -955,15 +966,15 @@ qemuMonitorSend(qemuMonitor *mon,
>>
>> while (!mon->msg->finished) {
>> if (virCondWait(&mon->notify, &mon->parent.lock) < 0) {
>> - virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> - _("Unable to wait on monitor condition"));
>> + virReportError(VIR_ERR_INTERNAL_ERROR,
>> + _("%s: Unable to wait on monitor condition"), NULLSTR(vmName));
> ditto as above.
Ack.
>> goto cleanup;
>> }
>> }
>>
>> if (mon->lastError.code != VIR_ERR_OK) {
>> - VIR_DEBUG("Send command resulted in error %s",
>> - NULLSTR(mon->lastError.message));
>> + VIR_DEBUG("Send command resulted in error %s mon=%p vm=%p name=%s",
>> + NULLSTR(mon->lastError.message), mon, vm, NULLSTR(vmName));
>> virSetError(&mon->lastError);
>> goto cleanup;
>> }
>> --
>> 2.25.1
>>
>>
More information about the libvir-list
mailing list