[RFC PATCH] qemu: Do not report eof when processing monitor IO

Jim Fehlig jfehlig at suse.com
Tue Oct 12 20:26:16 UTC 2021


On 10/8/21 15:37, Jim Fehlig wrote:
> There have been countless reports from users concerned about the following
> error reported by libvirtd when qemu domains are shutdown
> 
> internal error: End of file from qemu monitor
> 
> While the error is harmless, users often mistaken it for real problem with
> their deployments. EOF from the monitor can't be entirely ignored since
> other threads may be using the monitor and must be able to detect the EOF
> condition.
> 
> One potential fix is to delay reporting EOF until the monitor is used
> after EOF is detected. This patch adds a 'goteof' member to the
> qemuMonitor structure, which is set when EOF is detected on the monitor
> socket. If another thread later tries to send data on the monitor, the
> EOF error is reported.
> 
> Signed-off-by: Jim Fehlig <jfehlig at suse.com>
> ---
> 
> An RFC patch to squelch qemu monitor EOF error messages on VM shutdown.
> Previous discussions and information on testing of the patch can be
> found in this thread
> 
> https://listman.redhat.com/archives/libvir-list/2021-September/msg00949.html

FYI, the tests mentioned in that thread ran for 2376 iterations with this patch 
before I had to use the machine for other purposes.

Any comments on this approach to squelching the error message? Or other ideas 
for achieving the same?

Regards,
Jim

>   src/qemu/qemu_monitor.c | 29 ++++++++++++++++-------------
>   1 file changed, 16 insertions(+), 13 deletions(-)
> 
> diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
> index 5fc23f13d3..751ec8ba6c 100644
> --- a/src/qemu/qemu_monitor.c
> +++ b/src/qemu/qemu_monitor.c
> @@ -113,6 +113,7 @@ struct _qemuMonitor {
>   
>       /* true if qemu no longer wants 'props' sub-object of object-add */
>       bool objectAddNoWrap;
> +    bool goteof;
>   };
>   
>   /**
> @@ -526,10 +527,10 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>   {
>       qemuMonitor *mon = opaque;
>       bool error = false;
> -    bool eof = false;
>       bool hangup = false;
>   
>       virObjectRef(mon);
> +    mon->goteof = false;
>   
>       /* lock access to the monitor and protect fd */
>       virObjectLock(mon);
> @@ -544,7 +545,7 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>   
>       if (mon->lastError.code != VIR_ERR_OK) {
>           if (cond & (G_IO_HUP | G_IO_ERR))
> -            eof = true;
> +            mon->goteof = true;
>           error = true;
>       } else {
>           if (cond & G_IO_OUT) {
> @@ -562,7 +563,7 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>                   if (errno == ECONNRESET)
>                       hangup = true;
>               } else if (got == 0) {
> -                eof = true;
> +                mon->goteof = true;
>               } else {
>                   /* Ignore hangup/error cond if we read some data, to
>                    * give time for that data to be consumed */
> @@ -575,22 +576,19 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>   
>           if (cond & G_IO_HUP) {
>               hangup = true;
> -            if (!error) {
> -                virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> -                               _("End of file from qemu monitor"));
> -                eof = true;
> -            }
> +            if (!error)
> +                mon->goteof = true;
>           }
>   
> -        if (!error && !eof &&
> +        if (!error && !mon->goteof &&
>               cond & G_IO_ERR) {
>               virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>                              _("Invalid file descriptor while waiting for monitor"));
> -            eof = true;
> +            mon->goteof = true;
>           }
>       }
>   
> -    if (error || eof) {
> +    if (error || mon->goteof) {
>           if (hangup && mon->logFunc != NULL) {
>               /* Check if an error message from qemu is available and if so, use
>                * it to overwrite the actual message. It's done only in early
> @@ -609,7 +607,7 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>               /* Already have an error, so clear any new error */
>               virResetLastError();
>           } else {
> -            if (virGetLastErrorCode() == VIR_ERR_OK)
> +            if (virGetLastErrorCode() == VIR_ERR_OK && !mon->goteof)
>                   virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>                                  _("Error while processing monitor IO"));
>               virCopyLastError(&mon->lastError);
> @@ -630,7 +628,7 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>       /* We have to unlock to avoid deadlock against command thread,
>        * but is this safe ?  I think it is, because the callback
>        * will try to acquire the virDomainObj *mutex next */
> -    if (eof) {
> +    if (mon->goteof) {
>           qemuMonitorEofNotifyCallback eofNotify = mon->cb->eofNotify;
>           virDomainObj *vm = mon->vm;
>   
> @@ -949,6 +947,11 @@ qemuMonitorSend(qemuMonitor *mon,
>           virSetError(&mon->lastError);
>           return -1;
>       }
> +    if (mon->goteof) {
> +        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                       _("End of file from qemu monitor"));
> +        return -1;
> +    }
>   
>       mon->msg = msg;
>       qemuMonitorUpdateWatch(mon);
> 




More information about the libvir-list mailing list