[RFC PATCH] qemu: Do not report eof when processing monitor IO
Jim Fehlig
jfehlig at suse.com
Tue Oct 12 20:26:16 UTC 2021
On 10/8/21 15:37, Jim Fehlig wrote:
> There have been countless reports from users concerned about the following
> error reported by libvirtd when qemu domains are shutdown
>
> internal error: End of file from qemu monitor
>
> While the error is harmless, users often mistaken it for real problem with
> their deployments. EOF from the monitor can't be entirely ignored since
> other threads may be using the monitor and must be able to detect the EOF
> condition.
>
> One potential fix is to delay reporting EOF until the monitor is used
> after EOF is detected. This patch adds a 'goteof' member to the
> qemuMonitor structure, which is set when EOF is detected on the monitor
> socket. If another thread later tries to send data on the monitor, the
> EOF error is reported.
>
> Signed-off-by: Jim Fehlig <jfehlig at suse.com>
> ---
>
> An RFC patch to squelch qemu monitor EOF error messages on VM shutdown.
> Previous discussions and information on testing of the patch can be
> found in this thread
>
> https://listman.redhat.com/archives/libvir-list/2021-September/msg00949.html
FYI, the tests mentioned in that thread ran for 2376 iterations with this patch
before I had to use the machine for other purposes.
Any comments on this approach to squelching the error message? Or other ideas
for achieving the same?
Regards,
Jim
> src/qemu/qemu_monitor.c | 29 ++++++++++++++++-------------
> 1 file changed, 16 insertions(+), 13 deletions(-)
>
> diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
> index 5fc23f13d3..751ec8ba6c 100644
> --- a/src/qemu/qemu_monitor.c
> +++ b/src/qemu/qemu_monitor.c
> @@ -113,6 +113,7 @@ struct _qemuMonitor {
>
> /* true if qemu no longer wants 'props' sub-object of object-add */
> bool objectAddNoWrap;
> + bool goteof;
> };
>
> /**
> @@ -526,10 +527,10 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
> {
> qemuMonitor *mon = opaque;
> bool error = false;
> - bool eof = false;
> bool hangup = false;
>
> virObjectRef(mon);
> + mon->goteof = false;
>
> /* lock access to the monitor and protect fd */
> virObjectLock(mon);
> @@ -544,7 +545,7 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>
> if (mon->lastError.code != VIR_ERR_OK) {
> if (cond & (G_IO_HUP | G_IO_ERR))
> - eof = true;
> + mon->goteof = true;
> error = true;
> } else {
> if (cond & G_IO_OUT) {
> @@ -562,7 +563,7 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
> if (errno == ECONNRESET)
> hangup = true;
> } else if (got == 0) {
> - eof = true;
> + mon->goteof = true;
> } else {
> /* Ignore hangup/error cond if we read some data, to
> * give time for that data to be consumed */
> @@ -575,22 +576,19 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>
> if (cond & G_IO_HUP) {
> hangup = true;
> - if (!error) {
> - virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> - _("End of file from qemu monitor"));
> - eof = true;
> - }
> + if (!error)
> + mon->goteof = true;
> }
>
> - if (!error && !eof &&
> + if (!error && !mon->goteof &&
> cond & G_IO_ERR) {
> virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> _("Invalid file descriptor while waiting for monitor"));
> - eof = true;
> + mon->goteof = true;
> }
> }
>
> - if (error || eof) {
> + if (error || mon->goteof) {
> if (hangup && mon->logFunc != NULL) {
> /* Check if an error message from qemu is available and if so, use
> * it to overwrite the actual message. It's done only in early
> @@ -609,7 +607,7 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
> /* Already have an error, so clear any new error */
> virResetLastError();
> } else {
> - if (virGetLastErrorCode() == VIR_ERR_OK)
> + if (virGetLastErrorCode() == VIR_ERR_OK && !mon->goteof)
> virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> _("Error while processing monitor IO"));
> virCopyLastError(&mon->lastError);
> @@ -630,7 +628,7 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
> /* We have to unlock to avoid deadlock against command thread,
> * but is this safe ? I think it is, because the callback
> * will try to acquire the virDomainObj *mutex next */
> - if (eof) {
> + if (mon->goteof) {
> qemuMonitorEofNotifyCallback eofNotify = mon->cb->eofNotify;
> virDomainObj *vm = mon->vm;
>
> @@ -949,6 +947,11 @@ qemuMonitorSend(qemuMonitor *mon,
> virSetError(&mon->lastError);
> return -1;
> }
> + if (mon->goteof) {
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("End of file from qemu monitor"));
> + return -1;
> + }
>
> mon->msg = msg;
> qemuMonitorUpdateWatch(mon);
>
More information about the libvir-list
mailing list