[libvirt] [PATCH v3] Add API to change qemu agent response timeout
Michal Prívozník
mprivozn at redhat.com
Thu Nov 14 18:16:50 UTC 2019
On 11/13/19 11:06 PM, Jonathon Jongsma wrote:
> Some layered products such as oVirt have requested a way to avoid being
> blocked by guest agent commands when querying a loaded vm. For example,
> many guest agent commands are polled periodically to monitor changes,
> and rather than blocking the calling process, they'd prefer to simply
> time out when an agent query is taking too long.
>
> This patch adds a way for the user to specify a custom agent timeout
> that is applied to all agent commands.
>
> One special case to note here is the 'guest-sync' command. 'guest-sync'
> is issued internally prior to calling any other command. (For example,
> when libvirt wants to call 'guest-get-fsinfo', we first call
> 'guest-sync' and then call 'guest-get-fsinfo').
>
> Previously, the 'guest-sync' command used a 5-second timeout
> (VIR_DOMAIN_QEMU_AGENT_COMMAND_DEFAULT), whereas the actual command that
> followed always blocked indefinitely
> (VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK). As part of this patch, if a
> custom timeout is specified that is shorter than
> 5 seconds, this new timeout is also used for 'guest-sync'. If there is
> no custom timeout or if the custom timeout is longer than 5 seconds, we
> will continue to use the 5-second timeout.
>
> Signed-off-by: Jonathon Jongsma <jjongsma at redhat.com>
> ---
> Changes in v3:
> - Changed enum names per Daniel's suggestion
> - incorporated Michal's review and fixup patches
> - As discussed on the mailing list, this patch does not acquire any jobs, but
> simply uses mutex locking for data integrity
> - libvirt release versions updated
> - format and parse private agentTimeout data in status xml
>
> include/libvirt/libvirt-domain.h | 10 +++
> include/libvirt/libvirt-qemu.h | 8 +--
> src/driver-hypervisor.h | 6 ++
> src/libvirt-domain.c | 49 +++++++++++++
> src/libvirt_public.syms | 5 ++
> src/qemu/qemu_agent.c | 70 ++++++++++---------
> src/qemu/qemu_agent.h | 3 +
> src/qemu/qemu_domain.c | 6 ++
> src/qemu/qemu_domain.h | 1 +
> src/qemu/qemu_driver.c | 47 +++++++++++++
> src/remote/remote_driver.c | 1 +
> src/remote/remote_protocol.x | 18 ++++-
> src/remote_protocol-structs | 9 +++
> .../blockjob-blockdev-in.xml | 1 +
> .../blockjob-mirror-in.xml | 1 +
> .../disk-secinfo-upgrade-out.xml | 1 +
> .../migration-in-params-in.xml | 1 +
> .../migration-out-nbd-out.xml | 1 +
> .../migration-out-nbd-tls-out.xml | 1 +
> .../migration-out-params-in.xml | 1 +
> tests/qemustatusxml2xmldata/modern-in.xml | 1 +
> .../qemustatusxml2xmldata/vcpus-multi-in.xml | 1 +
> tools/virsh-domain.c | 52 ++++++++++++++
> 23 files changed, 257 insertions(+), 37 deletions(-)
Close enough :-)
> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
> index 54dde34f15..86358341d5 100644
> --- a/src/qemu/qemu_domain.c
> +++ b/src/qemu/qemu_domain.c
> @@ -2093,6 +2093,8 @@ qemuDomainObjPrivateAlloc(void *opaque)
> if (!(priv->dbusVMStates = virHashCreate(5, dbusVMStateHashFree)))
> goto error;
>
> + /* agent commands block by default, user can choose different behavior */
> + priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK;
> priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX;
> priv->driver = opaque;
>
> @@ -2875,6 +2877,8 @@ qemuDomainObjPrivateXMLFormat(virBufferPtr buf,
> if (qemuDomainObjPrivateXMLFormatSlirp(buf, vm) < 0)
> return -1;
>
> + virBufferAsprintf(buf, "<agentTimeout>%i</agentTimeout>\n", priv->agentTimeout);
> +
> return 0;
> }
>
> @@ -3672,6 +3676,8 @@ qemuDomainObjPrivateXMLParse(xmlXPathContextPtr ctxt,
>
> priv->memPrealloc = virXPathBoolean("boolean(./memPrealloc)", ctxt) == 1;
>
> + virXPathInt("string(./agentTimeout)", ctxt, &priv->agentTimeout);
This is not safe. If the value in status XML is not valid number,
ignoring the error is not good.
> +
> return 0;
>
> error:
> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
> index ab00b25789..98a9540275 100644
> --- a/src/qemu/qemu_domain.h
> +++ b/src/qemu/qemu_domain.h
> @@ -293,6 +293,7 @@ struct _qemuDomainObjPrivate {
> virDomainChrSourceDefPtr monConfig;
> bool monError;
> unsigned long long monStart;
> + int agentTimeout;
>
> qemuAgentPtr agent;
> bool agentError;
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index c969a3d463..77a26ccf2e 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -22666,6 +22666,52 @@ qemuDomainGetGuestInfo(virDomainPtr dom,
> return ret;
> }
>
> +
> +static int
> +qemuDomainAgentSetResponseTimeout(virDomainPtr dom,
> + int timeout,
> + unsigned int flags)
> +{
> + virDomainObjPtr vm = NULL;
> + int ret = -1;
> +
> + virCheckFlags(0, -1);
> +
> + if (timeout < VIR_DOMAIN_QEMU_AGENT_COMMAND_MIN) {
> + virReportError(VIR_ERR_INVALID_ARG,
> + _("guest agent timeout '%d' is "
> + "less than the minimum '%d'"),
> + timeout, VIR_DOMAIN_QEMU_AGENT_COMMAND_MIN);
> + return -1;
> + }
> +
> + if (!(vm = qemuDomainObjFromDomain(dom)))
> + return -1;
> +
> + if (virDomainAgentSetResponseTimeoutEnsureACL(dom->conn, vm->def) < 0)
> + goto cleanup;
> +
> + /* If domain has an agent, change its timeout. Otherwise just save the
> + * request so that we can set the timeout when the agent appears */
> + if (qemuDomainAgentAvailable(vm, false)) {
> + /* We don't need to acquire a job since we're not interacting with the
> + * agent or the qemu monitor. We're only setting a struct member, so
> + * just acquire the mutex lock. Worst case, any in-process agent
> + * commands will use the newly-set agent timeout. */
> + virObjectLock(QEMU_DOMAIN_PRIVATE(vm)->agent);
> + qemuAgentSetResponseTimeout(QEMU_DOMAIN_PRIVATE(vm)->agent, timeout);
> + virObjectUnlock(QEMU_DOMAIN_PRIVATE(vm)->agent);
> + }
> +
> + QEMU_DOMAIN_PRIVATE(vm)->agentTimeout = timeout;
This is the point where we need to save status XML if the domain is running.
> + ret = 0;
> +
> + cleanup:
> + virDomainObjEndAPI(&vm);
> + return ret;
> +}
> +
> +
> diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c
> index 0feb21ef17..def4107d27 100644
> --- a/tools/virsh-domain.c
> +++ b/tools/virsh-domain.c
> @@ -9832,6 +9832,52 @@ cmdQemuAgentCommand(vshControl *ctl, const vshCmd *cmd)
>
> return ret;
> }
> +/*
> + * "guest-agent-timeout" command
> + */
> +static const vshCmdInfo info_guest_agent_timeout[] = {
> + {.name = "help",
> + .data = N_("Set the guest agent timeout")
> + },
> + {.name = "desc",
> + .data = N_("Set the number of seconds to wait for a response from the guest agent.")
> + },
> + {.name = NULL}
> +};
> +
> +static const vshCmdOptDef opts_guest_agent_timeout[] = {
> + VIRSH_COMMON_OPT_DOMAIN_FULL(0),
> + {.name = "timeout",
> + .type = VSH_OT_INT,
> + .flags = VSH_OFLAG_REQ_OPT,
> + .help = N_("timeout seconds.")
> + },
> + {.name = NULL}
> +};
> +
> +static bool
> +cmdGuestAgentTimeout(vshControl *ctl, const vshCmd *cmd)
> +{
> + virDomainPtr dom = NULL;
> + int timeout;
> + const unsigned int flags = 0;
> + bool ret = false;
> +
> + dom = virshCommandOptDomain(ctl, cmd, NULL);
> + if (dom == NULL)
> + goto cleanup;
> +
> + if (vshCommandOptInt(ctl, cmd, "timeout", &timeout) < 0)
> + goto cleanup;
> +
> + if (virDomainAgentSetResponseTimeout(dom, timeout, flags) < 0)
> + goto cleanup;
> +
> + ret = true;
> + cleanup:
> + virshDomainFree(dom);
> + return ret;
> +}
>
> /*
> * "lxc-enter-namespace" namespace
> @@ -14492,6 +14538,12 @@ const vshCmdDef domManagementCmds[] = {
> .info = info_qemu_agent_command,
> .flags = 0
> },
> + {.name = "guest-agent-timeout",
> + .handler = cmdGuestAgentTimeout,
> + .opts = opts_guest_agent_timeout,
> + .info = info_guest_agent_timeout,
> + .flags = 0
> + },
> {.name = "reboot",
> .handler = cmdReboot,
> .opts = opts_reboot,
>
We need to document new virsh command.
I'm fixing all the isues I've raised, ACKing and pushing. Can you please
post a follow up patch that updates news.xml? I thinkg it's worth
recording in the release notes.
Reviewed-by: Michal Privoznik <mprivozn at redhat.com>
Michal
More information about the libvir-list
mailing list