[libvirt] [PATCH v2] qemu: Add check for whether KVM nesting is enabled

Jiri Denemark jdenemar at redhat.com
Tue Nov 27 07:58:14 UTC 2018


On Mon, Nov 26, 2018 at 18:38:06 -0500, John Ferlan wrote:
> Support for nested KVM is handled via a kernel module configuration
> adjustment which if done after libvirtd is started and/or the last
> QEMU capabilities adjustment can result in the inability to start a
> guest and use nested KVM until the capabilities cache is invalidated.
> This is because without knowing, the CPU settings for a guest may not
> add the vmx=on to/for the guest config.
> 
> Thus, let's fetch and save the setting during initialization and then
> when the capabilities are checked for various host related adjustments
> that could affect whether the capabilities cache is updated add a check
> whether the nested value was set for Intel, AMD, or s390 to force a
> refetch of the capabilities.
> 
> Signed-off-by: John Ferlan <jferlan at redhat.com>
> ---
>  v1 was part of an RFC:
> 
>  https://www.redhat.com/archives/libvir-list/2018-November/msg00494.html
> 
>  This patch alters that code slightly to add the check Marc Hartmayer
>  requested for S390 and to use "kvm" in the API names and variables to
>  make it clearer that it's not CapsIsNested but CapsKVMIsNested.
> 
>  If it's felt the new check should slide down further in virQEMUCapsIsValid
>  then that's fine - just let me know what it should follow.
> 
>  src/qemu/qemu_capabilities.c | 45 ++++++++++++++++++++++++++++++++++++
>  src/qemu/qemu_capspriv.h     |  2 ++
>  tests/qemucapabilitiestest.c |  3 +++
>  3 files changed, 50 insertions(+)
> 
> diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
> index fde27010e4..c377733fe6 100644
> --- a/src/qemu/qemu_capabilities.c
> +++ b/src/qemu/qemu_capabilities.c
> @@ -40,6 +40,7 @@
>  #include "virnodesuspend.h"
>  #include "virnuma.h"
>  #include "virhostcpu.h"
> +#include "virkmod.h"
>  #include "qemu_monitor.h"
>  #include "virstring.h"
>  #include "qemu_hostdev.h"
> @@ -557,6 +558,7 @@ struct _virQEMUCaps {
>      virObject parent;
>  
>      bool usedQMP;
> +    bool kvmIsNested;
>  
>      char *binary;
>      time_t ctime;
> @@ -1528,6 +1530,7 @@ virQEMUCapsPtr virQEMUCapsNewCopy(virQEMUCapsPtr qemuCaps)
>          return NULL;
>  
>      ret->usedQMP = qemuCaps->usedQMP;
> +    ret->kvmIsNested = qemuCaps->kvmIsNested;
>  
>      if (VIR_STRDUP(ret->binary, qemuCaps->binary) < 0)
>          goto error;
> @@ -3587,6 +3590,9 @@ virQEMUCapsLoadCache(virArch hostArch,
>      virQEMUCapsInitHostCPUModel(qemuCaps, hostArch, VIR_DOMAIN_VIRT_KVM);
>      virQEMUCapsInitHostCPUModel(qemuCaps, hostArch, VIR_DOMAIN_VIRT_QEMU);
>  
> +    qemuCaps->kvmIsNested = virXPathBoolean("count(./kvmIsNested) > 0",

I think the XPath expression could be as simple as "./kvmIsNested" or
maybe "boolean(./kvmIsNested)", but it doesn't really matter.

> +                                            ctxt) > 0;
> +
>      ret = 0;
>   cleanup:
>      VIR_FREE(str);
> @@ -3806,6 +3812,9 @@ virQEMUCapsFormatCache(virQEMUCapsPtr qemuCaps)
>      if (qemuCaps->sevCapabilities)
>          virQEMUCapsFormatSEVInfo(qemuCaps, &buf);
>  
> +    if (qemuCaps->kvmIsNested)
> +        virBufferAddLit(&buf, "<kvmIsNested/>\n");
> +
>      virBufferAdjustIndent(&buf, -2);
>      virBufferAddLit(&buf, "</qemuCaps>\n");
>  
> @@ -3846,6 +3855,30 @@ virQEMUCapsSaveFile(void *data,
>  }
>  
>  
> +static bool
> +virQEMUCapsKVMIsNested(void)
> +{
> +    VIR_AUTOFREE(char *) kConfig = NULL;
> +
> +    /* Intel, AMD, and s390 related checks */
> +    if ((kConfig = virKModConfig()) &&
> +        (strstr(kConfig, "kvm_intel nested=1") ||
> +         strstr(kConfig, "kvm_amd nested=1") ||
> +         strstr(kConfig, "kvm nested=1")))
> +        return true;
> +    return false;
> +}
> +
> +
> +void
> +virQEMUCapsClearKVMIsNested(virQEMUCapsPtr qemuCaps)
> +{
> +    /* For qemucapabilitiestest to avoid printing the </kvmIsNested> on
> +     * hosts with nested set in the kernel */
> +    qemuCaps->kvmIsNested = false;
> +}

I don't see why this function should be needed (see below for more
details).

> +
> +
>  static bool
>  virQEMUCapsIsValid(void *data,
>                     void *privData)
> @@ -3854,6 +3887,7 @@ virQEMUCapsIsValid(void *data,
>      virQEMUCapsCachePrivPtr priv = privData;
>      bool kvmUsable;
>      struct stat sb;
> +    bool kvmIsNested;
>  
>      if (!qemuCaps->binary)
>          return true;
> @@ -3886,6 +3920,15 @@ virQEMUCapsIsValid(void *data,
>          return false;
>      }
>  
> +    /* Check if someone changed the nested={0|1} value for the kernel from
> +     * the previous time we checked. If so, then refresh the capabilities. */
> +    kvmIsNested = virQEMUCapsKVMIsNested();
> +    if (kvmIsNested != qemuCaps->kvmIsNested) {
> +        VIR_WARN("changed kernel nested kvm value was %d", qemuCaps->kvmIsNested);
> +        qemuCaps->kvmIsNested = kvmIsNested;
> +        return false;
> +    }
> +
>      if (!virQEMUCapsGuestIsNative(priv->hostArch, qemuCaps->arch)) {
>          VIR_DEBUG("Guest arch (%s) is not native to host arch (%s), "
>                    "skipping KVM-related checks",
> @@ -4472,6 +4515,8 @@ virQEMUCapsInitQMP(virQEMUCapsPtr qemuCaps,
>      if (virQEMUCapsInitQMPMonitor(qemuCaps, cmd->mon) < 0)
>          goto cleanup;
>  
> +    qemuCaps->kvmIsNested = virQEMUCapsKVMIsNested();
> +

This assignment should be done in the caller, i.e., in
virQEMUCapsNewForBinaryInternal. Probably somewhere close to setting
microcodeVersion since both should be done only if KVM was enabled.

>      if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_KVM)) {
>          virQEMUCapsInitQMPCommandAbort(cmd);
>          if ((rc = virQEMUCapsInitQMPCommandRun(cmd, true)) != 0) {
> diff --git a/src/qemu/qemu_capspriv.h b/src/qemu/qemu_capspriv.h
> index 8d1a40fe74..edfe2cd6f6 100644
> --- a/src/qemu/qemu_capspriv.h
> +++ b/src/qemu/qemu_capspriv.h
> @@ -48,6 +48,8 @@ int
>  virQEMUCapsInitQMPMonitor(virQEMUCapsPtr qemuCaps,
>                            qemuMonitorPtr mon);
>  
> +void virQEMUCapsClearKVMIsNested(virQEMUCapsPtr qemuCaps);
> +
>  int
>  virQEMUCapsInitQMPMonitorTCG(virQEMUCapsPtr qemuCaps,
>                               qemuMonitorPtr mon);
> diff --git a/tests/qemucapabilitiestest.c b/tests/qemucapabilitiestest.c
> index 8fe5a55e1d..90942c6fce 100644
> --- a/tests/qemucapabilitiestest.c
> +++ b/tests/qemucapabilitiestest.c
> @@ -63,6 +63,9 @@ testQemuCaps(const void *opaque)
>                                    qemuMonitorTestGetMonitor(mon)) < 0)
>          goto cleanup;
>  
> +    /* Don't apply what the host has... force clear for testing purposes */
> +    virQEMUCapsClearKVMIsNested(capsActual);
> +

We call virQEMUCapsInitQMPMonitor to parse the QEMU replies files via a
fake monitor. Your code sets kvmIsNested inside virQEMUCapsInitQMP
(which is one level above virQEMUCapsInitQMPMonitor in the call stack)
and thus it should never be set according to the host environment. And
even less so when kvmIsNested is set properly in
virQEMUCapsNewForBinaryInternal.

>      if (virQEMUCapsGet(capsActual, QEMU_CAPS_KVM)) {
>          qemuMonitorResetCommandID(qemuMonitorTestGetMonitor(mon));
>          if (virQEMUCapsInitQMPMonitorTCG(capsActual,

Jirka




More information about the libvir-list mailing list