[PATCH v2 20/27] conf: Introduce virtio-mem <memory/> model

Peter Krempa pkrempa at redhat.com
Fri Dec 4 12:17:52 UTC 2020


On Thu, Dec 03, 2020 at 13:36:23 +0100, Michal Privoznik wrote:
> QEMU gained this new virtio-mem model. It's similar to pc-dimm
> in a sense that guest uses it as memory, but in a sense very
> different from it as it can dynamically change allocation,
> without need for hotplug. More specifically, the device has two
> attributes more (it has more of course, but these two are
> important here):
> 
>  1) block-size - the granularity of the device. You can imagine
>     the device being divided into blocks, each 'block-size' long.
> 
>  2) requested-size - the portion of the device that is in use by
>     the guest.
> 
> And it all works like this: at guest startup/hotplug both
> block-size and requested-size are specified. When sysadmin wants
> to give some more memory to the guest, or take some back, they
> change the 'requested-size' attribute which is propagated to the
> guest where virtio-mem module takes corresponding action.
> This means, that 'requested-size' must be a whole number product
> of 'block-size' and of course has to be in rage [0, max-size]
> (including). The 'max-size' is yet another attribute but if not
> set it's "inherited" from corresponding memory-backend-* object.
> 
> Therefore, two new elements are introduced under <target/>, to
> reflect these attributes:
> 
>   <memory model='virtio'>
>     <target>
>       <size unit='KiB'>4194304</size>
>       <node>0</node>
>       <block unit='KiB'>2048</block>
>       <requested unit='KiB'>524288</requested>
>     </target>
>     <address type='pci' domain='0x0000' bus='0x00' slot='0x05' function='0x0'/>
>   </memory>
> 
> The intent here is that <requested/> will be allowed to change
> via virDomainUpdateDeviceFlags() API.
> 
> Note, QEMU does inform us about success of allocation via an
> event - this is covered in next patches.
> 
> Signed-off-by: Michal Privoznik <mprivozn at redhat.com>
> ---
>  docs/formatdomain.rst                         |  22 ++++
>  docs/schemas/domaincommon.rng                 |  10 ++
>  src/conf/domain_conf.c                        | 103 ++++++++++++++++--
>  src/conf/domain_conf.h                        |   5 +
>  .../memory-hotplug-virtio-mem.xml             |  78 +++++++++++++
>  ...emory-hotplug-virtio-mem.x86_64-latest.xml |   1 +
>  tests/qemuxml2xmltest.c                       |   1 +
>  7 files changed, 213 insertions(+), 7 deletions(-)
>  create mode 100644 tests/qemuxml2argvdata/memory-hotplug-virtio-mem.xml
>  create mode 120000 tests/qemuxml2xmloutdata/memory-hotplug-virtio-mem.x86_64-latest.xml
> 
> diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst
> index ca6bc0432e..3990728939 100644
> --- a/docs/formatdomain.rst
> +++ b/docs/formatdomain.rst
> @@ -7187,6 +7187,17 @@ Example: usage of the memory devices
>           </label>
>         </target>
>       </memory>
> +     <memory model='virtio'>
> +       <source>
> +         <path>/tmp/virtio_mem</path>
> +       </source>
> +       <target>
> +         <size unit='KiB'>1048576</size>
> +         <node>0</node>
> +         <block unit='KiB'>2048</block>
> +         <requested unit='KiB'>524288</requested>
> +       </target>
> +     </memory>
>       <memory model='virtio' access='shared'>
>         <source>
>           <path>/tmp/virtio_pmem</path>
> @@ -7300,6 +7311,17 @@ Example: usage of the memory devices
>        so other backend types should use the ``readonly`` element. :since:`Since
>        5.0.0`
>  
> +   ``block``
> +     The size of an individual block, granularity of division of memory module.
> +     Must be power of two and at least equal to size of a transparent hugepage
> +     (2MiB on x84_64). The default is hypervisor dependant. This is valid for
> +     ``virtio`` model only and mutually exclusive with ``pmem``.
> +
> +   ``requested``
> +     The total size of blocks exposed to the guest. Must respect ``block``
> +     granularity. This is valid for ``virtio`` model only and mutually
> +     exclusive with ``pmem``.

Docs don't mention interactions of <size> and <requested>. Is size the
actual size? In such case you'll need to clear it on startup and
populate it with the actual size later.

The issue with <pmem> was mentioned earlier.

[...]

> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
> index 935bea1804..0551f6f266 100644
> --- a/src/conf/domain_conf.c
> +++ b/src/conf/domain_conf.c
> @@ -6764,10 +6764,23 @@ virDomainMemoryDefValidate(const virDomainMemoryDef *mem,
>                  return -1;
>              }
>          } else {
> -            /* TODO: plain virtio-mem behaves differently then virtio-pmem */
> -            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> -                           _("virtio-mem is not supported yet. <pmem/> is required"));
> -            return -1;
> +            if (mem->requestedsize > mem->size) {
> +                virReportError(VIR_ERR_XML_DETAIL, "%s",
> +                               _("requested size must be smaller than @size"));
> +                return -1;
> +            }
> +
> +            if (!VIR_IS_POW2(mem->blocksize)) {
> +                virReportError(VIR_ERR_XML_DETAIL, "%s",
> +                               _("block size must be a power of two"));
> +                return -1;
> +            }

Docs state that blocksize must be also a multiple of page size.

> +
> +            if (mem->requestedsize % mem->blocksize != 0) {
> +                virReportError(VIR_ERR_XML_DETAIL, "%s",
> +                               _("requested size must be an integer multiple of block size"));
> +                return -1;
> +            }
>          }
>          break;
>  
> @@ -16774,9 +16787,25 @@ virDomainMemorySourceDefParseXML(xmlNodePtr node,
>      case VIR_DOMAIN_MEMORY_MODEL_VIRTIO:
>          def->s.virtio.path = virXPathString("string(./path)", ctxt);
>  
> -        if (virXPathBoolean("boolean(./pmem)", ctxt))
> +        if (virXPathBoolean("boolean(./pmem)", ctxt)) {
>              def->s.virtio.pmem = true;
> +        } else {
> +            if (virDomainParseMemory("./pagesize", "./pagesize/@unit", ctxt,
> +                                     &def->s.virtio.pagesize, false, false) < 0)
> +                return -1;
>  
> +            if ((nodemask = virXPathString("string(./nodemask)", ctxt))) {
> +                if (virBitmapParse(nodemask, &def->s.virtio.sourceNodes,
> +                                   VIR_DOMAIN_CPUMASK_LEN) < 0)
> +                    return -1;
> +
> +                if (virBitmapIsAllClear(def->s.virtio.sourceNodes)) {
> +                    virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> +                                   _("Invalid value of 'nodemask': %s"), nodemask);
> +                    return -1;

Move this to virDomainMemoryDefValidate too.

> +                }
> +            }
> +        }
>          break;
>  
>      case VIR_DOMAIN_MEMORY_MODEL_NONE:

[...]


> @@ -16831,6 +16861,27 @@ virDomainMemoryTargetDefParseXML(xmlNodePtr node,
>  
>          if (virXPathBoolean("boolean(./readonly)", ctxt))
>              def->readonly = true;
> +
> +        break;
> +
> +    case VIR_DOMAIN_MEMORY_MODEL_VIRTIO:
> +        if (!def->s.virtio.pmem) {
> +            if (virDomainParseMemory("./block", "./block/@unit", ctxt,
> +                                     &def->blocksize, false, false) < 0)
> +                return -1;
> +
> +            if (virDomainParseMemory("./requested", "./requested/@unit", ctxt,
> +                                     &def->requestedsize, false, false) < 0)
> +                return -1;
> +        }

If size is current size it should be optional.

> +
> +        break;
> +
> +    case VIR_DOMAIN_MEMORY_MODEL_NONE:
> +    case VIR_DOMAIN_MEMORY_MODEL_DIMM:
> +    case VIR_DOMAIN_MEMORY_MODEL_LAST:
> +        /* nada */
> +        break;
>      }
>  
>      return 0;
> @@ -18649,7 +18700,9 @@ virDomainMemoryFindByDefInternal(virDomainDefPtr def,
>          /* target info -> always present */
>          if (tmp->model != mem->model ||
>              tmp->targetNode != mem->targetNode ||
> -            tmp->size != mem->size)
> +            tmp->size != mem->size ||

If size is the current size and being actively updated, this lookup
might not actually work if it's updated between checking and updating.

> +            tmp->blocksize != mem->blocksize ||
> +            tmp->requestedsize != mem->requestedsize)
>              continue;
>  
>          switch (mem->model) {

[...]

> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
> index efaa4c5473..f16dc0a029 100644
> --- a/src/conf/domain_conf.h
> +++ b/src/conf/domain_conf.h
> @@ -2327,8 +2327,11 @@ struct _virDomainMemoryDef {
>              bool pmem;
>          } nvdimm; /* VIR_DOMAIN_MEMORY_MODEL_NVDIMM */
>          struct {
> +            // nodemask + hugepages + no prealloc

Leftovers?

>              char *path; /* Required for pmem, otherwise optional */
>              bool pmem;
> +            virBitmapPtr sourceNodes;
> +            unsigned long long pagesize; /* kibibytes */
>          } virtio; /* VIR_DOMAIN_MEMORY_MODEL_VIRTIO */
>      } s;
>  




More information about the libvir-list mailing list