[PATCH v2 20/27] conf: Introduce virtio-mem <memory/> model
Peter Krempa
pkrempa at redhat.com
Fri Dec 4 12:17:52 UTC 2020
On Thu, Dec 03, 2020 at 13:36:23 +0100, Michal Privoznik wrote:
> QEMU gained this new virtio-mem model. It's similar to pc-dimm
> in a sense that guest uses it as memory, but in a sense very
> different from it as it can dynamically change allocation,
> without need for hotplug. More specifically, the device has two
> attributes more (it has more of course, but these two are
> important here):
>
> 1) block-size - the granularity of the device. You can imagine
> the device being divided into blocks, each 'block-size' long.
>
> 2) requested-size - the portion of the device that is in use by
> the guest.
>
> And it all works like this: at guest startup/hotplug both
> block-size and requested-size are specified. When sysadmin wants
> to give some more memory to the guest, or take some back, they
> change the 'requested-size' attribute which is propagated to the
> guest where virtio-mem module takes corresponding action.
> This means, that 'requested-size' must be a whole number product
> of 'block-size' and of course has to be in rage [0, max-size]
> (including). The 'max-size' is yet another attribute but if not
> set it's "inherited" from corresponding memory-backend-* object.
>
> Therefore, two new elements are introduced under <target/>, to
> reflect these attributes:
>
> <memory model='virtio'>
> <target>
> <size unit='KiB'>4194304</size>
> <node>0</node>
> <block unit='KiB'>2048</block>
> <requested unit='KiB'>524288</requested>
> </target>
> <address type='pci' domain='0x0000' bus='0x00' slot='0x05' function='0x0'/>
> </memory>
>
> The intent here is that <requested/> will be allowed to change
> via virDomainUpdateDeviceFlags() API.
>
> Note, QEMU does inform us about success of allocation via an
> event - this is covered in next patches.
>
> Signed-off-by: Michal Privoznik <mprivozn at redhat.com>
> ---
> docs/formatdomain.rst | 22 ++++
> docs/schemas/domaincommon.rng | 10 ++
> src/conf/domain_conf.c | 103 ++++++++++++++++--
> src/conf/domain_conf.h | 5 +
> .../memory-hotplug-virtio-mem.xml | 78 +++++++++++++
> ...emory-hotplug-virtio-mem.x86_64-latest.xml | 1 +
> tests/qemuxml2xmltest.c | 1 +
> 7 files changed, 213 insertions(+), 7 deletions(-)
> create mode 100644 tests/qemuxml2argvdata/memory-hotplug-virtio-mem.xml
> create mode 120000 tests/qemuxml2xmloutdata/memory-hotplug-virtio-mem.x86_64-latest.xml
>
> diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst
> index ca6bc0432e..3990728939 100644
> --- a/docs/formatdomain.rst
> +++ b/docs/formatdomain.rst
> @@ -7187,6 +7187,17 @@ Example: usage of the memory devices
> </label>
> </target>
> </memory>
> + <memory model='virtio'>
> + <source>
> + <path>/tmp/virtio_mem</path>
> + </source>
> + <target>
> + <size unit='KiB'>1048576</size>
> + <node>0</node>
> + <block unit='KiB'>2048</block>
> + <requested unit='KiB'>524288</requested>
> + </target>
> + </memory>
> <memory model='virtio' access='shared'>
> <source>
> <path>/tmp/virtio_pmem</path>
> @@ -7300,6 +7311,17 @@ Example: usage of the memory devices
> so other backend types should use the ``readonly`` element. :since:`Since
> 5.0.0`
>
> + ``block``
> + The size of an individual block, granularity of division of memory module.
> + Must be power of two and at least equal to size of a transparent hugepage
> + (2MiB on x84_64). The default is hypervisor dependant. This is valid for
> + ``virtio`` model only and mutually exclusive with ``pmem``.
> +
> + ``requested``
> + The total size of blocks exposed to the guest. Must respect ``block``
> + granularity. This is valid for ``virtio`` model only and mutually
> + exclusive with ``pmem``.
Docs don't mention interactions of <size> and <requested>. Is size the
actual size? In such case you'll need to clear it on startup and
populate it with the actual size later.
The issue with <pmem> was mentioned earlier.
[...]
> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
> index 935bea1804..0551f6f266 100644
> --- a/src/conf/domain_conf.c
> +++ b/src/conf/domain_conf.c
> @@ -6764,10 +6764,23 @@ virDomainMemoryDefValidate(const virDomainMemoryDef *mem,
> return -1;
> }
> } else {
> - /* TODO: plain virtio-mem behaves differently then virtio-pmem */
> - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> - _("virtio-mem is not supported yet. <pmem/> is required"));
> - return -1;
> + if (mem->requestedsize > mem->size) {
> + virReportError(VIR_ERR_XML_DETAIL, "%s",
> + _("requested size must be smaller than @size"));
> + return -1;
> + }
> +
> + if (!VIR_IS_POW2(mem->blocksize)) {
> + virReportError(VIR_ERR_XML_DETAIL, "%s",
> + _("block size must be a power of two"));
> + return -1;
> + }
Docs state that blocksize must be also a multiple of page size.
> +
> + if (mem->requestedsize % mem->blocksize != 0) {
> + virReportError(VIR_ERR_XML_DETAIL, "%s",
> + _("requested size must be an integer multiple of block size"));
> + return -1;
> + }
> }
> break;
>
> @@ -16774,9 +16787,25 @@ virDomainMemorySourceDefParseXML(xmlNodePtr node,
> case VIR_DOMAIN_MEMORY_MODEL_VIRTIO:
> def->s.virtio.path = virXPathString("string(./path)", ctxt);
>
> - if (virXPathBoolean("boolean(./pmem)", ctxt))
> + if (virXPathBoolean("boolean(./pmem)", ctxt)) {
> def->s.virtio.pmem = true;
> + } else {
> + if (virDomainParseMemory("./pagesize", "./pagesize/@unit", ctxt,
> + &def->s.virtio.pagesize, false, false) < 0)
> + return -1;
>
> + if ((nodemask = virXPathString("string(./nodemask)", ctxt))) {
> + if (virBitmapParse(nodemask, &def->s.virtio.sourceNodes,
> + VIR_DOMAIN_CPUMASK_LEN) < 0)
> + return -1;
> +
> + if (virBitmapIsAllClear(def->s.virtio.sourceNodes)) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + _("Invalid value of 'nodemask': %s"), nodemask);
> + return -1;
Move this to virDomainMemoryDefValidate too.
> + }
> + }
> + }
> break;
>
> case VIR_DOMAIN_MEMORY_MODEL_NONE:
[...]
> @@ -16831,6 +16861,27 @@ virDomainMemoryTargetDefParseXML(xmlNodePtr node,
>
> if (virXPathBoolean("boolean(./readonly)", ctxt))
> def->readonly = true;
> +
> + break;
> +
> + case VIR_DOMAIN_MEMORY_MODEL_VIRTIO:
> + if (!def->s.virtio.pmem) {
> + if (virDomainParseMemory("./block", "./block/@unit", ctxt,
> + &def->blocksize, false, false) < 0)
> + return -1;
> +
> + if (virDomainParseMemory("./requested", "./requested/@unit", ctxt,
> + &def->requestedsize, false, false) < 0)
> + return -1;
> + }
If size is current size it should be optional.
> +
> + break;
> +
> + case VIR_DOMAIN_MEMORY_MODEL_NONE:
> + case VIR_DOMAIN_MEMORY_MODEL_DIMM:
> + case VIR_DOMAIN_MEMORY_MODEL_LAST:
> + /* nada */
> + break;
> }
>
> return 0;
> @@ -18649,7 +18700,9 @@ virDomainMemoryFindByDefInternal(virDomainDefPtr def,
> /* target info -> always present */
> if (tmp->model != mem->model ||
> tmp->targetNode != mem->targetNode ||
> - tmp->size != mem->size)
> + tmp->size != mem->size ||
If size is the current size and being actively updated, this lookup
might not actually work if it's updated between checking and updating.
> + tmp->blocksize != mem->blocksize ||
> + tmp->requestedsize != mem->requestedsize)
> continue;
>
> switch (mem->model) {
[...]
> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
> index efaa4c5473..f16dc0a029 100644
> --- a/src/conf/domain_conf.h
> +++ b/src/conf/domain_conf.h
> @@ -2327,8 +2327,11 @@ struct _virDomainMemoryDef {
> bool pmem;
> } nvdimm; /* VIR_DOMAIN_MEMORY_MODEL_NVDIMM */
> struct {
> + // nodemask + hugepages + no prealloc
Leftovers?
> char *path; /* Required for pmem, otherwise optional */
> bool pmem;
> + virBitmapPtr sourceNodes;
> + unsigned long long pagesize; /* kibibytes */
> } virtio; /* VIR_DOMAIN_MEMORY_MODEL_VIRTIO */
> } s;
>
More information about the libvir-list
mailing list