[PATCH v2 12/27] conf: Introduce virtio-pmem <memory/> model

Peter Krempa pkrempa at redhat.com
Fri Dec 4 08:29:47 UTC 2020


On Thu, Dec 03, 2020 at 13:36:15 +0100, Michal Privoznik wrote:
> The virtio-pmem is a virtio variant of NVDIMM and just like
> NVDIMM virtio-pmem also allows accessing host pages bypassing
> guest page cache. The difference is that if a regular file is
> used to back guest's NVDIMM (model='nvdimm') the persistence of
> guest writes might not be guaranteed while with virtio-pmem it
> is.
> 
> To express this new model at domain XML level, I've chose the
> following:
> 
>   <memory model='virtio' access='shared'>
>     <source>
>       <path>/tmp/virtio_pmem</path>
>       <pmem/>
>     </source>
>     <target>
>       <size unit='KiB'>524288</size>
>     </target>
>     <address type='pci' domain='0x0000' bus='0x00' slot='0x05' function='0x0'/>
>   </memory>
> 
> The <source/> looks like for model='nvdimm', except <pmem/> is
> required - this is to allow for future expansion when the
> model='virtio' without <pmem/> will describe virtio-mem (which is
> a different device).

This is not a good design. You use a property of <source> which
describes the backend to influence the model of the device, which is
definitely a frontend property as witnessed also by the ABI stability
check:

[I'm moving some hunks around for locality of my point]

> @@ -24223,6 +24292,21 @@ virDomainMemoryDefCheckABIStability(virDomainMemoryDefPtr src,
>                             _("Target NVDIMM UUID doesn't match source NVDIMM"));
>              return false;
>          }
> +        break;
> +
> +    case VIR_DOMAIN_MEMORY_MODEL_VIRTIO:
> +        if (src->s.virtio.pmem != dst->s.virtio.pmem) {
> +            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> +                           _("Target NVDIMM pmem flag doesn't match "
> +                             "source NVDIMM pmem flag"));

(also please error messages with no line breaks)

> +            return false;
> +        }
> +        break;
> +
> +    case VIR_DOMAIN_MEMORY_MODEL_NONE:
> +    case VIR_DOMAIN_MEMORY_MODEL_DIMM:
> +    case VIR_DOMAIN_MEMORY_MODEL_LAST:
> +        break;
>      }
>  
>      return virDomainDeviceInfoCheckABIStability(&src->info, &dst->info);

> @@ -6721,6 +6742,39 @@ virDomainMemoryDefValidate(const virDomainMemoryDef *mem,
>                             _("label size is required for NVDIMM device"));
>              return -1;
>          }
> +        break;
> +
> +    case VIR_DOMAIN_MEMORY_MODEL_VIRTIO:
> +        if (mem->s.virtio.pmem) {
> +            if (!mem->s.virtio.path) {
> +                virReportError(VIR_ERR_XML_DETAIL, "%s",
> +                               _("path is required for model 'virtio'"));
> +                return -1;
> +            }
> +
> +            if (mem->access != VIR_DOMAIN_MEMORY_ACCESS_SHARED) {
> +                virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> +                               _("shared access mode required for virtio-pmem device"));
> +                return -1;
> +            }
> +
> +            if (mem->targetNode != -1) {
> +                virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> +                               _("virtio-pmem does not support NUMA nodes"));
> +                return -1;
> +            }
> +        } else {
> +            /* TODO: plain virtio-mem behaves differently then virtio-pmem */
> +            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> +                           _("virtio-mem is not supported yet. <pmem/> is required"));
> +            return -1;

This also doesn't seem to inspire confidence in the design.

I think we'll need to use 'virtio-mem' and 'virtio-pmem' for the models.

> +        }
> +        break;
> +
> +    case VIR_DOMAIN_MEMORY_MODEL_NONE:
> +    case VIR_DOMAIN_MEMORY_MODEL_DIMM:
> +    case VIR_DOMAIN_MEMORY_MODEL_LAST:
> +        break;
>      }
>  
>      return 0;

[...]

> Another difference between NVDIMM and virtio-pmem is that while
> the former supports NUMA node locality the latter doesn't. And
> also, the latter goes onto PCI bus and not into a DIMM module.

I think you've used enough arguments to avoid starting the commit
message that it's a variant of nvdimm :). It's similar in usage, but
definitely very dissimilar in implementation.

[...]

> @@ -5373,15 +5377,31 @@ static int
>  virDomainMemoryDefPostParse(virDomainMemoryDefPtr mem,
>                              const virDomainDef *def)
>  {
> -    /* Although only the QEMU driver implements PPC64 support, this
> -     * code is related to the platform specification (PAPR), i.e. it
> -     * is hypervisor agnostic, and any future PPC64 hypervisor driver
> -     * will have the same restriction.
> -     */
> -    if (ARCH_IS_PPC64(def->os.arch) &&
> -        mem->model == VIR_DOMAIN_MEMORY_MODEL_NVDIMM &&
> -        virDomainNVDimmAlignSizePseries(mem) < 0)
> -        return -1;
> +    switch (mem->model) {
> +    case VIR_DOMAIN_MEMORY_MODEL_NVDIMM:
> +        /* Although only the QEMU driver implements PPC64 support, this
> +         * code is related to the platform specification (PAPR), i.e. it
> +         * is hypervisor agnostic, and any future PPC64 hypervisor driver
> +         * will have the same restriction.
> +         */
> +        if (ARCH_IS_PPC64(def->os.arch) &&
> +            virDomainNVDimmAlignSizePseries(mem) < 0)
> +            return -1;
> +        break;
> +
> +    case VIR_DOMAIN_MEMORY_MODEL_VIRTIO:
> +        /* Virtio-pmem mandates shared access so that guest writes get
> +         * reflected in the underlying file. */

I'd expect a check that 'access' is not
'VIR_DOMAIN_MEMORY_ACCESS_PRIVATE' explicitly if it's mandatory to use
shared access.

> +        if (mem->s.virtio.pmem &&
> +            mem->access == VIR_DOMAIN_MEMORY_ACCESS_DEFAULT)
> +            mem->access = VIR_DOMAIN_MEMORY_ACCESS_SHARED;
> +        break;
> +
> +    case VIR_DOMAIN_MEMORY_MODEL_DIMM:
> +    case VIR_DOMAIN_MEMORY_MODEL_NONE:
> +    case VIR_DOMAIN_MEMORY_MODEL_LAST:
> +        break;
> +    }
>  
>      return 0;
>  }

[...]





[...]


> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
> index e705e8d8d5..ab7938a355 100644
> --- a/src/qemu/qemu_domain.c
> +++ b/src/qemu/qemu_domain.c
> @@ -8485,6 +8485,8 @@ static int
>  qemuDomainDefValidateMemoryHotplugDevice(const virDomainMemoryDef *mem,
>                                           const virDomainDef *def)
>  {
> +    bool needsNuma = true;
> +
>      switch (mem->model) {
>      case VIR_DOMAIN_MEMORY_MODEL_DIMM:
>      case VIR_DOMAIN_MEMORY_MODEL_NVDIMM:
> @@ -8520,11 +8522,35 @@ qemuDomainDefValidateMemoryHotplugDevice(const virDomainMemoryDef *mem,
>          }
>          break;
>  
> +    case VIR_DOMAIN_MEMORY_MODEL_VIRTIO:
> +        if (mem->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI &&
> +            mem->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE) {
> +            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> +                           _("only 'pci' addresses are supported for the "
> +                             "virtio-pmem device"));
> +            return -1;
> +        }
> +
> +        /* virtio-pmem doesn't have .node attribute. */
> +        if (mem->s.virtio.pmem)
> +            needsNuma = false;
> +        break;
> +
>      case VIR_DOMAIN_MEMORY_MODEL_NONE:
>      case VIR_DOMAIN_MEMORY_MODEL_LAST:
>          return -1;
>      }
>  
> +    if (needsNuma &&
> +        virDomainNumaGetNodeCount(def->numa) != 0) {
> +        if (mem->targetNode == -1) {
> +            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> +                           _("target NUMA node needs to be specified for "
> +                             "memory device"));
> +            return -1;
> +        }
> +    }
> +
>      return 0;
>  }
>  
> diff --git a/src/qemu/qemu_domain_address.c b/src/qemu/qemu_domain_address.c
> index d872f75b38..bcf6724da3 100644
> --- a/src/qemu/qemu_domain_address.c
> +++ b/src/qemu/qemu_domain_address.c
> @@ -313,11 +313,11 @@ qemuDomainPrimeVirtioDeviceAddresses(virDomainDefPtr def,
>                                       virDomainDeviceAddressType type)
>  {
>      /*
> -       declare address-less virtio devices to be of address type 'type'
> -       disks, networks, videos, consoles, controllers, memballoon and rng
> -       in this order
> -       if type is ccw filesystem and vsock devices are declared to be of
> -       address type ccw
> +       Declare address-less virtio devices to be of address type 'type'
> +       disks, networks, videos, consoles, controllers, hostdevs, memballoon,
> +       rngs and memories in this order.
> +       If type is ccw filesystem and vsock devices are declared to be of
> +       address type ccw.
>      */
>      size_t i;
>  
> @@ -379,6 +379,12 @@ qemuDomainPrimeVirtioDeviceAddresses(virDomainDefPtr def,
>              def->rngs[i]->info.type = type;
>      }
>  
> +    for (i = 0; i < def->nmems; i++) {
> +        if (def->mems[i]->model == VIR_DOMAIN_MEMORY_MODEL_VIRTIO &&
> +            def->mems[i]->info.type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE)
> +            def->mems[i]->info.type = type;
> +    }
> +
>      if (type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_CCW) {
>          for (i = 0; i < def->nfss; i++) {
>              if (def->fss[i]->info.type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE)
> @@ -1040,11 +1046,23 @@ qemuDomainDeviceCalculatePCIConnectFlags(virDomainDeviceDefPtr dev,
>          }
>          break;
>  
> +    case VIR_DOMAIN_DEVICE_MEMORY:
> +        switch (dev->data.memory->model) {
> +        case VIR_DOMAIN_MEMORY_MODEL_VIRTIO:
> +            return virtioFlags;
> +
> +        case VIR_DOMAIN_MEMORY_MODEL_NONE:
> +        case VIR_DOMAIN_MEMORY_MODEL_DIMM:
> +        case VIR_DOMAIN_MEMORY_MODEL_NVDIMM:
> +        case VIR_DOMAIN_MEMORY_MODEL_LAST:
> +            return 0;
> +        }
> +        break;
> +
>          /* These devices don't ever connect with PCI */
>      case VIR_DOMAIN_DEVICE_NVRAM:
>      case VIR_DOMAIN_DEVICE_TPM:
>      case VIR_DOMAIN_DEVICE_PANIC:
> -    case VIR_DOMAIN_DEVICE_MEMORY:
>      case VIR_DOMAIN_DEVICE_HUB:
>      case VIR_DOMAIN_DEVICE_REDIRDEV:
>      case VIR_DOMAIN_DEVICE_SMARTCARD:
> @@ -2455,6 +2473,17 @@ qemuDomainAssignDevicePCISlots(virDomainDefPtr def,
>              return -1;
>      }
>  
> +    for (i = 0; i < def->nmems; i++) {
> +        virDomainMemoryDefPtr mem = def->mems[i];
> +
> +        if (mem->model != VIR_DOMAIN_MEMORY_MODEL_VIRTIO ||
> +            !virDeviceInfoPCIAddressIsWanted(&mem->info))
> +            continue;
> +
> +        if (qemuDomainPCIAddressReserveNextAddr(addrs, &mem->info) < 0)
> +            return -1;
> +    }
> +
>      return 0;
>  }
>  
> @@ -3102,19 +3131,32 @@ qemuAssignMemoryDeviceSlot(virDomainMemoryDefPtr mem,
>  
>  
>  int
> -qemuDomainAssignMemoryDeviceSlot(virDomainDefPtr def,
> +qemuDomainAssignMemoryDeviceSlot(virQEMUDriverPtr driver,
> +                                 virDomainObjPtr vm,
>                                   virDomainMemoryDefPtr mem)
>  {
> -    virBitmapPtr slotmap = NULL;
> -    int ret;
> +    g_autoptr(virBitmap) slotmap = NULL;
> +    virDomainDeviceDef dev = {.type = VIR_DOMAIN_DEVICE_MEMORY, .data.memory = mem};
>  
> -    if (!(slotmap = qemuDomainGetMemorySlotMap(def)))
> -        return -1;
> +    switch (mem->model) {
> +    case VIR_DOMAIN_MEMORY_MODEL_DIMM:
> +    case VIR_DOMAIN_MEMORY_MODEL_NVDIMM:
> +        if (!(slotmap = qemuDomainGetMemorySlotMap(vm->def)))
> +            return -1;
>  
> -    ret = qemuAssignMemoryDeviceSlot(mem, slotmap);
> +        return qemuAssignMemoryDeviceSlot(mem, slotmap);
> +        break;
>  
> -    virBitmapFree(slotmap);
> -    return ret;
> +    case VIR_DOMAIN_MEMORY_MODEL_VIRTIO:
> +        return qemuDomainEnsurePCIAddress(vm, &dev, driver);
> +        break;
> +
> +    case VIR_DOMAIN_MEMORY_MODEL_NONE:
> +    case VIR_DOMAIN_MEMORY_MODEL_LAST:
> +        break;
> +    }
> +
> +    return 0;
>  }
>  
>  
> @@ -3132,8 +3174,22 @@ qemuDomainAssignMemorySlots(virDomainDefPtr def)
>          return -1;
>  
>      for (i = 0; i < def->nmems; i++) {
> -        if (qemuAssignMemoryDeviceSlot(def->mems[i], slotmap) < 0)
> -            goto cleanup;
> +        virDomainMemoryDefPtr mem = def->mems[i];
> +
> +        switch (mem->model) {
> +        case VIR_DOMAIN_MEMORY_MODEL_DIMM:
> +        case VIR_DOMAIN_MEMORY_MODEL_NVDIMM:
> +            if (qemuAssignMemoryDeviceSlot(def->mems[i], slotmap) < 0)
> +                goto cleanup;
> +            break;
> +
> +        case VIR_DOMAIN_MEMORY_MODEL_VIRTIO:
> +            /* handled in qemuDomainAssignPCIAddresses() */
> +            break;
> +        case VIR_DOMAIN_MEMORY_MODEL_NONE:
> +        case VIR_DOMAIN_MEMORY_MODEL_LAST:
> +            break;
> +        }
>      }

Preferably this commit will not introduce actual code to the qemu
driver. Please do this refactor separately, before introduction of the
new model. This applies to all of the above hunks.




More information about the libvir-list mailing list