[libvirt] [PATCH v4 8/9] qemu: Add support to Add/Delete IOThreads

John Ferlan jferlan at redhat.com
Thu Apr 23 15:22:17 UTC 2015



On 04/23/2015 09:22 AM, Peter Krempa wrote:
> On Tue, Apr 21, 2015 at 19:31:29 -0400, John Ferlan wrote:
>> Add qemuDomainAddIOThread and qemuDomainDelIOThread in order to add or
>> remove an IOThread to/from the host either for live or config optoins
>>
>> The implementation for the 'live' option will use the iothreadpids list
>> in order to make decision, while the 'config' option will use the
>> iothreadids list.  Additionally, for deletion each may have to adjust
>> the iothreadpin list.
>>
>> IOThreads are implemented by qmp objects, the code makes use of the existing
>> qemuMonitorAddObject or qemuMonitorDelObject APIs.
>>
>> Signed-off-by: John Ferlan <jferlan at redhat.com>
>> ---
>>  src/conf/domain_audit.c  |   9 ++
>>  src/conf/domain_audit.h  |   6 +
>>  src/libvirt_private.syms |   1 +
>>  src/qemu/qemu_driver.c   | 401 +++++++++++++++++++++++++++++++++++++++++++++++
>>  4 files changed, 417 insertions(+)
>>
> 
> ...
> 
>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>> index ee13d08..c34abc9 100644
>> --- a/src/qemu/qemu_driver.c
>> +++ b/src/qemu/qemu_driver.c
>> @@ -6137,6 +6137,405 @@ qemuDomainPinIOThread(virDomainPtr dom,
>>      return ret;
>>  }
>>  
>> +static int
>> +qemuDomainHotplugAddIOThread(virQEMUDriverPtr driver,
>> +                             virDomainObjPtr vm,
>> +                             unsigned int iothread_id)
>> +{
>> +    qemuDomainObjPrivatePtr priv = vm->privateData;
>> +    char *alias = NULL;
>> +    size_t idx;
>> +    int rc = -1;
>> +    int ret = -1;
>> +    unsigned int orig_niothreads = vm->def->iothreads;
>> +    unsigned int exp_niothreads = vm->def->iothreads;
>> +    int new_niothreads = 0;
>> +    qemuMonitorIOThreadInfoPtr *new_iothreads = NULL;
>> +    virCgroupPtr cgroup_iothread = NULL;
>> +    char *mem_mask = NULL;
>> +    virDomainIOThreadIDDefPtr iothrid;
>> +
>> +    if (virDomainIOThreadIDFind(vm->def, iothread_id)) {
>> +        virReportError(VIR_ERR_INVALID_ARG,
>> +                       _("an IOThread is already using iothread_id '%u'"),
>> +                       iothread_id);
>> +        goto cleanup;
>> +    }
>> +
>> +    if (virAsprintf(&alias, "iothread%u", iothread_id) < 0)
>> +        return -1;
>> +
>> +    qemuDomainObjEnterMonitor(driver, vm);
>> +
>> +    rc = qemuMonitorAddObject(priv->mon, "iothread", alias, NULL);
>> +    exp_niothreads++;
>> +    if (rc < 0)
>> +        goto exit_monitor;
>> +
>> +    /* After hotplugging the IOThreads we need to re-detect the
>> +     * IOThreads thread_id's, adjust the cgroups, thread affinity,
>> +     * and add the thread_id to the vm->def->iothreadids list.
>> +     */
>> +    if ((new_niothreads = qemuMonitorGetIOThreads(priv->mon,
>> +                                                  &new_iothreads)) < 0)
>> +        goto exit_monitor;
>> +
>> +    if (qemuDomainObjExitMonitor(driver, vm) < 0)
>> +        goto cleanup;
>> +
>> +    if (new_niothreads != exp_niothreads) {
>> +        virReportError(VIR_ERR_INTERNAL_ERROR,
>> +                       _("got wrong number of IOThread ids from QEMU monitor. "
>> +                         "got %d, wanted %d"),
>> +                       new_niothreads, exp_niothreads);
>> +        vm->def->iothreads = new_niothreads;
>> +        goto cleanup;
>> +    }
>> +    vm->def->iothreads = exp_niothreads;
>> +
>> +    if (virDomainNumatuneGetMode(vm->def->numa, -1) ==
>> +        VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
>> +        virDomainNumatuneMaybeFormatNodeset(vm->def->numa,
>> +                                            priv->autoNodeset,
>> +                                            &mem_mask, -1) < 0)
>> +        goto cleanup;
>> +
>> +
>> +    /*
>> +     * If we've successfully added an IOThread, find out where we added it
>> +     * in the QEMU IOThread list, so we can add it to our iothreadids list
>> +     */
>> +    for (idx = 0; idx < new_niothreads; idx++) {
>> +        if (STREQ(new_iothreads[idx]->name, alias))
>> +            break;
>> +    }
>> +
>> +    if (idx == new_niothreads) {
>> +        virReportError(VIR_ERR_INTERNAL_ERROR,
>> +                       _("cannot find new IOThread '%u' in QEMU monitor."),
>> +                       iothread_id);
>> +        goto cleanup;
>> +    }
>> +
>> +    if (!(iothrid = virDomainIOThreadIDAdd(vm->def, iothread_id)))
>> +        goto cleanup;
>> +
>> +    iothrid->thread_id = new_iothreads[idx]->thread_id;
>> +
>> +    /* Add IOThread to cgroup if present */
>> +    if (priv->cgroup) {
>> +        cgroup_iothread =
>> +            qemuDomainAddCgroupForThread(priv->cgroup,
>> +                                         VIR_CGROUP_THREAD_IOTHREAD,
>> +                                         iothread_id, mem_mask,
>> +                                         iothrid->thread_id);
>> +        if (!cgroup_iothread)
>> +            goto cleanup;
>> +    }
>> +
>> +    /* Inherit def->cpuset */
>> +    if (vm->def->cpumask) {
>> +        if (!(iothrid->cpumask = virBitmapNewCopy(vm->def->cpumask)))
>> +            goto cleanup;
> 
> Copying the existing default mask is not necessary here. You only have
> to set it now. It will be set automatically the next time the VM will
> start.
> 
> Also you still didn't add the case where automatic numa placement is
> used.
> 

Perhaps because I'm not clear what you're asking for.  Obviously I'm
comparing to HotplugVcpus


>> +        vm->def->cputune.niothreadspin++;
> 
> As said, this shouldn't be necessary.
> 

It's been removed

>> +
>> +        if (qemuDomainHotplugPinThread(iothrid->cpumask, iothread_id,
>> +                                       iothrid->thread_id, cgroup_iothread) < 0)
>> +            goto cleanup;
>> +
>> +        if (qemuProcessSetSchedParams(iothread_id, iothrid->thread_id,
>> +                                      vm->def->cputune.niothreadsched,
>> +                                      vm->def->cputune.iothreadsched) < 0)
>> +            goto cleanup;
> 
> As I've pointed out in my last review:
> <cite>
> qemuProcessSetSchedParams won't do anything since the new thread doesn't
> have any scheduler assigned.
> </cite>
> 
> For your reply to that comment:
> So it's wrong in qemuDomainHotplugVcpus ? Is someone else fixing that?
> 
> Well, yes. This isn't qemuDomainHotplugVcpus though so why to add the
> same mistake? Existing code is no golden standard for how things should
> be done.
> 

So if we know existing code is broken, shouldn't it be fixed? That way
it prevents someone from doing something wrong. While existing code
isn't a "golden standard" - it does provide a working example for others
to follow if they're implementing something very similar to what
currently exists.

I don't mind removing it - fine, whatever, but is someone going to fix
the HotplugVcpus as well?  Or should I add a patch 10 that does that?

>> +    }
>> +
>> +    ret = 0;
>> +
>> + cleanup:
>> +    if (new_iothreads) {
>> +        for (idx = 0; idx < new_niothreads; idx++)
>> +            qemuMonitorIOThreadInfoFree(new_iothreads[idx]);
>> +        VIR_FREE(new_iothreads);
>> +    }
>> +    VIR_FREE(mem_mask);
>> +    virDomainAuditIOThread(vm, orig_niothreads, new_niothreads,
>> +                           "update", rc == 0);
>> +    virCgroupFree(&cgroup_iothread);
>> +    VIR_FREE(alias);
>> +    return ret;
>> +
>> + exit_monitor:
>> +    ignore_value(qemuDomainObjExitMonitor(driver, vm));
>> +    goto cleanup;
>> +}
>> +
>> +static int
>> +qemuDomainHotplugDelIOThread(virQEMUDriverPtr driver,
>> +                             virDomainObjPtr vm,
>> +                             unsigned int iothread_id)
>> +{
>> +    qemuDomainObjPrivatePtr priv = vm->privateData;
>> +    size_t idx;
>> +    char *alias = NULL;
>> +    int rc = -1;
>> +    int ret = -1;
>> +    unsigned int orig_niothreads = vm->def->iothreads;
>> +    unsigned int exp_niothreads = vm->def->iothreads;
>> +    int new_niothreads = 0;
>> +    qemuMonitorIOThreadInfoPtr *new_iothreads = NULL;
>> +
>> +    /* Normally would use virDomainIOThreadIDFind, but we need the index
>> +     * from whence to delete for later...
>> +     */
>> +    for (idx = 0; idx < vm->def->niothreadids; idx++) {
>> +        if (iothread_id == vm->def->iothreadids[idx]->iothread_id)
>> +            break;
>> +    }
>> +
>> +    if (idx == vm->def->niothreadids) {
>> +        virReportError(VIR_ERR_INVALID_ARG,
>> +                       _("cannot find IOThread '%u' in iothreadids list"),
>> +                       iothread_id);
>> +        return -1;
>> +    }
>> +
>> +    if (virAsprintf(&alias, "iothread%u", iothread_id) < 0)
>> +        return -1;
>> +
>> +    qemuDomainObjEnterMonitor(driver, vm);
>> +
>> +    rc = qemuMonitorDelObject(priv->mon, alias);
>> +    exp_niothreads--;
>> +    if (rc < 0)
>> +        goto exit_monitor;
>> +
>> +    if ((new_niothreads = qemuMonitorGetIOThreads(priv->mon,
>> +                                                  &new_iothreads)) < 0)
>> +        goto exit_monitor;
>> +
>> +    if (qemuDomainObjExitMonitor(driver, vm) < 0)
>> +        goto cleanup;
>> +
>> +    if (new_niothreads != exp_niothreads) {
>> +        virReportError(VIR_ERR_INTERNAL_ERROR,
>> +                       _("got wrong number of IOThread ids from QEMU monitor. "
>> +                         "got %d, wanted %d"),
>> +                       new_niothreads, exp_niothreads);
>> +        vm->def->iothreads = new_niothreads;
>> +        goto cleanup;
>> +    }
>> +    vm->def->iothreads = exp_niothreads;
>> +
>> +    if (vm->def->iothreadids[idx]->cpumask)
>> +        vm->def->cputune.niothreadspin--;
> 
> This shouldn't be necessary after you fix the check for formating
> cputune.
> 

Right

>> +    virDomainIOThreadIDDel(vm->def, iothread_id);
>> +
>> +    if (qemuDomainDelCgroupForThread(priv->cgroup,
>> +                                     VIR_CGROUP_THREAD_IOTHREAD,
>> +                                     iothread_id) < 0)
>> +        goto cleanup;
> 
> Here you also should remove the corresponding bit from the iothread
> scheduler list since you are removign the bit. Ideally you merge that
> info here too. Note that it won't be that easy since it's stored in an
> orthogonal fashion, which terribly sucks, but it would be great to have
> all the data in single place.
> 

The design of *sched is less than optimal to say the least - it made a
lot of assumptions and should have a way to remove bits as necessary...
Since vcpu's cannot removed "yet" it hasn't mattered. I'll figure
something out - it'll probably require a new API for domain_conf.c since
the code will need to be used for both active and config

> 
>> +
>> +    ret = 0;
>> +
>> + cleanup:
>> +    if (new_iothreads) {
>> +        for (idx = 0; idx < new_niothreads; idx++)
>> +            qemuMonitorIOThreadInfoFree(new_iothreads[idx]);
>> +        VIR_FREE(new_iothreads);
>> +    }
>> +    virDomainAuditIOThread(vm, orig_niothreads, new_niothreads,
>> +                           "update", rc == 0);
>> +    VIR_FREE(alias);
>> +    return ret;
>> +
>> + exit_monitor:
>> +    ignore_value(qemuDomainObjExitMonitor(driver, vm));
>> +    goto cleanup;
>> +}
>> +
>> +static int
>> +qemuDomainChgIOThread(virQEMUDriverPtr driver,
>> +                      virDomainObjPtr vm,
>> +                      unsigned int iothread_id,
>> +                      bool add,
>> +                      unsigned int flags)
>> +{
>> +    virQEMUDriverConfigPtr cfg = NULL;
>> +    virCapsPtr caps = NULL;
>> +    qemuDomainObjPrivatePtr priv;
>> +    virCgroupPtr cgroup_temp = NULL;
>> +    virBitmapPtr all_nodes = NULL;
>> +    char *all_nodes_str = NULL;
>> +    char *mem_mask = NULL;
>> +    virDomainDefPtr persistentDef;
>> +    int ret = -1;
>> +
>> +    cfg = virQEMUDriverGetConfig(driver);
>> +
>> +    if (!(caps = virQEMUDriverGetCapabilities(driver, false)))
>> +        goto cleanup;
>> +
>> +    priv = vm->privateData;
>> +
>> +    if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
>> +        goto cleanup;
>> +
>> +    if (virDomainLiveConfigHelperMethod(caps, driver->xmlopt, vm, &flags,
>> +                                        &persistentDef) < 0)
>> +        goto endjob;
>> +
>> +    if (flags & VIR_DOMAIN_AFFECT_LIVE) {
>> +        if (!virDomainObjIsActive(vm)) {
>> +            virReportError(VIR_ERR_OPERATION_INVALID, "%s",
>> +                           _("cannot change IOThreads for an inactive domain"));
>> +            goto endjob;
>> +        }
>> +
>> +        if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_OBJECT_IOTHREAD)) {
>> +            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
>> +                           _("IOThreads not supported with this binary"));
>> +            goto endjob;
>> +        }
>> +
>> +        if (virNumaIsAvailable()) {
>> +            if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_EMULATOR, 0,
>> +                                   false, &cgroup_temp) < 0)
>> +                goto endjob;
>> +
>> +            if (!(all_nodes = virNumaGetHostNodeset()))
>> +                goto endjob;
>> +
>> +            if (!(all_nodes_str = virBitmapFormat(all_nodes)))
>> +                goto endjob;
>> +
>> +            if (virCgroupGetCpusetMems(cgroup_temp, &mem_mask) < 0 ||
>> +                virCgroupSetCpusetMems(cgroup_temp, all_nodes_str) < 0)
>> +                goto endjob;
> 
> One more example that blatant copying of the code is a terrible idea.
> This whole block was added to fix the following problem:

Hmm... perhaps a case where a few extra comments in the vCPU code would
have helped the reader understand what was being done without ferreting
around git log history to understand it.

I'll remove it from the patch.

John
> 
> commit e3435caf6af41748204e542dee13ede8441d88c0
> Author: Martin Kletzander <mkletzan at redhat.com>
> Date:   Fri Dec 12 15:36:45 2014 +0100
> 
>     qemu: Fix hotplugging cpus with strict memory pinning
>     
>     When hot-plugging a VCPU into the guest, kvm needs to allocate some data
>     from the DMA zone, which might be in a memory node that's not allowed in
>     cpuset.mems.  Basically the same problem as there was with starting the
>     domain and due to which commit 7e72ac787848b7434c9359a57c1e2789d92350f8
>     exists.  This patch just extends it to hotplugging as well.
>     
>     Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1161540
>     
>     Signed-off-by: Martin Kletzander <mkletzan at redhat.com>
> 
> 
> Since it is apparently not necessary when doing iothread hotplug please
> remove it.
> 
>> +        }
>> +
>> +        if (add) {
>> +            if (qemuDomainHotplugAddIOThread(driver, vm, iothread_id) < 0)
>> +                goto endjob;
>> +        } else {
>> +            if (qemuDomainHotplugDelIOThread(driver, vm, iothread_id) < 0)
>> +                goto endjob;
>> +        }
>> +
>> +        if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm) < 0)
>> +            goto endjob;
>> +    }
>> +
>> +    if (flags & VIR_DOMAIN_AFFECT_CONFIG) {
>> +        if (add) {
>> +            if (!virDomainIOThreadIDAdd(persistentDef, iothread_id))
>> +                goto endjob;
>> +
>> +            persistentDef->iothreads++;
>> +        } else {
>> +            virDomainIOThreadIDDefPtr iothrid;
>> +            if (!(iothrid = virDomainIOThreadIDFind(persistentDef,
>> +                                                    iothread_id))) {
>> +                virReportError(VIR_ERR_INVALID_ARG,
>> +                               _("cannot find IOThread '%u' in persistent "
>> +                                 "iothreadids"),
>> +                               iothread_id);
>> +                goto cleanup;
>> +            }
>> +
>> +            if (iothrid->cpumask)
>> +                persistentDef->cputune.niothreadspin--;
>> +            virDomainIOThreadIDDel(persistentDef, iothread_id);
>> +            persistentDef->iothreads--;
>> +        }
>> +
>> +        if (virDomainSaveConfig(cfg->configDir, persistentDef) < 0)
>> +            goto endjob;
>> +    }
>> +
>> +    ret = 0;
>> +
>> + endjob:
>> +    if (mem_mask) {
>> +        virErrorPtr err = virSaveLastError();
>> +        virCgroupSetCpusetMems(cgroup_temp, mem_mask);
>> +        virSetError(err);
>> +        virFreeError(err);
>> +    }
>> +    qemuDomainObjEndJob(driver, vm);
>> +
>> + cleanup:
>> +    VIR_FREE(mem_mask);
>> +    VIR_FREE(all_nodes_str);
>> +    virBitmapFree(all_nodes);
>> +    virCgroupFree(&cgroup_temp);
>> +    virObjectUnref(caps);
>> +    virObjectUnref(cfg);
>> +    return ret;
>> +}
>> +
> 
> Peter
> 




More information about the libvir-list mailing list