[libvirt] [PATCH v2 2/3] docs, schema, conf: Add support for setting scheduler parameters of guest threads
Michal Privoznik
mprivozn at redhat.com
Wed Feb 11 14:04:34 UTC 2015
On 10.02.2015 16:35, Martin Kletzander wrote:
> In order for QEMU vCPU (and other) threads to run with RT scheduler,
> libvirt needs to take care of that so QEMU doesn't have to run privileged.
>
> Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1178986
>
> Signed-off-by: Martin Kletzander <mkletzan at redhat.com>
> ---
> docs/formatdomain.html.in | 16 ++
> docs/schemas/domaincommon.rng | 44 +++++
> src/conf/domain_conf.c | 183 ++++++++++++++++++++-
> src/conf/domain_conf.h | 24 +++
> .../qemuxml2argv-cputune-iothreadsched-toomuch.xml | 38 +++++
> .../qemuxml2argv-cputune-iothreadsched.xml | 39 +++++
> .../qemuxml2argv-cputune-vcpusched-overlap.xml | 38 +++++
> tests/qemuxml2argvtest.c | 2 +
> .../qemuxml2xmlout-cputune-iothreadsched.xml | 39 +++++
> tests/qemuxml2xmltest.c | 1 +
> 10 files changed, 423 insertions(+), 1 deletion(-)
> create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-cputune-iothreadsched-toomuch.xml
> create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-cputune-iothreadsched.xml
> create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-cputune-vcpusched-overlap.xml
> create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-cputune-iothreadsched.xml
>
> diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
> index d8144ea..3381dfe 100644
> --- a/docs/formatdomain.html.in
> +++ b/docs/formatdomain.html.in
> @@ -550,6 +550,8 @@
> <quota>-1</quota>
> <emulator_period>1000000</emulator_period>
> <emulator_quota>-1</emulator_quota>
> + <vcpusched vcpus='0-4,^3' scheduler='fifo' priority='1'/>
> + <iothreadsched iothreads='2' scheduler='batch'/>
> </cputune>
> ...
> </domain>
> @@ -652,6 +654,20 @@
> <span class="since">Only QEMU driver support since 0.10.0</span>
> </dd>
>
> + <dt><code>vcpusched</code> and <code>iothreadsched</code></dt>
> + <dd>
> + The optional <code>vcpusched</code> elements specifie the scheduler
s/specifie/specifies/
And maybe s/scheduler/scheduler type/?
> + (values <code>batch</code>, <code>idle</code>, <code>fifo</code>,
> + <code>rr</code>) for particular vCPU/IOThread threads (based on
> + <code>vcpus</code> and <code>iothreads</code>, leaving out
> + <code>vcpus</code>/<code>iothreads</code> sets the default).
> + For real-time schedulers (<code>fifo</code>, <code>rr</code>),
> + priority must be specified as well (and is ignored for
> + non-real-time ones). The value range for the priority depends
> + on the host kernel (usually 1-99).
> + <span class="since">Since 1.2.12</span>
> + </dd>
> +
> </dl>
>
>
> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
> index d467dce..98766dc 100644
> --- a/docs/schemas/domaincommon.rng
> +++ b/docs/schemas/domaincommon.rng
> @@ -815,10 +815,54 @@
> </attribute>
> </element>
> </zeroOrMore>
> + <zeroOrMore>
> + <element name="vcpusched">
> + <optional>
> + <attribute name="vcpus">
> + <ref name='cpuset'/>
> + </attribute>
> + </optional>
> + <ref name="schedparam"/>
> + </element>
> + </zeroOrMore>
> + <zeroOrMore>
> + <element name="iothreadsched">
> + <optional>
> + <attribute name="iothreads">
> + <ref name='cpuset'/>
> + </attribute>
> + </optional>
> + <ref name="schedparam"/>
> + </element>
> + </zeroOrMore>
> </interleave>
> </element>
> </define>
>
> + <define name="schedparam">
> + <choice>
> + <group>
> + <attribute name="scheduler">
> + <choice>
> + <value>batch</value>
> + <value>idle</value>
> + </choice>
> + </attribute>
> + </group>
> + <group>
> + <attribute name="scheduler">
> + <choice>
> + <value>fifo</value>
> + <value>rr</value>
> + </choice>
> + </attribute>
> + <attribute name="priority">
> + <ref name="unsignedShort"/>
> + </attribute>
> + </group>
> + </choice>
> + </define>
> +
Have we returned to the rule where each new XML extension requires ACK
from at least one of Daniels?
> <!-- All the NUMA related tunables would go in the numatune -->
> <define name="numatune">
> <element name="numatune">
> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
> index 77319dc..3fb68b3 100644
> --- a/src/conf/domain_conf.c
> +++ b/src/conf/domain_conf.c
> @@ -772,6 +772,13 @@ VIR_ENUM_IMPL(virDomainLoader,
> "rom",
> "pflash")
>
> +VIR_ENUM_IMPL(virDomainThreadSched, VIR_DOMAIN_THREAD_SCHED_LAST,
> + "other", /* default */
> + "batch",
> + "idle",
> + "fifo",
> + "rr")
> +
> /* Internal mapping: subset of block job types that can be present in
> * <mirror> XML (remaining types are not two-phase). */
> VIR_ENUM_DECL(virDomainBlockJob)
> @@ -2234,6 +2241,14 @@ void virDomainDefFree(virDomainDefPtr def)
> virDomainVcpuPinDefArrayFree(def->cputune.iothreadspin,
> def->cputune.niothreadspin);
>
> + for (i = 0; i < def->cputune.nvcpusched; i++)
> + virBitmapFree(def->cputune.vcpusched[i].ids);
> + VIR_FREE(def->cputune.vcpusched);
> +
> + for (i = 0; i < def->cputune.niothreadsched; i++)
> + virBitmapFree(def->cputune.iothreadsched[i].ids);
> + VIR_FREE(def->cputune.iothreadsched);
> +
> virDomainNumatuneFree(def->numatune);
>
> virSysinfoDefFree(def->sysinfo);
> @@ -12595,6 +12610,70 @@ virDomainLoaderDefParseXML(xmlNodePtr node,
> return ret;
> }
>
> +static int
> +virDomainThreadSchedParse(xmlNodePtr node,
> + unsigned int minid,
> + unsigned int maxid,
> + const char *name,
> + virDomainThreadSchedParamPtr sp)
> +{
> + char *tmp = NULL;
> + int sched = 0;
> +
> + tmp = virXMLPropString(node, name);
> + if (!tmp) {
> + virReportError(VIR_ERR_XML_ERROR,
> + _("Missing attribute '%s' in element '%sched'"),
> + name, name);
> + goto error;
> + }
> +
> + if (!virBitmapParse(tmp, 0, &sp->ids,
> + VIR_DOMAIN_CPUMASK_LEN) ||
> + virBitmapIsAllClear(sp->ids) ||
> + virBitmapNextSetBit(sp->ids, -1) < minid ||
> + virBitmapLastSetBit(sp->ids) > maxid) {
> +
> + virReportError(VIR_ERR_XML_ERROR,
> + _("Invalid value of '%s': %s"),
> + name, tmp);
> + goto error;
> + }
> + VIR_FREE(tmp);
> +
> + tmp = virXMLPropString(node, "scheduler");
> + if (tmp) {
> + if ((sched = virDomainThreadSchedTypeFromString(tmp)) <= 0) {
> + virReportError(VIR_ERR_INTERNAL_ERROR,
> + _("Invalid scheduler attribute: '%s'"),
> + tmp);
> + goto error;
> + }
> + sp->scheduler = sched;
> +
> + VIR_FREE(tmp);
> + if (sp->scheduler >= VIR_DOMAIN_THREAD_SCHED_FIFO) {
> + tmp = virXMLPropString(node, "priority");
> + if (!tmp) {
> + virReportError(VIR_ERR_XML_ERROR, "%s",
> + _("Missing scheduler priority"));
> + goto error;
> + }
> + if (virStrToLong_i(tmp, NULL, 10, &sp->priority) < 0) {
> + virReportError(VIR_ERR_XML_ERROR, "%s",
> + _("Invalid value for element priority"));
> + goto error;
> + }
> + VIR_FREE(tmp);
> + }
> + }
> +
> + return 0;
> +
> + error:
> + VIR_FREE(tmp);
> + return -1;
> +}
>
> static virDomainDefPtr
> virDomainDefParseXML(xmlDocPtr xml,
> @@ -13139,6 +13218,77 @@ virDomainDefParseXML(xmlDocPtr xml,
> }
> VIR_FREE(nodes);
>
> + if ((n = virXPathNodeSet("./cputune/vcpusched", ctxt, &nodes)) < 0) {
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("cannot extract vcpusched nodes"));
> + goto error;
> + }
> + if (n) {
> + if (n > def->maxvcpus) {
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("too many vcpusched nodes in cputune"));
> + goto error;
> + }
> +
> + if (VIR_ALLOC_N(def->cputune.vcpusched, n) < 0)
> + goto error;
> + def->cputune.nvcpusched = n;
> +
> + for (i = 0; i < def->cputune.nvcpusched; i++) {
> + if (virDomainThreadSchedParse(nodes[i],
> + 0, def->maxvcpus - 1,
> + "vcpus",
> + &def->cputune.vcpusched[i]) < 0)
> + goto error;
> +
> + for (j = 0; j < i; j++) {
> + if (virBitmapOverlaps(def->cputune.vcpusched[i].ids,
> + def->cputune.vcpusched[j].ids)) {
> + virReportError(VIR_ERR_XML_DETAIL, "%s",
> + _("vcpusched attributes 'vcpus' "
> + "must not overlap"));
> + goto error;
> + }
> + }
> + }
> + }
> + VIR_FREE(nodes);
> +
> + if ((n = virXPathNodeSet("./cputune/iothreadsched", ctxt, &nodes)) < 0) {
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("cannot extract iothreadsched nodes"));
> + goto error;
> + }
> + if (n) {
> + if (n > def->iothreads) {
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("too many iothreadsched nodes in cputune"));
> + goto error;
> + }
> +
> + if (VIR_ALLOC_N(def->cputune.iothreadsched, n) < 0)
> + goto error;
> + def->cputune.niothreadsched = n;
> +
> + for (i = 0; i < def->cputune.niothreadsched; i++) {
> + if (virDomainThreadSchedParse(nodes[i],
> + 1, def->iothreads,
> + "iothreads",
> + &def->cputune.iothreadsched[i]) < 0)
> + goto error;
> +
> + for (j = 0; j < i; j++) {
> + if (virBitmapOverlaps(def->cputune.iothreadsched[i].ids,
> + def->cputune.iothreadsched[j].ids)) {
> + virReportError(VIR_ERR_XML_DETAIL, "%s",
> + _("iothreadsched attributes 'iothreads' "
> + "must not overlap"));
> + goto error;
> + }
> + }
> + }
> + }
> + VIR_FREE(nodes);
Once parsed, do we also need a check in virDomainDefCheckABIStability()?
Or can the sched parameters change on migration? I guess they can, but
seems like you dig more into this area than me recently.
>
> /* analysis of cpu handling */
> if ((node = virXPathNode("./cpu[1]", ctxt)) != NULL) {
> @@ -19434,7 +19584,8 @@ virDomainDefFormatInternal(virDomainDefPtr def,
> def->cputune.period || def->cputune.quota ||
> def->cputune.emulatorpin ||
> def->cputune.emulator_period || def->cputune.emulator_quota ||
> - def->cputune.niothreadspin) {
> + def->cputune.niothreadspin ||
> + def->cputune.vcpusched || def->cputune.iothreadsched) {
> virBufferAddLit(buf, "<cputune>\n");
> cputune = true;
> }
> @@ -19503,6 +19654,36 @@ virDomainDefFormatInternal(virDomainDefPtr def,
> VIR_FREE(cpumask);
> }
>
> + for (i = 0; i < def->cputune.nvcpusched; i++) {
> + virDomainThreadSchedParamPtr sp = &def->cputune.vcpusched[i];
> + char *ids = NULL;
> +
> + if (!(ids = virBitmapFormat(sp->ids)))
> + goto error;
> + virBufferAsprintf(buf, "<vcpusched vcpus='%s' scheduler='%s'",
> + ids, virDomainThreadSchedTypeToString(sp->scheduler));
> + VIR_FREE(ids);
> +
> + if (sp->priority)
> + virBufferAsprintf(buf, " priority='%d'", sp->priority);
> + virBufferAddLit(buf, "/>\n");
> + }
> +
> + for (i = 0; i < def->cputune.niothreadsched; i++) {
> + virDomainThreadSchedParamPtr sp = &def->cputune.iothreadsched[i];
> + char *ids = NULL;
> +
> + if (!(ids = virBitmapFormat(sp->ids)))
> + goto error;
> + virBufferAsprintf(buf, "<iothreadsched iothreads='%s' scheduler='%s'",
> + ids, virDomainThreadSchedTypeToString(sp->scheduler));
> + VIR_FREE(ids);
> +
> + if (sp->priority)
> + virBufferAsprintf(buf, " priority='%d'", sp->priority);
> + virBufferAddLit(buf, "/>\n");
> + }
> +
> virBufferAdjustIndent(buf, -2);
> if (cputune)
> virBufferAddLit(buf, "</cputune>\n");
> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
> index 93f2314..6be70bf 100644
> --- a/src/conf/domain_conf.h
> +++ b/src/conf/domain_conf.h
> @@ -1810,6 +1810,24 @@ typedef enum {
> VIR_DOMAIN_CPU_PLACEMENT_MODE_LAST
> } virDomainCpuPlacementMode;
>
> +typedef enum {
> + VIR_DOMAIN_THREAD_SCHED_OTHER = 0,
> + VIR_DOMAIN_THREAD_SCHED_BATCH,
> + VIR_DOMAIN_THREAD_SCHED_IDLE,
> + VIR_DOMAIN_THREAD_SCHED_FIFO,
> + VIR_DOMAIN_THREAD_SCHED_RR,
> +
> + VIR_DOMAIN_THREAD_SCHED_LAST
> +} virDomainThreadSched;
> +
> +typedef struct _virDomainThreadSchedParam virDomainThreadSchedParam;
> +typedef virDomainThreadSchedParam *virDomainThreadSchedParamPtr;
> +struct _virDomainThreadSchedParam {
> + virBitmapPtr ids;
> + virDomainThreadSched scheduler;
> + int priority;
> +};
> +
> typedef struct _virDomainTimerCatchupDef virDomainTimerCatchupDef;
> typedef virDomainTimerCatchupDef *virDomainTimerCatchupDefPtr;
> struct _virDomainTimerCatchupDef {
> @@ -1997,6 +2015,11 @@ struct _virDomainCputune {
> virDomainVcpuPinDefPtr emulatorpin;
> size_t niothreadspin;
> virDomainVcpuPinDefPtr *iothreadspin;
> +
> + size_t nvcpusched;
> + virDomainThreadSchedParamPtr vcpusched;
> + size_t niothreadsched;
> + virDomainThreadSchedParamPtr iothreadsched;
> };
>
> typedef struct _virDomainBlkiotune virDomainBlkiotune;
> @@ -2844,6 +2867,7 @@ VIR_ENUM_DECL(virDomainRNGModel)
> VIR_ENUM_DECL(virDomainRNGBackend)
> VIR_ENUM_DECL(virDomainTPMModel)
> VIR_ENUM_DECL(virDomainTPMBackend)
> +VIR_ENUM_DECL(virDomainThreadSched)
Yet another ENUM_DECL without corresponding libvirt_private.syms change.
> /* from libvirt.h */
> VIR_ENUM_DECL(virDomainState)
> VIR_ENUM_DECL(virDomainNostateReason)
ACK with all of that fixed.
Michal
More information about the libvir-list
mailing list