[PATCH 2/3] virprocess: add the SCHED_DEADLINE scheduling policy

Michal Prívozník mprivozn at redhat.com
Wed Aug 10 08:44:17 UTC 2022


On 8/1/22 19:11, Sasha Algisi wrote:
> Tasks associated to virtual CPUs, IO Threads and Emulator processes
> can be created with the SCHED_DEADLINE policy. The policy is described
> in details here: https://docs.kernel.org/scheduler/sched-deadline.html
> 
> It requires the following parameters (all in nanoseconds):
> 1) runtime
> 2) deadline
> 3) period
> 
> It must always holds that: runtime <= deadline <= period.
> 
> The kernel enforces that the values stay within [1024, 2^63-1].
> Note, however, that a smaller range could be set (or be already set
> by  default) via sysctl (see kernel.sched_deadline_period_max_us and
> kernel.sched_deadline_period_min_us).
> 
> All the three parameters are mandatory but period can be set to 0,
> in which case it will set to the same value of deadline.
> 
> Signed-off-by: Sasha Algisi <sasha.algisi at edu.unito.it>
> Signed-off-by: Dario Faggioli <dfaggioli at suse.com>
> ---
>  src/ch/ch_process.c     |  3 +-
>  src/conf/domain_conf.h  |  3 ++
>  src/qemu/qemu_process.c |  8 +++-
>  src/util/virprocess.c   | 84 +++++++++++++++++++++++++++++++++++++++--
>  src/util/virprocess.h   |  6 ++-
>  5 files changed, 97 insertions(+), 7 deletions(-)
> 
> diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c
> index 77f55e777b..a40d188aac 100644
> --- a/src/ch/ch_process.c
> +++ b/src/ch/ch_process.c
> @@ -293,7 +293,8 @@ virCHProcessSetupPid(virDomainObj *vm,
>      /* Set scheduler type and priority, but not for the main thread. */
>      if (sched &&
>          nameval != VIR_CGROUP_THREAD_EMULATOR &&
> -        virProcessSetScheduler(pid, sched->policy, sched->priority) < 0)
> +        virProcessSetScheduler(pid, sched->policy, sched->priority,
> +                               sched->runtime, sched->deadline, sched->period) < 0)
>          goto cleanup;
>  
>      ret = 0;
> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
> index 060c395943..c3d1a1b65d 100644
> --- a/src/conf/domain_conf.h
> +++ b/src/conf/domain_conf.h
> @@ -2454,6 +2454,9 @@ typedef enum {
>  struct _virDomainThreadSchedParam {
>      virProcessSchedPolicy policy;
>      int priority;
> +    uint64_t runtime;
> +    uint64_t deadline;
> +    uint64_t period;

Or just unsigned long long. Here we don't face kernel just yet, and can
use 'more generic' types which also allows you to drop plenty of
typecasts when using internal helpers.

>  };
>  
>  struct _virDomainTimerCatchupDef {
> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index 137dcf5cf4..7586e0538a 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -2728,7 +2728,8 @@ qemuProcessSetupPid(virDomainObj *vm,
>      /* Set scheduler type and priority, but not for the main thread. */
>      if (sched &&
>          nameval != VIR_CGROUP_THREAD_EMULATOR &&
> -        virProcessSetScheduler(pid, sched->policy, sched->priority) < 0)
> +        virProcessSetScheduler(pid, sched->policy, sched->priority,
> +                               sched->runtime, sched->deadline, sched->period) < 0)
>          goto cleanup;
>  
>      ret = 0;
> @@ -7813,7 +7814,10 @@ qemuProcessLaunch(virConnectPtr conn,
>      if (vm->def->cputune.emulatorsched &&
>          virProcessSetScheduler(vm->pid,
>                                 vm->def->cputune.emulatorsched->policy,
> -                               vm->def->cputune.emulatorsched->priority) < 0)
> +                               vm->def->cputune.emulatorsched->priority,
> +                               vm->def->cputune.emulatorsched->runtime,
> +                               vm->def->cputune.emulatorsched->deadline,
> +                               vm->def->cputune.emulatorsched->period) < 0)
>          goto cleanup;
>  
>      VIR_DEBUG("Setting any required VM passwords");
> diff --git a/src/util/virprocess.c b/src/util/virprocess.c
> index a8f86784e1..c96bfc45fd 100644
> --- a/src/util/virprocess.c
> +++ b/src/util/virprocess.c
> @@ -84,6 +84,7 @@ VIR_ENUM_IMPL(virProcessSchedPolicy,
>                "idle",
>                "fifo",
>                "rr",
> +              "deadline",
>  );
>  
>  
> @@ -1610,6 +1611,13 @@ virProcessSchedTranslatePolicy(virProcessSchedPolicy policy)
>      case VIR_PROC_POLICY_RR:
>          return SCHED_RR;
>  
> +    case VIR_PROC_POLICY_DEADLINE:
> +# ifdef SCHED_DEADLINE
> +        return SCHED_DEADLINE;
> +# else
> +        return -1;
> +# endif
> +
>      case VIR_PROC_POLICY_LAST:
>          /* nada */
>          break;
> @@ -1621,13 +1629,20 @@ virProcessSchedTranslatePolicy(virProcessSchedPolicy policy)
>  int
>  virProcessSetScheduler(pid_t pid,
>                         virProcessSchedPolicy policy,
> -                       int priority)
> +                       int priority,
> +                       uint64_t runtime G_GNUC_UNUSED,
> +                       uint64_t deadline G_GNUC_UNUSED,
> +                       uint64_t period G_GNUC_UNUSED)

The !WITH_SCHED_SETSCHEDULER case stub is not updated correspondingly.
And these new arguments don't need the unused annotation ...
>  {
>      struct sched_param param = {0};
>      int pol = virProcessSchedTranslatePolicy(policy);
>  
> -    VIR_DEBUG("pid=%lld, policy=%d, priority=%u",
> -              (long long) pid, policy, priority);
> +    VIR_DEBUG("pid=%lld, policy=%d, priority=%u, "
> +              "runtime=%llu, deadline=%llu, period=%llu",
> +              (long long) pid, policy, priority,
> +              (unsigned long long) runtime,
> +              (unsigned long long) deadline,
> +              (unsigned long long) period);

.. as they are used here. Also, arguments can be ull type because it's
only later in this function that we need to convert them to uint64_t.

>  
>      if (!policy)
>          return 0;
> @@ -1667,6 +1682,69 @@ virProcessSetScheduler(pid_t pid,
>          param.sched_priority = priority;
>      }
>  
> +# ifdef SCHED_DEADLINE
> +    if (pol == SCHED_DEADLINE) {
> +        struct sched_attr attr = {0};
> +        /*
> +         * The range is enforced in the kernel.
> +         * See: https://man7.org/linux/man-pages/man7/sched.7.html
> +         */
> +        uint64_t min_value = 1024;
> +        uint64_t max_value = (1ULL << 63) - 1;
> +
> +        if (runtime < min_value || runtime > max_value) {
> +            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> +                           _("Scheduler runtime %llu out of range "
> +                             "[%llu, %llu]"),
> +                           (unsigned long long) runtime,
> +                           (unsigned long long) min_value,
> +                           (unsigned long long) max_value);
> +            return -1;
> +        }
> +
> +        if (deadline < runtime || deadline > max_value) {
> +            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> +                           _("Scheduler deadline %llu out of range "
> +                             "[%llu, %llu]"),
> +                           (unsigned long long) deadline,
> +                           (unsigned long long) runtime,
> +                           (unsigned long long) max_value);
> +            return -1;
> +        }
> +
> +        if ((period < deadline || period > max_value) && period != 0) {
> +            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> +                           _("Invalid scheduler period %llu, "
> +                             "possible correct values are 0 "
> +                             "or [%llu, %llu]"),
> +                           (unsigned long long) period,
> +                           (unsigned long long) deadline,
> +                           (unsigned long long) max_value);
> +            return -1;
> +        }
> +
> +        attr.size = sizeof(attr);
> +        attr.sched_policy = pol;
> +        /*
> +         * Setting reset-on-fork is necessary as SCHED_DEADLINE
> +         * tasks cannot fork. See:
> +         * https://docs.kernel.org/scheduler/sched-deadline.html#default-behavior
> +         */
> +        attr.sched_flags = SCHED_FLAG_RESET_ON_FORK;
> +        attr.sched_runtime = runtime;
> +        attr.sched_deadline = deadline;
> +        attr.sched_period = period;
> +
> +        if (sched_setattr(pid, &attr, 0) == 0) {
> +            return 0;
> +        } else {
> +            virReportSystemError(errno,
> +                                 _("Cannot set scheduler parameters for pid %lld"),
> +                                 (long long) pid);
> +            return -1;
> +        }
> +    }
> +# endif
>      if (sched_setscheduler(pid, pol, &param) < 0) {

So what happens when the deadline policy is requested (e.g.
virProcessSetScheduler(policy=VIR_PROC_POLICY_DEADLINE) but
SCHED_DEADLINE is not available? Does this sched_setscheduler() here
error out?

>          virReportSystemError(errno,
>                               _("Cannot set scheduler parameters for pid %lld"),
> diff --git a/src/util/virprocess.h b/src/util/virprocess.h
> index 30b6981c73..84fdf00fdf 100644
> --- a/src/util/virprocess.h
> +++ b/src/util/virprocess.h
> @@ -33,6 +33,7 @@ typedef enum {
>      VIR_PROC_POLICY_IDLE,
>      VIR_PROC_POLICY_FIFO,
>      VIR_PROC_POLICY_RR,
> +    VIR_PROC_POLICY_DEADLINE,

This fails to compile, because of a switch() inside
virDomainSchedulerFormat() that does not handle this new case. I think
you can work around this by reordering things a bit. For instance, the
first patch can introduce XML parsing/formatting and this new enum
member. Then, follow up patch(-es) can extend virProcessSetScheduler.

>  
>      VIR_PROC_POLICY_LAST
>  } virProcessSchedPolicy;
> @@ -116,7 +117,10 @@ int virProcessSetupPrivateMountNS(void);
>  
>  int virProcessSetScheduler(pid_t pid,
>                             virProcessSchedPolicy policy,
> -                           int priority);
> +                           int priority,
> +                           uint64_t runtime,
> +                           uint64_t deadline,
> +                           uint64_t period);
>  
>  GStrv virProcessGetStat(pid_t pid, pid_t tid);
>  

Michal



More information about the libvir-list mailing list