Introduce the data structures, constants and symbols needed for SCHED_DEADLINE implementation. Core data structure of SCHED_DEADLINE are defined, along with their initializers. Hooks for checking if a task belong to the new policy are also added where they are needed. Signed-off-by: Dario Faggioli --- include/linux/sched.h | 68 ++++++++++++++++++++++++++++++++++++++++++- kernel/hrtimer.c | 2 +- kernel/sched.c | 78 ++++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 135 insertions(+), 13 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index cf20084..c72a132 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -38,6 +38,7 @@ #define SCHED_BATCH 3 /* SCHED_ISO: reserved but not implemented yet */ #define SCHED_IDLE 5 +#define SCHED_DEADLINE 6 /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ #define SCHED_RESET_ON_FORK 0x40000000 @@ -136,6 +137,10 @@ struct sched_param { * Given this task model, there are a multiplicity of scheduling algorithms * and policies, that can be used to ensure all the tasks will make their * timing constraints. + * + * As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the + * only user of this new interface. More information about the algorithm + * available in the scheduling class file or in Documentation/. */ struct sched_param_ex { int sched_priority; @@ -1089,6 +1094,7 @@ struct sched_domain; #define ENQUEUE_WAKEUP 1 #define ENQUEUE_WAKING 2 #define ENQUEUE_HEAD 4 +#define ENQUEUE_REPLENISH 8 #define DEQUEUE_SLEEP 1 @@ -1222,6 +1228,47 @@ struct sched_rt_entity { #endif }; +struct sched_dl_entity { + struct rb_node rb_node; + int nr_cpus_allowed; + + /* + * Original scheduling parameters. Copied here from sched_param_ex + * during sched_setscheduler_ex(), they will remain the same until + * the next sched_setscheduler_ex(). + */ + u64 dl_runtime; /* maximum runtime for each instance */ + u64 dl_deadline; /* relative deadline of each instance */ + + /* + * Actual scheduling parameters. Initialized with the values above, + * they are continously updated during task execution. Note that + * the remaining runtime could be < 0 in case we are in overrun. + */ + s64 runtime; /* remaining runtime for this instance */ + u64 deadline; /* absolute deadline for this instance */ + unsigned int flags; /* specifying the scheduler behaviour */ + + /* + * Some bool flags: + * + * @dl_throttled tells if we exhausted the runtime. If so, the + * task has to wait for a replenishment to be performed at the + * next firing of dl_timer. + * + * @dl_new tells if a new instance arrived. If so we must + * start executing it with full runtime and reset its absolute + * deadline; + */ + int dl_throttled, dl_new; + + /* + * Bandwidth enforcement timer. Each -deadline task has its + * own bandwidth to be enforced, thus we need one timer per task. + */ + struct hrtimer dl_timer; +}; + struct rcu_node; enum perf_event_task_context { @@ -1251,6 +1298,7 @@ struct task_struct { const struct sched_class *sched_class; struct sched_entity se; struct sched_rt_entity rt; + struct sched_dl_entity dl; #ifdef CONFIG_PREEMPT_NOTIFIERS /* list of struct preempt_notifier: */ @@ -1580,6 +1628,10 @@ struct task_struct { * user-space. This allows kernel threads to set their * priority to a value higher than any user task. Note: * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. + * + * SCHED_DEADLINE tasks has negative priorities, reflecting + * the fact that any of them has higher prio than RT and + * NORMAL/BATCH tasks. */ #define MAX_USER_RT_PRIO 100 @@ -1588,9 +1640,23 @@ struct task_struct { #define MAX_PRIO (MAX_RT_PRIO + 40) #define DEFAULT_PRIO (MAX_RT_PRIO + 20) +#define MAX_DL_PRIO 0 + +static inline int dl_prio(int prio) +{ + if (unlikely(prio < MAX_DL_PRIO)) + return 1; + return 0; +} + +static inline int dl_task(struct task_struct *p) +{ + return dl_prio(p->prio); +} + static inline int rt_prio(int prio) { - if (unlikely(prio < MAX_RT_PRIO)) + if (unlikely(prio >= MAX_DL_PRIO && prio < MAX_RT_PRIO)) return 1; return 0; } diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 72206cf..9cd8564 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1574,7 +1574,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, unsigned long slack; slack = current->timer_slack_ns; - if (rt_task(current)) + if (dl_task(current) || rt_task(current)) slack = 0; hrtimer_init_on_stack(&t.timer, clockid, mode); diff --git a/kernel/sched.c b/kernel/sched.c index 76f1bc6..d157358 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -128,11 +128,23 @@ static inline int rt_policy(int policy) return 0; } +static inline int dl_policy(int policy) +{ + if (unlikely(policy == SCHED_DEADLINE)) + return 1; + return 0; +} + static inline int task_has_rt_policy(struct task_struct *p) { return rt_policy(p->policy); } +static inline int task_has_dl_policy(struct task_struct *p) +{ + return dl_policy(p->policy); +} + /* * This is the priority-queue data structure of the RT scheduling class: */ @@ -405,6 +417,15 @@ struct rt_rq { #endif }; +/* Deadline class' related fields in a runqueue */ +struct dl_rq { + /* runqueue is an rbtree, ordered by deadline */ + struct rb_root rb_root; + struct rb_node *rb_leftmost; + + unsigned long dl_nr_running; +}; + #ifdef CONFIG_SMP /* @@ -469,6 +490,7 @@ struct rq { struct cfs_rq cfs; struct rt_rq rt; + struct dl_rq dl; #ifdef CONFIG_FAIR_GROUP_SCHED /* list of leaf cfs_rq on this cpu: */ @@ -1852,8 +1874,6 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) #endif } -static const struct sched_class rt_sched_class; - #define sched_class_highest (&stop_sched_class) #define for_each_class(class) \ for (class = sched_class_highest; class; class = class->next) @@ -2070,7 +2090,9 @@ static inline int normal_prio(struct task_struct *p) { int prio; - if (task_has_rt_policy(p)) + if (task_has_dl_policy(p)) + prio = MAX_DL_PRIO-1; + else if (task_has_rt_policy(p)) prio = MAX_RT_PRIO-1 - p->rt_priority; else prio = __normal_prio(p); @@ -2634,6 +2656,12 @@ static void __sched_fork(struct task_struct *p) memset(&p->se.statistics, 0, sizeof(p->se.statistics)); #endif + RB_CLEAR_NODE(&p->dl.rb_node); + hrtimer_init(&p->dl.dl_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + p->dl.dl_runtime = p->dl.runtime = 0; + p->dl.dl_deadline = p->dl.deadline = 0; + p->dl.flags = 0; + INIT_LIST_HEAD(&p->rt.run_list); p->se.on_rq = 0; INIT_LIST_HEAD(&p->se.group_node); @@ -2662,7 +2690,8 @@ void sched_fork(struct task_struct *p, int clone_flags) * Revert to default priority/policy on fork if requested. */ if (unlikely(p->sched_reset_on_fork)) { - if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) { + if (p->policy == SCHED_DEADLINE || + p->policy == SCHED_FIFO || p->policy == SCHED_RR) { p->policy = SCHED_NORMAL; p->normal_prio = p->static_prio; } @@ -4464,6 +4493,8 @@ long __sched sleep_on_timeout(wait_queue_head_t *q, long timeout) } EXPORT_SYMBOL(sleep_on_timeout); +static const struct sched_class dl_sched_class; + #ifdef CONFIG_RT_MUTEXES /* @@ -4497,7 +4528,9 @@ void rt_mutex_setprio(struct task_struct *p, int prio) if (running) p->sched_class->put_prev_task(rq, p); - if (rt_prio(prio)) + if (dl_prio(prio)) + p->sched_class = &dl_sched_class; + else if (rt_prio(prio)) p->sched_class = &rt_sched_class; else p->sched_class = &fair_sched_class; @@ -4533,9 +4566,9 @@ void set_user_nice(struct task_struct *p, long nice) * The RT priorities are set via sched_setscheduler(), but we still * allow the 'normal' nice value to be set - but as expected * it wont have any effect on scheduling until the task is - * SCHED_FIFO/SCHED_RR: + * SCHED_DEADLINE, SCHED_FIFO or SCHED_RR: */ - if (task_has_rt_policy(p)) { + if (task_has_dl_policy(p) || task_has_rt_policy(p)) { p->static_prio = NICE_TO_PRIO(nice); goto out_unlock; } @@ -4680,7 +4713,9 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) p->normal_prio = normal_prio(p); /* we are holding p->pi_lock already */ p->prio = rt_mutex_getprio(p); - if (rt_prio(p->prio)) + if (dl_prio(p->prio)) + p->sched_class = &dl_sched_class; + else if (rt_prio(p->prio)) p->sched_class = &rt_sched_class; else p->sched_class = &fair_sched_class; @@ -4688,6 +4723,19 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) } /* + * This function validates the new parameters of a -deadline task. + * We ask for the deadline not being zero, and greater or equal + * than the runtime. + */ +static bool +__checkparam_dl(const struct sched_param_ex *prm) +{ + return prm && timespec_to_ns(&prm->sched_deadline) != 0 && + timespec_compare(&prm->sched_deadline, + &prm->sched_runtime) >= 0; +} + +/* * check the target process has a UID that matches the current process's */ static bool check_same_owner(struct task_struct *p) @@ -4725,7 +4773,8 @@ recheck: reset_on_fork = !!(policy & SCHED_RESET_ON_FORK); policy &= ~SCHED_RESET_ON_FORK; - if (policy != SCHED_FIFO && policy != SCHED_RR && + if (policy != SCHED_DEADLINE && + policy != SCHED_FIFO && policy != SCHED_RR && policy != SCHED_NORMAL && policy != SCHED_BATCH && policy != SCHED_IDLE) return -EINVAL; @@ -4740,7 +4789,8 @@ recheck: (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) return -EINVAL; - if (rt_policy(policy) != (param->sched_priority != 0)) + if ((dl_policy(policy) && !__checkparam_dl(param_ex)) || + (rt_policy(policy) != (param->sched_priority != 0))) return -EINVAL; /* @@ -7980,6 +8030,11 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) #endif } +static void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq) +{ + dl_rq->rb_root = RB_ROOT; +} + #ifdef CONFIG_FAIR_GROUP_SCHED static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, struct sched_entity *se, int cpu, int add, @@ -8111,6 +8166,7 @@ void __init sched_init(void) rq->calc_load_update = jiffies + LOAD_FREQ; init_cfs_rq(&rq->cfs, rq); init_rt_rq(&rq->rt, rq); + init_dl_rq(&rq->dl, rq); #ifdef CONFIG_FAIR_GROUP_SCHED init_task_group.shares = init_task_group_load; INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); @@ -8301,7 +8357,7 @@ void normalize_rt_tasks(void) p->se.statistics.block_start = 0; #endif - if (!rt_task(p)) { + if (!dl_task(p) && !rt_task(p)) { /* * Renice negative nice level userspace * tasks back to 0: -- 1.7.2.3 -- <> (Raistlin Majere) ---------------------------------------------------------------------- Dario Faggioli, ReTiS Lab, Scuola Superiore Sant'Anna, Pisa (Italy) http://blog.linux.it/raistlin / raistlin@ekiga.net / dario.faggioli@jabber.org