All of lore.kernel.org
 help / color / mirror / Atom feed
From: Luca Abeni <luca.abeni@unitn.it>
To: linux-kernel@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>, Juri Lelli <juri.lelli@arm.com>,
	Claudio Scordino <claudio@evidence.eu.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Luca Abeni <luca.abeni@unitn.it>
Subject: [RFC v3 2/6] Improve the tracking of active utilisation
Date: Mon, 24 Oct 2016 16:06:34 +0200	[thread overview]
Message-ID: <1477317998-7487-3-git-send-email-luca.abeni@unitn.it> (raw)
In-Reply-To: <1477317998-7487-1-git-send-email-luca.abeni@unitn.it>

This patch implements a more theoretically sound algorithm for
thracking the active utilisation: instead of decreasing it when a
task blocks, use a timer (the "inactive timer", named after the
"Inactive" task state of the GRUB algorithm) to decrease the
active utilisaation at the so called "0-lag time".

Signed-off-by: Luca Abeni <luca.abeni@unitn.it>
---
 include/linux/sched.h   |   1 +
 kernel/sched/core.c     |   1 +
 kernel/sched/deadline.c | 139 ++++++++++++++++++++++++++++++++++++++++++------
 kernel/sched/sched.h    |   1 +
 4 files changed, 126 insertions(+), 16 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 348f51b..22543c6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1433,6 +1433,7 @@ struct sched_dl_entity {
 	 * own bandwidth to be enforced, thus we need one timer per task.
 	 */
 	struct hrtimer dl_timer;
+	struct hrtimer inactive_timer;
 };
 
 union rcu_special {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 94732d1..664c618 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2217,6 +2217,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 
 	RB_CLEAR_NODE(&p->dl.rb_node);
 	init_dl_task_timer(&p->dl);
+	init_inactive_task_timer(&p->dl);
 	__dl_clear_params(p);
 
 	INIT_LIST_HEAD(&p->rt.run_list);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 3d95c1d..80d1541 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -47,6 +47,7 @@ static void add_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 {
 	u64 se_bw = dl_se->dl_bw;
 
+	lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
 	dl_rq->running_bw += se_bw;
 }
 
@@ -54,11 +55,52 @@ static void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 {
 	u64 se_bw = dl_se->dl_bw;
 
+	lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
 	dl_rq->running_bw -= se_bw;
 	if (WARN_ON(dl_rq->running_bw < 0))
 		dl_rq->running_bw = 0;
 }
 
+static void task_go_inactive(struct task_struct *p)
+{
+	struct sched_dl_entity *dl_se = &p->dl;
+	struct hrtimer *timer = &dl_se->inactive_timer;
+	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
+	struct rq *rq = rq_of_dl_rq(dl_rq);
+	s64 zerolag_time;
+
+	WARN_ON(dl_se->dl_runtime == 0);
+
+	/* If the inactive timer is already armed, return immediately */
+	if (hrtimer_active(&dl_se->inactive_timer))
+		return;
+
+	zerolag_time = dl_se->deadline -
+		 div64_long((dl_se->runtime * dl_se->dl_period),
+			dl_se->dl_runtime);
+
+	/*
+	 * Using relative times instead of the absolute "0-lag time"
+	 * allows to simplify the code
+	 */
+	zerolag_time -= rq_clock(rq);
+
+	/*
+	 * If the "0-lag time" already passed, decrease the active
+	 * utilization now, instead of starting a timer
+	 */
+	if (zerolag_time < 0) {
+		sub_running_bw(dl_se, dl_rq);
+		if (!dl_task(p))
+			__dl_clear_params(p);
+
+		return;
+	}
+
+	get_task_struct(p);
+	hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL);
+}
+
 static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
 {
 	struct sched_dl_entity *dl_se = &p->dl;
@@ -514,7 +556,20 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
 	struct rq *rq = rq_of_dl_rq(dl_rq);
 
-	add_running_bw(dl_se, dl_rq);
+	if (hrtimer_is_queued(&dl_se->inactive_timer)) {
+		hrtimer_try_to_cancel(&dl_se->inactive_timer);
+		WARN_ON(dl_task_of(dl_se)->nr_cpus_allowed > 1);
+	} else {
+		/*
+		 * The "inactive timer" has been cancelled in
+		 * select_task_rq_dl() (and the acvive utilisation has
+		 * been decreased). So, increase the active utilisation.
+		 * If select_task_rq_dl() could not cancel the timer,
+		 * inactive_task_timer() will * find the task state as
+		 * TASK_RUNNING, and will do nothing, so we are still safe.
+		 */
+		add_running_bw(dl_se, dl_rq);
+	}
 
 	if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
 	    dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
@@ -602,14 +657,8 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
 
 	rq = task_rq_lock(p, &rf);
 
-	/*
-	 * The task might have changed its scheduling policy to something
-	 * different than SCHED_DEADLINE (through switched_fromd_dl()).
-	 */
-	if (!dl_task(p)) {
-		__dl_clear_params(p);
+	if (!dl_task(p))
 		goto unlock;
-	}
 
 	/*
 	 * The task might have been boosted by someone else and might be in the
@@ -796,6 +845,44 @@ static void update_curr_dl(struct rq *rq)
 	}
 }
 
+static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
+{
+	struct sched_dl_entity *dl_se = container_of(timer,
+						     struct sched_dl_entity,
+						     inactive_timer);
+	struct task_struct *p = dl_task_of(dl_se);
+	struct rq_flags rf;
+	struct rq *rq;
+
+	rq = task_rq_lock(p, &rf);
+
+	if (!dl_task(p)) {
+		__dl_clear_params(p);
+
+		goto unlock;
+	}
+	if (p->state == TASK_RUNNING)
+		goto unlock;
+
+	sched_clock_tick();
+	update_rq_clock(rq);
+
+	sub_running_bw(dl_se, &rq->dl);
+unlock:
+	task_rq_unlock(rq, p, &rf);
+	put_task_struct(p);
+
+	return HRTIMER_NORESTART;
+}
+
+void init_inactive_task_timer(struct sched_dl_entity *dl_se)
+{
+	struct hrtimer *timer = &dl_se->inactive_timer;
+
+	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	timer->function = inactive_task_timer;
+}
+
 #ifdef CONFIG_SMP
 
 static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
@@ -1000,7 +1087,7 @@ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 		sub_running_bw(&p->dl, &rq->dl);
 
 	if (flags & DEQUEUE_SLEEP)
-		sub_running_bw(&p->dl, &rq->dl);
+		task_go_inactive(p);
 }
 
 /*
@@ -1074,6 +1161,14 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
 	}
 	rcu_read_unlock();
 
+	rq = task_rq(p);
+	raw_spin_lock(&rq->lock);
+	if (hrtimer_active(&p->dl.inactive_timer)) {
+		sub_running_bw(&p->dl, &rq->dl);
+		hrtimer_try_to_cancel(&p->dl.inactive_timer);
+	}
+	raw_spin_unlock(&rq->lock);
+
 out:
 	return cpu;
 }
@@ -1244,6 +1339,11 @@ static void task_dead_dl(struct task_struct *p)
 	/* XXX we should retain the bw until 0-lag */
 	dl_b->total_bw -= p->dl.dl_bw;
 	raw_spin_unlock_irq(&dl_b->lock);
+	if (hrtimer_active(&p->dl.inactive_timer)) {
+		raw_spin_lock_irq(&task_rq(p)->lock);
+		sub_running_bw(&p->dl, dl_rq_of_se(&p->dl));
+		raw_spin_unlock_irq(&task_rq(p)->lock);
+	}
 }
 
 static void set_curr_task_dl(struct rq *rq)
@@ -1720,15 +1820,22 @@ void __init init_sched_dl_class(void)
 static void switched_from_dl(struct rq *rq, struct task_struct *p)
 {
 	/*
-	 * Start the deadline timer; if we switch back to dl before this we'll
-	 * continue consuming our current CBS slice. If we stay outside of
-	 * SCHED_DEADLINE until the deadline passes, the timer will reset the
-	 * task.
+	 * task_go_inactive() can start the "inactive timer" (if the 0-lag
+	 * time is in the future). If the task switches back to dl before
+	 * the "inactive timer" fires, it can continue to consume its current
+	 * runtime using its current deadline. If it stays outside of
+	 * SCHED_DEADLINE until the 0-lag time passes, inactive_task_timer()
+	 * will reset the task parameters.
 	 */
-	if (!start_dl_timer(p))
-		__dl_clear_params(p);
+	if (task_on_rq_queued(p) && p->dl.dl_runtime)
+		task_go_inactive(p);
 
-	if (task_on_rq_queued(p))
+	/*
+	 * We cannot use inactive_task_timer() to invoke sub_running_bw()
+	 * at the 0-lag time, because the task could have been migrated
+	 * while SCHED_OTHER in the meanwhile.
+	 */
+	if (hrtimer_is_queued(&p->dl.inactive_timer))
 		sub_running_bw(&p->dl, &rq->dl);
 
 	/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3a36c74..e82c419 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1358,6 +1358,7 @@ extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime
 extern struct dl_bandwidth def_dl_bandwidth;
 extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
 extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
+extern void init_inactive_task_timer(struct sched_dl_entity *dl_se);
 
 unsigned long to_ratio(u64 period, u64 runtime);
 
-- 
2.7.4

  parent reply	other threads:[~2016-10-24 14:07 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-10-24 14:06 [RFC v3 0/6] CPU reclaiming for SCHED_DEADLINE Luca Abeni
2016-10-24 14:06 ` [RFC v3 1/6] Track the active utilisation Luca Abeni
2016-10-25  9:09   ` Daniel Bristot de Oliveira
2016-10-25  9:29     ` luca abeni
2016-10-25 13:58       ` Steven Rostedt
2016-10-25 18:04         ` Luca Abeni
2016-11-18 14:23         ` Peter Zijlstra
2016-11-18 15:10           ` luca abeni
2016-11-18 15:28             ` Peter Zijlstra
2016-11-18 16:42           ` Steven Rostedt
2016-12-05 22:30           ` luca abeni
2016-12-06  8:35             ` Peter Zijlstra
2016-12-06  8:57               ` luca abeni
2016-12-06 13:47               ` luca abeni
2016-11-01 16:45   ` Juri Lelli
2016-11-01 21:10     ` luca abeni
2016-11-08 17:56       ` Juri Lelli
2016-11-08 18:17         ` Luca Abeni
2016-11-08 18:53           ` Juri Lelli
2016-11-08 19:09             ` Luca Abeni
2016-11-08 20:02               ` Juri Lelli
2016-11-09 15:25                 ` luca abeni
2016-11-09 16:29         ` luca abeni
2016-11-18 14:55         ` Peter Zijlstra
2016-11-18 13:55   ` Peter Zijlstra
2016-11-18 15:06     ` luca abeni
2016-10-24 14:06 ` Luca Abeni [this message]
2016-11-01 16:46   ` [RFC v3 2/6] Improve the tracking of " Juri Lelli
2016-11-01 21:46     ` luca abeni
2016-11-02  2:35       ` luca abeni
2016-11-10 10:04         ` Juri Lelli
2016-11-10 11:56           ` Juri Lelli
2016-11-10 12:15             ` luca abeni
2016-11-10 12:34               ` Juri Lelli
2016-11-10 12:45                 ` luca abeni
2016-11-02  2:41   ` luca abeni
2016-11-18 15:36   ` Peter Zijlstra
2016-11-18 15:56     ` luca abeni
2016-11-18 15:47   ` Peter Zijlstra
2016-11-18 16:06     ` luca abeni
2016-11-18 18:49       ` Peter Zijlstra
2016-10-24 14:06 ` [RFC v3 3/6] Fix the update of the total -deadline utilization Luca Abeni
2016-10-24 14:06 ` [RFC v3 4/6] GRUB accounting Luca Abeni
2016-10-24 14:06 ` [RFC v3 5/6] Do not reclaim the whole CPU bandwidth Luca Abeni
2016-10-24 14:06 ` [RFC v3 6/6] Make GRUB a task's flag Luca Abeni

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1477317998-7487-3-git-send-email-luca.abeni@unitn.it \
    --to=luca.abeni@unitn.it \
    --cc=claudio@evidence.eu.com \
    --cc=juri.lelli@arm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.