All of lore.kernel.org
 help / color / mirror / Atom feed
From: Luca Abeni <luca.abeni@unitn.it>
To: linux-kernel@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>, Juri Lelli <juri.lelli@arm.com>,
	Luca Abeni <luca.abeni@unitn.it>
Subject: [RFC 4/8] Improve the tracking of active utilisation
Date: Thu, 14 Jan 2016 16:24:49 +0100	[thread overview]
Message-ID: <1452785094-3086-5-git-send-email-luca.abeni@unitn.it> (raw)
In-Reply-To: <1452785094-3086-1-git-send-email-luca.abeni@unitn.it>

This patch implements a more theoretically sound algorithm for
thracking the active utilisation: instead of decreasing it when a
task blocks, use a timer (the "inactive timer", named after the
"Inactive" task state of the GRUB algorithm) to decrease the
active utilisaation at the so called "0-lag time".
---
 include/linux/sched.h   |   1 +
 kernel/sched/core.c     |   1 +
 kernel/sched/deadline.c | 152 ++++++++++++++++++++++++++++++++++++++++++------
 kernel/sched/sched.h    |   1 +
 4 files changed, 137 insertions(+), 18 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 61aa9bb..50f212f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1346,6 +1346,7 @@ struct sched_dl_entity {
 	 * own bandwidth to be enforced, thus we need one timer per task.
 	 */
 	struct hrtimer dl_timer;
+	struct hrtimer inactive_timer;
 };
 
 union rcu_special {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 44253ad..7ca17e4c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2215,6 +2215,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 
 	RB_CLEAR_NODE(&p->dl.rb_node);
 	init_dl_task_timer(&p->dl);
+	init_inactive_task_timer(&p->dl);
 	__dl_clear_params(p);
 
 	INIT_LIST_HEAD(&p->rt.run_list);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index d8e9962..0efa596 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -49,6 +49,7 @@ static void add_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 {
 	u64 se_bw = dl_se->dl_bw;
 
+	lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
 	dl_rq->running_bw += se_bw;
 	trace_sched_stat_running_bw_add(dl_task_of(dl_se), se_bw, dl_rq->running_bw);
 }
@@ -57,6 +58,7 @@ static void clear_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 {
 	u64 se_bw = dl_se->dl_bw;
 
+	lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
 	dl_rq->running_bw -= se_bw;
 	trace_sched_stat_running_bw_clear(dl_task_of(dl_se), se_bw, dl_rq->running_bw);
 	if (dl_rq->running_bw < 0) {
@@ -65,6 +67,62 @@ static void clear_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 	}
 }
 
+static void task_go_inactive(struct task_struct *p)
+{
+	struct sched_dl_entity *dl_se = &p->dl;
+	struct hrtimer *timer = &dl_se->inactive_timer;
+	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
+	struct rq *rq = rq_of_dl_rq(dl_rq);
+	ktime_t now, act;
+	s64 delta;
+	u64 zerolag_time;
+
+	WARN_ON(dl_se->dl_runtime == 0);
+
+	/* If the inactive timer is already armed, return immediately */
+	if (hrtimer_active(&dl_se->inactive_timer))
+		return;
+
+
+	/*
+	 * We want the timer to fire at the "0 lag time", but considering
+	 * that it is actually coming from rq->clock and not from
+	 * hrtimer's time base reading.
+	 */
+        zerolag_time = dl_se->deadline - div64_long((dl_se->runtime * dl_se->dl_period), dl_se->dl_runtime);
+
+	act = ns_to_ktime(zerolag_time);
+	now = hrtimer_cb_get_time(timer);
+	delta = ktime_to_ns(now) - rq_clock(rq);
+	act = ktime_add_ns(act, delta);
+
+	/*
+	 * If the "0-lag time" already passed, decrease the active
+	 * utilization now, instead of starting a timer
+	 */
+	if (ktime_us_delta(act, now) < 0) {
+		clear_running_bw(dl_se, dl_rq);
+		if (!dl_task(p)) {
+			__dl_clear_params(p);
+		}
+		return;
+	}
+
+	if (!hrtimer_is_queued(timer)) {
+		hrtimer_start(timer, act, HRTIMER_MODE_ABS);
+	}
+
+	if (hrtimer_active(timer) == 0) {
+		printk("Problem activating inactive_timer!\n");
+		clear_running_bw(dl_se, dl_rq);
+		if (!dl_task(p)) {
+			__dl_clear_params(p);
+		}
+	} else {
+		get_task_struct(p);
+	}
+}
+
 static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
 {
 	struct sched_dl_entity *dl_se = &p->dl;
@@ -522,7 +580,6 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
 	struct rq *rq = rq_of_dl_rq(dl_rq);
 
-	add_running_bw(dl_se, dl_rq);
 
 	/*
 	 * The arrival of a new instance needs special treatment, i.e.,
@@ -530,9 +587,20 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
 	 */
 	if (dl_se->dl_new) {
 		setup_new_dl_entity(dl_se, pi_se);
+		add_running_bw(dl_se, dl_rq);
 		return;
 	}
 
+	/* If the "inactive timer" is still active, stop it adn leave
+	 * the active utilisation unchanged.
+	 * If it is running, increase the active utilisation
+	 */
+	if (hrtimer_active(&dl_se->inactive_timer)) {
+		hrtimer_try_to_cancel(&dl_se->inactive_timer);
+	} else {
+	        add_running_bw(dl_se, dl_rq);
+	}
+
 	if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
 	    dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
 		dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
@@ -619,12 +687,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
 
 	rq = task_rq_lock(p, &flags);
 
-	/*
-	 * The task might have changed its scheduling policy to something
-	 * different than SCHED_DEADLINE (through switched_fromd_dl()).
-	 */
 	if (!dl_task(p)) {
-		__dl_clear_params(p);
 		goto unlock;
 	}
 
@@ -811,6 +874,49 @@ static void update_curr_dl(struct rq *rq)
 	}
 }
 
+static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
+{
+	struct sched_dl_entity *dl_se = container_of(timer,
+						     struct sched_dl_entity,
+						     inactive_timer);
+	struct task_struct *p = dl_task_of(dl_se);
+	unsigned long flags;
+	struct rq *rq;
+
+	rq = task_rq_lock(p, &flags);
+
+	if (dl_se->dl_new) {
+		printk("Problem! New task was inactive?\n");
+		goto unlock;
+	}
+	if (!dl_task(p)) {
+		__dl_clear_params(p);
+
+		goto unlock;
+	}
+	if (p->state == TASK_RUNNING) {
+		goto unlock;
+	}
+
+	sched_clock_tick();
+	update_rq_clock(rq);
+
+	clear_running_bw(dl_se, &rq->dl);
+unlock:
+	task_rq_unlock(rq, p, &flags);
+	put_task_struct(p);
+
+	return HRTIMER_NORESTART;
+}
+
+void init_inactive_task_timer(struct sched_dl_entity *dl_se)
+{
+	struct hrtimer *timer = &dl_se->inactive_timer;
+
+	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	timer->function = inactive_task_timer;
+}
+
 #ifdef CONFIG_SMP
 
 static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
@@ -987,7 +1093,10 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 	 * run yet) will take care of this.
 	 */
 	if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
-		add_running_bw(&p->dl, &rq->dl);
+		if (hrtimer_try_to_cancel(&p->dl.inactive_timer) < 0) {
+			printk("Waking up a depleted task, but cannot cancel inactive timer!\n");
+			add_running_bw(&p->dl, &rq->dl);
+		}
 		return;
 	}
 
@@ -1009,7 +1118,7 @@ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 	update_curr_dl(rq);
 	__dequeue_task_dl(rq, p, flags);
 	if (flags & DEQUEUE_SLEEP)
-		clear_running_bw(&p->dl, &rq->dl);
+		task_go_inactive(p);
 }
 
 /*
@@ -1087,6 +1196,19 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
 	}
 	rcu_read_unlock();
 
+	if (rq != cpu_rq(cpu)) {
+		if (hrtimer_active(&p->dl.inactive_timer)) {
+			raw_spin_lock(&rq->lock);
+			clear_running_bw(&p->dl, &rq->dl);
+			raw_spin_unlock(&rq->lock);
+			rq = cpu_rq(cpu);
+			raw_spin_lock(&rq->lock);
+			add_running_bw(&p->dl, &rq->dl);
+			raw_spin_unlock(&rq->lock);
+		}
+	}
+
+
 out:
 	return cpu;
 }
@@ -1248,8 +1370,6 @@ static void task_fork_dl(struct task_struct *p)
 static void task_dead_dl(struct task_struct *p)
 {
 	struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
-	struct dl_rq *dl_rq = dl_rq_of_se(&p->dl);
-	struct rq *rq = rq_of_dl_rq(dl_rq);
 
 	/*
 	 * Since we are TASK_DEAD we won't slip out of the domain!
@@ -1258,10 +1378,6 @@ static void task_dead_dl(struct task_struct *p)
 	/* XXX we should retain the bw until 0-lag */
 	dl_b->total_bw -= p->dl.dl_bw;
 	raw_spin_unlock_irq(&dl_b->lock);
-
-	if (task_on_rq_queued(p)) {
-		clear_running_bw(&p->dl, &rq->dl);
-	}
 }
 
 static void set_curr_task_dl(struct rq *rq)
@@ -1742,12 +1858,12 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
 	 * SCHED_DEADLINE until the deadline passes, the timer will reset the
 	 * task.
 	 */
-	if (!start_dl_timer(p))
+	if (task_on_rq_queued(p))
+		task_go_inactive(p);
+	if (!hrtimer_active(&p->dl.inactive_timer))
 		__dl_clear_params(p);
-
-	if (task_on_rq_queued(p)) {
+	else if (!hrtimer_callback_running(&p->dl.inactive_timer))
 		clear_running_bw(&p->dl, &rq->dl);
-	}
 
 	/*
 	 * Since this might be the only -deadline task on the rq,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 826ca6a..9d0fdb1 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1278,6 +1278,7 @@ extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime
 extern struct dl_bandwidth def_dl_bandwidth;
 extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
 extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
+extern void init_inactive_task_timer(struct sched_dl_entity *dl_se);
 
 unsigned long to_ratio(u64 period, u64 runtime);
 
-- 
1.9.1

  parent reply	other threads:[~2016-01-14 15:27 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-01-14 15:24 [RFC 0/8] CPU reclaiming for SCHED_DEADLINE Luca Abeni
2016-01-14 15:24 ` [RFC 1/8] Track the active utilisation Luca Abeni
2016-01-14 16:49   ` Peter Zijlstra
2016-01-15  6:37     ` Luca Abeni
2016-01-14 19:13   ` Peter Zijlstra
2016-01-15  8:07     ` Luca Abeni
2016-01-14 15:24 ` [RFC 2/8] Correctly track the active utilisation for migrating tasks Luca Abeni
2016-01-14 15:24 ` [RFC 3/8] sched/deadline: add some tracepoints Luca Abeni
2016-01-14 15:24 ` Luca Abeni [this message]
2016-01-14 17:16   ` [RFC 4/8] Improve the tracking of active utilisation Peter Zijlstra
2016-01-15  6:48     ` Luca Abeni
2016-01-14 19:43   ` Peter Zijlstra
2016-01-15  9:27     ` Luca Abeni
2016-01-19 12:20     ` Luca Abeni
2016-01-19 13:47       ` Peter Zijlstra
2016-01-27 13:36         ` Luca Abeni
2016-01-27 14:39           ` Peter Zijlstra
2016-01-27 14:45             ` Luca Abeni
2016-01-28 13:08               ` Vincent Guittot
     [not found]               ` <CAKfTPtAt0gTwk9aAZN238NT1O-zJvxVQDTh2QN_KxAnE61xMww@mail.gmail.com>
2016-01-28 13:48                 ` luca abeni
2016-01-28 13:56                   ` Vincent Guittot
2016-01-28 11:14             ` luca abeni
2016-01-28 12:21               ` Peter Zijlstra
2016-01-28 13:41                 ` luca abeni
2016-01-28 14:00                   ` Peter Zijlstra
2016-01-28 21:15                     ` Luca Abeni
2016-01-14 19:47   ` Peter Zijlstra
2016-01-15  8:10     ` Luca Abeni
2016-01-15  8:32       ` Peter Zijlstra
2016-01-14 15:24 ` [RFC 5/8] Track the "total rq utilisation" too Luca Abeni
2016-01-14 19:12   ` Peter Zijlstra
2016-01-15  8:04     ` Luca Abeni
2016-01-14 19:48   ` Peter Zijlstra
2016-01-15  6:50     ` Luca Abeni
2016-01-15  8:34       ` Peter Zijlstra
2016-01-15  9:15         ` Luca Abeni
2016-01-29 15:06           ` Peter Zijlstra
2016-01-29 21:21             ` Luca Abeni
2016-01-14 15:24 ` [RFC 6/8] GRUB accounting Luca Abeni
2016-01-14 19:50   ` Peter Zijlstra
2016-01-15  8:05     ` Luca Abeni
2016-01-14 15:24 ` [RFC 7/8] Make GRUB a task's flag Luca Abeni
2016-01-14 19:56   ` Peter Zijlstra
2016-01-15  8:15     ` Luca Abeni
2016-01-15  8:41       ` Peter Zijlstra
2016-01-15  9:08         ` Luca Abeni
2016-01-14 15:24 ` [RFC 8/8] Do not reclaim the whole CPU bandwidth Luca Abeni
2016-01-14 19:59   ` Peter Zijlstra
2016-01-15  8:21     ` Luca Abeni
2016-01-15  8:50       ` Peter Zijlstra
2016-01-15  9:49         ` Luca Abeni
2016-01-26 12:52         ` luca abeni
2016-01-27 14:44           ` Peter Zijlstra
2016-02-02 20:53             ` Luca Abeni
2016-02-03 11:30               ` Juri Lelli
2016-02-03 13:28                 ` luca abeni
2016-01-19 10:11 ` [RFC 0/8] CPU reclaiming for SCHED_DEADLINE Juri Lelli
2016-01-19 11:50   ` Luca Abeni

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1452785094-3086-5-git-send-email-luca.abeni@unitn.it \
    --to=luca.abeni@unitn.it \
    --cc=juri.lelli@arm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.