All of lore.kernel.org
 help / color / mirror / Atom feed
From: Fabio Checconi <fchecconi@gmail.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@elte.hu>, Thomas Gleixner <tglx@linutronix.de>,
	Paul Turner <pjt@google.com>,
	Dario Faggioli <faggioli@gandalf.sssup.it>,
	Michael Trimarchi <michael@evidence.eu.com>,
	Dhaval Giani <dhaval@retis.sssup.it>,
	Tommaso Cucinotta <t.cucinotta@sssup.it>,
	linux-kernel@vger.kernel.org,
	Fabio Checconi <fchecconi@gmail.com>,
	Fabio Checconi <fabio@gandalf.sssup.it>
Subject: [PATCH 3/3] sched: make runtime balancing code more EDF-friendly
Date: Tue, 23 Feb 2010 19:56:35 +0100	[thread overview]
Message-ID: <61a4579f9c15818ee9c878d21b9db90480f16261.1266931410.git.fabio@helm.retis> (raw)
In-Reply-To: <cover.1266931410.git.fabio@helm.retis>
In-Reply-To: <cover.1266931410.git.fabio@helm.retis>

The old runtime balancing code logic does not work too well when using
EDF to schedule runqueues.  One of the problems is that the old throttling
code had only one timer to handle budget replenishments; EDF needs per-cpu
timers, not in sync among themselves.

This patch changes balance_runtime() to work in push or pull mode: 1) when
an rt_rq needs extra runtime, it tries to borrow it from its siblings, and
2) when an rt_rq has some borroewed runtime, it tries to distribute it
among the rt_rq's needing it.  This change improves things a little bit,
making easier to redistribute bandwidth when the load on the various
rt_rq's changes; anyway this is far from being an optimal solution.
I think we need (intependently from this patchset) to make the balancing
logic cooperate with (or at least aware of) task push/pulls and define
more formally the objectives of the runtime migration policy.

Signed-off-by: Fabio Checconi <fabio@gandalf.sssup.it>
---
 kernel/sched_rt.c |  152 ++++++++++++++++++++++++++++++++++++++++-------------
 1 files changed, 116 insertions(+), 36 deletions(-)

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 27768bd..fa0ad58 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -498,6 +498,20 @@ static u64 from_ratio(unsigned long ratio, u64 period)
 	return (ratio * period) >> 20;
 }
 
+static inline bool runtime_push_needed(struct rt_rq *src, struct rt_rq *dst)
+{
+	if (!rt_rq_throttled(dst))
+		return false;
+
+	if (!dst->rt_nr_running)
+		return false;
+
+	if (src->rt_runtime <= dst->rt_runtime)
+		return false;
+
+	return true;
+}
+
 /*
  * Try to move *diff units of runtime from src to dst, checking
  * that the utilization does not exceed the global limits on the
@@ -505,14 +519,14 @@ static u64 from_ratio(unsigned long ratio, u64 period)
  * in *diff the actual amount of runtime moved, false on failure, which
  * means that no more bandwidth can be migrated to rt_rq.
  */
-static int rt_move_bw(struct rt_rq *src, struct rt_rq *dst,
+static bool rt_move_bw(struct rt_rq *src, struct rt_rq *dst,
 		       s64 *diff, u64 rt_period)
 {
 	struct rq *rq = rq_of_rt_rq(dst), *src_rq = rq_of_rt_rq(src);
 	struct rt_edf_tree *dtree = &rq->rt.rt_edf_tree;
 	struct rt_edf_tree *stree = &src_rq->rt.rt_edf_tree;
 	unsigned long bw_to_move;
-	int ret = 0;
+	bool ret = false;
 
 	double_spin_lock(&dtree->rt_bw_lock, &stree->rt_bw_lock);
 
@@ -525,7 +539,7 @@ static int rt_move_bw(struct rt_rq *src, struct rt_rq *dst,
 
 		stree->rt_free_bw -= bw_to_move;
 		dtree->rt_free_bw += bw_to_move;
-		ret = 1;
+		ret = true;
 	}
 
 	double_spin_unlock(&dtree->rt_bw_lock, &stree->rt_bw_lock);
@@ -533,18 +547,85 @@ static int rt_move_bw(struct rt_rq *src, struct rt_rq *dst,
 	return ret;
 }
 
+static inline bool stop_runtime_balancing(bool pull, bool moved, s64 diff)
+{
+	if (pull && !moved) {
+		/* No more available bw on our cpu while pulling. */
+		return true;
+	}
+
+	if (!pull && diff < 0) {
+		/* No more bandwidth to give back while pushing. */
+		return true;
+	}
+
+	return false;
+}
+
 /*
- * Handle runtime rebalancing: try to push our bandwidth to
- * runqueues that need it.
+ * Try to move runtime between rt_rq and iter, in the direction specified
+ * by pull.  Return true if balancing should stop.
  */
-static void do_balance_runtime(struct rt_rq *rt_rq)
+static inline bool move_runtime(struct rt_rq *rt_rq, struct rt_rq *iter,
+			       int weight, u64 rt_period, bool pull)
+{
+	struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
+	struct rt_rq *src, *dst;
+	u64 leave;
+	s64 diff = 0;
+	bool moved = true;
+
+	if (pull) {
+		/*
+		 * From runqueues with spare time, take 1/n part of their
+		 * spare time, but no more than our period.  In case we are
+		 * stealing from an active rt_rq, make sure we steal only
+		 * from the runtime it borrowed, to avoid instability.
+		 */
+		if (iter->rt_nr_running)
+			leave = max(rt_b->rt_runtime, iter->rt_time);
+		else
+			leave = iter->rt_time;
+		diff = iter->rt_runtime - leave;
+		src = iter;
+		dst = rt_rq;
+	} else if (runtime_push_needed(rt_rq, iter)) {
+		/*
+		 * Try to give 1/n part of our borrowed runtime to the
+		 * runqueues needing it.
+		 */
+		diff = rt_rq->rt_runtime - rt_rq->rt_time - rt_b->rt_runtime;
+		src = rt_rq;
+		dst = iter;
+	}
+
+	if (diff > 0) {
+		diff = div_u64((u64)diff, weight);
+		if (dst->rt_runtime + diff > rt_period)
+			diff = rt_period - dst->rt_runtime;
+
+		moved = rt_move_bw(src, dst, &diff, rt_period);
+		if (moved) {
+			src->rt_runtime -= diff;
+			dst->rt_runtime += diff;
+		}
+	}
+
+	return stop_runtime_balancing(pull, moved, diff);
+}
+
+/*
+ * Handle runtime rebalancing: either try to push our bandwidth to
+ * runqueues that need it, or pull from those that can lend some.
+ */
+static void do_balance_runtime(struct rt_rq *rt_rq, bool pull)
 {
 	struct rq *rq = cpu_rq(smp_processor_id());
 	struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 	struct root_domain *rd = rq->rd;
-	int i, weight, ret;
+	int i, weight;
 	u64 rt_period, prev_runtime;
-	s64 diff;
+	bool stop;
 
 	weight = cpumask_weight(rd->span);
 
@@ -581,26 +662,10 @@ static void do_balance_runtime(struct rt_rq *rt_rq)
 		if (iter->rt_runtime == RUNTIME_INF)
 			goto next;
 
-		/*
-		 * From runqueues with spare time, take 1/n part of their
-		 * spare time, but no more than our period.
-		 */
-		diff = iter->rt_runtime - iter->rt_time;
-		if (diff > 0) {
-			diff = div_u64((u64)diff, weight);
-			if (rt_rq->rt_runtime + diff > rt_period)
-				diff = rt_period - rt_rq->rt_runtime;
-
-			ret = rt_move_bw(iter, rt_rq, &diff, rt_period);
-			if (ret) {
-				iter->rt_runtime -= diff;
-				rt_rq->rt_runtime += diff;
-			}
-
-			if (!ret || rt_rq->rt_runtime == rt_period) {
-				raw_spin_unlock(&iter->rt_runtime_lock);
-				break;
-			}
+		stop = move_runtime(rt_rq, iter, weight, rt_period, pull);
+		if (stop) {
+			raw_spin_unlock(&iter->rt_runtime_lock);
+			break;
 		}
 next:
 		raw_spin_unlock(&iter->rt_runtime_lock);
@@ -756,16 +821,27 @@ static void enable_runtime(struct rq *rq)
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 
-static inline void balance_runtime(struct rt_rq *rt_rq)
+static inline void balance_runtime(struct rt_rq *rt_rq, int pull)
 {
-	if (rt_rq->rt_time > rt_rq->rt_runtime) {
-		raw_spin_unlock(&rt_rq->rt_runtime_lock);
-		do_balance_runtime(rt_rq);
-		raw_spin_lock(&rt_rq->rt_runtime_lock);
-	}
+	raw_spin_unlock(&rt_rq->rt_runtime_lock);
+	do_balance_runtime(rt_rq, pull);
+	raw_spin_lock(&rt_rq->rt_runtime_lock);
 }
 
+static void pull_runtime(struct rt_rq *rt_rq)
+{
+	if (rt_rq->rt_time > rt_rq->rt_runtime)
+		balance_runtime(rt_rq, true);
+}
+
+static void push_runtime(struct rt_rq *rt_rq)
+{
+	if (rt_rq->rt_time < rt_rq->rt_runtime)
+		balance_runtime(rt_rq, false);
+}
 #else /* !CONFIG_SMP */
+static inline void pull_runtime(struct rt_rq *rt_rq) {}
+static inline void push_runtime(struct rt_rq *rt_rq) {}
 
 static void rt_reset_runtime(void)
 {
@@ -828,6 +904,9 @@ static int __do_sched_rt_period_timer(struct rt_rq *rt_rq, int overrun)
 	} else if (rt_rq->rt_nr_running)
 		idle = 0;
 
+	/* Try to give back what we borrowed from rt_rq's in need. */
+	push_runtime(rt_rq);
+
 	return idle && !rt_rq_needs_recharge(rt_rq);
 }
 
@@ -843,7 +922,7 @@ static int do_sched_rt_period_timer(struct rt_rq *rt_rq, int overrun)
 	raw_spin_lock(&rt_rq->rt_runtime_lock);
 
 	if (rt_rq->rt_throttled)
-		balance_runtime(rt_rq);
+		pull_runtime(rt_rq);
 
 	idle = __do_sched_rt_period_timer(rt_rq, overrun);
 
@@ -919,7 +998,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
 	if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq))
 		return 0;
 
-	balance_runtime(rt_rq);
+	pull_runtime(rt_rq);
 	runtime = sched_rt_runtime(rt_rq);
 	if (runtime == RUNTIME_INF)
 		return 0;
@@ -1261,6 +1340,7 @@ update:
 			rt_rq->rt_deadline += period;
 			rt_rq->rt_time -= runtime;
 		}
+		push_runtime(rt_rq);
 	}
 	raw_spin_unlock(&rt_rq->rt_runtime_lock);
 }
-- 
1.6.5.7


  parent reply	other threads:[~2010-02-23 18:48 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-02-23 18:56 [PATCH 0/3] sched: use EDF to throttle RT task groups v2 Fabio Checconi
2010-02-23 18:56 ` [PATCH 1/3] sched: use EDF to schedule groups Fabio Checconi
2010-02-25 20:28   ` Peter Zijlstra
2010-03-03 16:59     ` Fabio Checconi
2010-02-23 18:56 ` [PATCH 2/3] sched: enforce per-cpu utilization limits on runtime balancing Fabio Checconi
2010-02-25 20:28   ` Peter Zijlstra
2010-03-03 16:59     ` Fabio Checconi
2010-02-25 20:28   ` Peter Zijlstra
2010-03-03 17:00     ` Fabio Checconi
2010-03-23 20:32       ` Peter Zijlstra
2010-02-25 20:28   ` Peter Zijlstra
2010-03-03 16:59     ` Fabio Checconi
2010-02-25 20:28   ` Peter Zijlstra
2010-03-03 17:00     ` Fabio Checconi
2010-03-23 20:33       ` Peter Zijlstra
2010-02-25 20:28   ` Peter Zijlstra
2010-03-03 17:00     ` Fabio Checconi
2010-02-23 18:56 ` Fabio Checconi [this message]
2010-02-25 20:28   ` [PATCH 3/3] sched: make runtime balancing code more EDF-friendly Peter Zijlstra
2010-03-03 17:01     ` Fabio Checconi
2010-02-25 20:28 ` [PATCH 0/3] sched: use EDF to throttle RT task groups v2 Peter Zijlstra
2010-02-27 12:33 ` Peter Zijlstra
2010-03-03 17:01   ` Fabio Checconi
2010-03-23 20:30     ` Peter Zijlstra
2010-03-23 20:56       ` Dhaval Giani
2010-03-23 21:51         ` Tommaso Cucinotta

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=61a4579f9c15818ee9c878d21b9db90480f16261.1266931410.git.fabio@helm.retis \
    --to=fchecconi@gmail.com \
    --cc=dhaval@retis.sssup.it \
    --cc=fabio@gandalf.sssup.it \
    --cc=faggioli@gandalf.sssup.it \
    --cc=linux-kernel@vger.kernel.org \
    --cc=michael@evidence.eu.com \
    --cc=mingo@elte.hu \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=t.cucinotta@sssup.it \
    --cc=tglx@linutronix.de \
    --subject='Re: [PATCH 3/3] sched: make runtime balancing code more EDF-friendly' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.