linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: linux-kernel@vger.kernel.org
Cc: mingo@elte.hu, efault@gmx.de, vatsa@in.ibm.com,
	Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [PATCH 6/8] sched: avg_vruntime
Date: Fri, 24 Oct 2008 11:06:17 +0200	[thread overview]
Message-ID: <20081024091109.832347739@chello.nl> (raw)
In-Reply-To: 20081024090611.936552213@chello.nl

[-- Attachment #1: sched-avg-vruntime.patch --]
[-- Type: text/plain, Size: 4344 bytes --]

Renicing requires scaling the lag. Therefore we need a way to compute the it.
Lag is defined as the difference between the service time received from the
ideal model and the actual scheduler.

The defining property of a fair scheduler is that the sum of all lags is zero;
which can be seen is trivially true for the ideal case, as all lags are zero.

Therefore, the average of all virtual runtimes will be the point of zero lag.

We cannot prove fairness for CFS due to sleeper fairness (without it we can).
However since we can observe it does converge to fairness in stable operation,
we can say the zero lag point converges to the average.

We can't just take the average of vruntime - as it will use the full range
of its u64 and will wrap around. Instead we'll use the average of
(vruntime - min_vruntime)

\Sum_{i}^{n} 1/n (v_{i} - v) = 1/n (\Sum_{i}^{n} v_{i}) - vn

By factoring out the 1/n (never storing that) we avoid rounding, which
would bring an accumulating error.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/sched.c       |    3 ++
 kernel/sched_debug.c |    3 ++
 kernel/sched_fair.c  |   56 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 61 insertions(+), 1 deletion(-)

Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -383,6 +383,9 @@ struct cfs_rq {
 	struct load_weight load;
 	unsigned long nr_running;
 
+	long nr_queued;
+	s64 avg_vruntime;
+
 	u64 exec_clock;
 	u64 min_vruntime;
 
Index: linux-2.6/kernel/sched_debug.c
===================================================================
--- linux-2.6.orig/kernel/sched_debug.c
+++ linux-2.6/kernel/sched_debug.c
@@ -161,6 +161,9 @@ void print_cfs_rq(struct seq_file *m, in
 			SPLIT_NS(spread0));
 	SEQ_printf(m, "  .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
 	SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "avg_vruntime",
+			SPLIT_NS(avg_vruntime(cfs_rq)));
+
 #ifdef CONFIG_SCHEDSTATS
 #define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, rq->n);
 
Index: linux-2.6/kernel/sched_fair.c
===================================================================
--- linux-2.6.orig/kernel/sched_fair.c
+++ linux-2.6/kernel/sched_fair.c
@@ -271,6 +271,56 @@ static inline s64 entity_key(struct cfs_
 	return se->vruntime - cfs_rq->min_vruntime;
 }
 
+static void
+avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	s64 key = entity_key(cfs_rq, se);
+	cfs_rq->avg_vruntime += key;
+	cfs_rq->nr_queued++;
+}
+
+static void
+avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	s64 key = entity_key(cfs_rq, se);
+	cfs_rq->avg_vruntime -= key;
+	cfs_rq->nr_queued--;
+}
+
+static inline
+void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
+{
+	cfs_rq->avg_vruntime -= cfs_rq->nr_queued * delta;
+}
+
+static u64 avg_vruntime(struct cfs_rq *cfs_rq)
+{
+	s64 avg = cfs_rq->avg_vruntime;
+	long nr_queued = cfs_rq->nr_queued;
+
+	if (cfs_rq->curr) {
+		nr_queued++;
+		avg += entity_key(cfs_rq, cfs_rq->curr);
+	}
+
+	if (nr_queued)
+		avg = div_s64(avg, nr_queued);
+
+	return cfs_rq->min_vruntime + avg;
+}
+
+static void __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime)
+{
+	/*
+	 * open coded max_vruntime() to allow updating avg_vruntime
+	 */
+	s64 delta = (s64)(vruntime - cfs_rq->min_vruntime);
+	if (delta > 0) {
+		avg_vruntime_update(cfs_rq, delta);
+		cfs_rq->min_vruntime = vruntime;
+	}
+}
+
 static void update_min_vruntime(struct cfs_rq *cfs_rq)
 {
 	u64 vruntime = cfs_rq->min_vruntime;
@@ -289,7 +339,7 @@ static void update_min_vruntime(struct c
 			vruntime = min_vruntime(vruntime, se->vruntime);
 	}
 
-	cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
+	__update_min_vruntime(cfs_rq, vruntime);
 }
 
 /*
@@ -303,6 +353,8 @@ static void __enqueue_entity(struct cfs_
 	s64 key = entity_key(cfs_rq, se);
 	int leftmost = 1;
 
+	avg_vruntime_add(cfs_rq, se);
+
 	/*
 	 * Find the right place in the rbtree:
 	 */
@@ -345,6 +397,8 @@ static void __dequeue_entity(struct cfs_
 		cfs_rq->next = NULL;
 
 	rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
+
+	avg_vruntime_sub(cfs_rq, se);
 }
 
 static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq)

-- 


  parent reply	other threads:[~2008-10-24  9:59 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-10-24  9:06 [PATCH 0/8] scheduler patches Peter Zijlstra
2008-10-24  9:06 ` [PATCH 1/8] sched: fix a find_busiest_group buglet Peter Zijlstra
2008-10-24  9:06 ` [PATCH 2/8] sched: more accurate min_vruntime accounting Peter Zijlstra
2008-10-24  9:06 ` [PATCH 3/8] sched: weaken sync hint Peter Zijlstra
2008-10-24  9:06 ` [PATCH 4/8] sched: re-instate vruntime based wakeup preemption Peter Zijlstra
2008-10-24  9:06 ` [PATCH 5/8] sched: virtual time buddy preemption Peter Zijlstra
2008-10-24  9:06 ` Peter Zijlstra [this message]
2008-10-29 15:48   ` [PATCH 6/8] sched: avg_vruntime Peter Zijlstra
2008-11-01 18:13     ` Fabio Checconi
2008-10-24  9:06 ` [PATCH 7/8] sched: non-zero lag renice Peter Zijlstra
2008-10-24 17:47   ` Chris Friesen
2008-10-24 20:28     ` Peter Zijlstra
2008-10-24 21:13       ` Chris Friesen
2008-10-24  9:06 ` [PATCH 8/8] use avg_vruntime for task placement Peter Zijlstra
2008-10-24 10:26 ` [PATCH 0/8] scheduler patches Ingo Molnar
2008-10-24 10:29   ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20081024091109.832347739@chello.nl \
    --to=a.p.zijlstra@chello.nl \
    --cc=efault@gmx.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=vatsa@in.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).