linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] cpu scheduler: unsquish dynamic priorities
@ 2005-12-20  4:53 Peter Williams
  2005-12-21  5:21 ` Peter Williams
  2005-12-21  9:36 ` Ingo Molnar
  0 siblings, 2 replies; 5+ messages in thread
From: Peter Williams @ 2005-12-20  4:53 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 1454 bytes --]

The problem:

The current scheduler implementation maps 40 nice values and 10 bonus 
values into only 40 priority slots on the run queues.  This results in 
the dynamic priorities of tasks at either end of the nice scale being 
squished up.  E.g. all tasks with nice in the range -20 to -16 and the 
maximum of 10 bonus points will end up with a dynamic priority of 
MAX_RT_PRIO and all tasks with nice in the range 15 to 19 and no bonus 
points will end up with a dynamic priority of MAX_PRIO - 1.

Although the fact that niceness is primarily implemented by time slice 
size means that this will have little or no adverse effect on the long 
term allocation of CPU resources due to niceness, it could adversely 
effect latency as it will interfere with preemption.

The solution:

Increase the number of priority slots in the run queues to allow a 
linear mapping and eliminate the squish.

The implementation:

As the only place MAX_PRIO is used outside of sched.c is to initialize 
the init task in init_task.h, it is possible to implement the solution 
entirely within sched.c by defining a new macro IDLE_PRIO (which is 
equal to the sum of MAX_PRIO and MAX_BONUS) and then replacing MAX_PRIO 
by IDLE_PRIO where appropriate.

Signed-off-by: Peter Williams <pwil3058@bigpond.com.au>

-- 
Peter Williams                                   pwil3058@bigpond.net.au

"Learning, n. The kind of ignorance distinguishing the studious."
  -- Ambrose Bierce

[-- Attachment #2: unsquish-cpu-scheduler-dynamic-priorities --]
[-- Type: text/plain, Size: 3854 bytes --]

Index: GIT-warnings/kernel/sched.c
===================================================================
--- GIT-warnings.orig/kernel/sched.c	2005-12-20 13:02:45.000000000 +1100
+++ GIT-warnings/kernel/sched.c	2005-12-20 13:04:02.000000000 +1100
@@ -180,14 +180,15 @@ static unsigned int task_timeslice(task_
  * These are the runqueue data structures:
  */
 
-#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long))
+#define IDLE_PRIO (MAX_PRIO + MAX_BONUS)
+#define BITMAP_SIZE ((((IDLE_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long))
 
 typedef struct runqueue runqueue_t;
 
 struct prio_array {
 	unsigned int nr_active;
 	unsigned long bitmap[BITMAP_SIZE];
-	struct list_head queue[MAX_PRIO];
+	struct list_head queue[IDLE_PRIO];
 };
 
 /*
@@ -645,18 +646,15 @@ static inline void enqueue_task_head(str
  */
 static int effective_prio(task_t *p)
 {
-	int bonus, prio;
+	int prio;
 
 	if (rt_task(p))
 		return p->prio;
 
-	bonus = CURRENT_BONUS(p) - MAX_BONUS / 2;
+	prio = p->static_prio + MAX_BONUS - CURRENT_BONUS(p);
+	BUG_ON(prio < MAX_RT_PRIO);
+	BUG_ON(prio > IDLE_PRIO - 1);
 
-	prio = p->static_prio - bonus;
-	if (prio < MAX_RT_PRIO)
-		prio = MAX_RT_PRIO;
-	if (prio > MAX_PRIO-1)
-		prio = MAX_PRIO-1;
 	return prio;
 }
 
@@ -1861,7 +1859,7 @@ void pull_task(runqueue_t *src_rq, prio_
 	p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
 				+ this_rq->timestamp_last_tick;
 	/*
-	 * Note that idle threads have a prio of MAX_PRIO, for this test
+	 * Note that idle threads have a prio of IDLE_PRIO, for this test
 	 * to be always true for them.
 	 */
 	if (TASK_PREEMPTS_CURR(p, this_rq))
@@ -1945,8 +1943,8 @@ skip_bitmap:
 	if (!idx)
 		idx = sched_find_first_bit(array->bitmap);
 	else
-		idx = find_next_bit(array->bitmap, MAX_PRIO, idx);
-	if (idx >= MAX_PRIO) {
+		idx = find_next_bit(array->bitmap, IDLE_PRIO, idx);
+	if (idx >= IDLE_PRIO) {
 		if (array == busiest->expired && busiest->active->nr_active) {
 			array = busiest->active;
 			dst_array = this_rq->active;
@@ -3056,7 +3054,7 @@ go_idle:
 		rq->expired = array;
 		array = rq->active;
 		rq->expired_timestamp = 0;
-		rq->best_expired_prio = MAX_PRIO;
+		rq->best_expired_prio = IDLE_PRIO;
 	}
 
 	idx = sched_find_first_bit(array->bitmap);
@@ -4396,7 +4394,7 @@ void __devinit init_idle(task_t *idle, i
 
 	idle->sleep_avg = 0;
 	idle->array = NULL;
-	idle->prio = MAX_PRIO;
+	idle->prio = IDLE_PRIO;
 	idle->state = TASK_RUNNING;
 	idle->cpus_allowed = cpumask_of_cpu(cpu);
 	set_task_cpu(idle, cpu);
@@ -4737,7 +4735,7 @@ static void migrate_dead_tasks(unsigned 
 	struct runqueue *rq = cpu_rq(dead_cpu);
 
 	for (arr = 0; arr < 2; arr++) {
-		for (i = 0; i < MAX_PRIO; i++) {
+		for (i = 0; i < IDLE_PRIO; i++) {
 			struct list_head *list = &rq->arrays[arr].queue[i];
 			while (!list_empty(list))
 				migrate_dead(dead_cpu,
@@ -4793,7 +4791,7 @@ static int migration_call(struct notifie
 		/* Idle task back to normal (off runqueue, low prio) */
 		rq = task_rq_lock(rq->idle, &flags);
 		deactivate_task(rq->idle, rq);
-		rq->idle->static_prio = MAX_PRIO;
+		rq->idle->static_prio = IDLE_PRIO;
 		__setscheduler(rq->idle, SCHED_NORMAL, 0);
 		migrate_dead_tasks(cpu);
 		task_rq_unlock(rq, &flags);
@@ -5610,7 +5608,7 @@ void __init sched_init(void)
 		rq->nr_running = 0;
 		rq->active = rq->arrays;
 		rq->expired = rq->arrays + 1;
-		rq->best_expired_prio = MAX_PRIO;
+		rq->best_expired_prio = IDLE_PRIO;
 
 #ifdef CONFIG_SMP
 		rq->sd = NULL;
@@ -5625,12 +5623,12 @@ void __init sched_init(void)
 
 		for (j = 0; j < 2; j++) {
 			array = rq->arrays + j;
-			for (k = 0; k < MAX_PRIO; k++) {
+			for (k = 0; k < IDLE_PRIO; k++) {
 				INIT_LIST_HEAD(array->queue + k);
 				__clear_bit(k, array->bitmap);
 			}
 			// delimiter for bitsearch
-			__set_bit(MAX_PRIO, array->bitmap);
+			__set_bit(IDLE_PRIO, array->bitmap);
 		}
 	}
 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] cpu scheduler: unsquish dynamic priorities
  2005-12-20  4:53 [PATCH] cpu scheduler: unsquish dynamic priorities Peter Williams
@ 2005-12-21  5:21 ` Peter Williams
  2005-12-21  9:36 ` Ingo Molnar
  1 sibling, 0 replies; 5+ messages in thread
From: Peter Williams @ 2005-12-21  5:21 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 1904 bytes --]

Peter Williams wrote:
> The problem:
> 
> The current scheduler implementation maps 40 nice values and 10 bonus 
> values into only 40 priority slots on the run queues.  This results in 
> the dynamic priorities of tasks at either end of the nice scale being 
> squished up.  E.g. all tasks with nice in the range -20 to -16 and the 
> maximum of 10 bonus points will end up with a dynamic priority of 
> MAX_RT_PRIO and all tasks with nice in the range 15 to 19 and no bonus 
> points will end up with a dynamic priority of MAX_PRIO - 1.
> 
> Although the fact that niceness is primarily implemented by time slice 
> size means that this will have little or no adverse effect on the long 
> term allocation of CPU resources due to niceness, it could adversely 
> effect latency as it will interfere with preemption.
> 
> The solution:
> 
> Increase the number of priority slots in the run queues to allow a 
> linear mapping and eliminate the squish.
> 
> The implementation:
> 
> As the only place MAX_PRIO is used outside of sched.c is to initialize 
> the init task in init_task.h, it is possible to implement the solution 
> entirely within sched.c by defining a new macro IDLE_PRIO (which is 
> equal to the sum of MAX_PRIO and MAX_BONUS) and then replacing MAX_PRIO 
> by IDLE_PRIO where appropriate.
> 
> Signed-off-by: Peter Williams <pwil3058@bigpond.com.au>

I overlooked the effect of this patch on the TASK_INTERACTIVE() macro 
function and this has an adverse effect on interactive responsiveness. 
A modified patch that corrects that oversight is attached.

Signed-off-by: Peter Williams <pwil3058@bigpond.com.au>

  sched.c |   38 ++++++++++++++++++--------------------
  1 files changed, 18 insertions(+), 20 deletions(-)

-- 
Peter Williams                                   pwil3058@bigpond.net.au

"Learning, n. The kind of ignorance distinguishing the studious."
  -- Ambrose Bierce

[-- Attachment #2: unsquish-cpu-scheduler-dynamic-priorities-v2 --]
[-- Type: text/plain, Size: 4141 bytes --]

Index: GIT-warnings/kernel/sched.c
===================================================================
--- GIT-warnings.orig/kernel/sched.c	2005-12-21 15:11:09.000000000 +1100
+++ GIT-warnings/kernel/sched.c	2005-12-21 15:54:49.000000000 +1100
@@ -145,7 +145,7 @@
 	(SCALE(TASK_NICE(p), 40, MAX_BONUS) + INTERACTIVE_DELTA)
 
 #define TASK_INTERACTIVE(p) \
-	((p)->prio <= (p)->static_prio - DELTA(p))
+	((p)->prio <= (p)->static_prio - DELTA(p) + MAX_BONUS / 2)
 
 #define INTERACTIVE_SLEEP(p) \
 	(JIFFIES_TO_NS(MAX_SLEEP_AVG * \
@@ -180,14 +180,15 @@ static unsigned int task_timeslice(task_
  * These are the runqueue data structures:
  */
 
-#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long))
+#define IDLE_PRIO (MAX_PRIO + MAX_BONUS)
+#define BITMAP_SIZE ((((IDLE_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long))
 
 typedef struct runqueue runqueue_t;
 
 struct prio_array {
 	unsigned int nr_active;
 	unsigned long bitmap[BITMAP_SIZE];
-	struct list_head queue[MAX_PRIO];
+	struct list_head queue[IDLE_PRIO];
 };
 
 /*
@@ -645,18 +646,15 @@ static inline void enqueue_task_head(str
  */
 static int effective_prio(task_t *p)
 {
-	int bonus, prio;
+	int prio;
 
 	if (rt_task(p))
 		return p->prio;
 
-	bonus = CURRENT_BONUS(p) - MAX_BONUS / 2;
+	prio = p->static_prio + MAX_BONUS - CURRENT_BONUS(p);
+	BUG_ON(prio < MAX_RT_PRIO);
+	BUG_ON(prio > IDLE_PRIO - 1);
 
-	prio = p->static_prio - bonus;
-	if (prio < MAX_RT_PRIO)
-		prio = MAX_RT_PRIO;
-	if (prio > MAX_PRIO-1)
-		prio = MAX_PRIO-1;
 	return prio;
 }
 
@@ -1861,7 +1859,7 @@ void pull_task(runqueue_t *src_rq, prio_
 	p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
 				+ this_rq->timestamp_last_tick;
 	/*
-	 * Note that idle threads have a prio of MAX_PRIO, for this test
+	 * Note that idle threads have a prio of IDLE_PRIO, for this test
 	 * to be always true for them.
 	 */
 	if (TASK_PREEMPTS_CURR(p, this_rq))
@@ -1945,8 +1943,8 @@ skip_bitmap:
 	if (!idx)
 		idx = sched_find_first_bit(array->bitmap);
 	else
-		idx = find_next_bit(array->bitmap, MAX_PRIO, idx);
-	if (idx >= MAX_PRIO) {
+		idx = find_next_bit(array->bitmap, IDLE_PRIO, idx);
+	if (idx >= IDLE_PRIO) {
 		if (array == busiest->expired && busiest->active->nr_active) {
 			array = busiest->active;
 			dst_array = this_rq->active;
@@ -3056,7 +3054,7 @@ go_idle:
 		rq->expired = array;
 		array = rq->active;
 		rq->expired_timestamp = 0;
-		rq->best_expired_prio = MAX_PRIO;
+		rq->best_expired_prio = IDLE_PRIO;
 	}
 
 	idx = sched_find_first_bit(array->bitmap);
@@ -4396,7 +4394,7 @@ void __devinit init_idle(task_t *idle, i
 
 	idle->sleep_avg = 0;
 	idle->array = NULL;
-	idle->prio = MAX_PRIO;
+	idle->prio = IDLE_PRIO;
 	idle->state = TASK_RUNNING;
 	idle->cpus_allowed = cpumask_of_cpu(cpu);
 	set_task_cpu(idle, cpu);
@@ -4737,7 +4735,7 @@ static void migrate_dead_tasks(unsigned 
 	struct runqueue *rq = cpu_rq(dead_cpu);
 
 	for (arr = 0; arr < 2; arr++) {
-		for (i = 0; i < MAX_PRIO; i++) {
+		for (i = 0; i < IDLE_PRIO; i++) {
 			struct list_head *list = &rq->arrays[arr].queue[i];
 			while (!list_empty(list))
 				migrate_dead(dead_cpu,
@@ -4793,7 +4791,7 @@ static int migration_call(struct notifie
 		/* Idle task back to normal (off runqueue, low prio) */
 		rq = task_rq_lock(rq->idle, &flags);
 		deactivate_task(rq->idle, rq);
-		rq->idle->static_prio = MAX_PRIO;
+		rq->idle->static_prio = IDLE_PRIO;
 		__setscheduler(rq->idle, SCHED_NORMAL, 0);
 		migrate_dead_tasks(cpu);
 		task_rq_unlock(rq, &flags);
@@ -5610,7 +5608,7 @@ void __init sched_init(void)
 		rq->nr_running = 0;
 		rq->active = rq->arrays;
 		rq->expired = rq->arrays + 1;
-		rq->best_expired_prio = MAX_PRIO;
+		rq->best_expired_prio = IDLE_PRIO;
 
 #ifdef CONFIG_SMP
 		rq->sd = NULL;
@@ -5625,12 +5623,12 @@ void __init sched_init(void)
 
 		for (j = 0; j < 2; j++) {
 			array = rq->arrays + j;
-			for (k = 0; k < MAX_PRIO; k++) {
+			for (k = 0; k < IDLE_PRIO; k++) {
 				INIT_LIST_HEAD(array->queue + k);
 				__clear_bit(k, array->bitmap);
 			}
 			// delimiter for bitsearch
-			__set_bit(MAX_PRIO, array->bitmap);
+			__set_bit(IDLE_PRIO, array->bitmap);
 		}
 	}
 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] cpu scheduler: unsquish dynamic priorities
  2005-12-20  4:53 [PATCH] cpu scheduler: unsquish dynamic priorities Peter Williams
  2005-12-21  5:21 ` Peter Williams
@ 2005-12-21  9:36 ` Ingo Molnar
  2005-12-22  0:41   ` Peter Williams
  1 sibling, 1 reply; 5+ messages in thread
From: Ingo Molnar @ 2005-12-21  9:36 UTC (permalink / raw)
  To: Peter Williams; +Cc: Linux Kernel Mailing List, Andrew Morton


* Peter Williams <pwil3058@bigpond.net.au> wrote:

> The problem:
> 
> The current scheduler implementation maps 40 nice values and 10 bonus 
> values into only 40 priority slots on the run queues.  This results in 
> the dynamic priorities of tasks at either end of the nice scale being 
> squished up.  E.g. all tasks with nice in the range -20 to -16 and the 
> maximum of 10 bonus points will end up with a dynamic priority of 
> MAX_RT_PRIO and all tasks with nice in the range 15 to 19 and no bonus 
> points will end up with a dynamic priority of MAX_PRIO - 1.
> 
> Although the fact that niceness is primarily implemented by time slice 
> size means that this will have little or no adverse effect on the long 
> term allocation of CPU resources due to niceness, it could adversely 
> effect latency as it will interfere with preemption.

this property of the priority distribution was intentional from me, i 
wanted to have an easy way to test 'no priority boosting downwards' 
(nice +19) and 'no priority boosting upwards' (nice -20) conditions. But 
i like your patch, because it simplifies effective_prio() a bit, and 
with SCHED_BATCH we'll have the 'no boosting' property anyway. Could you 
redo the patch against the current scheduler queue in -mm, so that we 
can try it out in -mm?

Btw., another user-visible effect is that task_prio() will return the 
new range, which will be visible in e.g. 'top'. I dont think it will be 
confusing though.

	Ingo

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] cpu scheduler: unsquish dynamic priorities
  2005-12-21  9:36 ` Ingo Molnar
@ 2005-12-22  0:41   ` Peter Williams
  2006-01-06  5:22     ` Peter Williams
  0 siblings, 1 reply; 5+ messages in thread
From: Peter Williams @ 2005-12-22  0:41 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Linux Kernel Mailing List, Andrew Morton

[-- Attachment #1: Type: text/plain, Size: 2559 bytes --]

Ingo Molnar wrote:
> * Peter Williams <pwil3058@bigpond.net.au> wrote:
> 
> 
>>The problem:
>>
>>The current scheduler implementation maps 40 nice values and 10 bonus 
>>values into only 40 priority slots on the run queues.  This results in 
>>the dynamic priorities of tasks at either end of the nice scale being 
>>squished up.  E.g. all tasks with nice in the range -20 to -16 and the 
>>maximum of 10 bonus points will end up with a dynamic priority of 
>>MAX_RT_PRIO and all tasks with nice in the range 15 to 19 and no bonus 
>>points will end up with a dynamic priority of MAX_PRIO - 1.
>>
>>Although the fact that niceness is primarily implemented by time slice 
>>size means that this will have little or no adverse effect on the long 
>>term allocation of CPU resources due to niceness, it could adversely 
>>effect latency as it will interfere with preemption.
> 
> 
> this property of the priority distribution was intentional from me, i 
> wanted to have an easy way to test 'no priority boosting downwards' 
> (nice +19) and 'no priority boosting upwards' (nice -20) conditions.  But
> i like your patch, because it simplifies effective_prio() a bit,

Yes and after some testing we should be able to drop the two BUG_ON() 
statements and simplify it even further.  I only put them in because the 
original code meant that the implicit assertion:

0 <= CURRENT_BONUS(p) <= MAX_BONUS

hasn't really been tested.  As a result of code review, I'm pretty sure 
that it holds but it doesn't hurt to be careful.

> and 
> with SCHED_BATCH we'll have the 'no boosting' property anyway. Could you 
> redo the patch against the current scheduler queue in -mm, so that we 
> can try it out in -mm?

Attached is a patch against 2.6.15-rc5-mm3.

> 
> Btw., another user-visible effect is that task_prio() will return the 
> new range, which will be visible in e.g. 'top'. I dont think it will be 
> confusing though.

No, people will get used to it.  Interactive tasks (with nice 0) now 
tend to get a dynamic priority of 20 instead of 15 which looks more 
natural (to me).  But I guess it makes the interactive adjustment look 
more like a penalty imposed on non interactive tasks rather than a bonus 
given to interactive tasks.  But this is simpler than the original 
version where it looks like a combination of a bonus to interactive 
tasks and a penalty for non interactive tasks.

Peter
-- 
Peter Williams                                   pwil3058@bigpond.net.au

"Learning, n. The kind of ignorance distinguishing the studious."
  -- Ambrose Bierce

[-- Attachment #2: unsquish-cpu-scheduler-dynamic-priorities-2.6.15-rc5-mm3 --]
[-- Type: text/plain, Size: 4129 bytes --]

Index: MM-2.6.X/kernel/sched.c
===================================================================
--- MM-2.6.X.orig/kernel/sched.c	2005-12-22 10:12:13.000000000 +1100
+++ MM-2.6.X/kernel/sched.c	2005-12-22 10:13:07.000000000 +1100
@@ -157,7 +157,7 @@
 	(SCALE(TASK_NICE(p), 40, MAX_BONUS) + INTERACTIVE_DELTA)
 
 #define TASK_INTERACTIVE(p) \
-	((p)->prio <= (p)->static_prio - DELTA(p))
+	((p)->prio <= (p)->static_prio - DELTA(p) + MAX_BONUS / 2)
 
 #define INTERACTIVE_SLEEP(p) \
 	(JIFFIES_TO_NS(MAX_SLEEP_AVG * \
@@ -199,14 +199,15 @@ EXPORT_SYMBOL_GPL(__put_task_struct_cb);
  * These are the runqueue data structures:
  */
 
-#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long))
+#define IDLE_PRIO (MAX_PRIO + MAX_BONUS)
+#define BITMAP_SIZE ((((IDLE_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long))
 
 typedef struct runqueue runqueue_t;
 
 struct prio_array {
 	unsigned int nr_active;
 	unsigned long bitmap[BITMAP_SIZE];
-	struct list_head queue[MAX_PRIO];
+	struct list_head queue[IDLE_PRIO];
 };
 
 /*
@@ -664,18 +665,15 @@ static inline void enqueue_task_head(str
  */
 static int effective_prio(task_t *p)
 {
-	int bonus, prio;
+	int prio;
 
 	if (rt_task(p))
 		return p->prio;
 
-	bonus = CURRENT_BONUS(p) - MAX_BONUS / 2;
+	prio = p->static_prio + MAX_BONUS - CURRENT_BONUS(p);
+	BUG_ON(prio < MAX_RT_PRIO);
+	BUG_ON(prio > IDLE_PRIO - 1);
 
-	prio = p->static_prio - bonus;
-	if (prio < MAX_RT_PRIO)
-		prio = MAX_RT_PRIO;
-	if (prio > MAX_PRIO-1)
-		prio = MAX_PRIO-1;
 	return prio;
 }
 
@@ -1867,7 +1865,7 @@ void pull_task(runqueue_t *src_rq, prio_
 	p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
 				+ this_rq->timestamp_last_tick;
 	/*
-	 * Note that idle threads have a prio of MAX_PRIO, for this test
+	 * Note that idle threads have a prio of IDLE_PRIO, for this test
 	 * to be always true for them.
 	 */
 	if (TASK_PREEMPTS_CURR(p, this_rq))
@@ -1952,8 +1950,8 @@ skip_bitmap:
 	if (!idx)
 		idx = sched_find_first_bit(array->bitmap);
 	else
-		idx = find_next_bit(array->bitmap, MAX_PRIO, idx);
-	if (idx >= MAX_PRIO) {
+		idx = find_next_bit(array->bitmap, IDLE_PRIO, idx);
+	if (idx >= IDLE_PRIO) {
 		if (array == busiest->expired && busiest->active->nr_active) {
 			array = busiest->active;
 			dst_array = this_rq->active;
@@ -3071,7 +3069,7 @@ go_idle:
 		rq->expired = array;
 		array = rq->active;
 		rq->expired_timestamp = 0;
-		rq->best_expired_prio = MAX_PRIO;
+		rq->best_expired_prio = IDLE_PRIO;
 	}
 
 	idx = sched_find_first_bit(array->bitmap);
@@ -4434,7 +4432,7 @@ void __devinit init_idle(task_t *idle, i
 
 	idle->sleep_avg = 0;
 	idle->array = NULL;
-	idle->prio = MAX_PRIO;
+	idle->prio = IDLE_PRIO;
 	idle->state = TASK_RUNNING;
 	idle->cpus_allowed = cpumask_of_cpu(cpu);
 	set_task_cpu(idle, cpu);
@@ -4775,7 +4773,7 @@ static void migrate_dead_tasks(unsigned 
 	struct runqueue *rq = cpu_rq(dead_cpu);
 
 	for (arr = 0; arr < 2; arr++) {
-		for (i = 0; i < MAX_PRIO; i++) {
+		for (i = 0; i < IDLE_PRIO; i++) {
 			struct list_head *list = &rq->arrays[arr].queue[i];
 			while (!list_empty(list))
 				migrate_dead(dead_cpu,
@@ -4954,7 +4952,7 @@ static int migration_call(struct notifie
 		/* Idle task back to normal (off runqueue, low prio) */
 		rq = task_rq_lock(rq->idle, &flags);
 		deactivate_task(rq->idle, rq);
-		rq->idle->static_prio = MAX_PRIO;
+		rq->idle->static_prio = IDLE_PRIO;
 		__setscheduler(rq->idle, SCHED_NORMAL, 0);
 		migrate_dead_tasks(cpu);
 		task_rq_unlock(rq, &flags);
@@ -6239,7 +6237,7 @@ void __init sched_init(void)
 		rq->nr_running = 0;
 		rq->active = rq->arrays;
 		rq->expired = rq->arrays + 1;
-		rq->best_expired_prio = MAX_PRIO;
+		rq->best_expired_prio = IDLE_PRIO;
 
 #ifdef CONFIG_SMP
 		rq->sd = NULL;
@@ -6254,12 +6252,12 @@ void __init sched_init(void)
 
 		for (j = 0; j < 2; j++) {
 			array = rq->arrays + j;
-			for (k = 0; k < MAX_PRIO; k++) {
+			for (k = 0; k < IDLE_PRIO; k++) {
 				INIT_LIST_HEAD(array->queue + k);
 				__clear_bit(k, array->bitmap);
 			}
 			// delimiter for bitsearch
-			__set_bit(MAX_PRIO, array->bitmap);
+			__set_bit(IDLE_PRIO, array->bitmap);
 		}
 	}
 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] cpu scheduler: unsquish dynamic priorities
  2005-12-22  0:41   ` Peter Williams
@ 2006-01-06  5:22     ` Peter Williams
  0 siblings, 0 replies; 5+ messages in thread
From: Peter Williams @ 2006-01-06  5:22 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Linux Kernel Mailing List, Andrew Morton

[-- Attachment #1: Type: text/plain, Size: 2847 bytes --]

Peter Williams wrote:
> Ingo Molnar wrote:
> 
>> * Peter Williams <pwil3058@bigpond.net.au> wrote:
>>
>>
>>> The problem:
>>>
>>> The current scheduler implementation maps 40 nice values and 10 bonus 
>>> values into only 40 priority slots on the run queues.  This results 
>>> in the dynamic priorities of tasks at either end of the nice scale 
>>> being squished up.  E.g. all tasks with nice in the range -20 to -16 
>>> and the maximum of 10 bonus points will end up with a dynamic 
>>> priority of MAX_RT_PRIO and all tasks with nice in the range 15 to 19 
>>> and no bonus points will end up with a dynamic priority of MAX_PRIO - 1.
>>>
>>> Although the fact that niceness is primarily implemented by time 
>>> slice size means that this will have little or no adverse effect on 
>>> the long term allocation of CPU resources due to niceness, it could 
>>> adversely effect latency as it will interfere with preemption.
>>
>>
>>
>> this property of the priority distribution was intentional from me, i 
>> wanted to have an easy way to test 'no priority boosting downwards' 
>> (nice +19) and 'no priority boosting upwards' (nice -20) conditions.  But
>> i like your patch, because it simplifies effective_prio() a bit,
> 
> 
> Yes and after some testing we should be able to drop the two BUG_ON() 
> statements and simplify it even further.  I only put them in because the 
> original code meant that the implicit assertion:
> 
> 0 <= CURRENT_BONUS(p) <= MAX_BONUS
> 
> hasn't really been tested.  As a result of code review, I'm pretty sure 
> that it holds but it doesn't hurt to be careful.
> 
>> and with SCHED_BATCH we'll have the 'no boosting' property anyway. 
>> Could you redo the patch against the current scheduler queue in -mm, 
>> so that we can try it out in -mm?
> 
> 
> Attached is a patch against 2.6.15-rc5-mm3.

On the assumption that you're still interested in trying this patch out 
I've attached a version against 2.6.15-mm1.

> 
>>
>> Btw., another user-visible effect is that task_prio() will return the 
>> new range, which will be visible in e.g. 'top'. I dont think it will 
>> be confusing though.
> 
> 
> No, people will get used to it.  Interactive tasks (with nice 0) now 
> tend to get a dynamic priority of 20 instead of 15 which looks more 
> natural (to me).  But I guess it makes the interactive adjustment look 
> more like a penalty imposed on non interactive tasks rather than a bonus 
> given to interactive tasks.  But this is simpler than the original 
> version where it looks like a combination of a bonus to interactive 
> tasks and a penalty for non interactive tasks.
> 

Signed-off-by: Peter Williams <pwil3058@bigpond.com.au>

-- 
Peter Williams                                   pwil3058@bigpond.net.au

"Learning, n. The kind of ignorance distinguishing the studious."
  -- Ambrose Bierce

[-- Attachment #2: unsquish-cpu-scheduler-dynamic-priorities --]
[-- Type: text/plain, Size: 5394 bytes --]

The problem:

The current scheduler implementation maps 40 nice values and 10 bonus
values into only 40 priority slots on the run queues.  This results in
the dynamic priorities of tasks at either end of the nice scale being
squished up.  E.g. all tasks with nice in the range -20 to -16 and the
maximum of 10 bonus points will end up with a dynamic priority of
MAX_RT_PRIO and all tasks with nice in the range 15 to 19 and no
bonus points will end up with a dynamic priority of MAX_PRIO - 1.

Although the fact that niceness is promarily implemented by time
slice size means that this will have no adverse effect on the long
term allocation of CPU resources due to niceness, it could adversely
effect latency as it will interfere with preemption.

The solution:

Increase the number of priority slots in the run queues to allow a
linear mapping and eliminate the squish.

The implementation:

As the only place MAX_PRIO is used outside of sched.c is to
initialize the init task in init_task.h, it is possible to implement the
solution entirely within sched.c by defining a new macro IDLE_PRIO
(which is equal to the sum of MAX_PRIO and MAX_BONUS) and then
replacing MAX_PRIO by IDLE_PRIO where appropriate.

Signed-off-by: Peter Williams <pwil3058@bigpond.com.au>
Index: MM-2.6.X/kernel/sched.c
===================================================================
--- MM-2.6.X.orig/kernel/sched.c	2006-01-06 14:39:28.000000000 +1100
+++ MM-2.6.X/kernel/sched.c	2006-01-06 16:16:04.000000000 +1100
@@ -157,7 +157,7 @@
 	(SCALE(TASK_NICE(p), 40, MAX_BONUS) + INTERACTIVE_DELTA)
 
 #define TASK_INTERACTIVE(p) \
-	((p)->prio <= (p)->static_prio - DELTA(p))
+	((p)->prio <= (p)->static_prio - DELTA(p) + MAX_BONUS / 2)
 
 #define INTERACTIVE_SLEEP(p) \
 	(JIFFIES_TO_NS(MAX_SLEEP_AVG * \
@@ -199,14 +199,15 @@ EXPORT_SYMBOL_GPL(__put_task_struct_cb);
  * These are the runqueue data structures:
  */
 
-#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long))
+#define IDLE_PRIO (MAX_PRIO + MAX_BONUS)
+#define BITMAP_SIZE ((((IDLE_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long))
 
 typedef struct runqueue runqueue_t;
 
 struct prio_array {
 	unsigned int nr_active;
 	unsigned long bitmap[BITMAP_SIZE];
-	struct list_head queue[MAX_PRIO];
+	struct list_head queue[IDLE_PRIO];
 };
 
 /*
@@ -664,18 +665,15 @@ static inline void enqueue_task_head(str
  */
 static int effective_prio(task_t *p)
 {
-	int bonus, prio;
+	int prio;
 
 	if (rt_task(p))
 		return p->prio;
 
-	bonus = CURRENT_BONUS(p) - MAX_BONUS / 2;
+	prio = p->static_prio + MAX_BONUS - CURRENT_BONUS(p);
+	BUG_ON(prio < MAX_RT_PRIO);
+	BUG_ON(prio > IDLE_PRIO - 1);
 
-	prio = p->static_prio - bonus;
-	if (prio < MAX_RT_PRIO)
-		prio = MAX_RT_PRIO;
-	if (prio > MAX_PRIO-1)
-		prio = MAX_PRIO-1;
 	return prio;
 }
 
@@ -1867,7 +1865,7 @@ void pull_task(runqueue_t *src_rq, prio_
 	p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
 				+ this_rq->timestamp_last_tick;
 	/*
-	 * Note that idle threads have a prio of MAX_PRIO, for this test
+	 * Note that idle threads have a prio of IDLE_PRIO, for this test
 	 * to be always true for them.
 	 */
 	if (TASK_PREEMPTS_CURR(p, this_rq))
@@ -1952,8 +1950,8 @@ skip_bitmap:
 	if (!idx)
 		idx = sched_find_first_bit(array->bitmap);
 	else
-		idx = find_next_bit(array->bitmap, MAX_PRIO, idx);
-	if (idx >= MAX_PRIO) {
+		idx = find_next_bit(array->bitmap, IDLE_PRIO, idx);
+	if (idx >= IDLE_PRIO) {
 		if (array == busiest->expired && busiest->active->nr_active) {
 			array = busiest->active;
 			dst_array = this_rq->active;
@@ -3071,7 +3069,7 @@ go_idle:
 		rq->expired = array;
 		array = rq->active;
 		rq->expired_timestamp = 0;
-		rq->best_expired_prio = MAX_PRIO;
+		rq->best_expired_prio = IDLE_PRIO;
 	}
 
 	idx = sched_find_first_bit(array->bitmap);
@@ -4434,7 +4432,7 @@ void __devinit init_idle(task_t *idle, i
 
 	idle->sleep_avg = 0;
 	idle->array = NULL;
-	idle->prio = MAX_PRIO;
+	idle->prio = IDLE_PRIO;
 	idle->state = TASK_RUNNING;
 	idle->cpus_allowed = cpumask_of_cpu(cpu);
 	set_task_cpu(idle, cpu);
@@ -4775,7 +4773,7 @@ static void migrate_dead_tasks(unsigned 
 	struct runqueue *rq = cpu_rq(dead_cpu);
 
 	for (arr = 0; arr < 2; arr++) {
-		for (i = 0; i < MAX_PRIO; i++) {
+		for (i = 0; i < IDLE_PRIO; i++) {
 			struct list_head *list = &rq->arrays[arr].queue[i];
 			while (!list_empty(list))
 				migrate_dead(dead_cpu,
@@ -4954,7 +4952,7 @@ static int migration_call(struct notifie
 		/* Idle task back to normal (off runqueue, low prio) */
 		rq = task_rq_lock(rq->idle, &flags);
 		deactivate_task(rq->idle, rq);
-		rq->idle->static_prio = MAX_PRIO;
+		rq->idle->static_prio = IDLE_PRIO;
 		__setscheduler(rq->idle, SCHED_NORMAL, 0);
 		migrate_dead_tasks(cpu);
 		task_rq_unlock(rq, &flags);
@@ -6239,7 +6237,7 @@ void __init sched_init(void)
 		rq->nr_running = 0;
 		rq->active = rq->arrays;
 		rq->expired = rq->arrays + 1;
-		rq->best_expired_prio = MAX_PRIO;
+		rq->best_expired_prio = IDLE_PRIO;
 
 #ifdef CONFIG_SMP
 		rq->sd = NULL;
@@ -6254,12 +6252,12 @@ void __init sched_init(void)
 
 		for (j = 0; j < 2; j++) {
 			array = rq->arrays + j;
-			for (k = 0; k < MAX_PRIO; k++) {
+			for (k = 0; k < IDLE_PRIO; k++) {
 				INIT_LIST_HEAD(array->queue + k);
 				__clear_bit(k, array->bitmap);
 			}
 			// delimiter for bitsearch
-			__set_bit(MAX_PRIO, array->bitmap);
+			__set_bit(IDLE_PRIO, array->bitmap);
 		}
 	}
 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2006-01-06  5:22 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-12-20  4:53 [PATCH] cpu scheduler: unsquish dynamic priorities Peter Williams
2005-12-21  5:21 ` Peter Williams
2005-12-21  9:36 ` Ingo Molnar
2005-12-22  0:41   ` Peter Williams
2006-01-06  5:22     ` Peter Williams

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).