linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Interactivity bits
@ 2003-07-08 20:12 Guillaume Chazarain
  2003-07-08 21:13 ` Davide Libenzi
  2003-07-10  7:14 ` Guillaume Chazarain
  0 siblings, 2 replies; 7+ messages in thread
From: Guillaume Chazarain @ 2003-07-08 20:12 UTC (permalink / raw)
  To: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 3638 bytes --]

Hello,

Currently the interactive points a process can have are in a [-5, 5] range,
that is, 25% of the [0, 39] range. Two reasons are mentionned:

1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
2) nice -20 CPU hogs do not get preempted by nice 0 tasks.

But, using 50% of the range, instead of 25% the interactivity points are better
spread and both rules are still respected.  Having a larger range for
interactivity points it's easier to choose between two interactive tasks.

So, why not changing PRIO_BONUS_RATIO to 50 instead of 25?
Actually it should be in the [45, 49] range to maximize the bonus points
range and satisfy both rules due to integer arithmetic.

Something like that:

--- linux-2.5.74-mm2-O3/kernel/sched.c  2003-07-07 18:46:29.000000000 +0200
+++ linux-2.5.74-mm2-O3/kernel/sched.c-bonus    2003-07-08 15:27:12.000000000 +0200
@@ -71,7 +71,7 @@
 #define CHILD_PENALTY		80
 #define PARENT_PENALTY		100
 #define EXIT_WEIGHT		3
-#define PRIO_BONUS_RATIO	25
+#define PRIO_BONUS_RATIO	45
 #define INTERACTIVE_DELTA	2
 #define MIN_SLEEP_AVG		(HZ)
 #define MAX_SLEEP_AVG		(10*HZ)
@@ -90,13 +90,13 @@
  * We scale it linearly, offset by the INTERACTIVE_DELTA delta.
  * Here are a few examples of different nice levels:
  *
- *  TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0]
- *  TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0]
- *  TASK_INTERACTIVE(  0): [1,1,1,1,0,0,0,0,0,0,0]
- *  TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0]
- *  TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0]
+ *  TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0]
+ *  TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0]
+ *  TASK_INTERACTIVE(  0): [1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0]
+ *  TASK_INTERACTIVE( 10): [1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+ *  TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
  *
- * (the X axis represents the possible -5 ... 0 ... +5 dynamic
+ * (the X axis represents the possible -9 ... 0 ... +9 dynamic
  *  priority range a task can explore, a value of '1' means the
  *  task is rated interactive.)
  *
@@ -325,9 +325,9 @@
  * priority but is modified by bonuses/penalties.
  *
  * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
- * into the -5 ... 0 ... +5 bonus/penalty range.
+ * into the -9 ... 0 ... +9 bonus/penalty range.
  *
- * We use 25% of the full 0...39 priority range so that:
+ * We use 50% of the full 0...39 priority range so that:
  *
  * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
  * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.



And if you want to try other values for PRIO_BONUS_RATIO, I attached a simple
hack to generate the infos in the above comment.



Another thing that I was wondering is: should every absence on the runqueue be
considered interactive bonus?  For example, TASK_UNINTERRIBLE tasks receive
bonus when they wake up.  This implies that when a CPU hog becomes a memory hog
and starts swapping, it is considered interactive.  OTOH when a task is swapping
I would like it to consume its data the earliest possible, to avoid losing the
swapping benefit.
So I'd like to know if the patch below is a good or bad thing.

--- linux-2.5.74-mm2-O3/kernel/sched.c  2003-07-07 18:46:29.000000000 +0200
+++ linux-2.5.74-mm2-O3/kernel/sched.c-INTERR   2003-07-08 17:43:59.000000000 +0200
@@ -388,7 +388,7 @@ static inline void activate_task(task_t 
 {
	long sleep_time = jiffies - p->last_run - 1;
 
-	if (sleep_time > 0) {
+	if (sleep_time > 0 && p->state == TASK_INTERRUPTIBLE) {
		unsigned long runtime = jiffies - p->avg_start;
 
		/*




Thanks for your wisdom.
Guillaume


[-- Attachment #2: testbonus.c --]
[-- Type: application/octet-stream, Size: 1982 bytes --]

#include <stdio.h>

/* sched.h */
#define MAX_USER_RT_PRIO        100
#define MAX_RT_PRIO             MAX_USER_RT_PRIO
#define MAX_PRIO                (MAX_RT_PRIO + 40)

/* sched.c */
#define NICE_TO_PRIO(nice)      (MAX_RT_PRIO + (nice) + 20)
#define PRIO_TO_NICE(prio)      ((prio) - MAX_RT_PRIO - 20)
#define TASK_NICE(p)            PRIO_TO_NICE((p)->static_prio)

#define USER_PRIO(p)            ((p)-MAX_RT_PRIO)
#define MAX_USER_PRIO           (USER_PRIO(MAX_PRIO))

#define PRIO_BONUS_RATIO        45      /* Between 45 and 49 */
#define INTERACTIVE_DELTA       2

#define SCALE(v1,v1_max,v2_max) \
        (v1) * (v2_max) / (v1_max)

#define DELTA(p) \
        (SCALE(TASK_NICE(p), 40, MAX_USER_PRIO*PRIO_BONUS_RATIO/100) + \
                INTERACTIVE_DELTA)

#define TASK_INTERACTIVE(p) \
        ((p)->prio <= (p)->static_prio - DELTA(p))

/*****************/

#define MAX_BONUS (MAX_USER_PRIO * PRIO_BONUS_RATIO / 100 / 2)
#define MIN_BONUS (-MAX_BONUS)

typedef struct {
    int static_prio;
    int prio;
} mini_task_t;

static void write_values(int nice)
{
    int bonus;
    mini_task_t p;

    p.static_prio = NICE_TO_PRIO(nice);
    p.prio = p.static_prio + MIN_BONUS;

    printf("TASK_INTERACTIVE(%3d): [%d", nice, TASK_INTERACTIVE(&p));

    for (bonus = MIN_BONUS + 1; bonus <= MAX_BONUS; bonus++) {
        p.prio = p.static_prio + bonus;
        printf(",%d", TASK_INTERACTIVE(&p));
    }

    puts("]");
}

int main(void)
{
    printf("Interactivity bonus between %d and %d\n\n", MIN_BONUS, MAX_BONUS);

    write_values(-20);
    write_values(-10);
    write_values(0);
    write_values(10);
    write_values(19);

    puts("");
    printf("nice +19 interactive tasks : %d\n", NICE_TO_PRIO(19) - MAX_BONUS);
    printf("nice 0 CPU hogs : %d\n", NICE_TO_PRIO(0) - MIN_BONUS);

    puts("");
    printf("nice -20 CPU hogs : %d\n", NICE_TO_PRIO(-20) - MIN_BONUS);
    printf("nice 0 interactive tasks : %d\n", NICE_TO_PRIO(0) - MAX_BONUS);

    return 0;
}

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Interactivity bits
  2003-07-08 20:12 [PATCH] Interactivity bits Guillaume Chazarain
@ 2003-07-08 21:13 ` Davide Libenzi
  2003-07-10  7:14 ` Guillaume Chazarain
  1 sibling, 0 replies; 7+ messages in thread
From: Davide Libenzi @ 2003-07-08 21:13 UTC (permalink / raw)
  To: Guillaume Chazarain; +Cc: Linux Kernel Mailing List

On Tue, 8 Jul 2003, Guillaume Chazarain wrote:

> Hello,
>
> Currently the interactive points a process can have are in a [-5, 5] range,
> that is, 25% of the [0, 39] range. Two reasons are mentionned:
>
> 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
> 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
>
> But, using 50% of the range, instead of 25% the interactivity points are better
> spread and both rules are still respected.  Having a larger range for
> interactivity points it's easier to choose between two interactive tasks.
>
> So, why not changing PRIO_BONUS_RATIO to 50 instead of 25?
> Actually it should be in the [45, 49] range to maximize the bonus points
> range and satisfy both rules due to integer arithmetic.

I believe these are the bits that broke the scheduler, that was working
fine during the very first shots in 2.5. IIRC Ingo was hit by ppl
complains about those 'nice' rules and he had to fix it. It'd be
interesting bring back a more generous interactive bonus and see how the
scheduler behave.



- Davide


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Interactivity bits
  2003-07-08 20:12 [PATCH] Interactivity bits Guillaume Chazarain
  2003-07-08 21:13 ` Davide Libenzi
@ 2003-07-10  7:14 ` Guillaume Chazarain
  1 sibling, 0 replies; 7+ messages in thread
From: Guillaume Chazarain @ 2003-07-10  7:14 UTC (permalink / raw)
  To: Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 3185 bytes --]

Hello,

here is the latest version of my simple patch.
I increased INTERACTIVE_DELTA to 4 as told me
Mike Galbraith, he also explained to me why
the p->state == TASK_INTERRUPTIBLE test was a bad thing.

The previous patch against -mm3 was an accident, it
messed my tweaks with Con's work and it was... cough
not tested...

I'd appreciate any feedback.
Thanks.


Guillaume



--- linux-2.5.74-bk7/kernel/sched.c.old	2003-07-09 10:08:01.000000000 +0200
+++ linux-2.5.74-bk7/kernel/sched.c	2003-07-10 00:35:59.000000000 +0200
@@ -68,11 +68,11 @@
  */
 #define MIN_TIMESLICE		( 10 * HZ / 1000)
 #define MAX_TIMESLICE		(200 * HZ / 1000)
-#define CHILD_PENALTY		50
-#define PARENT_PENALTY		100
+#define CHILD_PENALTY		80
+#define PARENT_PENALTY		90
 #define EXIT_WEIGHT		3
-#define PRIO_BONUS_RATIO	25
-#define INTERACTIVE_DELTA	2
+#define PRIO_BONUS_RATIO	45
+#define INTERACTIVE_DELTA	4
 #define MAX_SLEEP_AVG		(10*HZ)
 #define STARVATION_LIMIT	(10*HZ)
 #define NODE_THRESHOLD		125
@@ -88,13 +88,13 @@
  * We scale it linearly, offset by the INTERACTIVE_DELTA delta.
  * Here are a few examples of different nice levels:
  *
- *  TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0]
- *  TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0]
- *  TASK_INTERACTIVE(  0): [1,1,1,1,0,0,0,0,0,0,0]
- *  TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0]
- *  TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0]
+ *  TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0]
+ *  TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0]
+ *  TASK_INTERACTIVE(  0): [1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0]
+ *  TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+ *  TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
  *
- * (the X axis represents the possible -5 ... 0 ... +5 dynamic
+ * (the X axis represents the possible -9 ... 0 ... +9 dynamic
  *  priority range a task can explore, a value of '1' means the
  *  task is rated interactive.)
  *
@@ -303,9 +303,9 @@
  * priority but is modified by bonuses/penalties.
  *
  * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
- * into the -5 ... 0 ... +5 bonus/penalty range.
+ * into the -9 ... 0 ... +9 bonus/penalty range.
  *
- * We use 25% of the full 0...39 priority range so that:
+ * We use 50% of the full 0...39 priority range so that:
  *
  * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
  * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
@@ -347,9 +347,9 @@
  */
 static inline void activate_task(task_t *p, runqueue_t *rq)
 {
-	long sleep_time = jiffies - p->last_run - 1;
+	long sleep_time = jiffies - p->last_run;
 
-	if (sleep_time > 0) {
+	if (sleep_time) {
 		int sleep_avg;
 
 		/*
@@ -361,15 +361,9 @@
 		 * higher the priority boost gets as well.
 		 */
 		sleep_avg = p->sleep_avg + sleep_time;
-
-		/*
-		 * 'Overflow' bonus ticks go to the waker as well, so the
-		 * ticks are not lost. This has the effect of further
-		 * boosting tasks that are related to maximum-interactive
-		 * tasks.
-		 */
 		if (sleep_avg > MAX_SLEEP_AVG)
 			sleep_avg = MAX_SLEEP_AVG;
+
 		if (p->sleep_avg != sleep_avg) {
 			p->sleep_avg = sleep_avg;
 			p->prio = effective_prio(p);


[-- Attachment #2: patch.-mm3 --]
[-- Type: application/octet-stream, Size: 5807 bytes --]

--- linux-2.5.74-mm3/kernel/sched.c.old	2003-07-09 16:44:31.000000000 +0200
+++ linux-2.5.74-mm3/kernel/sched.c	2003-07-10 00:34:27.000000000 +0200
@@ -69,15 +69,13 @@
 #define MIN_TIMESLICE		( 10 * HZ / 1000)
 #define MAX_TIMESLICE		(200 * HZ / 1000)
 #define CHILD_PENALTY		80
-#define PARENT_PENALTY		100
+#define PARENT_PENALTY		90
 #define EXIT_WEIGHT		3
-#define PRIO_BONUS_RATIO	25
-#define INTERACTIVE_DELTA	2
-#define MIN_SLEEP_AVG		(HZ)
+#define PRIO_BONUS_RATIO	45
+#define INTERACTIVE_DELTA	4
 #define MAX_SLEEP_AVG		(10*HZ)
 #define STARVATION_LIMIT	(10*HZ)
 #define NODE_THRESHOLD		125
-#define MAX_BONUS		((MAX_USER_PRIO - MAX_RT_PRIO) * PRIO_BONUS_RATIO / 100)
 
 /*
  * If a task is 'interactive' then we reinsert it in the active
@@ -90,13 +88,13 @@
  * We scale it linearly, offset by the INTERACTIVE_DELTA delta.
  * Here are a few examples of different nice levels:
  *
- *  TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0]
- *  TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0]
- *  TASK_INTERACTIVE(  0): [1,1,1,1,0,0,0,0,0,0,0]
- *  TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0]
- *  TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0]
+ *  TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0]
+ *  TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0]
+ *  TASK_INTERACTIVE(  0): [1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0]
+ *  TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+ *  TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
  *
- * (the X axis represents the possible -5 ... 0 ... +5 dynamic
+ * (the X axis represents the possible -9 ... 0 ... +9 dynamic
  *  priority range a task can explore, a value of '1' means the
  *  task is rated interactive.)
  *
@@ -299,35 +297,15 @@
 	array->nr_active++;
 	p->array = array;
 }
-/*
- * normalise_sleep converts a task's sleep_avg to
- * an appropriate proportion of MIN_SLEEP_AVG.
- */
-static inline void normalise_sleep(task_t *p)
-{
-	unsigned long old_avg_time = jiffies - p->avg_start;
-
-	if (unlikely(old_avg_time < MIN_SLEEP_AVG))
-		return;
-
-	if (p->sleep_avg > MAX_SLEEP_AVG)
-		p->sleep_avg = MAX_SLEEP_AVG;
-
-	if (old_avg_time > MAX_SLEEP_AVG)
-		old_avg_time = MAX_SLEEP_AVG;
-
-	p->sleep_avg = p->sleep_avg * MIN_SLEEP_AVG / old_avg_time;
-	p->avg_start = jiffies - MIN_SLEEP_AVG;
-}
 
 /*
  * effective_prio - return the priority that is based on the static
  * priority but is modified by bonuses/penalties.
  *
  * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
- * into the -5 ... 0 ... +5 bonus/penalty range.
+ * into the -9 ... 0 ... +9 bonus/penalty range.
  *
- * We use 25% of the full 0...39 priority range so that:
+ * We use 50% of the full 0...39 priority range so that:
  *
  * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
  * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
@@ -337,28 +315,11 @@
 static int effective_prio(task_t *p)
 {
 	int bonus, prio;
-	unsigned long sleep_period;
 
 	if (rt_task(p))
 		return p->prio;
 
-	sleep_period = jiffies - p->avg_start;
-
-	if (unlikely(!sleep_period))
-		return p->static_prio;
-
-	if (sleep_period > MAX_SLEEP_AVG)
-		sleep_period = MAX_SLEEP_AVG;
-
-	if (p->sleep_avg > sleep_period)
-		sleep_period = p->sleep_avg;
-
-	/*
-	 * The bonus is determined according to the accumulated
-	 * sleep avg over the duration the task has been running
-	 * until it reaches MAX_SLEEP_AVG. -ck
-	 */
-	bonus = MAX_USER_PRIO*PRIO_BONUS_RATIO*p->sleep_avg/sleep_period/100 -
+	bonus = MAX_USER_PRIO*PRIO_BONUS_RATIO*p->sleep_avg/MAX_SLEEP_AVG/100 -
 			MAX_USER_PRIO*PRIO_BONUS_RATIO/100/2;
 
 	prio = p->static_prio - bonus;
@@ -386,10 +347,10 @@
  */
 static inline void activate_task(task_t *p, runqueue_t *rq)
 {
-	long sleep_time = jiffies - p->last_run - 1;
+	long sleep_time = jiffies - p->last_run;
 
-	if (sleep_time > 0) {
-		unsigned long runtime = jiffies - p->avg_start;
+	if (sleep_time) {
+		int sleep_avg;
 
 		/*
 		 * This code gives a bonus to interactive tasks.
@@ -399,34 +360,15 @@
 		 * spends sleeping, the higher the average gets - and the
 		 * higher the priority boost gets as well.
 		 */
-		p->sleep_avg += sleep_time;
-		/*
-		 * Give a bonus to tasks that wake early on to prevent
-		 * the problem of the denominator in the bonus equation
-		 * from continually getting larger.
-		 */
-		if (runtime < MAX_SLEEP_AVG)
-			p->sleep_avg += (runtime - p->sleep_avg) * (MAX_SLEEP_AVG - runtime) *
-				(MAX_BONUS - INTERACTIVE_DELTA) / MAX_BONUS / MAX_SLEEP_AVG;
-
-		if (p->sleep_avg > MAX_SLEEP_AVG)
-			p->sleep_avg = MAX_SLEEP_AVG;
-
-		/*
-		 * Tasks that sleep a long time are categorised as idle and
-		 * get their static priority only
-		 */
-		if (sleep_time > MIN_SLEEP_AVG){
-			p->avg_start = jiffies - MIN_SLEEP_AVG;
-			p->sleep_avg = MIN_SLEEP_AVG / 2;
-		}
-
-		if (unlikely(p->avg_start > jiffies)){
-			p->avg_start = jiffies;
-			p->sleep_avg = 0;
+		sleep_avg = p->sleep_avg + sleep_time;
+		if (sleep_avg > MAX_SLEEP_AVG)
+			sleep_avg = MAX_SLEEP_AVG;
+
+		if (p->sleep_avg != sleep_avg) {
+			p->sleep_avg = sleep_avg;
+			p->prio = effective_prio(p);
 		}
 	}
-	p->prio = effective_prio(p);
 	__activate_task(p, rq);
 }
 
@@ -603,7 +545,6 @@
 	 * from forking tasks that are max-interactive.
 	 */
 	current->sleep_avg = current->sleep_avg * PARENT_PENALTY / 100;
-	normalise_sleep(p);
 	p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100;
 	p->prio = effective_prio(p);
 	set_task_cpu(p, smp_processor_id());
@@ -644,8 +585,6 @@
 	 * If the child was a (relative-) CPU hog then decrease
 	 * the sleep_avg of the parent as well.
 	 */
-	normalise_sleep(p);
-	normalise_sleep(p->parent);
 	if (p->sleep_avg < p->parent->sleep_avg)
 		p->parent->sleep_avg = (p->parent->sleep_avg * EXIT_WEIGHT +
 			p->sleep_avg) / (EXIT_WEIGHT + 1);

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Interactivity bits
       [not found] <WQ98NJGC3OMJH0887GC84IHIE856FA.3f0c5488@monpc>
@ 2003-07-09 18:44 ` Roberto Orenstein
  0 siblings, 0 replies; 7+ messages in thread
From: Roberto Orenstein @ 2003-07-09 18:44 UTC (permalink / raw)
  To: Guillaume Chazarain; +Cc: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 573 bytes --]

On Wed, 2003-07-09 at 14:44, Guillaume Chazarain wrote:
> 09/07/03 17:59:10, Roberto Orenstein <rstein@brturbo.com> wrote:
> 
> >Just a suggestion, why instead of changing the code you don't try the
> >attached patch? At least you don't have to recompile just to change a
> >few define's. Against 2.5.73, but applies in 2.5.74. Just the "long
> >sleep_time = jiffies - p->last_run;" isn't there.
> 
> Hi again,
> 
> your patch oops on boot, here is a patch to correct that.
> 
> Guillaume
> 

Oops, thanks. Corrected patch below.
Anyway, I hope it helps.

regards,
Roberto

[-- Attachment #2: patch-sched_tuning --]
[-- Type: text/x-patch, Size: 6202 bytes --]

diff -X dontdiff -Nru linux-2.5.74/include/linux/sysctl.h linux-2.5.74-test/include/linux/sysctl.h
--- linux-2.5.74/include/linux/sysctl.h	2003-06-27 19:35:11.000000000 -0300
+++ linux-2.5.74-test/include/linux/sysctl.h	2003-07-09 15:11:10.000000000 -0300
@@ -130,6 +130,7 @@
 	KERN_PIDMAX=55,		/* int: PID # limit */
   	KERN_CORE_PATTERN=56,	/* string: pattern for core-file names */
 	KERN_PANIC_ON_OOPS=57,  /* int: whether we will panic on an oops */
+	KERN_SCHED_TUNING=58	/* dir: scheduler tuning */
 };
 
 
@@ -193,6 +194,21 @@
 	RANDOM_UUID=6
 };
 
+/* /proc/sys/kernel/sched_tuning */
+enum
+{
+	SCHED_TUNING_MIN_TIMESLICE=1,
+	SCHED_TUNING_MAX_TIMESLICE=2,
+	SCHED_TUNING_BONUS_RATIO=3,
+	SCHED_TUNING_MAX_SLEEP_AVG=4,
+	SCHED_TUNING_STARVATION_LIMIT=5,
+	SCHED_TUNING_CHILD_PENALTY=6,
+	SCHED_TUNING_PARENT_PENALTY=7,
+	SCHED_TUNING_EXIT_WEIGHT=8,
+	SCHED_TUNING_INTERACTIVE_DELTA=9,
+	SCHED_TUNING_NODE_THRESHOLD=10
+};
+
 /* /proc/sys/bus/isa */
 enum
 {
diff -X dontdiff -Nru linux-2.5.74/kernel/sched.c linux-2.5.74-test/kernel/sched.c
--- linux-2.5.74/kernel/sched.c	2003-07-02 23:25:40.000000000 -0300
+++ linux-2.5.74-test/kernel/sched.c	2003-07-09 15:11:10.000000000 -0300
@@ -66,16 +66,28 @@
  * maximum timeslice is 200 msecs. Timeslices get refilled after
  * they expire.
  */
-#define MIN_TIMESLICE		( 10 * HZ / 1000)
-#define MAX_TIMESLICE		(200 * HZ / 1000)
-#define CHILD_PENALTY		50
-#define PARENT_PENALTY		100
-#define EXIT_WEIGHT		3
-#define PRIO_BONUS_RATIO	25
-#define INTERACTIVE_DELTA	2
-#define MAX_SLEEP_AVG		(10*HZ)
-#define STARVATION_LIMIT	(10*HZ)
-#define NODE_THRESHOLD		125
+int sched_min_timeslice 	= ( 10 * HZ / 1000);
+int sched_max_timeslice 	= (200 * HZ / 1000);
+int sched_prio_bonus_ratio	= 25;
+int sched_max_sleep_avg		= (10*HZ);
+int sched_starvation_limit	= (10*HZ);
+int sched_child_penalty		= 50;
+int sched_parent_penalty	= 100;
+int sched_exit_weight		= 3;
+int sched_interactive_delta 	= 2;
+int sched_node_threshold	= 125;
+
+
+#define MIN_TIMESLICE		sched_min_timeslice
+#define MAX_TIMESLICE		sched_max_timeslice
+#define CHILD_PENALTY		sched_child_penalty
+#define PARENT_PENALTY		sched_parent_penalty
+#define EXIT_WEIGHT		sched_exit_weight
+#define PRIO_BONUS_RATIO	sched_prio_bonus_ratio
+#define INTERACTIVE_DELTA	sched_interactive_delta
+#define MAX_SLEEP_AVG		sched_max_sleep_avg
+#define STARVATION_LIMIT	sched_starvation_limit
+#define NODE_THRESHOLD		sched_node_threshold
 
 /*
  * If a task is 'interactive' then we reinsert it in the active
diff -X dontdiff -Nru linux-2.5.74/kernel/sysctl.c linux-2.5.74-test/kernel/sysctl.c
--- linux-2.5.74/kernel/sysctl.c	2003-06-27 19:38:13.000000000 -0300
+++ linux-2.5.74-test/kernel/sysctl.c	2003-07-09 15:26:22.000000000 -0300
@@ -58,6 +58,17 @@
 extern int pid_max;
 extern int sysctl_lower_zone_protection;
 extern int min_free_kbytes;
+/* sched.c */
+extern int sched_min_timeslice;
+extern int sched_max_timeslice;
+extern int sched_prio_bonus_ratio;
+extern int sched_max_sleep_avg;
+extern int sched_starvation_limit;
+extern int sched_child_penalty;
+extern int sched_parent_penalty;
+extern int sched_exit_weight;
+extern int sched_interactive_delta;
+extern int sched_node_threshold;
 
 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
 static int maxolduid = 65535;
@@ -123,6 +134,7 @@
 static ctl_table debug_table[];
 static ctl_table dev_table[];
 extern ctl_table random_table[];
+static ctl_table sched_tuning_table[];
 
 /* /proc declarations: */
 
@@ -551,6 +563,12 @@
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= KERN_SCHED_TUNING,
+		.procname	= "sched_tuning",
+		.mode		= 0555,
+		.child		= sched_tuning_table	
+	},
 	{ .ctl_name = 0 }
 };
 
@@ -775,6 +793,101 @@
 
 static ctl_table dev_table[] = {
 	{ .ctl_name = 0 }
+};
+
+/* sched tuning */
+static ctl_table sched_tuning_table[] = {
+	/* min_timeslice */
+	{
+		.ctl_name 	= SCHED_TUNING_MIN_TIMESLICE,
+		.procname	= "min_timeslice",
+		.mode		= 0644,
+		.data		= &sched_min_timeslice,
+		.maxlen		= sizeof(int),
+		.proc_handler	= &proc_dointvec,		
+	},
+	/*max_timeslice */
+	{
+		.ctl_name	= SCHED_TUNING_MAX_TIMESLICE,
+		.procname	= "max_timeslice",
+		.mode		= 0644,
+		.data		= &sched_max_timeslice,
+		.maxlen		= sizeof(int),
+		.proc_handler	= &proc_dointvec,
+	},
+	/* prio_bonus_ratio */
+	{
+		.ctl_name 	= SCHED_TUNING_BONUS_RATIO,
+		.procname	= "prio_bonus_ratio",
+		.mode		= 0644,
+		.data		= &sched_prio_bonus_ratio,
+		.maxlen		= sizeof(int),
+		.proc_handler	= &proc_dointvec,		
+	},
+	/* max_sleep_avg */
+	{
+		.ctl_name	= SCHED_TUNING_MAX_SLEEP_AVG,
+		.procname	= "max_sleep_avg",
+		.mode		= 0644,
+		.data		= &sched_max_sleep_avg,
+		.maxlen		= sizeof(int),
+		.proc_handler	= &proc_dointvec,
+	},
+	/* starvation_limit */
+	{
+		.ctl_name 	= SCHED_TUNING_STARVATION_LIMIT,
+		.procname	= "starvation_limit",
+		.mode		= 0644,
+		.data		= &sched_starvation_limit,
+		.maxlen		= sizeof(int),
+		.proc_handler	= &proc_dointvec,		
+	},
+	/* child_penalty */
+	{
+		.ctl_name 	= SCHED_TUNING_CHILD_PENALTY,
+		.procname	= "child_penalty",
+		.mode		= 0644,
+		.data		= &sched_child_penalty,
+		.maxlen		= sizeof(int),
+		.proc_handler	= &proc_dointvec,		
+	},
+	/* parent_penalty */
+	{
+		.ctl_name	= SCHED_TUNING_PARENT_PENALTY,
+		.procname	= "parent_penalty",
+		.mode		= 0644,
+		.data		= &sched_parent_penalty,
+		.maxlen		= sizeof(int),
+		.proc_handler	= &proc_dointvec,
+	},
+	/* exit_weight */
+	{
+		.ctl_name 	= SCHED_TUNING_EXIT_WEIGHT,
+		.procname	= "exit_weight",
+		.mode		= 0644,
+		.data		= &sched_exit_weight,
+		.maxlen		= sizeof(int),
+		.proc_handler	= &proc_dointvec,		
+	},
+	/* interactive_delta */
+	{
+		.ctl_name	= SCHED_TUNING_INTERACTIVE_DELTA,
+		.procname	= "interactive_delta",
+		.mode		= 0644,
+		.data		= &sched_interactive_delta,
+		.maxlen		= sizeof(int),
+		.proc_handler	= &proc_dointvec,
+	},
+	/* node_threshold */
+	{
+		.ctl_name	= SCHED_TUNING_NODE_THRESHOLD,
+		.procname	= "node_threshold",
+		.mode		= 0644,
+		.data		= &sched_node_threshold,
+		.maxlen		= sizeof(int),
+		.proc_handler	= &proc_dointvec,
+	},
+	{ .ctl_name = 0 }
 };  
 
 extern void init_irq_proc (void);

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Interactivity bits
  2003-07-09  9:49 Guillaume Chazarain
  2003-07-09 10:59 ` Marc-Christian Petersen
@ 2003-07-09 15:59 ` Roberto Orenstein
  1 sibling, 0 replies; 7+ messages in thread
From: Roberto Orenstein @ 2003-07-09 15:59 UTC (permalink / raw)
  To: Guillaume Chazarain; +Cc: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1807 bytes --]

On Wed, 2003-07-09 at 06:49, Guillaume Chazarain wrote:
> 08/07/03 23:13:22, Davide Libenzi <davidel@xmailserver.org> wrote:
> 
> >On Tue, 8 Jul 2003, Guillaume Chazarain wrote:
> >
> >> Hello,
> >>
> >> Currently the interactive points a process can have are in a [-5, 5] range,
> >> that is, 25% of the [0, 39] range. Two reasons are mentionned:
> >>
> >> 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
> >> 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
> >>
> >> But, using 50% of the range, instead of 25% the interactivity points are better
> >> spread and both rules are still respected.  Having a larger range for
> >> interactivity points it's easier to choose between two interactive tasks.
> >>
> >> So, why not changing PRIO_BONUS_RATIO to 50 instead of 25?
> >> Actually it should be in the [45, 49] range to maximize the bonus points
> >> range and satisfy both rules due to integer arithmetic.
> >
> >I believe these are the bits that broke the scheduler, that was working
> >fine during the very first shots in 2.5. IIRC Ingo was hit by ppl
> >complains about those 'nice' rules and he had to fix it. It'd be
> >interesting bring back a more generous interactive bonus and see how the
> >scheduler behave.
> 
> Thanks for the info.
> Before being 25% the interactivity range was 70%, thus breaking the rules. So
> I am now more convinced that a 50% range could be a good thing.
> 

Just a suggestion, why instead of changing the code you don't try the
attached patch? At least you don't have to recompile just to change a
few define's. Against 2.5.73, but applies in 2.5.74. Just the "long
sleep_time = jiffies - p->last_run;" isn't there.

I remember that I saw someone's patch nearly identical to this ( I think
it was Robert Love) but I don't remember the url.

[-- Attachment #2: patch-sched_tuning --]
[-- Type: text/x-patch, Size: 6217 bytes --]

diff -Nur -X dontdiff linux-2.5.73-O1int/include/linux/sysctl.h linux-2.5.73-test/include/linux/sysctl.h
--- linux-2.5.73-O1int/include/linux/sysctl.h	2003-06-27 19:35:11.000000000 -0300
+++ linux-2.5.73-test/include/linux/sysctl.h	2003-06-27 20:45:59.000000000 -0300
@@ -130,6 +130,7 @@
 	KERN_PIDMAX=55,		/* int: PID # limit */
   	KERN_CORE_PATTERN=56,	/* string: pattern for core-file names */
 	KERN_PANIC_ON_OOPS=57,  /* int: whether we will panic on an oops */
+	KERN_SCHED_TUNING=58	/* dir: scheduler tuning */
 };
 
 
@@ -193,6 +194,21 @@
 	RANDOM_UUID=6
 };
 
+/* /proc/sys/kernel/sched_tuning */
+enum
+{
+	SCHED_TUNING_MIN_TIMESLICE=1,
+	SCHED_TUNING_MAX_TIMESLICE=2,
+	SCHED_TUNING_BONUS_RATIO=3,
+	SCHED_TUNING_MAX_SLEEP_AVG=4,
+	SCHED_TUNING_STARVATION_LIMIT=5,
+	SCHED_TUNING_CHILD_PENALTY=6,
+	SCHED_TUNING_PARENT_PENALTY=7,
+	SCHED_TUNING_EXIT_WEIGHT=8,
+	SCHED_TUNING_INTERACTIVE_DELTA=9,
+	SCHED_TUNING_NODE_THRESHOLD=10
+};
+
 /* /proc/sys/bus/isa */
 enum
 {
diff -Nur -X dontdiff linux-2.5.73-O1int/kernel/sched.c linux-2.5.73-test/kernel/sched.c
--- linux-2.5.73-O1int/kernel/sched.c	2003-06-27 19:57:56.000000000 -0300
+++ linux-2.5.73-test/kernel/sched.c	2003-06-27 20:45:59.000000000 -0300
@@ -65,16 +65,28 @@
  * maximum timeslice is 200 msecs. Timeslices get refilled after
  * they expire.
  */
-#define MIN_TIMESLICE		( 10 * HZ / 1000)
-#define MAX_TIMESLICE		(200 * HZ / 1000)
-#define CHILD_PENALTY		50
-#define PARENT_PENALTY		100
-#define EXIT_WEIGHT		3
-#define PRIO_BONUS_RATIO	25
-#define INTERACTIVE_DELTA	2
-#define MAX_SLEEP_AVG		(10*HZ)
-#define STARVATION_LIMIT	(10*HZ)
-#define NODE_THRESHOLD		125
+int sched_min_timeslice 	= ( 10 * HZ / 1000);
+int sched_max_timeslice 	= (200 * HZ / 1000);
+int sched_prio_bonus_ratio	= 25;
+int sched_max_sleep_avg		= (10*HZ);
+int sched_starvation_limit	= (10*HZ);
+int sched_child_penalty		= 50;
+int sched_parent_penalty	= 100;
+int sched_exit_weight		= 3;
+int sched_interactive_delta 	= 2;
+int sched_node_threshold	= 125;
+
+
+#define MIN_TIMESLICE		sched_min_timeslice
+#define MAX_TIMESLICE		sched_max_timeslice
+#define CHILD_PENALTY		sched_child_penalty
+#define PARENT_PENALTY		sched_parent_penalty
+#define EXIT_WEIGHT		sched_exit_weight
+#define PRIO_BONUS_RATIO	sched_prio_bonus_ratio
+#define INTERACTIVE_DELTA	sched_interactive_delta
+#define MAX_SLEEP_AVG		sched_max_sleep_avg
+#define STARVATION_LIMIT	sched_starvation_limit
+#define NODE_THRESHOLD		sched_node_threshold
 
 /*
  * If a task is 'interactive' then we reinsert it in the active
diff -Nur -X dontdiff linux-2.5.73-O1int/kernel/sysctl.c linux-2.5.73-test/kernel/sysctl.c
--- linux-2.5.73-O1int/kernel/sysctl.c	2003-06-27 19:38:13.000000000 -0300
+++ linux-2.5.73-test/kernel/sysctl.c	2003-06-27 20:45:59.000000000 -0300
@@ -58,6 +58,17 @@
 extern int pid_max;
 extern int sysctl_lower_zone_protection;
 extern int min_free_kbytes;
+/* sched.c */
+extern int sched_min_timeslice;
+extern int sched_max_timeslice;
+extern int sched_prio_bonus_ratio;
+extern int sched_max_sleep_avg;
+extern int sched_starvation_limit;
+extern int sched_child_penalty;
+extern int sched_parent_penalty;
+extern int sched_exit_weight;
+extern int sched_interactive_delta;
+extern int sched_node_threshold;
 
 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
 static int maxolduid = 65535;
@@ -123,6 +134,7 @@
 static ctl_table debug_table[];
 static ctl_table dev_table[];
 extern ctl_table random_table[];
+static ctl_table sched_tuning_table[];
 
 /* /proc declarations: */
 
@@ -551,6 +563,12 @@
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= KERN_SCHED_TUNING,
+		.procname	= "sched_tuning",
+		.mode		= 0555,
+		.child		= sched_tuning_table	
+	},
 	{ .ctl_name = 0 }
 };
 
@@ -775,6 +793,100 @@
 
 static ctl_table dev_table[] = {
 	{ .ctl_name = 0 }
+};
+
+/* sched tuning */
+static ctl_table sched_tuning_table[] = {
+	/* min_timeslice */
+	{
+		.ctl_name 	= SCHED_TUNING_MIN_TIMESLICE,
+		.procname	= "min_timeslice",
+		.mode		= 0644,
+		.data		= &sched_min_timeslice,
+		.maxlen		= sizeof(int),
+		.proc_handler	= &proc_dointvec,		
+	},
+	/*max_timeslice */
+	{
+		.ctl_name	= SCHED_TUNING_MAX_TIMESLICE,
+		.procname	= "max_timeslice",
+		.mode		= 0644,
+		.data		= &sched_max_timeslice,
+		.maxlen		= sizeof(int),
+		.proc_handler	= &proc_dointvec,
+	},
+	/* prio_bonus_ratio */
+	{
+		.ctl_name 	= SCHED_TUNING_BONUS_RATIO,
+		.procname	= "prio_bonus_ratio",
+		.mode		= 0644,
+		.data		= &sched_prio_bonus_ratio,
+		.maxlen		= sizeof(int),
+		.proc_handler	= &proc_dointvec,		
+	},
+	/* max_sleep_avg */
+	{
+		.ctl_name	= SCHED_TUNING_MAX_SLEEP_AVG,
+		.procname	= "max_sleep_avg",
+		.mode		= 0644,
+		.data		= &sched_max_sleep_avg,
+		.maxlen		= sizeof(int),
+		.proc_handler	= &proc_dointvec,
+	},
+	/* starvation_limit */
+	{
+		.ctl_name 	= SCHED_TUNING_STARVATION_LIMIT,
+		.procname	= "starvation_limit",
+		.mode		= 0644,
+		.data		= &sched_starvation_limit,
+		.maxlen		= sizeof(int),
+		.proc_handler	= &proc_dointvec,		
+	},
+	/* child_penalty */
+	{
+		.ctl_name 	= SCHED_TUNING_CHILD_PENALTY,
+		.procname	= "child_penalty",
+		.mode		= 0644,
+		.data		= &sched_child_penalty,
+		.maxlen		= sizeof(int),
+		.proc_handler	= &proc_dointvec,		
+	},
+	/* parent_penalty */
+	{
+		.ctl_name	= SCHED_TUNING_PARENT_PENALTY,
+		.procname	= "parent_penalty",
+		.mode		= 0644,
+		.data		= &sched_parent_penalty,
+		.maxlen		= sizeof(int),
+		.proc_handler	= &proc_dointvec,
+	},
+	/* exit_weight */
+	{
+		.ctl_name 	= SCHED_TUNING_EXIT_WEIGHT,
+		.procname	= "exit_weight",
+		.mode		= 0644,
+		.data		= &sched_exit_weight,
+		.maxlen		= sizeof(int),
+		.proc_handler	= &proc_dointvec,		
+	},
+	/* interactive_delta */
+	{
+		.ctl_name	= SCHED_TUNING_INTERACTIVE_DELTA,
+		.procname	= "interactive_delta",
+		.mode		= 0644,
+		.data		= &sched_interactive_delta,
+		.maxlen		= sizeof(int),
+		.proc_handler	= &proc_dointvec,
+	},
+	/* node_threshold */
+	{
+		.ctl_name	= SCHED_TUNING_NODE_THRESHOLD,
+		.procname	= "node_threshold",
+		.mode		= 0644,
+		.data		= &sched_node_threshold,
+		.maxlen		= sizeof(int),
+		.proc_handler	= &proc_dointvec,
+	}
 };  
 
 extern void init_irq_proc (void);

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Interactivity bits
  2003-07-09  9:49 Guillaume Chazarain
@ 2003-07-09 10:59 ` Marc-Christian Petersen
  2003-07-09 15:59 ` Roberto Orenstein
  1 sibling, 0 replies; 7+ messages in thread
From: Marc-Christian Petersen @ 2003-07-09 10:59 UTC (permalink / raw)
  To: Guillaume Chazarain, Linux Kernel Mailing List

On Wednesday 09 July 2003 11:49, Guillaume Chazarain wrote:

Hi Guillaume,

> --- linux-2.5.74-bk6/kernel/sched.c.old	2003-07-09 10:08:01.000000000 +0200
> +++ linux-2.5.74-bk6/kernel/sched.c	2003-07-09 11:27:23.000000000 +0200
> @@ -68,10 +68,10 @@
>   */
>  #define MIN_TIMESLICE		( 10 * HZ / 1000)
>  #define MAX_TIMESLICE		(200 * HZ / 1000)
> -#define CHILD_PENALTY		50
> -#define PARENT_PENALTY		100
> +#define CHILD_PENALTY		80
> +#define PARENT_PENALTY		90
>  #define EXIT_WEIGHT		3
> -#define PRIO_BONUS_RATIO	25
> +#define PRIO_BONUS_RATIO	45
>  #define INTERACTIVE_DELTA	2
>  #define MAX_SLEEP_AVG		(10*HZ)
>  #define STARVATION_LIMIT	(10*HZ)
> @@ -88,13 +88,13 @@
>   * We scale it linearly, offset by the INTERACTIVE_DELTA delta.
>   * Here are a few examples of different nice levels:
>   *
> - *  TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0]
> - *  TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0]
> - *  TASK_INTERACTIVE(  0): [1,1,1,1,0,0,0,0,0,0,0]
> - *  TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0]
> - *  TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0]
> + *  TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0]
> + *  TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0]
> + *  TASK_INTERACTIVE(  0): [1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0]
> + *  TASK_INTERACTIVE( 10): [1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
> + *  TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
>   *
> - * (the X axis represents the possible -5 ... 0 ... +5 dynamic
> + * (the X axis represents the possible -9 ... 0 ... +9 dynamic
>   *  priority range a task can explore, a value of '1' means the
>   *  task is rated interactive.)
>   *
> @@ -303,9 +303,9 @@
>   * priority but is modified by bonuses/penalties.
>   *
>   * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
> - * into the -5 ... 0 ... +5 bonus/penalty range.
> + * into the -9 ... 0 ... +9 bonus/penalty range.
>   *
> - * We use 25% of the full 0...39 priority range so that:
> + * We use 50% of the full 0...39 priority range so that:
>   *
>   * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
>   * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
> @@ -347,9 +347,9 @@
>   */
>  static inline void activate_task(task_t *p, runqueue_t *rq)
>  {
> -	long sleep_time = jiffies - p->last_run - 1;
> +	long sleep_time = jiffies - p->last_run;
>
> -	if (sleep_time > 0) {
> +	if (p->state == TASK_INTERRUPTIBLE && sleep_time) {
>  		int sleep_avg;
>
>  		/*
>

I can't see & feel any good difference with this applied on top of .74-mm3 
(slightly modified to apply)


> --- linux-2.5.74-mm3/kernel/sched.c.old	2003-07-09 09:14:50.000000000 +0200
> +++ linux-2.5.74-mm3/kernel/sched.c	2003-07-09 11:39:56.000000000 +0200
> @@ -71,7 +71,7 @@
>  #define CHILD_PENALTY		80
>  #define PARENT_PENALTY		100
>  #define EXIT_WEIGHT		3
> -#define PRIO_BONUS_RATIO	25
> +#define PRIO_BONUS_RATIO	45
>  #define INTERACTIVE_DELTA	2
>  #define MIN_SLEEP_AVG		(HZ)
>  #define MAX_SLEEP_AVG		(10*HZ)
> @@ -386,9 +386,9 @@
>   */
>  static inline void activate_task(task_t *p, runqueue_t *rq)
>  {
> -	long sleep_time = jiffies - p->last_run - 1;
> +	long sleep_time = jiffies - p->last_run;
>
> -	if (sleep_time > 0) {
> +	if (p->state == TASK_INTERRUPTIBLE && sleep_time) {
>  		unsigned long runtime = jiffies - p->avg_start;
>
>  		/*
>

And I even cannot see and feel any good difference with that one ontop of 
.74-mm3.

It's more worse than w/o:

Before the patch: mplayer was able to play a movie in fullscreen w/o any 
framedrops while "make -j2 bzImage modules"

With the patch: mplayer stops for ~0.5 seconds every 5 seconds while "make -j2 
bzImage modules"

You said the "long sleep_time = jiffies - p->last_run;" change helps mplayer? 
Not in my case ;)

- Celeron 1,3GHz
- 512MB RAM
- 1GB SWAP (2 IDE disks, ~512MB on each disk)
- ATA100 Maxtor 60GB HDD
- ATA100 Fujitsu 40GB

ciao, Marc


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Interactivity bits
@ 2003-07-09  9:49 Guillaume Chazarain
  2003-07-09 10:59 ` Marc-Christian Petersen
  2003-07-09 15:59 ` Roberto Orenstein
  0 siblings, 2 replies; 7+ messages in thread
From: Guillaume Chazarain @ 2003-07-09  9:49 UTC (permalink / raw)
  To: Linux Kernel Mailing List

08/07/03 23:13:22, Davide Libenzi <davidel@xmailserver.org> wrote:

>On Tue, 8 Jul 2003, Guillaume Chazarain wrote:
>
>> Hello,
>>
>> Currently the interactive points a process can have are in a [-5, 5] range,
>> that is, 25% of the [0, 39] range. Two reasons are mentionned:
>>
>> 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
>> 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
>>
>> But, using 50% of the range, instead of 25% the interactivity points are better
>> spread and both rules are still respected.  Having a larger range for
>> interactivity points it's easier to choose between two interactive tasks.
>>
>> So, why not changing PRIO_BONUS_RATIO to 50 instead of 25?
>> Actually it should be in the [45, 49] range to maximize the bonus points
>> range and satisfy both rules due to integer arithmetic.
>
>I believe these are the bits that broke the scheduler, that was working
>fine during the very first shots in 2.5. IIRC Ingo was hit by ppl
>complains about those 'nice' rules and he had to fix it. It'd be
>interesting bring back a more generous interactive bonus and see how the
>scheduler behave.

Thanks for the info.
Before being 25% the interactivity range was 70%, thus breaking the rules. So
I am now more convinced that a 50% range could be a good thing.

Here is the patch I currently use and am very happy with it.

--- linux-2.5.74-bk6/kernel/sched.c.old	2003-07-09 10:08:01.000000000 +0200
+++ linux-2.5.74-bk6/kernel/sched.c	2003-07-09 11:27:23.000000000 +0200
@@ -68,10 +68,10 @@
  */
 #define MIN_TIMESLICE		( 10 * HZ / 1000)
 #define MAX_TIMESLICE		(200 * HZ / 1000)
-#define CHILD_PENALTY		50
-#define PARENT_PENALTY		100
+#define CHILD_PENALTY		80
+#define PARENT_PENALTY		90
 #define EXIT_WEIGHT		3
-#define PRIO_BONUS_RATIO	25
+#define PRIO_BONUS_RATIO	45
 #define INTERACTIVE_DELTA	2
 #define MAX_SLEEP_AVG		(10*HZ)
 #define STARVATION_LIMIT	(10*HZ)
@@ -88,13 +88,13 @@
  * We scale it linearly, offset by the INTERACTIVE_DELTA delta.
  * Here are a few examples of different nice levels:
  *
- *  TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0]
- *  TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0]
- *  TASK_INTERACTIVE(  0): [1,1,1,1,0,0,0,0,0,0,0]
- *  TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0]
- *  TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0]
+ *  TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0]
+ *  TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0]
+ *  TASK_INTERACTIVE(  0): [1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0]
+ *  TASK_INTERACTIVE( 10): [1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+ *  TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
  *
- * (the X axis represents the possible -5 ... 0 ... +5 dynamic
+ * (the X axis represents the possible -9 ... 0 ... +9 dynamic
  *  priority range a task can explore, a value of '1' means the
  *  task is rated interactive.)
  *
@@ -303,9 +303,9 @@
  * priority but is modified by bonuses/penalties.
  *
  * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
- * into the -5 ... 0 ... +5 bonus/penalty range.
+ * into the -9 ... 0 ... +9 bonus/penalty range.
  *
- * We use 25% of the full 0...39 priority range so that:
+ * We use 50% of the full 0...39 priority range so that:
  *
  * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
  * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
@@ -347,9 +347,9 @@
  */
 static inline void activate_task(task_t *p, runqueue_t *rq)
 {
-	long sleep_time = jiffies - p->last_run - 1;
+	long sleep_time = jiffies - p->last_run;
 
-	if (sleep_time > 0) {
+	if (p->state == TASK_INTERRUPTIBLE && sleep_time) {
 		int sleep_avg;
 
 		/*


The following change:
-	long sleep_time = jiffies - p->last_run - 1;
+	long sleep_time = jiffies - p->last_run;
helps mplayer become interactive. Otherwise, it uses just a little fraction of the CPU,
and is considered a CPU hog.


My workload may not be representative of ordinary desktop use, so I'd like to have some
feedback on these simple changes.  Of course these changes can be put on top of Con's work
in -mm3 with this patch.

--- linux-2.5.74-mm3/kernel/sched.c.old	2003-07-09 09:14:50.000000000 +0200
+++ linux-2.5.74-mm3/kernel/sched.c	2003-07-09 11:39:56.000000000 +0200
@@ -71,7 +71,7 @@
 #define CHILD_PENALTY		80
 #define PARENT_PENALTY		100
 #define EXIT_WEIGHT		3
-#define PRIO_BONUS_RATIO	25
+#define PRIO_BONUS_RATIO	45
 #define INTERACTIVE_DELTA	2
 #define MIN_SLEEP_AVG		(HZ)
 #define MAX_SLEEP_AVG		(10*HZ)
@@ -386,9 +386,9 @@
  */
 static inline void activate_task(task_t *p, runqueue_t *rq)
 {
-	long sleep_time = jiffies - p->last_run - 1;
+	long sleep_time = jiffies - p->last_run;
 
-	if (sleep_time > 0) {
+	if (p->state == TASK_INTERRUPTIBLE && sleep_time) {
 		unsigned long runtime = jiffies - p->avg_start;
 
 		/*




Thanks.
Guillaume






^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2003-07-10  6:57 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-07-08 20:12 [PATCH] Interactivity bits Guillaume Chazarain
2003-07-08 21:13 ` Davide Libenzi
2003-07-10  7:14 ` Guillaume Chazarain
2003-07-09  9:49 Guillaume Chazarain
2003-07-09 10:59 ` Marc-Christian Petersen
2003-07-09 15:59 ` Roberto Orenstein
     [not found] <WQ98NJGC3OMJH0887GC84IHIE856FA.3f0c5488@monpc>
2003-07-09 18:44 ` Roberto Orenstein

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).