All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] N1int for interactivity
@ 2003-07-15  3:55 Con Kolivas
  2003-07-15  3:59 ` Andrew Morton
  2003-07-15 15:23 ` Felipe Alfaro Solana
  0 siblings, 2 replies; 9+ messages in thread
From: Con Kolivas @ 2003-07-15  3:55 UTC (permalink / raw)
  To: linux kernel mailing list
  Cc: Mike Galbraith, Felipe Alfaro Solana, Andrew Morton

I've modified Mike Galbraith's nanosleep work for greater resolution to help 
the interactivity estimator work I've done in the O*int patches. This patch 
applies to any kernel patched up to the latest patch-O5int-0307150857 which 
applies on top of 2.5.75-mm1.

Please test and comment, and advise what further changes you think are 
appropriate or necessary, including other archs. I've preserved Mike's code
unchanged wherever possible. It works well for me, but n=1 does not
a good sample population make.

The patch-N1int-0307151249 is available here:
http://kernel.kolivas.org/2.5

and here:

diff -Naurp linux-2.5.75-mm1/include/linux/sched.h linux-2.5.75-test/include/linux/sched.h
--- linux-2.5.75-mm1/include/linux/sched.h	2003-07-13 00:21:30.000000000 +1000
+++ linux-2.5.75-test/include/linux/sched.h	2003-07-15 12:48:05.000000000 +1000
@@ -341,7 +341,9 @@ struct task_struct {
 
 	unsigned long sleep_avg;
 	unsigned long avg_start;
-	unsigned long last_run;
+	unsigned long long last_run;
+	unsigned int run_nsecs;
+	unsigned int sleep_nsecs;
 
 	unsigned long policy;
 	cpumask_t cpus_allowed;
diff -Naurp linux-2.5.75-mm1/kernel/sched.c linux-2.5.75-test/kernel/sched.c
--- linux-2.5.75-mm1/kernel/sched.c	2003-07-15 12:47:41.000000000 +1000
+++ linux-2.5.75-test/kernel/sched.c	2003-07-15 12:47:58.000000000 +1000
@@ -78,8 +78,14 @@
 #define STARVATION_LIMIT	(10*HZ)
 #define SLEEP_BUFFER		(HZ/20)
 #define NODE_THRESHOLD		125
+#define SCHED_NANOSECOND	1
+#define SCHED_SECOND		(1000000000 * SCHED_NANOSECOND)
+#define SCHED_TICK		(SCHED_SECOND / HZ)
+#define TICKS_PER_SECOND	(SCHED_SECOND / SCHED_TICK)
 #define MAX_BONUS		((MAX_USER_PRIO - MAX_RT_PRIO) * PRIO_BONUS_RATIO / 100)
 
+extern unsigned long long monotonic_clock(void);
+
 /*
  * If a task is 'interactive' then we reinsert it in the active
  * array after it has expired its current timeslice. (it will not
@@ -387,9 +393,23 @@ static inline void __activate_task(task_
  */
 static inline void activate_task(task_t *p, runqueue_t *rq)
 {
-	long sleep_time = jiffies - p->last_run - 1;
+	unsigned long long now = monotonic_clock();
+	long long sleep =  now - p->last_run + p->sleep_nsecs;
+	int ticks = 0;
+
+	if (sleep >= SCHED_TICK) {
+		while (sleep >= SCHED_SECOND) {
+			sleep -= SCHED_SECOND;
+			ticks += TICKS_PER_SECOND;
+		}
+		while (sleep >= SCHED_TICK) {
+			sleep -= SCHED_TICK;
+			ticks++;
+		}
+		p->sleep_nsecs = sleep;
+	} else p->sleep_nsecs += sleep;
 
-	if (sleep_time > 0) {
+	if (ticks > 0) {
 		unsigned long runtime = jiffies - p->avg_start;
 
 		/*
@@ -397,7 +417,7 @@ static inline void activate_task(task_t 
 		 * will get just under interactive status with a small runtime
 		 * to allow them to become interactive or non-interactive rapidly
 		 */
-		if (sleep_time > MIN_SLEEP_AVG){
+		if (ticks > MIN_SLEEP_AVG){
 			p->avg_start = jiffies - MIN_SLEEP_AVG;
 			p->sleep_avg = MIN_SLEEP_AVG * (MAX_BONUS - INTERACTIVE_DELTA - 1) /
 				MAX_BONUS;
@@ -410,7 +430,7 @@ static inline void activate_task(task_t 
 			 * spends sleeping, the higher the average gets - and the
 			 * higher the priority boost gets as well.
 			 */
-			p->sleep_avg += sleep_time;
+			p->sleep_avg += ticks;
 
 			/*
 			 * Give a bonus to tasks that wake early on to prevent
@@ -427,8 +447,10 @@ static inline void activate_task(task_t 
 			 * prevent fully interactive tasks from becoming
 			 * lower priority with small bursts of cpu usage.
 			 */
-			if (p->sleep_avg > (MAX_SLEEP_AVG + SLEEP_BUFFER))
+			if (p->sleep_avg > (MAX_SLEEP_AVG + SLEEP_BUFFER)){
 				p->sleep_avg = MAX_SLEEP_AVG + SLEEP_BUFFER;
+				p->sleep_nsecs = 0;
+			}
 		}
 
 		if (unlikely(p->avg_start > jiffies)){
@@ -616,6 +638,8 @@ void wake_up_forked_process(task_t * p)
 	normalise_sleep(p);
 	p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100;
 	p->prio = effective_prio(p);
+	p->run_nsecs = 0;
+	p->sleep_nsecs = 0;
 	set_task_cpu(p, smp_processor_id());
 
 	if (unlikely(!current->array))
@@ -1236,6 +1260,57 @@ DEFINE_PER_CPU(struct kernel_stat, kstat
 		(jiffies - (rq)->expired_timestamp >= \
 			STARVATION_LIMIT * ((rq)->nr_running) + 1)))
 
+inline void __scheduler_tick(runqueue_t *rq, task_t *p)
+{
+	unsigned long long now = monotonic_clock();
+	prio_array_t *array = rq->active;
+	int ticks;
+
+	p->run_nsecs += now - p->last_run;
+	/* Task might have expired already, but not scheduled off yet */
+	if (p->array != array) {
+		set_tsk_need_resched(p);
+		goto abort;
+	}
+	if (p->run_nsecs < SCHED_TICK || p->policy == SCHED_FIFO )
+		goto abort;
+
+	for (ticks = 0; p->run_nsecs >= SCHED_TICK; ticks++)
+		p->run_nsecs -= SCHED_TICK;
+	if (p->sleep_avg > ticks)
+		p->sleep_avg -= ticks;
+	else
+		p->sleep_avg = 0;
+	p->time_slice -= ticks;
+
+	if (p->time_slice <= 0) {
+		dequeue_task(p, p->array);
+		p->prio = effective_prio(p);
+		p->time_slice = task_timeslice(p);
+		p->first_time_slice = 0;
+		set_tsk_need_resched(p);
+		if ((EXPIRED_STARVING(rq) && !rt_task(p)) ||
+				!TASK_INTERACTIVE(p)) {
+			array = rq->expired;
+			if (!rq->expired_timestamp)
+				rq->expired_timestamp = jiffies;
+		}
+		enqueue_task(p, array);
+	} else if (unlikely(p->prio < effective_prio(p))){
+		/*
+		 * Tasks that have lowered their priority are put to the end
+		 * of the active array with their remaining timeslice
+		 */
+		dequeue_task(p, rq->active);
+		set_tsk_need_resched(p);
+		p->prio = effective_prio(p);
+		enqueue_task(p, rq->active);
+	}
+
+abort:
+	p->last_run = monotonic_clock();
+}
+
 /*
  * This function gets called by the timer code, with HZ frequency.
  * We call it with interrupts disabled.
@@ -1249,11 +1324,12 @@ void scheduler_tick(int user_ticks, int 
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
 	runqueue_t *rq = this_rq();
 	task_t *p = current;
+	int idle = p == rq->idle;
 
 	if (rcu_pending(cpu))
 		rcu_check_callbacks(cpu, user_ticks);
 
-	if (p == rq->idle) {
+	if (idle) {
 		/* note: this timer irq context must be accounted for as well */
 		if (irq_count() - HARDIRQ_OFFSET >= SOFTIRQ_OFFSET)
 			cpustat->system += sys_ticks;
@@ -1261,8 +1337,7 @@ void scheduler_tick(int user_ticks, int 
 			cpustat->iowait += sys_ticks;
 		else
 			cpustat->idle += sys_ticks;
-		rebalance_tick(rq, 1);
-		return;
+		goto out;
 	}
 	if (TASK_NICE(p) > 0)
 		cpustat->nice += user_ticks;
@@ -1270,61 +1345,15 @@ void scheduler_tick(int user_ticks, int 
 		cpustat->user += user_ticks;
 	cpustat->system += sys_ticks;
 
-	/* Task might have expired already, but not scheduled off yet */
-	if (p->array != rq->active) {
-		set_tsk_need_resched(p);
-		goto out;
-	}
 	spin_lock(&rq->lock);
 	/*
 	 * The task was running during this tick - update the
-	 * time slice counter and the sleep average.
+	 * time slice counter and the sleep average. 
 	 */
-	if (p->sleep_avg)
-		p->sleep_avg--;
-	if (unlikely(rt_task(p))) {
-		/*
-		 * RR tasks need a special form of timeslice management.
-		 * FIFO tasks have no timeslices.
-		 */
-		if ((p->policy == SCHED_RR) && !--p->time_slice) {
-			p->time_slice = task_timeslice(p);
-			p->first_time_slice = 0;
-			set_tsk_need_resched(p);
-
-			/* put it at the end of the queue: */
-			dequeue_task(p, rq->active);
-			enqueue_task(p, rq->active);
-		}
-		goto out_unlock;
-	}
-	if (!--p->time_slice) {
-		dequeue_task(p, rq->active);
-		set_tsk_need_resched(p);
-		p->prio = effective_prio(p);
-		p->time_slice = task_timeslice(p);
-		p->first_time_slice = 0;
-
-		if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) {
-			if (!rq->expired_timestamp)
-				rq->expired_timestamp = jiffies;
-			enqueue_task(p, rq->expired);
-		} else
-			enqueue_task(p, rq->active);
-	} else if (unlikely(p->prio < effective_prio(p))){
-		/*
-		 * Tasks that have lowered their priority are put to the end
-		 * of the active array with their remaining timeslice
-		 */
-		dequeue_task(p, rq->active);
-		set_tsk_need_resched(p);
-		p->prio = effective_prio(p);
-		enqueue_task(p, rq->active);
-	}
-out_unlock:
+	 __scheduler_tick(rq, p);
 	spin_unlock(&rq->lock);
 out:
-	rebalance_tick(rq, 0);
+	rebalance_tick(rq, idle);
 }
 
 void scheduling_functions_start_here(void) { }
@@ -1358,8 +1387,8 @@ need_resched:
 	rq = this_rq();
 
 	release_kernel_lock(prev);
-	prev->last_run = jiffies;
 	spin_lock_irq(&rq->lock);
+	__scheduler_tick(rq, prev);
 
 	/*
 	 * if entering off of a kernel preemption go straight
@@ -1414,6 +1443,7 @@ switch_tasks:
 	if (likely(prev != next)) {
 		rq->nr_switches++;
 		rq->curr = next;
+		next->last_run = prev->last_run;
 
 		prepare_arch_switch(rq, next);
 		prev = context_switch(rq, prev, next);
diff -Naurp linux-2.5.75-mm1/arch/i386/kernel/timers/timer_tsc.c linux-2.5.75-test/arch/i386/kernel/timers/timer_tsc.c
--- linux-2.5.75-mm1/arch/i386/kernel/timers/timer_tsc.c	2003-07-12 00:01:27.000000000 +1000
+++ linux-2.5.75-test/arch/i386/kernel/timers/timer_tsc.c	2003-07-15 12:48:14.000000000 +1000
@@ -102,12 +102,13 @@ static unsigned long get_offset_tsc(void
 static unsigned long long monotonic_clock_tsc(void)
 {
 	unsigned long long last_offset, this_offset, base;
-	
+	unsigned long flags;
+
 	/* atomically read monotonic base & last_offset */
-	read_lock_irq(&monotonic_lock);
+	read_lock_irqsave(&monotonic_lock, flags);
 	last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
 	base = monotonic_base;
-	read_unlock_irq(&monotonic_lock);
+	read_unlock_irqrestore(&monotonic_lock, flags);
 
 	/* Read the Time Stamp Counter */
 	rdtscll(this_offset);


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] N1int for interactivity
  2003-07-15  3:55 [PATCH] N1int for interactivity Con Kolivas
@ 2003-07-15  3:59 ` Andrew Morton
  2003-07-15  4:03   ` Con Kolivas
  2003-07-15  7:00   ` Zwane Mwaikambo
  2003-07-15 15:23 ` Felipe Alfaro Solana
  1 sibling, 2 replies; 9+ messages in thread
From: Andrew Morton @ 2003-07-15  3:59 UTC (permalink / raw)
  To: Con Kolivas; +Cc: linux-kernel, efault, felipe_alfaro

Con Kolivas <kernel@kolivas.org> wrote:
>
> I've modified Mike Galbraith's nanosleep work for greater resolution to help 
> the interactivity estimator work I've done in the O*int patches.

> +inline void __scheduler_tick(runqueue_t *rq, task_t *p)

Two callsites, this guy shouldn't be inlined.

Should it have static scope?  The code as-is generates a third copy...

>  static unsigned long long monotonic_clock_tsc(void)
>  {
>  	unsigned long long last_offset, this_offset, base;
> -	
> +	unsigned long flags;
> +
>  	/* atomically read monotonic base & last_offset */
> -	read_lock_irq(&monotonic_lock);
> +	read_lock_irqsave(&monotonic_lock, flags);
>  	last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
>  	base = monotonic_base;
> -	read_unlock_irq(&monotonic_lock);
> +	read_unlock_irqrestore(&monotonic_lock, flags);
>  
>  	/* Read the Time Stamp Counter */

Why do we need to take a global lock here?  Can't we use
get_cycles() or something?


Have all the other architectures been reviewed to see if they need this
change?


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] N1int for interactivity
  2003-07-15  3:59 ` Andrew Morton
@ 2003-07-15  4:03   ` Con Kolivas
  2003-07-15  7:02     ` Mike Galbraith
  2003-07-15  7:00   ` Zwane Mwaikambo
  1 sibling, 1 reply; 9+ messages in thread
From: Con Kolivas @ 2003-07-15  4:03 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, efault, felipe_alfaro

On Tue, 15 Jul 2003 13:59, Andrew Morton wrote:
> Con Kolivas <kernel@kolivas.org> wrote:
> > I've modified Mike Galbraith's nanosleep work for greater resolution to
> > help the interactivity estimator work I've done in the O*int patches.
> >
> > +inline void __scheduler_tick(runqueue_t *rq, task_t *p)
>
> Two callsites, this guy shouldn't be inlined.
>
> Should it have static scope?  The code as-is generates a third copy...
>
> >  static unsigned long long monotonic_clock_tsc(void)
> >  {
> >  	unsigned long long last_offset, this_offset, base;
> > -
> > +	unsigned long flags;
> > +
> >  	/* atomically read monotonic base & last_offset */
> > -	read_lock_irq(&monotonic_lock);
> > +	read_lock_irqsave(&monotonic_lock, flags);
> >  	last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
> >  	base = monotonic_base;
> > -	read_unlock_irq(&monotonic_lock);
> > +	read_unlock_irqrestore(&monotonic_lock, flags);
> >
> >  	/* Read the Time Stamp Counter */
>
> Why do we need to take a global lock here?  Can't we use
> get_cycles() or something?
>
>
> Have all the other architectures been reviewed to see if they need this
> change?

I'm calling for help here. This is getting way out of my depth; I've simply 
applied Mike's patch.

Con


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] N1int for interactivity
  2003-07-15  3:59 ` Andrew Morton
  2003-07-15  4:03   ` Con Kolivas
@ 2003-07-15  7:00   ` Zwane Mwaikambo
  2003-07-15 10:23     ` Con Kolivas
  1 sibling, 1 reply; 9+ messages in thread
From: Zwane Mwaikambo @ 2003-07-15  7:00 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Con Kolivas, linux-kernel, efault, felipe_alfaro

On Mon, 14 Jul 2003, Andrew Morton wrote:

> >  	base = monotonic_base;
> > -	read_unlock_irq(&monotonic_lock);
> > +	read_unlock_irqrestore(&monotonic_lock, flags);
> >  
> >  	/* Read the Time Stamp Counter */
> 
> Why do we need to take a global lock here?  Can't we use
> get_cycles() or something?

I think that'll break even on some x86 boxes if we used get_cycles. I do 
wonder however why we need that lock, i see x86/64 uses seqlock at least. 
Although i can't vouch for whether that would have an adverse affect here. 
I presume Stultz would know.

> Have all the other architectures been reviewed to see if they need this
> change?

No one else appears to have monotonic_clock, this would break every other 
arch out there.

-- 
function.linuxpower.ca

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] N1int for interactivity
  2003-07-15  4:03   ` Con Kolivas
@ 2003-07-15  7:02     ` Mike Galbraith
  0 siblings, 0 replies; 9+ messages in thread
From: Mike Galbraith @ 2003-07-15  7:02 UTC (permalink / raw)
  To: Con Kolivas; +Cc: Andrew Morton, linux-kernel, felipe_alfaro

At 02:03 PM 7/15/2003 +1000, Con Kolivas wrote:
>On Tue, 15 Jul 2003 13:59, Andrew Morton wrote:
> > Con Kolivas <kernel@kolivas.org> wrote:
> > > I've modified Mike Galbraith's nanosleep work for greater resolution to
> > > help the interactivity estimator work I've done in the O*int patches.

(I think a repeat of an earlier public warning wrt this diff may be in 
order: run irman/contest before you proceed... here there be giant economy 
sized dragons)

> > >
> > > +inline void __scheduler_tick(runqueue_t *rq, task_t *p)
> >
> > Two callsites, this guy shouldn't be inlined.

(I just wild-guessed that it'd be a tiny bit faster while i was coding... 
_the_ fast-path and all)

> >
> > Should it have static scope?  The code as-is generates a third copy...
> >
> > >  static unsigned long long monotonic_clock_tsc(void)
> > >  {
> > >     unsigned long long last_offset, this_offset, base;
> > > -
> > > +   unsigned long flags;
> > > +
> > >     /* atomically read monotonic base & last_offset */
> > > -   read_lock_irq(&monotonic_lock);
> > > +   read_lock_irqsave(&monotonic_lock, flags);
> > >     last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
> > >     base = monotonic_base;
> > > -   read_unlock_irq(&monotonic_lock);
> > > +   read_unlock_irqrestore(&monotonic_lock, flags);
> > >
> > >     /* Read the Time Stamp Counter */
> >
> > Why do we need to take a global lock here?  Can't we use
> > get_cycles() or something?

The scalability issue is gone I'm told.

 > Have all the other architectures been reviewed to see if they need this
> > change?
>
>I'm calling for help here. This is getting way out of my depth; I've simply
>applied Mike's patch.

Everyone will likely need that, or there will be major explosions.  You'll 
also have to deal with the no-tsc case... either return jiffies * 1000000 
or make it a config option.

(I'd go with a config option if I were you...dragons aren't all that likely 
to torch a desktop load, but if you do this globally, I'm quite certain 
that something is going to get char-broiled;)

         -Mike 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] N1int for interactivity
  2003-07-15  7:00   ` Zwane Mwaikambo
@ 2003-07-15 10:23     ` Con Kolivas
  0 siblings, 0 replies; 9+ messages in thread
From: Con Kolivas @ 2003-07-15 10:23 UTC (permalink / raw)
  To: Zwane Mwaikambo; +Cc: linux-kernel, efault, felipe_alfaro

On Tue, 15 Jul 2003 17:00, Zwane Mwaikambo wrote:
> On Mon, 14 Jul 2003, Andrew Morton wrote:
> > >  	base = monotonic_base;
> > > -	read_unlock_irq(&monotonic_lock);
> > > +	read_unlock_irqrestore(&monotonic_lock, flags);
> > >
> > >  	/* Read the Time Stamp Counter */
> >
> > Why do we need to take a global lock here?  Can't we use
> > get_cycles() or something?
>
> I think that'll break even on some x86 boxes if we used get_cycles. I do
> wonder however why we need that lock, i see x86/64 uses seqlock at least.
> Although i can't vouch for whether that would have an adverse affect here.
> I presume Stultz would know.
>
> > Have all the other architectures been reviewed to see if they need this
> > change?
>
> No one else appears to have monotonic_clock, this would break every other
> arch out there.

Ok what this patch is for is to see whether we really need to go down this 
path. It may be untidy and have dragons but it can be cleaned up and 
work with other archs but I just need some feedback by testers to see if it
adds _significant_ benefit that warrants it's addition to the direction I'm
already taking. Unfortunately all the incremental patches I've done seem 
to have scared people off :(

Here's a slightly tidier version (also at kernel.kolivas.org/2.5) which
patches against 2.5.75-mm1 that has been patched with O5int.

akpm removed from cc list as it's not interesting for -mm atm

Con

diff -Naurp linux-2.5.75-mm1/arch/i386/kernel/timers/timer_tsc.c linux-2.5.75-test/arch/i386/kernel/timers/timer_tsc.c
--- linux-2.5.75-mm1/arch/i386/kernel/timers/timer_tsc.c	2003-07-12 00:01:27.000000000 +1000
+++ linux-2.5.75-test/arch/i386/kernel/timers/timer_tsc.c	2003-07-15 12:48:14.000000000 +1000
@@ -102,12 +102,13 @@ static unsigned long get_offset_tsc(void
 static unsigned long long monotonic_clock_tsc(void)
 {
 	unsigned long long last_offset, this_offset, base;
-	
+	unsigned long flags;
+
 	/* atomically read monotonic base & last_offset */
-	read_lock_irq(&monotonic_lock);
+	read_lock_irqsave(&monotonic_lock, flags);
 	last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
 	base = monotonic_base;
-	read_unlock_irq(&monotonic_lock);
+	read_unlock_irqrestore(&monotonic_lock, flags);
 
 	/* Read the Time Stamp Counter */
 	rdtscll(this_offset);
diff -Naurp linux-2.5.75-mm1/include/linux/sched.h linux-2.5.75-test/include/linux/sched.h
--- linux-2.5.75-mm1/include/linux/sched.h	2003-07-13 00:21:30.000000000 +1000
+++ linux-2.5.75-test/include/linux/sched.h	2003-07-15 20:11:53.000000000 +1000
@@ -341,7 +341,9 @@ struct task_struct {
 
 	unsigned long sleep_avg;
 	unsigned long avg_start;
-	unsigned long last_run;
+	unsigned long long last_run;
+	unsigned long run_nsecs;
+	unsigned long sleep_nsecs;
 
 	unsigned long policy;
 	cpumask_t cpus_allowed;
diff -Naurp linux-2.5.75-mm1/kernel/sched.c linux-2.5.75-test/kernel/sched.c
--- linux-2.5.75-mm1/kernel/sched.c	2003-07-15 12:47:41.000000000 +1000
+++ linux-2.5.75-test/kernel/sched.c	2003-07-15 20:07:26.000000000 +1000
@@ -78,8 +78,14 @@
 #define STARVATION_LIMIT	(10*HZ)
 #define SLEEP_BUFFER		(HZ/20)
 #define NODE_THRESHOLD		125
+#define SCHED_NANOSECOND	1
+#define SCHED_SECOND		(1000000000 * SCHED_NANOSECOND)
+#define SCHED_TICK		(SCHED_SECOND / HZ)
+#define TICKS_PER_SECOND	(SCHED_SECOND / SCHED_TICK)
 #define MAX_BONUS		((MAX_USER_PRIO - MAX_RT_PRIO) * PRIO_BONUS_RATIO / 100)
 
+extern unsigned long long monotonic_clock(void);
+
 /*
  * If a task is 'interactive' then we reinsert it in the active
  * array after it has expired its current timeslice. (it will not
@@ -387,9 +393,19 @@ static inline void __activate_task(task_
  */
 static inline void activate_task(task_t *p, runqueue_t *rq)
 {
-	long sleep_time = jiffies - p->last_run - 1;
+	unsigned long long now = monotonic_clock();
+	unsigned long sleep =  (unsigned long)(now - p->last_run + p->sleep_nsecs);
+	unsigned long ticks = 0;
+
+	if (sleep >= SCHED_TICK) {
+		ticks += TICKS_PER_SECOND * (sleep / SCHED_SECOND);
+		sleep %= SCHED_SECOND;
+		ticks += sleep / SCHED_TICK;
+		sleep %= SCHED_TICK;
+		p->sleep_nsecs = sleep;
+	} else p->sleep_nsecs += sleep;
 
-	if (sleep_time > 0) {
+	if (ticks > 0) {
 		unsigned long runtime = jiffies - p->avg_start;
 
 		/*
@@ -397,7 +413,7 @@ static inline void activate_task(task_t 
 		 * will get just under interactive status with a small runtime
 		 * to allow them to become interactive or non-interactive rapidly
 		 */
-		if (sleep_time > MIN_SLEEP_AVG){
+		if (ticks > MIN_SLEEP_AVG){
 			p->avg_start = jiffies - MIN_SLEEP_AVG;
 			p->sleep_avg = MIN_SLEEP_AVG * (MAX_BONUS - INTERACTIVE_DELTA - 1) /
 				MAX_BONUS;
@@ -410,7 +426,7 @@ static inline void activate_task(task_t 
 			 * spends sleeping, the higher the average gets - and the
 			 * higher the priority boost gets as well.
 			 */
-			p->sleep_avg += sleep_time;
+			p->sleep_avg += ticks;
 
 			/*
 			 * Give a bonus to tasks that wake early on to prevent
@@ -427,8 +443,10 @@ static inline void activate_task(task_t 
 			 * prevent fully interactive tasks from becoming
 			 * lower priority with small bursts of cpu usage.
 			 */
-			if (p->sleep_avg > (MAX_SLEEP_AVG + SLEEP_BUFFER))
+			if (p->sleep_avg > (MAX_SLEEP_AVG + SLEEP_BUFFER)){
 				p->sleep_avg = MAX_SLEEP_AVG + SLEEP_BUFFER;
+				p->sleep_nsecs = 0;
+			}
 		}
 
 		if (unlikely(p->avg_start > jiffies)){
@@ -616,6 +634,8 @@ void wake_up_forked_process(task_t * p)
 	normalise_sleep(p);
 	p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100;
 	p->prio = effective_prio(p);
+	p->run_nsecs = 0;
+	p->sleep_nsecs = 0;
 	set_task_cpu(p, smp_processor_id());
 
 	if (unlikely(!current->array))
@@ -1236,6 +1256,57 @@ DEFINE_PER_CPU(struct kernel_stat, kstat
 		(jiffies - (rq)->expired_timestamp >= \
 			STARVATION_LIMIT * ((rq)->nr_running) + 1)))
 
+static void __scheduler_tick(runqueue_t *rq, task_t *p)
+{
+	unsigned long long now = monotonic_clock();
+	prio_array_t *array = rq->active;
+	unsigned long ticks;
+
+	p->run_nsecs += now - p->last_run;
+	/* Task might have expired already, but not scheduled off yet */
+	if (p->array != array) {
+		set_tsk_need_resched(p);
+		goto abort;
+	}
+	if (p->run_nsecs < SCHED_TICK || p->policy == SCHED_FIFO )
+		goto abort;
+
+	for (ticks = 0; p->run_nsecs >= SCHED_TICK; ticks++)
+		p->run_nsecs -= SCHED_TICK;
+	if (p->sleep_avg > ticks)
+		p->sleep_avg -= ticks;
+	else
+		p->sleep_avg = 0;
+	p->time_slice -= ticks;
+
+	if (p->time_slice <= 0) {
+		dequeue_task(p, p->array);
+		p->prio = effective_prio(p);
+		p->time_slice = task_timeslice(p);
+		p->first_time_slice = 0;
+		set_tsk_need_resched(p);
+		if ((EXPIRED_STARVING(rq) && !rt_task(p)) ||
+				!TASK_INTERACTIVE(p)) {
+			array = rq->expired;
+			if (!rq->expired_timestamp)
+				rq->expired_timestamp = jiffies;
+		}
+		enqueue_task(p, array);
+	} else if (unlikely(p->prio < effective_prio(p))){
+		/*
+		 * Tasks that have lowered their priority are put to the end
+		 * of the active array with their remaining timeslice
+		 */
+		dequeue_task(p, rq->active);
+		set_tsk_need_resched(p);
+		p->prio = effective_prio(p);
+		enqueue_task(p, rq->active);
+	}
+
+abort:
+	p->last_run = monotonic_clock();
+}
+
 /*
  * This function gets called by the timer code, with HZ frequency.
  * We call it with interrupts disabled.
@@ -1249,11 +1320,12 @@ void scheduler_tick(int user_ticks, int 
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
 	runqueue_t *rq = this_rq();
 	task_t *p = current;
+	int idle = p == rq->idle;
 
 	if (rcu_pending(cpu))
 		rcu_check_callbacks(cpu, user_ticks);
 
-	if (p == rq->idle) {
+	if (idle) {
 		/* note: this timer irq context must be accounted for as well */
 		if (irq_count() - HARDIRQ_OFFSET >= SOFTIRQ_OFFSET)
 			cpustat->system += sys_ticks;
@@ -1261,8 +1333,7 @@ void scheduler_tick(int user_ticks, int 
 			cpustat->iowait += sys_ticks;
 		else
 			cpustat->idle += sys_ticks;
-		rebalance_tick(rq, 1);
-		return;
+		goto out;
 	}
 	if (TASK_NICE(p) > 0)
 		cpustat->nice += user_ticks;
@@ -1270,61 +1341,15 @@ void scheduler_tick(int user_ticks, int 
 		cpustat->user += user_ticks;
 	cpustat->system += sys_ticks;
 
-	/* Task might have expired already, but not scheduled off yet */
-	if (p->array != rq->active) {
-		set_tsk_need_resched(p);
-		goto out;
-	}
 	spin_lock(&rq->lock);
 	/*
 	 * The task was running during this tick - update the
-	 * time slice counter and the sleep average.
+	 * time slice counter and the sleep average. 
 	 */
-	if (p->sleep_avg)
-		p->sleep_avg--;
-	if (unlikely(rt_task(p))) {
-		/*
-		 * RR tasks need a special form of timeslice management.
-		 * FIFO tasks have no timeslices.
-		 */
-		if ((p->policy == SCHED_RR) && !--p->time_slice) {
-			p->time_slice = task_timeslice(p);
-			p->first_time_slice = 0;
-			set_tsk_need_resched(p);
-
-			/* put it at the end of the queue: */
-			dequeue_task(p, rq->active);
-			enqueue_task(p, rq->active);
-		}
-		goto out_unlock;
-	}
-	if (!--p->time_slice) {
-		dequeue_task(p, rq->active);
-		set_tsk_need_resched(p);
-		p->prio = effective_prio(p);
-		p->time_slice = task_timeslice(p);
-		p->first_time_slice = 0;
-
-		if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) {
-			if (!rq->expired_timestamp)
-				rq->expired_timestamp = jiffies;
-			enqueue_task(p, rq->expired);
-		} else
-			enqueue_task(p, rq->active);
-	} else if (unlikely(p->prio < effective_prio(p))){
-		/*
-		 * Tasks that have lowered their priority are put to the end
-		 * of the active array with their remaining timeslice
-		 */
-		dequeue_task(p, rq->active);
-		set_tsk_need_resched(p);
-		p->prio = effective_prio(p);
-		enqueue_task(p, rq->active);
-	}
-out_unlock:
+	 __scheduler_tick(rq, p);
 	spin_unlock(&rq->lock);
 out:
-	rebalance_tick(rq, 0);
+	rebalance_tick(rq, idle);
 }
 
 void scheduling_functions_start_here(void) { }
@@ -1358,8 +1383,8 @@ need_resched:
 	rq = this_rq();
 
 	release_kernel_lock(prev);
-	prev->last_run = jiffies;
 	spin_lock_irq(&rq->lock);
+	__scheduler_tick(rq, prev);
 
 	/*
 	 * if entering off of a kernel preemption go straight
@@ -1414,6 +1439,7 @@ switch_tasks:
 	if (likely(prev != next)) {
 		rq->nr_switches++;
 		rq->curr = next;
+		next->last_run = prev->last_run;
 
 		prepare_arch_switch(rq, next);
 		prev = context_switch(rq, prev, next);


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] N1int for interactivity
  2003-07-15  3:55 [PATCH] N1int for interactivity Con Kolivas
  2003-07-15  3:59 ` Andrew Morton
@ 2003-07-15 15:23 ` Felipe Alfaro Solana
  2003-07-15 23:12   ` Con Kolivas
  1 sibling, 1 reply; 9+ messages in thread
From: Felipe Alfaro Solana @ 2003-07-15 15:23 UTC (permalink / raw)
  To: Con Kolivas; +Cc: linux kernel mailing list, Mike Galbraith, Andrew Morton

On Tue, 2003-07-15 at 05:55, Con Kolivas wrote:
> I've modified Mike Galbraith's nanosleep work for greater resolution to help 
> the interactivity estimator work I've done in the O*int patches. This patch 
> applies to any kernel patched up to the latest patch-O5int-0307150857 which 
> applies on top of 2.5.75-mm1.
> 
> Please test and comment, and advise what further changes you think are 
> appropriate or necessary, including other archs. I've preserved Mike's code
> unchanged wherever possible. It works well for me, but n=1 does not
> a good sample population make.
> 
> The patch-N1int-0307151249 is available here:
> http://kernel.kolivas.org/2.5

I can still starve XMMS on 2.5.75-mm1 + patch-O5int-0307150857 +
patch-N1int-0307152010:

1. Log on to KDE
2. Launch Konqueror
3. Launch XMMS and make it play
4. Move Konqueror window all over the desktop

Step 4 will make XMMS starve for a few seconds. Also, under heavy load
(while true; do a=2; done), moving the Konqueror window like crazy makes
X go jerky after a few seconds. If I quit moving windows around, after a
few other seconds, X returns to normal/smooth behavior.

I can fix the starvation/smoothness by setting:

#define PRIO_BONUS_RATIO        45
#define INTERACTIVE_DELTA       4
#define MAX_SLEEP_AVG           (HZ)
#define STARVATION_LIMIT        (HZ)

For me, 2.6.0-test1 stock scheduler plus above changes makes the most
user-friendly desktop I've ever seen.

Thanks!


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] N1int for interactivity
  2003-07-15 15:23 ` Felipe Alfaro Solana
@ 2003-07-15 23:12   ` Con Kolivas
  2003-07-16  7:12     ` Felipe Alfaro Solana
  0 siblings, 1 reply; 9+ messages in thread
From: Con Kolivas @ 2003-07-15 23:12 UTC (permalink / raw)
  To: Felipe Alfaro Solana
  Cc: linux kernel mailing list, Mike Galbraith, Andrew Morton

On Wed, 16 Jul 2003 01:23, Felipe Alfaro Solana wrote:
> On Tue, 2003-07-15 at 05:55, Con Kolivas wrote:
> > I've modified Mike Galbraith's nanosleep work for greater resolution to
> > help the interactivity estimator work I've done in the O*int patches.
> > This patch applies to any kernel patched up to the latest
> > patch-O5int-0307150857 which applies on top of 2.5.75-mm1.
> >
> > Please test and comment, and advise what further changes you think are
> > appropriate or necessary, including other archs. I've preserved Mike's
> > code unchanged wherever possible. It works well for me, but n=1 does not
> > a good sample population make.
> >
> > The patch-N1int-0307151249 is available here:
> > http://kernel.kolivas.org/2.5
>
> I can still starve XMMS on 2.5.75-mm1 + patch-O5int-0307150857 +
> patch-N1int-0307152010:
>
> 1. Log on to KDE
> 2. Launch Konqueror
> 3. Launch XMMS and make it play
> 4. Move Konqueror window all over the desktop
>
> Step 4 will make XMMS starve for a few seconds. Also, under heavy load
> (while true; do a=2; done), moving the Konqueror window like crazy makes
> X go jerky after a few seconds. If I quit moving windows around, after a
> few other seconds, X returns to normal/smooth behavior.

I was more concerned to see if the N1 patch added anything to the current 
behaviour, but that is abandoned work now so don't worry. I can address these 
further issues incrementally if you're willing to test them. My test boxes 
have been tamed but I need and appreciate greatly your testing.

> I can fix the starvation/smoothness by setting:
>
> #define PRIO_BONUS_RATIO        45
> #define INTERACTIVE_DELTA       4
> #define MAX_SLEEP_AVG           (HZ)
> #define STARVATION_LIMIT        (HZ)
>
> For me, 2.6.0-test1 stock scheduler plus above changes makes the most
> user-friendly desktop I've ever seen.

With a max sleep avg of just one second you'll never starve X or xmms for 
sustained periods; but under load X will not be smooth getting jerky even 
with small bursts of heavy X use; that's why Ingo increased the max sleep avg 
in the first place... but that led to other issues...

Con


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] N1int for interactivity
  2003-07-15 23:12   ` Con Kolivas
@ 2003-07-16  7:12     ` Felipe Alfaro Solana
  0 siblings, 0 replies; 9+ messages in thread
From: Felipe Alfaro Solana @ 2003-07-16  7:12 UTC (permalink / raw)
  To: Con Kolivas; +Cc: linux kernel mailing list, Mike Galbraith, Andrew Morton

On Wed, 2003-07-16 at 01:12, Con Kolivas wrote:
> > I can still starve XMMS on 2.5.75-mm1 + patch-O5int-0307150857 +
> > patch-N1int-0307152010:
> >
> > 1. Log on to KDE
> > 2. Launch Konqueror
> > 3. Launch XMMS and make it play
> > 4. Move Konqueror window all over the desktop
> >
> > Step 4 will make XMMS starve for a few seconds. Also, under heavy load
> > (while true; do a=2; done), moving the Konqueror window like crazy makes
> > X go jerky after a few seconds. If I quit moving windows around, after a
> > few other seconds, X returns to normal/smooth behavior.
> 
> I was more concerned to see if the N1 patch added anything to the current 
> behaviour, but that is abandoned work now so don't worry. I can address these 
> further issues incrementally if you're willing to test them. My test boxes 
> have been tamed but I need and appreciate greatly your testing.

Just throw anything to me and I will test it gladly :-)

> > I can fix the starvation/smoothness by setting:
> >
> > #define PRIO_BONUS_RATIO        45
> > #define INTERACTIVE_DELTA       4
> > #define MAX_SLEEP_AVG           (HZ)
> > #define STARVATION_LIMIT        (HZ)
> >
> > For me, 2.6.0-test1 stock scheduler plus above changes makes the most
> > user-friendly desktop I've ever seen.
> 
> With a max sleep avg of just one second you'll never starve X or xmms for 
> sustained periods; but under load X will not be smooth getting jerky even 
> with small bursts of heavy X use; that's why Ingo increased the max sleep avg 
> in the first place... but that led to other issues...

Well, under heavy load (while true ...) X still feels very smooth with
2.6.0-test1 and the above changes. In fact, I was inclined to make those
changes as X didn't feel smooth under load on my system without them.


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2003-07-16  6:57 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-07-15  3:55 [PATCH] N1int for interactivity Con Kolivas
2003-07-15  3:59 ` Andrew Morton
2003-07-15  4:03   ` Con Kolivas
2003-07-15  7:02     ` Mike Galbraith
2003-07-15  7:00   ` Zwane Mwaikambo
2003-07-15 10:23     ` Con Kolivas
2003-07-15 15:23 ` Felipe Alfaro Solana
2003-07-15 23:12   ` Con Kolivas
2003-07-16  7:12     ` Felipe Alfaro Solana

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.