linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] SMP signal latency fix up.
@ 2003-11-06 22:49 Mark Gross
  2003-11-06 23:20 ` Linus Torvalds
                   ` (2 more replies)
  0 siblings, 3 replies; 18+ messages in thread
From: Mark Gross @ 2003-11-06 22:49 UTC (permalink / raw)
  To: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 2001 bytes --]

The attached program should execute the following command line within a fraction of a second.
time ./ping_pong -c 10000

Running on SMP and the 2.6.0-test9 kernel, it takes about 10000 * 1/HZ seconds.  Running this 
command with maxcpus=1 the command finishes in fraction of a second.  Under SMP the 
signal delivery isn't kicking the task if its in the run state on the other CPU.

The following patch has been tested and seems to fix the problem.  
I'm confident about the change to sched.c actualy fixes a cut and paste bug.

The change to signal.c IS needed to fix the problem, but I'm not sure there isn't a better way.

Please have take a look

--mgross

diff -urN -X dontdiff linux-2.6.0-test9/kernel/sched.c /opt/linux-2.6.0-test9/kernel/sched.c
--- linux-2.6.0-test9/kernel/sched.c    2003-10-25 11:44:29.000000000 -0700
+++ /opt/linux-2.6.0-test9/kernel/sched.c       2003-11-06 13:04:03.628116240 -0800
@@ -626,13 +626,13 @@
                        }
                        success = 1;
                }
-#ifdef CONFIG_SMP
-               else
-                       if (unlikely(kick) && task_running(rq, p) && (task_cpu(p) != smp_processor_id()))
-                               smp_send_reschedule(task_cpu(p));
-#endif
                p->state = TASK_RUNNING;
        }
+#ifdef CONFIG_SMP
+               else
+               if (unlikely(kick) && task_running(rq, p) && (task_cpu(p) != smp_processor_id()))
+                       smp_send_reschedule(task_cpu(p));
+#endif
        task_rq_unlock(rq, &flags);

        return success;
diff -urN -X dontdiff linux-2.6.0-test9/kernel/signal.c /opt/linux-2.6.0-test9/kernel/signal.c
--- linux-2.6.0-test9/kernel/signal.c   2003-10-25 11:43:27.000000000 -0700
+++ /opt/linux-2.6.0-test9/kernel/signal.c      2003-11-06 12:18:22.000000000 -0800
@@ -555,6 +555,9 @@
                wake_up_process_kick(t);
                return;
        }
+       if (t->state == TASK_RUNNING ) 
+               wake_up_process_kick(t);
+       
 }

 /*

[-- Attachment #2: ping_pong.c --]
[-- Type: text/x-c, Size: 3210 bytes --]

#include <argp.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include <pthread.h>
#include <signal.h>

int argp_parse_option(int key, char *arg, struct argp_state *argp_state);

struct state {
	pthread_t receive;
	pthread_t send;
	int signal;
	int count;
	int yield;
        volatile int flags;
	volatile int sync_flags;
};

static struct state _state = {
	.signal = SIGUSR1,
	.flags = 0,
	.sync_flags = 0,
	.yield = 0,
};
static const struct argp_option argp_options[] = {
	{"count", 'c', "COUNT", 0, "Number of signals exchanged"},
	{"signal", 'n', "SIGNUM", 0, "The sended signal"},
	{"yield", 'y', NULL, 0, "Make receiver yield while waiting"},
	{0}
};
static const struct argp argp = {
	.options = argp_options,
	.parser = argp_parse_option,
};

int argp_parse_option(int key, char *arg, struct argp_state *argp_state)
{
	struct state *state = argp_state->input;
	switch (key) {		
	case 'n':
		if (sscanf(arg, "%u", &state->signal) != 1) {
			perror("invalid signal argument");
			return -EINVAL;
		}		
		break;
	case 'c':
		if (sscanf(arg, "%i", &state->count) != 1) {
			perror("invalid count argument");
			return -EINVAL;
		}		
		break;
	case 'y':
		state->yield = 1;
		break;
		
	}

	return 0;
}


static void * sender (void *arg)
{
	struct state * state = (struct state *) arg;
	sigset_t action;
	int c = 0, r = 0;
	int sig;
	
	sigemptyset (&action);
	sigaddset (&action, state->signal);
	sigprocmask (SIG_BLOCK, &action, 0);

	/* wait for the receiver to get ready for receiving signals */
	while (state->sync_flags == 0)
		sched_yield();

	while ( c++ < state->count ) {
		/* ping */
		pthread_kill ( state->receive, state->signal);	

		/* pong */
		if ((r = sigwait(&action, &sig)) != 0) {
			perror("sigwait");
			state->flags = 1;
			pthread_exit(&r);
		}
	}

	state->flags = 1;
 	pthread_exit (&r);	
}
   
static void handler(int signum)
{
	struct state * state = &_state;
	pthread_kill(state->send, state->signal);
}
static void * receiver (void *arg)
{
	struct state * state = (struct state *) arg;
	struct sigaction action;
	int r;
	
	action.sa_handler = handler;
	sigemptyset (&action.sa_mask);
	action.sa_flags = 0;	
	if ((r = sigaction (state->signal, &action, 0)) != 0) {
		perror("sigaction");
		pthread_exit(&r);
	}

	/* let the sender thread know we are ready to start receiving */
	state->sync_flags = 1;

	while ( state->flags == 0 ) { 
		if (state->yield)
			sched_yield();
		else
			/*spin*/;
	}

	/* ...and done */
	pthread_exit(&r);	
}	

int main(int argc, char *argv[])
{
	int result = 0;
	void * thread_result;
	struct state *state = &_state;

	if (argp_parse(&argp, argc, argv, ARGP_IN_ORDER, 0, state) != 0)
		exit(-1);

	result = pthread_create (&state->receive, NULL, receiver, state);
	if ( result != 0 ) {
		perror("pthread_create");
		exit(-1);
	}

	result = pthread_create (&state->send, NULL, sender, state);
	if ( result != 0 ) {
		perror("pthread_create");
		exit(-1);
	}

	result = pthread_join ( state->send, &thread_result);	
	if ( result != 0 ) {
		perror("pthread_join");
		exit(-1);
	}

	result = pthread_join ( state->receive, &thread_result);	
	if ( result != 0 ) {
		perror("pthread_join");
		exit(-1);
	}
	
	return 0;
}

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] SMP signal latency fix up.
  2003-11-06 22:49 [PATCH] SMP signal latency fix up Mark Gross
@ 2003-11-06 23:20 ` Linus Torvalds
  2003-11-07  1:39   ` Mark Gross
  2003-11-07  9:39   ` Ingo Molnar
  2003-11-06 23:26 ` Chris Friesen
  2003-11-07 10:24 ` Ingo Molnar
  2 siblings, 2 replies; 18+ messages in thread
From: Linus Torvalds @ 2003-11-06 23:20 UTC (permalink / raw)
  To: Mark Gross, Ingo Molnar; +Cc: Kernel Mailing List


On 6 Nov 2003, Mark Gross wrote:
>
> Running on SMP and the 2.6.0-test9 kernel, it takes about 10000 * 1/HZ seconds.  Running this 
> command with maxcpus=1 the command finishes in fraction of a second.  Under SMP the 
> signal delivery isn't kicking the task if its in the run state on the other CPU.

It looks like the "wake_up_process_kick()" interface is just broken. As it 
stands now, it's literally _designed_ to kick the process only when it 
wakes it up, which is silly and wrong. It makes no sense to kick a process 
that we just woke up, because it _will_ react immediately anyway.

We literally want to kick only processes that didn't need waking up, and 
the current interface is totally unsuitable for that.

> The following patch has been tested and seems to fix the problem.  
> I'm confident about the change to sched.c actualy fixes a cut and paste bug.

Naah, it's a thinko, the code shouldn't be like that at all.

There's only one user of the "wake_up_process_kick()" thing, and that one 
user really wants to kick the process totally independently of waking it 
up. Which just implies that we should just have a _regular_ 
"wake_up_process()" there, and a _separate_ "kick_process()" thing.

So I've got a feeling that 
 - we should remove the "kick" argument from "try_to_wake_up()"
 - the signal wakeup case should instead do a _regular_ wakeup.
 - we should kick the process if the wakeup _fails_.

Ie signal wakeup should most likely look something like


	inline void signal_wake_up(struct task_struct *t, int resume)
	{
		int woken;
		unsigned int mask;

		set_tsk_thread_flag(t,TIF_SIGPENDING);
		mask = TASK_INTERRUPTIBLE;
		if (resume)
			mask |= TASK_STOPPED;
		woken = 0;
		if (t->state & mask)
			woken = wake_up_state(p, mask);
		if (!woken)
			kick_process(p);
	}

where the "kick_process()" thing does the "is the task running on some 
other CPU and if so send it a reschedule event to make it react" thing.

Ingo?

		Linus

> diff -urN -X dontdiff linux-2.6.0-test9/kernel/sched.c /opt/linux-2.6.0-test9/kernel/sched.c
> --- linux-2.6.0-test9/kernel/sched.c    2003-10-25 11:44:29.000000000 -0700
> +++ /opt/linux-2.6.0-test9/kernel/sched.c       2003-11-06 13:04:03.628116240 -0800
> @@ -626,13 +626,13 @@
>                         }
>                         success = 1;
>                 }
> -#ifdef CONFIG_SMP
> -               else
> -                       if (unlikely(kick) && task_running(rq, p) && (task_cpu(p) != smp_processor_id()))
> -                               smp_send_reschedule(task_cpu(p));
> -#endif
>                 p->state = TASK_RUNNING;
>         }
> +#ifdef CONFIG_SMP
> +               else
> +               if (unlikely(kick) && task_running(rq, p) && (task_cpu(p) != smp_processor_id()))
> +                       smp_send_reschedule(task_cpu(p));
> +#endif
>         task_rq_unlock(rq, &flags);
> 
>         return success;
> diff -urN -X dontdiff linux-2.6.0-test9/kernel/signal.c /opt/linux-2.6.0-test9/kernel/signal.c
> --- linux-2.6.0-test9/kernel/signal.c   2003-10-25 11:43:27.000000000 -0700
> +++ /opt/linux-2.6.0-test9/kernel/signal.c      2003-11-06 12:18:22.000000000 -0800
> @@ -555,6 +555,9 @@
>                 wake_up_process_kick(t);
>                 return;
>         }
> +       if (t->state == TASK_RUNNING ) 
> +               wake_up_process_kick(t);
> +       
>  }
> 
>  /*
> 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] SMP signal latency fix up.
  2003-11-06 22:49 [PATCH] SMP signal latency fix up Mark Gross
  2003-11-06 23:20 ` Linus Torvalds
@ 2003-11-06 23:26 ` Chris Friesen
  2003-11-06 23:35   ` Mark Gross
  2003-11-07 10:24 ` Ingo Molnar
  2 siblings, 1 reply; 18+ messages in thread
From: Chris Friesen @ 2003-11-06 23:26 UTC (permalink / raw)
  To: mgross; +Cc: linux-kernel

Mark Gross wrote:

> diff -urN -X dontdiff linux-2.6.0-test9/kernel/sched.c /opt/linux-2.6.0-test9/kernel/sched.c
> --- linux-2.6.0-test9/kernel/sched.c    2003-10-25 11:44:29.000000000 -0700
> +++ /opt/linux-2.6.0-test9/kernel/sched.c       2003-11-06 13:04:03.628116240 -0800
> @@ -626,13 +626,13 @@
>                         }
>                         success = 1;
>                 }
> -#ifdef CONFIG_SMP
> -               else
> -                       if (unlikely(kick) && task_running(rq, p) && (task_cpu(p) != smp_processor_id()))
> -                               smp_send_reschedule(task_cpu(p));
> -#endif
>                 p->state = TASK_RUNNING;
>         }
> +#ifdef CONFIG_SMP
> +               else
> +               if (unlikely(kick) && task_running(rq, p) && (task_cpu(p) != smp_processor_id()))
> +                       smp_send_reschedule(task_cpu(p));
> +#endif
>         task_rq_unlock(rq, &flags);

Without any comment as to whether or not the patch would work, shouldn't 
the "else" be moved back by a tab?

Chris




-- 
Chris Friesen                    | MailStop: 043/33/F10
Nortel Networks                  | work: (613) 765-0557
3500 Carling Avenue              | fax:  (613) 765-2986
Nepean, ON K2H 8E9 Canada        | email: cfriesen@nortelnetworks.com


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] SMP signal latency fix up.
  2003-11-06 23:26 ` Chris Friesen
@ 2003-11-06 23:35   ` Mark Gross
  2003-11-07  0:55     ` Nuno Silva
  0 siblings, 1 reply; 18+ messages in thread
From: Mark Gross @ 2003-11-06 23:35 UTC (permalink / raw)
  To: Chris Friesen; +Cc: mgross, linux-kernel

On Thu, 2003-11-06 at 15:26, Chris Friesen wrote:
> Mark Gross wrote:
> 
> > diff -urN -X dontdiff linux-2.6.0-test9/kernel/sched.c /opt/linux-2.6.0-test9/kernel/sched.c
> > --- linux-2.6.0-test9/kernel/sched.c    2003-10-25 11:44:29.000000000 -0700
> > +++ /opt/linux-2.6.0-test9/kernel/sched.c       2003-11-06 13:04:03.628116240 -0800
> > @@ -626,13 +626,13 @@
> >                         }
> >                         success = 1;
> >                 }
> > -#ifdef CONFIG_SMP
> > -               else
> > -                       if (unlikely(kick) && task_running(rq, p) && (task_cpu(p) != smp_processor_id()))
> > -                               smp_send_reschedule(task_cpu(p));
> > -#endif
> >                 p->state = TASK_RUNNING;
> >         }
> > +#ifdef CONFIG_SMP
> > +               else
> > +               if (unlikely(kick) && task_running(rq, p) && (task_cpu(p) != smp_processor_id()))
> > +                       smp_send_reschedule(task_cpu(p));
> > +#endif
> >         task_rq_unlock(rq, &flags);
> 
> Without any comment as to whether or not the patch would work, shouldn't 
> the "else" be moved back by a tab?
> 

You are correct.  
The source file used ot build the patch "looked" correct but had some
spaces where intermixed, that didn't show up until the tabs got expanded
by the cut and paste into the mailer.

I can re-send if you like, but I have a feeling Linux and Ingo are going
to work something slightly different out.  So I'll hold off.

Sorry about that.

--mgross



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] SMP signal latency fix up.
  2003-11-06 23:35   ` Mark Gross
@ 2003-11-07  0:55     ` Nuno Silva
  0 siblings, 0 replies; 18+ messages in thread
From: Nuno Silva @ 2003-11-07  0:55 UTC (permalink / raw)
  To: linux-kernel



Mark Gross wrote:

[...]

> 
> I can re-send if you like, but I have a feeling Linux and Ingo are going
> to work something slightly different out.  So I'll hold off.
> 

Yeah, that *Linux* Torvalds guy is a dictator! And IngO(1) is no 
different! :-)

Ehehehe couldn't resist...

> Sorry about that.

...I'm sorry too :-)

Regards,
Nuno Silva

> 
> --mgross
> 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] SMP signal latency fix up.
  2003-11-06 23:20 ` Linus Torvalds
@ 2003-11-07  1:39   ` Mark Gross
  2003-11-07  1:42     ` Mark Gross
  2003-11-07  9:39   ` Ingo Molnar
  1 sibling, 1 reply; 18+ messages in thread
From: Mark Gross @ 2003-11-07  1:39 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Mark Gross, Ingo Molnar, Kernel Mailing List

On Thu, 2003-11-06 at 15:20, Linus Torvalds wrote:
> On 6 Nov 2003, Mark Gross wrote:
> >
> > Running on SMP and the 2.6.0-test9 kernel, it takes about 10000 * 1/HZ seconds.  Running this 
> > command with maxcpus=1 the command finishes in fraction of a second.  Under SMP the 
> > signal delivery isn't kicking the task if its in the run state on the other CPU.
> 
> It looks like the "wake_up_process_kick()" interface is just broken. As it 
> stands now, it's literally _designed_ to kick the process only when it 
> wakes it up, which is silly and wrong. It makes no sense to kick a process 
> that we just woke up, because it _will_ react immediately anyway.
> 
> We literally want to kick only processes that didn't need waking up, and 
> the current interface is totally unsuitable for that.
> 
> > The following patch has been tested and seems to fix the problem.  
> > I'm confident about the change to sched.c actualy fixes a cut and paste bug.
> 
> Naah, it's a thinko, the code shouldn't be like that at all.
> 
> There's only one user of the "wake_up_process_kick()" thing, and that one 
> user really wants to kick the process totally independently of waking it 
> up. Which just implies that we should just have a _regular_ 
> "wake_up_process()" there, and a _separate_ "kick_process()" thing.
> 
> So I've got a feeling that 
>  - we should remove the "kick" argument from "try_to_wake_up()"
>  - the signal wakeup case should instead do a _regular_ wakeup.
>  - we should kick the process if the wakeup _fails_.
> 
> Ie signal wakeup should most likely look something like
> 
> 
> 	inline void signal_wake_up(struct task_struct *t, int resume)
> 	{
> 		int woken;
> 		unsigned int mask;
> 
> 		set_tsk_thread_flag(t,TIF_SIGPENDING);
> 		mask = TASK_INTERRUPTIBLE;
> 		if (resume)
> 			mask |= TASK_STOPPED;
> 		woken = 0;
> 		if (t->state & mask)
> 			woken = wake_up_state(p, mask);
> 		if (!woken)
> 			kick_process(p);
> 	}
> 
> where the "kick_process()" thing does the "is the task running on some 
> other CPU and if so send it a reschedule event to make it react" thing.
> 
> Ingo?
> 
> 		Linus


Are you thinking about something like this?

It seems to work. I dropped the "task_running" test from
smp_process_kick intentionaly.  As well as the movement of the success
flag.  I hope its not too wrong.

It seems to work on a HT P4 desktop and a dual PIII box.

--mgross

diff -urN -X dontdiff linux-2.6.0-test9/include/linux/sched.h
/opt/linux-2.6.0-test9/include/linux/sched.h
--- linux-2.6.0-test9/include/linux/sched.h	2003-10-25
11:42:56.000000000 -0700
+++ /opt/linux-2.6.0-test9/include/linux/sched.h	2003-11-06
14:44:22.000000000 -0800
@@ -573,7 +573,7 @@
 
 extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned
int state));
 extern int FASTCALL(wake_up_process(struct task_struct * tsk));
-extern int FASTCALL(wake_up_process_kick(struct task_struct * tsk));
+extern void FASTCALL(smp_process_kick(struct task_struct * tsk));
 extern void FASTCALL(wake_up_forked_process(struct task_struct * tsk));
 extern void FASTCALL(sched_exit(task_t * p));
 
diff -urN -X dontdiff linux-2.6.0-test9/kernel/sched.c
/opt/linux-2.6.0-test9/kernel/sched.c
--- linux-2.6.0-test9/kernel/sched.c	2003-10-25 11:44:29.000000000 -0700
+++ /opt/linux-2.6.0-test9/kernel/sched.c	2003-11-06 14:58:29.000000000
-0800
@@ -585,7 +585,7 @@
  *
  * returns failure only if the task is already active.
  */
-static int try_to_wake_up(task_t * p, unsigned int state, int sync, int
kick)
+static int try_to_wake_up(task_t * p, unsigned int state, int sync)
 {
 	unsigned long flags;
 	int success = 0;
@@ -624,13 +624,8 @@
 				if (TASK_PREEMPTS_CURR(p, rq))
 					resched_task(rq->curr);
 			}
-			success = 1;
 		}
-#ifdef CONFIG_SMP
-	       	else
-			if (unlikely(kick) && task_running(rq, p) && (task_cpu(p) !=
smp_processor_id()))
-				smp_send_reschedule(task_cpu(p));
-#endif
+		success = 1;
 		p->state = TASK_RUNNING;
 	}
 	task_rq_unlock(rq, &flags);
@@ -640,19 +635,22 @@
 
 int wake_up_process(task_t * p)
 {
-	return try_to_wake_up(p, TASK_STOPPED | TASK_INTERRUPTIBLE |
TASK_UNINTERRUPTIBLE, 0, 0);
+	return try_to_wake_up(p, TASK_STOPPED | TASK_INTERRUPTIBLE |
TASK_UNINTERRUPTIBLE, 0);
 }
 
 EXPORT_SYMBOL(wake_up_process);
 
-int wake_up_process_kick(task_t * p)
+void smp_process_kick(task_t * p)
 {
-	return try_to_wake_up(p, TASK_STOPPED | TASK_INTERRUPTIBLE |
TASK_UNINTERRUPTIBLE, 0, 1);
+#ifdef CONFIG_SMP
+	if (task_cpu(p) != smp_processor_id())
+			smp_send_reschedule(task_cpu(p));
+#endif
 }
 
 int wake_up_state(task_t *p, unsigned int state)
 {
-	return try_to_wake_up(p, state, 0, 0);
+	return try_to_wake_up(p, state, 0);
 }
 
 /*
@@ -1624,7 +1622,7 @@
 int default_wake_function(wait_queue_t *curr, unsigned mode, int sync)
 {
 	task_t *p = curr->task;
-	return try_to_wake_up(p, mode, sync, 0);
+	return try_to_wake_up(p, mode, sync);
 }
 
 EXPORT_SYMBOL(default_wake_function);
diff -urN -X dontdiff linux-2.6.0-test9/kernel/signal.c
/opt/linux-2.6.0-test9/kernel/signal.c
--- linux-2.6.0-test9/kernel/signal.c	2003-10-25 11:43:27.000000000
-0700
+++ /opt/linux-2.6.0-test9/kernel/signal.c	2003-11-06 15:05:41.945237624
-0800
@@ -538,6 +538,7 @@
 inline void signal_wake_up(struct task_struct *t, int resume)
 {
 	unsigned int mask;
+	int woken;
 
 	set_tsk_thread_flag(t,TIF_SIGPENDING);
 
@@ -551,10 +552,14 @@
 	mask = TASK_INTERRUPTIBLE;
 	if (resume)
 		mask |= TASK_STOPPED;
+	woken = 0;
 	if (t->state & mask) {
-		wake_up_process_kick(t);
-		return;
+		woken = wake_up_process(t);
 	}
+#ifdef CONFIG_SMP
+	if (!woken) 
+		smp_process_kick(t);
+#endif	
 }
 
 /*





^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] SMP signal latency fix up.
  2003-11-07  1:39   ` Mark Gross
@ 2003-11-07  1:42     ` Mark Gross
  2003-11-07  9:45       ` Ingo Molnar
  0 siblings, 1 reply; 18+ messages in thread
From: Mark Gross @ 2003-11-07  1:42 UTC (permalink / raw)
  To: mgross; +Cc: Linus Torvalds, Ingo Molnar, Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 5841 bytes --]

On Thu, 2003-11-06 at 17:39, Mark Gross wrote:
> On Thu, 2003-11-06 at 15:20, Linus Torvalds wrote:
> > On 6 Nov 2003, Mark Gross wrote:
> > >
> > > Running on SMP and the 2.6.0-test9 kernel, it takes about 10000 * 1/HZ seconds.  Running this 
> > > command with maxcpus=1 the command finishes in fraction of a second.  Under SMP the 
> > > signal delivery isn't kicking the task if its in the run state on the other CPU.
> > 
> > It looks like the "wake_up_process_kick()" interface is just broken. As it 
> > stands now, it's literally _designed_ to kick the process only when it 
> > wakes it up, which is silly and wrong. It makes no sense to kick a process 
> > that we just woke up, because it _will_ react immediately anyway.
> > 
> > We literally want to kick only processes that didn't need waking up, and 
> > the current interface is totally unsuitable for that.
> > 
> > > The following patch has been tested and seems to fix the problem.  
> > > I'm confident about the change to sched.c actualy fixes a cut and paste bug.
> > 
> > Naah, it's a thinko, the code shouldn't be like that at all.
> > 
> > There's only one user of the "wake_up_process_kick()" thing, and that one 
> > user really wants to kick the process totally independently of waking it 
> > up. Which just implies that we should just have a _regular_ 
> > "wake_up_process()" there, and a _separate_ "kick_process()" thing.
> > 
> > So I've got a feeling that 
> >  - we should remove the "kick" argument from "try_to_wake_up()"
> >  - the signal wakeup case should instead do a _regular_ wakeup.
> >  - we should kick the process if the wakeup _fails_.
> > 
> > Ie signal wakeup should most likely look something like
> > 
> > 
> > 	inline void signal_wake_up(struct task_struct *t, int resume)
> > 	{
> > 		int woken;
> > 		unsigned int mask;
> > 
> > 		set_tsk_thread_flag(t,TIF_SIGPENDING);
> > 		mask = TASK_INTERRUPTIBLE;
> > 		if (resume)
> > 			mask |= TASK_STOPPED;
> > 		woken = 0;
> > 		if (t->state & mask)
> > 			woken = wake_up_state(p, mask);
> > 		if (!woken)
> > 			kick_process(p);
> > 	}
> > 
> > where the "kick_process()" thing does the "is the task running on some 
> > other CPU and if so send it a reschedule event to make it react" thing.
> > 
> > Ingo?
> > 
> > 		Linus
> 
> 
> Are you thinking about something like this?
> 
> It seems to work. I dropped the "task_running" test from
> smp_process_kick intentionaly.  As well as the movement of the success
> flag.  I hope its not too wrong.
> 
> It seems to work on a HT P4 desktop and a dual PIII box.
> 
> --mgross
Evolution messed up my patch.  Retry:

diff -urN -X dontdiff linux-2.6.0-test9/include/linux/sched.h /opt/linux-2.6.0-test9/include/linux/sched.h
--- linux-2.6.0-test9/include/linux/sched.h	2003-10-25 11:42:56.000000000 -0700
+++ /opt/linux-2.6.0-test9/include/linux/sched.h	2003-11-06 14:44:22.000000000 -0800
@@ -573,7 +573,7 @@
 
 extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state));
 extern int FASTCALL(wake_up_process(struct task_struct * tsk));
-extern int FASTCALL(wake_up_process_kick(struct task_struct * tsk));
+extern void FASTCALL(smp_process_kick(struct task_struct * tsk));
 extern void FASTCALL(wake_up_forked_process(struct task_struct * tsk));
 extern void FASTCALL(sched_exit(task_t * p));
 
diff -urN -X dontdiff linux-2.6.0-test9/kernel/sched.c /opt/linux-2.6.0-test9/kernel/sched.c
--- linux-2.6.0-test9/kernel/sched.c	2003-10-25 11:44:29.000000000 -0700
+++ /opt/linux-2.6.0-test9/kernel/sched.c	2003-11-06 14:58:29.000000000 -0800
@@ -585,7 +585,7 @@
  *
  * returns failure only if the task is already active.
  */
-static int try_to_wake_up(task_t * p, unsigned int state, int sync, int kick)
+static int try_to_wake_up(task_t * p, unsigned int state, int sync)
 {
 	unsigned long flags;
 	int success = 0;
@@ -624,13 +624,8 @@
 				if (TASK_PREEMPTS_CURR(p, rq))
 					resched_task(rq->curr);
 			}
-			success = 1;
 		}
-#ifdef CONFIG_SMP
-	       	else
-			if (unlikely(kick) && task_running(rq, p) && (task_cpu(p) != smp_processor_id()))
-				smp_send_reschedule(task_cpu(p));
-#endif
+		success = 1;
 		p->state = TASK_RUNNING;
 	}
 	task_rq_unlock(rq, &flags);
@@ -640,19 +635,22 @@
 
 int wake_up_process(task_t * p)
 {
-	return try_to_wake_up(p, TASK_STOPPED | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0, 0);
+	return try_to_wake_up(p, TASK_STOPPED | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
 }
 
 EXPORT_SYMBOL(wake_up_process);
 
-int wake_up_process_kick(task_t * p)
+void smp_process_kick(task_t * p)
 {
-	return try_to_wake_up(p, TASK_STOPPED | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0, 1);
+#ifdef CONFIG_SMP
+	if (task_cpu(p) != smp_processor_id())
+			smp_send_reschedule(task_cpu(p));
+#endif
 }
 
 int wake_up_state(task_t *p, unsigned int state)
 {
-	return try_to_wake_up(p, state, 0, 0);
+	return try_to_wake_up(p, state, 0);
 }
 
 /*
@@ -1624,7 +1622,7 @@
 int default_wake_function(wait_queue_t *curr, unsigned mode, int sync)
 {
 	task_t *p = curr->task;
-	return try_to_wake_up(p, mode, sync, 0);
+	return try_to_wake_up(p, mode, sync);
 }
 
 EXPORT_SYMBOL(default_wake_function);
diff -urN -X dontdiff linux-2.6.0-test9/kernel/signal.c /opt/linux-2.6.0-test9/kernel/signal.c
--- linux-2.6.0-test9/kernel/signal.c	2003-10-25 11:43:27.000000000 -0700
+++ /opt/linux-2.6.0-test9/kernel/signal.c	2003-11-06 15:05:41.945237624 -0800
@@ -538,6 +538,7 @@
 inline void signal_wake_up(struct task_struct *t, int resume)
 {
 	unsigned int mask;
+	int woken;
 
 	set_tsk_thread_flag(t,TIF_SIGPENDING);
 
@@ -551,10 +552,14 @@
 	mask = TASK_INTERRUPTIBLE;
 	if (resume)
 		mask |= TASK_STOPPED;
+	woken = 0;
 	if (t->state & mask) {
-		wake_up_process_kick(t);
-		return;
+		woken = wake_up_process(t);
 	}
+#ifdef CONFIG_SMP
+	if (!woken) 
+		smp_process_kick(t);
+#endif	
 }
 
 /*



[-- Attachment #2: signal_smp_fix.patch --]
[-- Type: text/x-patch, Size: 3225 bytes --]

diff -urN -X dontdiff linux-2.6.0-test9/include/linux/sched.h /opt/linux-2.6.0-test9/include/linux/sched.h
--- linux-2.6.0-test9/include/linux/sched.h	2003-10-25 11:42:56.000000000 -0700
+++ /opt/linux-2.6.0-test9/include/linux/sched.h	2003-11-06 14:44:22.000000000 -0800
@@ -573,7 +573,7 @@
 
 extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state));
 extern int FASTCALL(wake_up_process(struct task_struct * tsk));
-extern int FASTCALL(wake_up_process_kick(struct task_struct * tsk));
+extern void FASTCALL(smp_process_kick(struct task_struct * tsk));
 extern void FASTCALL(wake_up_forked_process(struct task_struct * tsk));
 extern void FASTCALL(sched_exit(task_t * p));
 
diff -urN -X dontdiff linux-2.6.0-test9/kernel/sched.c /opt/linux-2.6.0-test9/kernel/sched.c
--- linux-2.6.0-test9/kernel/sched.c	2003-10-25 11:44:29.000000000 -0700
+++ /opt/linux-2.6.0-test9/kernel/sched.c	2003-11-06 14:58:29.000000000 -0800
@@ -585,7 +585,7 @@
  *
  * returns failure only if the task is already active.
  */
-static int try_to_wake_up(task_t * p, unsigned int state, int sync, int kick)
+static int try_to_wake_up(task_t * p, unsigned int state, int sync)
 {
 	unsigned long flags;
 	int success = 0;
@@ -624,13 +624,8 @@
 				if (TASK_PREEMPTS_CURR(p, rq))
 					resched_task(rq->curr);
 			}
-			success = 1;
 		}
-#ifdef CONFIG_SMP
-	       	else
-			if (unlikely(kick) && task_running(rq, p) && (task_cpu(p) != smp_processor_id()))
-				smp_send_reschedule(task_cpu(p));
-#endif
+		success = 1;
 		p->state = TASK_RUNNING;
 	}
 	task_rq_unlock(rq, &flags);
@@ -640,19 +635,22 @@
 
 int wake_up_process(task_t * p)
 {
-	return try_to_wake_up(p, TASK_STOPPED | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0, 0);
+	return try_to_wake_up(p, TASK_STOPPED | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
 }
 
 EXPORT_SYMBOL(wake_up_process);
 
-int wake_up_process_kick(task_t * p)
+void smp_process_kick(task_t * p)
 {
-	return try_to_wake_up(p, TASK_STOPPED | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0, 1);
+#ifdef CONFIG_SMP
+	if (task_cpu(p) != smp_processor_id())
+			smp_send_reschedule(task_cpu(p));
+#endif
 }
 
 int wake_up_state(task_t *p, unsigned int state)
 {
-	return try_to_wake_up(p, state, 0, 0);
+	return try_to_wake_up(p, state, 0);
 }
 
 /*
@@ -1624,7 +1622,7 @@
 int default_wake_function(wait_queue_t *curr, unsigned mode, int sync)
 {
 	task_t *p = curr->task;
-	return try_to_wake_up(p, mode, sync, 0);
+	return try_to_wake_up(p, mode, sync);
 }
 
 EXPORT_SYMBOL(default_wake_function);
diff -urN -X dontdiff linux-2.6.0-test9/kernel/signal.c /opt/linux-2.6.0-test9/kernel/signal.c
--- linux-2.6.0-test9/kernel/signal.c	2003-10-25 11:43:27.000000000 -0700
+++ /opt/linux-2.6.0-test9/kernel/signal.c	2003-11-06 15:05:41.945237624 -0800
@@ -538,6 +538,7 @@
 inline void signal_wake_up(struct task_struct *t, int resume)
 {
 	unsigned int mask;
+	int woken;
 
 	set_tsk_thread_flag(t,TIF_SIGPENDING);
 
@@ -551,10 +552,14 @@
 	mask = TASK_INTERRUPTIBLE;
 	if (resume)
 		mask |= TASK_STOPPED;
+	woken = 0;
 	if (t->state & mask) {
-		wake_up_process_kick(t);
-		return;
+		woken = wake_up_process(t);
 	}
+#ifdef CONFIG_SMP
+	if (!woken) 
+		smp_process_kick(t);
+#endif	
 }
 
 /*

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] SMP signal latency fix up.
  2003-11-06 23:20 ` Linus Torvalds
  2003-11-07  1:39   ` Mark Gross
@ 2003-11-07  9:39   ` Ingo Molnar
  2003-11-07 15:09     ` Linus Torvalds
  2003-11-07 17:03     ` Mark Gross
  1 sibling, 2 replies; 18+ messages in thread
From: Ingo Molnar @ 2003-11-07  9:39 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Mark Gross, Kernel Mailing List


On Thu, 6 Nov 2003, Linus Torvalds wrote:

> So I've got a feeling that 
>  - we should remove the "kick" argument from "try_to_wake_up()"
>  - the signal wakeup case should instead do a _regular_ wakeup.
>  - we should kick the process if the wakeup _fails_.

agreed - and this essential mechanism was my intention when i added the
kick argument originally. The problem is solved the simplest way via the
patch below - ie. i missed the other !success branch within
try_to_wake_up().

--- linux/kernel/sched.c.orig	
+++ linux/kernel/sched.c	
@@ -626,13 +626,12 @@ repeat_lock_task:
 			}
 			success = 1;
 		}
-#ifdef CONFIG_SMP
-	       	else
-			if (unlikely(kick) && task_running(rq, p) && (task_cpu(p) != smp_processor_id()))
-				smp_send_reschedule(task_cpu(p));
-#endif
 		p->state = TASK_RUNNING;
 	}
+#ifdef CONFIG_SMP
+	if (unlikely(kick) && !success && task_running(rq, p) && (task_cpu(p) != smp_processor_id()))
+		smp_send_reschedule(task_cpu(p));
+#endif
 	task_rq_unlock(rq, &flags);
 
 	return success;

(note that this is slightly different from Mark's patch.)

but i fully agree with your other suggestion - there's no problem with
sending the IPI later and outside of the wakeup spinlock. In fact doing so
removes a variable from the wakeup hotpath and cleans up stuff. Hence i'd
suggest to apply the attached patch which implements your suggestion. I've
tested it and it solves the latency problem of the code Mark posted. It
compiles & boots on both UP and SMP x86.

	Ingo

--- linux/include/linux/sched.h.orig	
+++ linux/include/linux/sched.h	
@@ -574,7 +574,11 @@ extern void do_timer(struct pt_regs *);
 
 extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state));
 extern int FASTCALL(wake_up_process(struct task_struct * tsk));
-extern int FASTCALL(wake_up_process_kick(struct task_struct * tsk));
+#ifdef CONFIG_SMP
+ extern void FASTCALL(kick_process(struct task_struct * tsk));
+#else
+ static inline void kick_process(struct task_struct *tsk) { }
+#endif
 extern void FASTCALL(wake_up_forked_process(struct task_struct * tsk));
 extern void FASTCALL(sched_exit(task_t * p));
 
--- linux/kernel/sched.c.orig	
+++ linux/kernel/sched.c	
@@ -530,6 +530,15 @@ static inline void resched_task(task_t *
 #endif
 }
 
+/**
+ * task_curr - is this task currently executing on a CPU?
+ * @p: the task in question.
+ */
+inline int task_curr(task_t *p)
+{
+	return cpu_curr(task_cpu(p)) == p;
+}
+
 #ifdef CONFIG_SMP
 
 /*
@@ -568,6 +577,27 @@ repeat:
 	task_rq_unlock(rq, &flags);
 	preempt_enable();
 }
+
+/***
+ * kick_process - kick a running thread to enter/exit the kernel
+ * @p: the to-be-kicked thread
+ *
+ * Cause a process which is running on another CPU to enter
+ * kernel-mode, without any delay. (to get signals handled.)
+ */
+void kick_process(task_t *p)
+{
+	int cpu;
+
+	preempt_disable();
+	cpu = task_cpu(p);
+	if ((cpu != smp_processor_id()) && task_curr(p))
+		smp_send_reschedule(cpu);
+	preempt_enable();
+}
+
+EXPORT_SYMBOL_GPL(kick_process);
+
 #endif
 
 /***
@@ -575,7 +605,6 @@ repeat:
  * @p: the to-be-woken-up thread
  * @state: the mask of task states that can be woken
  * @sync: do a synchronous wakeup?
- * @kick: kick the CPU if the task is already running?
  *
  * Put it on the run-queue if it's not already there. The "current"
  * thread is always on the run-queue (except when the actual
@@ -585,7 +614,7 @@ repeat:
  *
  * returns failure only if the task is already active.
  */
-static int try_to_wake_up(task_t * p, unsigned int state, int sync, int kick)
+static int try_to_wake_up(task_t * p, unsigned int state, int sync)
 {
 	unsigned long flags;
 	int success = 0;
@@ -626,33 +655,22 @@ repeat_lock_task:
 			}
 			success = 1;
 		}
-#ifdef CONFIG_SMP
-	       	else
-			if (unlikely(kick) && task_running(rq, p) && (task_cpu(p) != smp_processor_id()))
-				smp_send_reschedule(task_cpu(p));
-#endif
 		p->state = TASK_RUNNING;
 	}
 	task_rq_unlock(rq, &flags);
 
 	return success;
 }
-
 int wake_up_process(task_t * p)
 {
-	return try_to_wake_up(p, TASK_STOPPED | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0, 0);
+	return try_to_wake_up(p, TASK_STOPPED | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
 }
 
 EXPORT_SYMBOL(wake_up_process);
 
-int wake_up_process_kick(task_t * p)
-{
-	return try_to_wake_up(p, TASK_STOPPED | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0, 1);
-}
-
 int wake_up_state(task_t *p, unsigned int state)
 {
-	return try_to_wake_up(p, state, 0, 0);
+	return try_to_wake_up(p, state, 0);
 }
 
 /*
@@ -1621,7 +1639,7 @@ EXPORT_SYMBOL(preempt_schedule);
 int default_wake_function(wait_queue_t *curr, unsigned mode, int sync)
 {
 	task_t *p = curr->task;
-	return try_to_wake_up(p, mode, sync, 0);
+	return try_to_wake_up(p, mode, sync);
 }
 
 EXPORT_SYMBOL(default_wake_function);
@@ -1942,15 +1960,6 @@ int task_nice(task_t *p)
 EXPORT_SYMBOL(task_nice);
 
 /**
- * task_curr - is this task currently executing on a CPU?
- * @p: the task in question.
- */
-int task_curr(task_t *p)
-{
-	return cpu_curr(task_cpu(p)) == p;
-}
-
-/**
  * idle_cpu - is a given cpu idle currently?
  * @cpu: the processor in question.
  */
--- linux/kernel/signal.c.orig	
+++ linux/kernel/signal.c	
@@ -538,8 +538,9 @@ int dequeue_signal(struct task_struct *t
 inline void signal_wake_up(struct task_struct *t, int resume)
 {
 	unsigned int mask;
+	int woken;
 
-	set_tsk_thread_flag(t,TIF_SIGPENDING);
+	set_tsk_thread_flag(t, TIF_SIGPENDING);
 
 	/*
 	 * If resume is set, we want to wake it up in the TASK_STOPPED case.
@@ -551,10 +552,11 @@ inline void signal_wake_up(struct task_s
 	mask = TASK_INTERRUPTIBLE;
 	if (resume)
 		mask |= TASK_STOPPED;
-	if (t->state & mask) {
-		wake_up_process_kick(t);
-		return;
-	}
+	woken = 0;
+	if (t->state & mask)
+		woken = wake_up_state(t, mask);
+	if (!woken)
+		kick_process(t);
 }
 
 /*

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] SMP signal latency fix up.
  2003-11-07  1:42     ` Mark Gross
@ 2003-11-07  9:45       ` Ingo Molnar
  2003-11-07 15:43         ` Mark Gross
  0 siblings, 1 reply; 18+ messages in thread
From: Ingo Molnar @ 2003-11-07  9:45 UTC (permalink / raw)
  To: Mark Gross; +Cc: Linus Torvalds, Kernel Mailing List


On Fri, 6 Nov 2003, Mark Gross wrote:

>  			}
> -			success = 1;
>  		}
> -#ifdef CONFIG_SMP
> -	       	else
> -			if (unlikely(kick) && task_running(rq, p) && (task_cpu(p) != smp_processor_id()))
> -				smp_send_reschedule(task_cpu(p));
> -#endif
> +		success = 1;

hm, this i believe is incorrect - you've moved the 'success' case outside
of the 'real wakeup' branch.

to avoid races, we only want to report success if the thread has been
truly placed on the runqueue by this call. The other case (eg. changing
TASK_INTERRUPTIBLE to TASK_RUNNING) does not count as a 'wakeup'. Note
that if the task was in a non-TASK_RUNNING state then we dont have to kick
the process anyway because it's in kernel-mode and will go through the
signal return path soon.

	Ingo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] SMP signal latency fix up.
  2003-11-06 22:49 [PATCH] SMP signal latency fix up Mark Gross
  2003-11-06 23:20 ` Linus Torvalds
  2003-11-06 23:26 ` Chris Friesen
@ 2003-11-07 10:24 ` Ingo Molnar
  2 siblings, 0 replies; 18+ messages in thread
From: Ingo Molnar @ 2003-11-07 10:24 UTC (permalink / raw)
  To: Mark Gross; +Cc: linux-kernel, Linus Torvalds


On Thu, 6 Nov 2003, Mark Gross wrote:

> Running on SMP and the 2.6.0-test9 kernel, it takes about 10000 * 1/HZ
> seconds.  Running this command with maxcpus=1 the command finishes in
> fraction of a second.  Under SMP the signal delivery isn't kicking the
> task if its in the run state on the other CPU.

yeah - good catch - it's a brown paper bag bug.

	Ingo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] SMP signal latency fix up.
  2003-11-07  9:39   ` Ingo Molnar
@ 2003-11-07 15:09     ` Linus Torvalds
  2003-11-07 15:17       ` Ingo Molnar
  2003-11-07 15:29       ` Ingo Molnar
  2003-11-07 17:03     ` Mark Gross
  1 sibling, 2 replies; 18+ messages in thread
From: Linus Torvalds @ 2003-11-07 15:09 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Mark Gross, Kernel Mailing List


On Fri, 7 Nov 2003, Ingo Molnar wrote:
> On Thu, 6 Nov 2003, Linus Torvalds wrote:
> 
> > So I've got a feeling that 
> >  - we should remove the "kick" argument from "try_to_wake_up()"
> >  - the signal wakeup case should instead do a _regular_ wakeup.
> >  - we should kick the process if the wakeup _fails_.
> 
> agreed - and this essential mechanism was my intention when i added the
> kick argument originally. The problem is solved the simplest way via the
> patch below - ie. i missed the other !success branch within
> try_to_wake_up().

The thing is, that simple patch does NOT solve the problem, as Mark
already noted. Why? Because while it makes "try_to_waker_up()" do the
rigth thing, it doesn't make "wake_up_process_kick()" do the right thing.

The "mask" argument isn't supported by "wake_up_process_kick()", so the
_caller_ has to do

	if (mask & state)
		wake_up_process_kick()

so now you have _another_ case where the kicking isn't done (in the 
caller).

That's why I think the whole interface is scrogged, and why the "simpler" 
patch is not very workable.

> but i fully agree with your other suggestion - there's no problem with
> sending the IPI later and outside of the wakeup spinlock. In fact doing so
> removes a variable from the wakeup hotpath and cleans up stuff. Hence i'd
> suggest to apply the attached patch which implements your suggestion. I've
> tested it and it solves the latency problem of the code Mark posted. It
> compiles & boots on both UP and SMP x86.

This seems to work for me, and I obviously agree with it. I'll have to 
think for a bit whether I can call this an important enough bug to care 
for 2.6.0, since it's only a performance regression. 

(It _looks_ obvious enough, but can you check that there are no pointers
that we might be following in the "is it running" checks that could be
stale because we don't hold the runqueue lock any more).

			Linus


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] SMP signal latency fix up.
  2003-11-07 15:09     ` Linus Torvalds
@ 2003-11-07 15:17       ` Ingo Molnar
  2003-11-07 15:31         ` Ingo Molnar
  2003-11-07 15:29       ` Ingo Molnar
  1 sibling, 1 reply; 18+ messages in thread
From: Ingo Molnar @ 2003-11-07 15:17 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Mark Gross, Kernel Mailing List


On Fri, 7 Nov 2003, Linus Torvalds wrote:

> (It _looks_ obvious enough, but can you check that there are no pointers
> that we might be following in the "is it running" checks that could be
> stale because we don't hold the runqueue lock any more).

the 'is it running' check is 'task_curr(p)', which in this circumstance is
equivalent to the following test:

	per_cpu(runqueues, (cpu)).curr == p

where 'cpu' is p->thread_info->cpu. All pointers dereferenced in this test
are stable, because 1) send_signal() is always called within the siglock,
which serializes with task exit 2) p->thread_info->cpu is always valid for
the same reason.

so this seems to be safe to me.

	Ingo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] SMP signal latency fix up.
  2003-11-07 15:09     ` Linus Torvalds
  2003-11-07 15:17       ` Ingo Molnar
@ 2003-11-07 15:29       ` Ingo Molnar
  1 sibling, 0 replies; 18+ messages in thread
From: Ingo Molnar @ 2003-11-07 15:29 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Mark Gross, Kernel Mailing List


On Fri, 7 Nov 2003, Linus Torvalds wrote:

> The thing is, that simple patch does NOT solve the problem, [...]

doh, indeed. I was concentrating on the second patch but for a moment
thought that we could get away with a simpler patch, but nope, you are
right, we've got to touch both signal.c and sched.c, and there's no way
around the interface change.

	Ingo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] SMP signal latency fix up.
  2003-11-07 15:17       ` Ingo Molnar
@ 2003-11-07 15:31         ` Ingo Molnar
  0 siblings, 0 replies; 18+ messages in thread
From: Ingo Molnar @ 2003-11-07 15:31 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Mark Gross, Kernel Mailing List


> the 'is it running' check is 'task_curr(p)', which in this circumstance is
> equivalent to the following test:
> 
> 	per_cpu(runqueues, (cpu)).curr == p

btw., the 'is it running right now' test is a necessary thing as well - we
really dont want to flood CPUs with IPIs which just happen to have the
target task in their runqueue (but the task is not executing).

	Ingo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] SMP signal latency fix up.
  2003-11-07  9:45       ` Ingo Molnar
@ 2003-11-07 15:43         ` Mark Gross
  0 siblings, 0 replies; 18+ messages in thread
From: Mark Gross @ 2003-11-07 15:43 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Mark Gross, Linus Torvalds, Kernel Mailing List

On Fri, 2003-11-07 at 01:45, Ingo Molnar wrote:
> On Fri, 6 Nov 2003, Mark Gross wrote:
> 
> >  			}
> > -			success = 1;
> >  		}
> > -#ifdef CONFIG_SMP
> > -	       	else
> > -			if (unlikely(kick) && task_running(rq, p) && (task_cpu(p) != smp_processor_id()))
> > -				smp_send_reschedule(task_cpu(p));
> > -#endif
> > +		success = 1;
> 
> hm, this i believe is incorrect - you've moved the 'success' case outside
> of the 'real wakeup' branch.
> 

Yup, I was confusing myself a bit on the return symantics of
try_to_wake_up, and the relationship with race between changing
task->state and scheduling a task off a cpu (the "array" test while
holding the rq lock.).  

The feeling that this was likely wrong was eating at me all evening and
then it came to me around 8pm when I was driving my son to some thing.  

> to avoid races, we only want to report success if the thread has been
> truly placed on the runqueue by this call. The other case (eg. changing
> TASK_INTERRUPTIBLE to TASK_RUNNING) does not count as a 'wakeup'. Note
> that if the task was in a non-TASK_RUNNING state then we dont have to kick
> the process anyway because it's in kernel-mode and will go through the
> signal return path soon.
> 
> 	Ingo




^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] SMP signal latency fix up.
  2003-11-07  9:39   ` Ingo Molnar
  2003-11-07 15:09     ` Linus Torvalds
@ 2003-11-07 17:03     ` Mark Gross
  2003-11-08  6:48       ` Ingo Molnar
  1 sibling, 1 reply; 18+ messages in thread
From: Mark Gross @ 2003-11-07 17:03 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Linus Torvalds, Mark Gross, Kernel Mailing List


This patch is very close to mine.  It fixes that bug I had with my
change to success, and it adds a test to check if the kick target is
current.  

Its hard for me to tell if its better to being more careful with
throughing around the IPI's at the cost of the extra opperations within
your kick code.

Looks correct, and works good too!  I just verified that it solves my
signal latency issue on both my HT system and my dual PIII box. Where I
first found the problem.

FWIW  I would like to see your patch go into 2.6.  

Thanks!


On Fri, 2003-11-07 at 01:39, Ingo Molnar wrote:

> but i fully agree with your other suggestion - there's no problem with
> sending the IPI later and outside of the wakeup spinlock. In fact doing so
> removes a variable from the wakeup hotpath and cleans up stuff. Hence i'd
> suggest to apply the attached patch which implements your suggestion. I've
> tested it and it solves the latency problem of the code Mark posted. It
> compiles & boots on both UP and SMP x86.
> 
> 	Ingo
> 
> --- linux/include/linux/sched.h.orig	
> +++ linux/include/linux/sched.h	
> @@ -574,7 +574,11 @@ extern void do_timer(struct pt_regs *);
>  
>  extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state));
>  extern int FASTCALL(wake_up_process(struct task_struct * tsk));
> -extern int FASTCALL(wake_up_process_kick(struct task_struct * tsk));
> +#ifdef CONFIG_SMP
> + extern void FASTCALL(kick_process(struct task_struct * tsk));
> +#else
> + static inline void kick_process(struct task_struct *tsk) { }
> +#endif
>  extern void FASTCALL(wake_up_forked_process(struct task_struct * tsk));
>  extern void FASTCALL(sched_exit(task_t * p));
>  
> --- linux/kernel/sched.c.orig	
> +++ linux/kernel/sched.c	
> @@ -530,6 +530,15 @@ static inline void resched_task(task_t *
>  #endif
>  }
>  
> +/**
> + * task_curr - is this task currently executing on a CPU?
> + * @p: the task in question.
> + */
> +inline int task_curr(task_t *p)
> +{
> +	return cpu_curr(task_cpu(p)) == p;
> +}
> +
>  #ifdef CONFIG_SMP
>  
>  /*
> @@ -568,6 +577,27 @@ repeat:
>  	task_rq_unlock(rq, &flags);
>  	preempt_enable();
>  }
> +
> +/***
> + * kick_process - kick a running thread to enter/exit the kernel
> + * @p: the to-be-kicked thread
> + *
> + * Cause a process which is running on another CPU to enter
> + * kernel-mode, without any delay. (to get signals handled.)
> + */
> +void kick_process(task_t *p)
> +{
> +	int cpu;
> +
> +	preempt_disable();
> +	cpu = task_cpu(p);
> +	if ((cpu != smp_processor_id()) && task_curr(p))
> +		smp_send_reschedule(cpu);
> +	preempt_enable();
> +}
> +
> +EXPORT_SYMBOL_GPL(kick_process);
> +
>  #endif
>  
>  /***
> @@ -575,7 +605,6 @@ repeat:
>   * @p: the to-be-woken-up thread
>   * @state: the mask of task states that can be woken
>   * @sync: do a synchronous wakeup?
> - * @kick: kick the CPU if the task is already running?
>   *
>   * Put it on the run-queue if it's not already there. The "current"
>   * thread is always on the run-queue (except when the actual
> @@ -585,7 +614,7 @@ repeat:
>   *
>   * returns failure only if the task is already active.
>   */
> -static int try_to_wake_up(task_t * p, unsigned int state, int sync, int kick)
> +static int try_to_wake_up(task_t * p, unsigned int state, int sync)
>  {
>  	unsigned long flags;
>  	int success = 0;
> @@ -626,33 +655,22 @@ repeat_lock_task:
>  			}
>  			success = 1;
>  		}
> -#ifdef CONFIG_SMP
> -	       	else
> -			if (unlikely(kick) && task_running(rq, p) && (task_cpu(p) != smp_processor_id()))
> -				smp_send_reschedule(task_cpu(p));
> -#endif
>  		p->state = TASK_RUNNING;
>  	}
>  	task_rq_unlock(rq, &flags);
>  
>  	return success;
>  }
> -
>  int wake_up_process(task_t * p)
>  {
> -	return try_to_wake_up(p, TASK_STOPPED | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0, 0);
> +	return try_to_wake_up(p, TASK_STOPPED | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
>  }
>  
>  EXPORT_SYMBOL(wake_up_process);
>  
> -int wake_up_process_kick(task_t * p)
> -{
> -	return try_to_wake_up(p, TASK_STOPPED | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0, 1);
> -}
> -
>  int wake_up_state(task_t *p, unsigned int state)
>  {
> -	return try_to_wake_up(p, state, 0, 0);
> +	return try_to_wake_up(p, state, 0);
>  }
>  
>  /*
> @@ -1621,7 +1639,7 @@ EXPORT_SYMBOL(preempt_schedule);
>  int default_wake_function(wait_queue_t *curr, unsigned mode, int sync)
>  {
>  	task_t *p = curr->task;
> -	return try_to_wake_up(p, mode, sync, 0);
> +	return try_to_wake_up(p, mode, sync);
>  }
>  
>  EXPORT_SYMBOL(default_wake_function);
> @@ -1942,15 +1960,6 @@ int task_nice(task_t *p)
>  EXPORT_SYMBOL(task_nice);
>  
>  /**
> - * task_curr - is this task currently executing on a CPU?
> - * @p: the task in question.
> - */
> -int task_curr(task_t *p)
> -{
> -	return cpu_curr(task_cpu(p)) == p;
> -}
> -
> -/**
>   * idle_cpu - is a given cpu idle currently?
>   * @cpu: the processor in question.
>   */
> --- linux/kernel/signal.c.orig	
> +++ linux/kernel/signal.c	
> @@ -538,8 +538,9 @@ int dequeue_signal(struct task_struct *t
>  inline void signal_wake_up(struct task_struct *t, int resume)
>  {
>  	unsigned int mask;
> +	int woken;
>  
> -	set_tsk_thread_flag(t,TIF_SIGPENDING);
> +	set_tsk_thread_flag(t, TIF_SIGPENDING);
>  
>  	/*
>  	 * If resume is set, we want to wake it up in the TASK_STOPPED case.
> @@ -551,10 +552,11 @@ inline void signal_wake_up(struct task_s
>  	mask = TASK_INTERRUPTIBLE;
>  	if (resume)
>  		mask |= TASK_STOPPED;
> -	if (t->state & mask) {
> -		wake_up_process_kick(t);
> -		return;
> -	}
> +	woken = 0;
> +	if (t->state & mask)
> +		woken = wake_up_state(t, mask);
> +	if (!woken)
> +		kick_process(t);
>  }
>  
>  /*


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] SMP signal latency fix up.
  2003-11-07 17:03     ` Mark Gross
@ 2003-11-08  6:48       ` Ingo Molnar
  0 siblings, 0 replies; 18+ messages in thread
From: Ingo Molnar @ 2003-11-08  6:48 UTC (permalink / raw)
  To: Mark Gross; +Cc: Linus Torvalds, Kernel Mailing List


On Fri, 7 Nov 2003, Mark Gross wrote:

> Its hard for me to tell if its better to being more careful with
> throughing around the IPI's at the cost of the extra opperations within
> your kick code.

an IPI creates quite some overhead both on the source and on the target
CPU, so i think it's definitely worth this extra check. Also, with
increasingly higher load it's increasingly more likely that we can skip
the IPI (because the task might be on the runqueue but it is not
executing), so further increasing the load via additional IPIs is the
wrong answer.

> Looks correct, and works good too!  I just verified that it solves my
> signal latency issue on both my HT system and my dual PIII box. Where I
> first found the problem.

great!

	Ingo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] SMP signal latency fix up.
@ 2003-11-06 23:00 Mark Gross
  0 siblings, 0 replies; 18+ messages in thread
From: Mark Gross @ 2003-11-06 23:00 UTC (permalink / raw)
  To: linux-kernel


The program after the patch should execute the following command line within a fraction of a second.
time ./ping_pong -c 10000

Running on SMP and the 2.6.0-test9 kernel, it takes about 10000 * 1/HZ seconds.  Running this 
command with maxcpus=1 the command finishes in fraction of a second.  Under SMP the 
signal delivery isn't kicking the task if its in the run state on the other CPU.

The following patch has been tested and seems to fix the problem.  
I'm confident about the change to sched.c actualy fixes a cut and paste bug.

The change to signal.c IS needed to fix the problem, but I'm not sure there isn't a better way.

Please have take a look

--mgross

diff -urN -X dontdiff linux-2.6.0-test9/kernel/sched.c /opt/linux-2.6.0-test9/kernel/sched.c
--- linux-2.6.0-test9/kernel/sched.c    2003-10-25 11:44:29.000000000 -0700
+++ /opt/linux-2.6.0-test9/kernel/sched.c       2003-11-06 13:04:03.628116240 -0800
@@ -626,13 +626,13 @@
                        }
                        success = 1;
                }
-#ifdef CONFIG_SMP
-               else
-                       if (unlikely(kick) && task_running(rq, p) && (task_cpu(p) != smp_processor_id()))
-                               smp_send_reschedule(task_cpu(p));
-#endif
                p->state = TASK_RUNNING;
        }
+#ifdef CONFIG_SMP
+               else
+               if (unlikely(kick) && task_running(rq, p) && (task_cpu(p) != smp_processor_id()))
+                       smp_send_reschedule(task_cpu(p));
+#endif
        task_rq_unlock(rq, &flags);

        return success;
diff -urN -X dontdiff linux-2.6.0-test9/kernel/signal.c /opt/linux-2.6.0-test9/kernel/signal.c
--- linux-2.6.0-test9/kernel/signal.c   2003-10-25 11:43:27.000000000 -0700
+++ /opt/linux-2.6.0-test9/kernel/signal.c      2003-11-06 12:18:22.000000000 -0800
@@ -555,6 +555,9 @@
                wake_up_process_kick(t);
                return;
        }
+       if (t->state == TASK_RUNNING ) 
+               wake_up_process_kick(t);
+       
 }

 /*


---------------------------------- test program ---------------------------
/* ping_pong.c */

#include <argp.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include <pthread.h>
#include <signal.h>

int argp_parse_option(int key, char *arg, struct argp_state *argp_state);

struct state {
	pthread_t receive;
	pthread_t send;
	int signal;
	int count;
	int yield;
        volatile int flags;
	volatile int sync_flags;
};

static struct state _state = {
	.signal = SIGUSR1,
	.flags = 0,
	.sync_flags = 0,
	.yield = 0,
};
static const struct argp_option argp_options[] = {
	{"count", 'c', "COUNT", 0, "Number of signals exchanged"},
	{"signal", 'n', "SIGNUM", 0, "The sended signal"},
	{"yield", 'y', NULL, 0, "Make receiver yield while waiting"},
	{0}
};
static const struct argp argp = {
	.options = argp_options,
	.parser = argp_parse_option,
};

int argp_parse_option(int key, char *arg, struct argp_state *argp_state)
{
	struct state *state = argp_state->input;
	switch (key) {		
	case 'n':
		if (sscanf(arg, "%u", &state->signal) != 1) {
			perror("invalid signal argument");
			return -EINVAL;
		}		
		break;
	case 'c':
		if (sscanf(arg, "%i", &state->count) != 1) {
			perror("invalid count argument");
			return -EINVAL;
		}		
		break;
	case 'y':
		state->yield = 1;
		break;
		
	}

	return 0;
}


static void * sender (void *arg)
{
	struct state * state = (struct state *) arg;
	sigset_t action;
	int c = 0, r = 0;
	int sig;
	
	sigemptyset (&action);
	sigaddset (&action, state->signal);
	sigprocmask (SIG_BLOCK, &action, 0);

	/* wait for the receiver to get ready for receiving signals */
	while (state->sync_flags == 0)
		sched_yield();

	while ( c++ < state->count ) {
		/* ping */
		pthread_kill ( state->receive, state->signal);	

		/* pong */
		if ((r = sigwait(&action, &sig)) != 0) {
			perror("sigwait");
			state->flags = 1;
			pthread_exit(&r);
		}
	}

	state->flags = 1;
 	pthread_exit (&r);	
}
   
static void handler(int signum)
{
	struct state * state = &_state;
	pthread_kill(state->send, state->signal);
}
static void * receiver (void *arg)
{
	struct state * state = (struct state *) arg;
	struct sigaction action;
	int r;
	
	action.sa_handler = handler;
	sigemptyset (&action.sa_mask);
	action.sa_flags = 0;	
	if ((r = sigaction (state->signal, &action, 0)) != 0) {
		perror("sigaction");
		pthread_exit(&r);
	}

	/* let the sender thread know we are ready to start receiving */
	state->sync_flags = 1;

	while ( state->flags == 0 ) { 
		if (state->yield)
			sched_yield();
		else
			/*spin*/;
	}

	/* ...and done */
	pthread_exit(&r);	
}	

int main(int argc, char *argv[])
{
	int result = 0;
	void * thread_result;
	struct state *state = &_state;

	if (argp_parse(&argp, argc, argv, ARGP_IN_ORDER, 0, state) != 0)
		exit(-1);

	result = pthread_create (&state->receive, NULL, receiver, state);
	if ( result != 0 ) {
		perror("pthread_create");
		exit(-1);
	}

	result = pthread_create (&state->send, NULL, sender, state);
	if ( result != 0 ) {
		perror("pthread_create");
		exit(-1);
	}

	result = pthread_join ( state->send, &thread_result);	
	if ( result != 0 ) {
		perror("pthread_join");
		exit(-1);
	}

	result = pthread_join ( state->receive, &thread_result);	
	if ( result != 0 ) {
		perror("pthread_join");
		exit(-1);
	}
	
	return 0;
}


^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2003-11-08  6:49 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-11-06 22:49 [PATCH] SMP signal latency fix up Mark Gross
2003-11-06 23:20 ` Linus Torvalds
2003-11-07  1:39   ` Mark Gross
2003-11-07  1:42     ` Mark Gross
2003-11-07  9:45       ` Ingo Molnar
2003-11-07 15:43         ` Mark Gross
2003-11-07  9:39   ` Ingo Molnar
2003-11-07 15:09     ` Linus Torvalds
2003-11-07 15:17       ` Ingo Molnar
2003-11-07 15:31         ` Ingo Molnar
2003-11-07 15:29       ` Ingo Molnar
2003-11-07 17:03     ` Mark Gross
2003-11-08  6:48       ` Ingo Molnar
2003-11-06 23:26 ` Chris Friesen
2003-11-06 23:35   ` Mark Gross
2003-11-07  0:55     ` Nuno Silva
2003-11-07 10:24 ` Ingo Molnar
2003-11-06 23:00 Mark Gross

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).