All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] kthread: NUMA aware kthread_create_on_cpu()
@ 2010-11-28 19:33 Eric Dumazet
  2010-11-28 22:40 ` Andi Kleen
  0 siblings, 1 reply; 26+ messages in thread
From: Eric Dumazet @ 2010-11-28 19:33 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-kernel, netdev, David Miller, Andi Kleen, Tejun Heo, Rusty Russell

All kthreads being created from a single helper task, they all use
memory from a single node for their kernel stack and task struct.

This patch creates kthread_create_on_cpu(), adding a 'cpu' parameter to
parameters already used by kthread_create().

This parameter serves in allocating memory for the new kthread on its
memory node if available.

Users of this new function are : ksoftirqd, kworker, migration,
pktgend...

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Tejun Heo <tj@kernel.org>
---
 include/linux/kthread.h   |   12 ++++++++----
 include/linux/mempolicy.h |    5 +++++
 kernel/kthread.c          |   29 ++++++++++++++++++++++-------
 kernel/softirq.c          |    3 ++-
 kernel/stop_machine.c     |    4 ++--
 kernel/workqueue.c        |    5 +++--
 mm/mempolicy.c            |    8 ++++++++
 net/core/pktgen.c         |    3 ++-
 8 files changed, 52 insertions(+), 17 deletions(-)

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 685ea65..032b6ee 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -4,10 +4,14 @@
 #include <linux/err.h>
 #include <linux/sched.h>
 
-struct task_struct *kthread_create(int (*threadfn)(void *data),
-				   void *data,
-				   const char namefmt[], ...)
-	__attribute__((format(printf, 3, 4)));
+struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
+					  void *data,
+					  int cpu,
+					  const char namefmt[], ...)
+	__attribute__((format(printf, 4, 5)));
+
+#define kthread_create(threadfn, data, namefmt, arg...) \
+	kthread_create_on_cpu(threadfn, data, -1, namefmt, ##arg)
 
 /**
  * kthread_run - create and wake a thread.
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 31ac26c..5c66d66 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -200,6 +200,7 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 					    unsigned long idx);
 
 extern void numa_default_policy(void);
+extern void numa_cpubind_policy(int cpu);
 extern void numa_policy_init(void);
 extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new,
 				enum mpol_rebind_step step);
@@ -317,6 +318,10 @@ static inline void numa_default_policy(void)
 {
 }
 
+static inline void numa_cpubind_policy(int cpu)
+{
+}
+
 static inline void mpol_rebind_task(struct task_struct *tsk,
 				const nodemask_t *new,
 				enum mpol_rebind_step step)
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 2dc3786..3ddb9ae 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
+#include <linux/mempolicy.h>
 #include <linux/freezer.h>
 #include <trace/events/sched.h>
 
@@ -27,6 +28,7 @@ struct kthread_create_info
 	/* Information passed to kthread() from kthreadd. */
 	int (*threadfn)(void *data);
 	void *data;
+	int cpu;
 
 	/* Result passed back to kthread_create() from kthreadd. */
 	struct task_struct *result;
@@ -101,7 +103,15 @@ static int kthread(void *_create)
 static void create_kthread(struct kthread_create_info *create)
 {
 	int pid;
-
+	static int last_cpu_pref = -1;
+
+	if (create->cpu != last_cpu_pref) {
+		if (create->cpu == -1)
+			numa_default_policy();
+		else
+			numa_cpubind_policy(create->cpu);
+		last_cpu_pref = create->cpu;
+	}
 	/* We want our own signal handler (we take no signals by default). */
 	pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
 	if (pid < 0) {
@@ -111,15 +121,18 @@ static void create_kthread(struct kthread_create_info *create)
 }
 
 /**
- * kthread_create - create a kthread.
+ * kthread_create_on_cpu - create a kthread.
  * @threadfn: the function to run until signal_pending(current).
  * @data: data ptr for @threadfn.
+ * @cpu: cpu number.
  * @namefmt: printf-style name for the thread.
  *
  * Description: This helper function creates and names a kernel
  * thread.  The thread will be stopped: use wake_up_process() to start
  * it.  See also kthread_run().
  *
+ * If thread is going to be bound on a particular cpu, give its number
+ * in @cpu, to get NUMA affinity for kthread stack, or else give -1.
  * When woken, the thread will run @threadfn() with @data as its
  * argument. @threadfn() can either call do_exit() directly if it is a
  * standalone thread for which noone will call kthread_stop(), or
@@ -129,15 +142,17 @@ static void create_kthread(struct kthread_create_info *create)
  *
  * Returns a task_struct or ERR_PTR(-ENOMEM).
  */
-struct task_struct *kthread_create(int (*threadfn)(void *data),
-				   void *data,
-				   const char namefmt[],
-				   ...)
+struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
+					  void *data,
+					  int cpu,
+					  const char namefmt[],
+					  ...)
 {
 	struct kthread_create_info create;
 
 	create.threadfn = threadfn;
 	create.data = data;
+	create.cpu = cpu;
 	init_completion(&create.done);
 
 	spin_lock(&kthread_create_lock);
@@ -164,7 +179,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
 	}
 	return create.result;
 }
-EXPORT_SYMBOL(kthread_create);
+EXPORT_SYMBOL(kthread_create_on_cpu);
 
 /**
  * kthread_bind - bind a just-created kthread to a cpu.
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 18f4be0..b2b7044 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -831,7 +831,8 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
 	switch (action) {
 	case CPU_UP_PREPARE:
 	case CPU_UP_PREPARE_FROZEN:
-		p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
+		p = kthread_create_on_cpu(run_ksoftirqd, hcpu, hotcpu,
+					  "ksoftirqd/%d", hotcpu);
 		if (IS_ERR(p)) {
 			printk("ksoftirqd for %i failed\n", hotcpu);
 			return notifier_from_errno(PTR_ERR(p));
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 2df820b..7c0f287 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -301,8 +301,8 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
 	case CPU_UP_PREPARE:
 		BUG_ON(stopper->thread || stopper->enabled ||
 		       !list_empty(&stopper->works));
-		p = kthread_create(cpu_stopper_thread, stopper, "migration/%d",
-				   cpu);
+		p = kthread_create_on_cpu(cpu_stopper_thread, stopper, cpu,
+					  "migration/%d", cpu);
 		if (IS_ERR(p))
 			return notifier_from_errno(PTR_ERR(p));
 		get_task_struct(p);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 90db1bd..f054fb9 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1318,8 +1318,9 @@ static struct worker *create_worker(struct global_cwq *gcwq, bool bind)
 	worker->id = id;
 
 	if (!on_unbound_cpu)
-		worker->task = kthread_create(worker_thread, worker,
-					      "kworker/%u:%d", gcwq->cpu, id);
+		worker->task = kthread_create_on_cpu(worker_thread, worker,
+					gcwq->cpu,
+					"kworker/%u:%d", gcwq->cpu, id);
 	else
 		worker->task = kthread_create(worker_thread, worker,
 					      "kworker/u:%d", id);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 4a57f13..f959edc 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2278,6 +2278,14 @@ void numa_default_policy(void)
 	do_set_mempolicy(MPOL_DEFAULT, 0, NULL);
 }
 
+void numa_cpubind_policy(int cpu)
+{
+	nodemask_t mask;
+
+	init_nodemask_of_node(&mask, cpu_to_node(cpu));
+	do_set_mempolicy(MPOL_BIND, 0, &mask);
+}
+
 /*
  * Parse and format mempolicy from/to strings
  */
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 33bc382..c921fe9 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3785,7 +3785,8 @@ static int __init pktgen_create_thread(int cpu)
 	list_add_tail(&t->th_list, &pktgen_threads);
 	init_completion(&t->start_done);
 
-	p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu);
+	p = kthread_create_on_cpu(pktgen_thread_worker, t, cpu,
+				  "kpktgend_%d", cpu);
 	if (IS_ERR(p)) {
 		pr_err("kernel_thread() failed for cpu %d\n", t->cpu);
 		list_del(&t->th_list);



^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH] kthread: NUMA aware kthread_create_on_cpu()
  2010-11-28 19:33 [PATCH] kthread: NUMA aware kthread_create_on_cpu() Eric Dumazet
@ 2010-11-28 22:40 ` Andi Kleen
  2010-11-28 22:51   ` Eric Dumazet
  0 siblings, 1 reply; 26+ messages in thread
From: Andi Kleen @ 2010-11-28 22:40 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Andrew Morton, linux-kernel, netdev, David Miller, Andi Kleen,
	Tejun Heo, Rusty Russell


On Sun, Nov 28, 2010 at 08:33:53PM +0100, Eric Dumazet wrote:
> @@ -101,7 +103,15 @@ static int kthread(void *_create)
>  static void create_kthread(struct kthread_create_info *create)
>  {
>  	int pid;
> -
> +	static int last_cpu_pref = -1;
> +
> +	if (create->cpu != last_cpu_pref) {

Is that actually thread-safe?

> +void numa_cpubind_policy(int cpu)
> +{
> +	nodemask_t mask;
> +
> +	init_nodemask_of_node(&mask, cpu_to_node(cpu));
> +	do_set_mempolicy(MPOL_BIND, 0, &mask);

You don't want bind, you want preferred, otherwise this
will explode if the node is empty.

Also this messes up the policy of the caller process. You really
need to save/restore it.

And if the slab is configured for slab interleaving in
the cpuset this will be ignored I think.

Also I think the slab fast path ignores the policy anyways,
the policy only acts when slab has to grab new pages.
Are you sure this works at all?

It would be probably better to pass through the node
to the low level allocation functions and use them
there directly.

Problem is that this ends up in architecture specific code
for the stack, so may be a larger patch.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] kthread: NUMA aware kthread_create_on_cpu()
  2010-11-28 22:40 ` Andi Kleen
@ 2010-11-28 22:51   ` Eric Dumazet
  2010-11-28 23:01     ` Andi Kleen
  0 siblings, 1 reply; 26+ messages in thread
From: Eric Dumazet @ 2010-11-28 22:51 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Andrew Morton, linux-kernel, netdev, David Miller, Tejun Heo,
	Rusty Russell

Le dimanche 28 novembre 2010 à 23:40 +0100, Andi Kleen a écrit :
> On Sun, Nov 28, 2010 at 08:33:53PM +0100, Eric Dumazet wrote:
> > @@ -101,7 +103,15 @@ static int kthread(void *_create)
> >  static void create_kthread(struct kthread_create_info *create)
> >  {
> >  	int pid;
> > -
> > +	static int last_cpu_pref = -1;
> > +
> > +	if (create->cpu != last_cpu_pref) {
> 
> Is that actually thread-safe?

Yes, we use one dedicated task to create all kthreads.

This task runs kthreadd(void *unused) in kernel/kthread.c

This only duty is to create tasks.


> 
> > +void numa_cpubind_policy(int cpu)
> > +{
> > +	nodemask_t mask;
> > +
> > +	init_nodemask_of_node(&mask, cpu_to_node(cpu));
> > +	do_set_mempolicy(MPOL_BIND, 0, &mask);
> 
> You don't want bind, you want preferred, otherwise this
> will explode if the node is empty.
> 

OK thanks, I'll test the patch with BIND or PREFERRED on x86_32 mode
since I have one machine with two sockets, 2GB on each socket, so 2nd
node only have HIGHMEM, no LOWMEM.

> Also this messes up the policy of the caller process. You really
> need to save/restore it.

Well, caller process duty is to create kthreads in a loop.

> 
> And if the slab is configured for slab interleaving in
> the cpuset this will be ignored I think.
> 



> Also I think the slab fast path ignores the policy anyways,
> the policy only acts when slab has to grab new pages.
> Are you sure this works at all?
> 

It works on x86 at least, I tested this patch and got correct stacks for
pktgen and ksoftirqd kthreads for sure.

> It would be probably better to pass through the node
> to the low level allocation functions and use them
> there directly.
> 

It would be difficult, because do_fork() is arch dependant

> Problem is that this ends up in architecture specific code
> for the stack, so may be a larger patch.

I suggest arches that need slab to allocate kthread stacks do the
appropriate changes, because I am not able to make them myself.

On x86, we use page allocator only, so NUMA mempolicy is used.




^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] kthread: NUMA aware kthread_create_on_cpu()
  2010-11-28 22:51   ` Eric Dumazet
@ 2010-11-28 23:01     ` Andi Kleen
  2010-11-28 23:37       ` Eric Dumazet
  2010-11-29  9:03       ` [PATCH] kthread: NUMA aware kthread_create_on_cpu() Américo Wang
  0 siblings, 2 replies; 26+ messages in thread
From: Andi Kleen @ 2010-11-28 23:01 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Andi Kleen, Andrew Morton, linux-kernel, netdev, David Miller,
	Tejun Heo, Rusty Russell

On Sun, Nov 28, 2010 at 11:51:51PM +0100, Eric Dumazet wrote:
> > Also this messes up the policy of the caller process. You really
> > need to save/restore it.
> 
> Well, caller process duty is to create kthreads in a loop.

In this case any other allocations it may do are still on those
nodes.

> > Problem is that this ends up in architecture specific code
> > for the stack, so may be a larger patch.
> 
> I suggest arches that need slab to allocate kthread stacks do the
> appropriate changes, because I am not able to make them myself.
> 
> On x86, we use page allocator only, so NUMA mempolicy is used.

task_struct is always allocated from slab.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] kthread: NUMA aware kthread_create_on_cpu()
  2010-11-28 23:01     ` Andi Kleen
@ 2010-11-28 23:37       ` Eric Dumazet
  2010-11-29  9:05         ` Andi Kleen
  2010-11-29  9:03       ` [PATCH] kthread: NUMA aware kthread_create_on_cpu() Américo Wang
  1 sibling, 1 reply; 26+ messages in thread
From: Eric Dumazet @ 2010-11-28 23:37 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Andrew Morton, linux-kernel, netdev, David Miller, Tejun Heo,
	Rusty Russell

Le lundi 29 novembre 2010 à 00:01 +0100, Andi Kleen a écrit :
> On Sun, Nov 28, 2010 at 11:51:51PM +0100, Eric Dumazet wrote:
> > > Also this messes up the policy of the caller process. You really
> > > need to save/restore it.
> > 
> > Well, caller process duty is to create kthreads in a loop.
> 
> In this case any other allocations it may do
>  are still on those
> nodes.

As I said, it does only create_kthread() calls, and no "other
allocations".

while (!list_empty(&kthread_create_list)) {
      struct kthread_create_info *create;

      create = list_entry(kthread_create_list.next,
                          struct kthread_create_info, list);
      list_del_init(&create->list);
      spin_unlock(&kthread_create_lock);

      create_kthread(create);

      spin_lock(&kthread_create_lock);
}





> 
> > > Problem is that this ends up in architecture specific code
> > > for the stack, so may be a larger patch.
> > 
> > I suggest arches that need slab to allocate kthread stacks do the
> > appropriate changes, because I am not able to make them myself.
> > 
> > On x86, we use page allocator only, so NUMA mempolicy is used.
> 
> task_struct is always allocated from slab.

Hmm, I meant stack (the thing that might be trashed a lot in ksoftirqd),
so it is included in struct thread_info

And this one uses __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER) from
alloc_thread_info()


By the way, I re-tested my original patch (MPOL_BIND) on x86_32

# cat /proc/buddyinfo 
Node 0, zone      DMA      0      1      0      1      2      1      1      0      1      1      3 
Node 0, zone   Normal     22     14     10      3      2      3      4      2      3      2    165 
Node 0, zone  HighMem     41     35    346    223    124    140     40     19      2      0    143 
Node 1, zone  HighMem     21      7      8      4    217     97     33     11      3      1    415 

And got correct stacks. Are you sure we must use PREFERRED ?




^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] kthread: NUMA aware kthread_create_on_cpu()
  2010-11-28 23:01     ` Andi Kleen
  2010-11-28 23:37       ` Eric Dumazet
@ 2010-11-29  9:03       ` Américo Wang
  1 sibling, 0 replies; 26+ messages in thread
From: Américo Wang @ 2010-11-29  9:03 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Eric Dumazet, Andrew Morton, linux-kernel, netdev, David Miller,
	Tejun Heo, Rusty Russell

On Mon, Nov 29, 2010 at 12:01:47AM +0100, Andi Kleen wrote:
>
>task_struct is always allocated from slab.
>

IA64 is an exception, it has its own alloc_task_struct()
which is simply __get_free_pages().

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] kthread: NUMA aware kthread_create_on_cpu()
  2010-11-28 23:37       ` Eric Dumazet
@ 2010-11-29  9:05         ` Andi Kleen
  2010-11-29  9:38           ` Eric Dumazet
  0 siblings, 1 reply; 26+ messages in thread
From: Andi Kleen @ 2010-11-29  9:05 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Andi Kleen, Andrew Morton, linux-kernel, netdev, David Miller,
	Tejun Heo, Rusty Russell

On Mon, Nov 29, 2010 at 12:37:04AM +0100, Eric Dumazet wrote:
> Le lundi 29 novembre 2010 à 00:01 +0100, Andi Kleen a écrit :
> > On Sun, Nov 28, 2010 at 11:51:51PM +0100, Eric Dumazet wrote:
> > > > Also this messes up the policy of the caller process. You really
> > > > need to save/restore it.
> > > 
> > > Well, caller process duty is to create kthreads in a loop.
> > 
> > In this case any other allocations it may do
> >  are still on those
> > nodes.
> 
> As I said, it does only create_kthread() calls, and no "other
> allocations".

Code changes. Your current setup seems fragile (also
the static variable)

> > > > for the stack, so may be a larger patch.
> > > 
> > > I suggest arches that need slab to allocate kthread stacks do the
> > > appropriate changes, because I am not able to make them myself.
> > > 
> > > On x86, we use page allocator only, so NUMA mempolicy is used.
> > 
> > task_struct is always allocated from slab.
> 
> Hmm, I meant stack (the thing that might be trashed a lot in ksoftirqd),
> so it is included in struct thread_info

task_struct is quite hot too. Also your original mail said
task struct i believe.

> And got correct stacks. Are you sure we must use PREFERRED ?

Yes.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] kthread: NUMA aware kthread_create_on_cpu()
  2010-11-29  9:05         ` Andi Kleen
@ 2010-11-29  9:38           ` Eric Dumazet
  2010-11-29 15:13             ` [PATCH v2 0/4] " Eric Dumazet
                               ` (5 more replies)
  0 siblings, 6 replies; 26+ messages in thread
From: Eric Dumazet @ 2010-11-29  9:38 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Andrew Morton, linux-kernel, netdev, David Miller, Tejun Heo,
	Rusty Russell

Le lundi 29 novembre 2010 à 10:05 +0100, Andi Kleen a écrit :
> On Mon, Nov 29, 2010 at 12:37:04AM +0100, Eric Dumazet wrote:

> > Hmm, I meant stack (the thing that might be trashed a lot in ksoftirqd),
> > so it is included in struct thread_info
> 
> task_struct is quite hot too. Also your original mail said
> task struct i believe.
> 
> > And got correct stacks. Are you sure we must use PREFERRED ?
> 
> Yes.

I'll respin a patch not using mempolicy but direct node, for both
thread_info and task_struct allocations.






^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH v2 0/4] kthread: NUMA aware kthread_create_on_cpu()
  2010-11-29  9:38           ` Eric Dumazet
@ 2010-11-29 15:13             ` Eric Dumazet
  2010-11-29 16:09               ` Andi Kleen
  2010-11-29 17:39               ` David Miller
  2010-11-29 15:13             ` [PATCH v2 1/4] mm: NUMA aware alloc_task_struct_node() Eric Dumazet
                               ` (4 subsequent siblings)
  5 siblings, 2 replies; 26+ messages in thread
From: Eric Dumazet @ 2010-11-29 15:13 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Andrew Morton, linux-kernel, linux-arch, netdev, David Miller,
	Tejun Heo, Rusty Russell, Tony Luck, Fenghua Yu

Note : compiled and tested on x86_32 and x86_64 only, but these patches
take care of other arches as well.

Cc: linux-arch@vger.kernel.org 

Thanks for your feedback Andi !


[PATCH v2 0/4] kthread: NUMA aware kthread_create_on_cpu()

All kthreads being created from a single helper task, they all use
memory from a single node for their kernel stack and task struct.

This patch suite creates kthread_create_on_cpu(), adding a 'cpu'
parameter to parameters already used by kthread_create().

This parameter serves in allocating memory for the new kthread on its
memory node if available.

Users of this new function are : ksoftirqd, kworker, migration,
pktgend...


Patch 1/4 mm: NUMA aware alloc_task_struct_node()

alloc_task_struct(void) becomes alloc_task_struct_node(int node)

Patch 2/4 mm: NUMA aware alloc_thread_info_node()

alloc_thread_info(struct task_struct *tsk) becomes
alloc_thread_info_node(struct task_struct *tsk, int node)

Patch 3/4 kthread: NUMA aware kthread_create_on_cpu()

Patch 4/4 kthread: use kthread_create_on_cpu() 

ksoftirqd, kworker, migration, and pktgend kthreads can be created with
NUMA aware kthread_create_on_cpu()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Tejun Heo <tj@kernel.org>
Cc: linux-arch@vger.kernel.org
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
---
 arch/cris/include/asm/thread_info.h     |    2 -
 arch/frv/include/asm/processor.h        |    2 -
 arch/frv/include/asm/thread_info.h      |   13 ++------
 arch/frv/kernel/process.c               |    5 ++-
 arch/ia64/include/asm/thread_info.h     |   14 +++++++--
 arch/m32r/include/asm/thread_info.h     |   13 ++------
 arch/mips/include/asm/thread_info.h     |    6 ++--
 arch/mn10300/include/asm/thread_info.h  |    6 ++--
 arch/powerpc/include/asm/thread_info.h  |    2 -
 arch/powerpc/kernel/process.c           |    4 +-
 arch/score/include/asm/thread_info.h    |    2 -
 arch/sh/include/asm/thread_info.h       |    2 -
 arch/sh/kernel/process.c                |   16 ++++++-----
 arch/sparc/include/asm/thread_info_32.h |    6 ++--
 arch/sparc/include/asm/thread_info_64.h |   24 ++++++++--------
 arch/sparc/mm/srmmu.c                   |    4 +-
 arch/sparc/mm/sun4c.c                   |    4 +-
 arch/tile/include/asm/thread_info.h     |    2 -
 arch/tile/kernel/process.c              |    4 +-
 arch/um/include/asm/processor-generic.h |    2 -
 arch/x86/include/asm/thread_info.h      |   10 +++++-
 include/linux/kthread.h                 |   14 ++++++---
 include/linux/sched.h                   |    1 
 kernel/fork.c                           |   20 ++++++++-----
 kernel/kthread.c                        |   32 +++++++++++++++++-----
 kernel/softirq.c                        |    3 +-
 kernel/stop_machine.c                   |    4 +-
 kernel/workqueue.c                      |    4 +-
 net/core/pktgen.c                       |    3 +-
 29 files changed, 135 insertions(+), 89 deletions(-)




^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH v2 1/4] mm: NUMA aware alloc_task_struct_node()
  2010-11-29  9:38           ` Eric Dumazet
  2010-11-29 15:13             ` [PATCH v2 0/4] " Eric Dumazet
@ 2010-11-29 15:13             ` Eric Dumazet
  2010-11-29 15:14             ` [PATCH v2 2/4] mm: NUMA aware alloc_thread_info_node() Eric Dumazet
                               ` (3 subsequent siblings)
  5 siblings, 0 replies; 26+ messages in thread
From: Eric Dumazet @ 2010-11-29 15:13 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Andrew Morton, linux-kernel, netdev, David Miller, Tejun Heo,
	Rusty Russell, Tony Luck, Fenghua Yu

Add a node parameter to alloc_task_struct(), and change its name to
alloc_task_struct_node()

This change is needed to allow NUMA aware kthread_create_on_cpu()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Tejun Heo <tj@kernel.org>
Cc: linux-arch@vger.kernel.org
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
---
 arch/frv/include/asm/processor.h        |    2 +-
 arch/frv/kernel/process.c               |    5 +++--
 arch/ia64/include/asm/thread_info.h     |    9 ++++++++-
 arch/um/include/asm/processor-generic.h |    2 +-
 kernel/fork.c                           |   10 ++++++----
 5 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h
index 3744f2e..4b789ab 100644
--- a/arch/frv/include/asm/processor.h
+++ b/arch/frv/include/asm/processor.h
@@ -137,7 +137,7 @@ unsigned long get_wchan(struct task_struct *p);
 #define	KSTK_ESP(tsk)	((tsk)->thread.frame0->sp)
 
 /* Allocation and freeing of basic task resources. */
-extern struct task_struct *alloc_task_struct(void);
+extern struct task_struct *alloc_task_struct_node(int node);
 extern void free_task_struct(struct task_struct *p);
 
 #define cpu_relax()    barrier()
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index efad120..9d35975 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -44,9 +44,10 @@ asmlinkage void ret_from_fork(void);
 void (*pm_power_off)(void);
 EXPORT_SYMBOL(pm_power_off);
 
-struct task_struct *alloc_task_struct(void)
+struct task_struct *alloc_task_struct_node(int node)
 {
-	struct task_struct *p = kmalloc(THREAD_SIZE, GFP_KERNEL);
+	struct task_struct *p = kmalloc_node(THREAD_SIZE, GFP_KERNEL, node);
+
 	if (p)
 		atomic_set((atomic_t *)(p+1), 1);
 	return p;
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index b6a5ba2..342004b 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -84,7 +84,14 @@ struct thread_info {
 #define end_of_stack(p) (unsigned long *)((void *)(p) + IA64_RBS_OFFSET)
 
 #define __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
-#define alloc_task_struct()	((struct task_struct *)__get_free_pages(GFP_KERNEL | __GFP_COMP, KERNEL_STACK_SIZE_ORDER))
+#define alloc_task_struct_node(node)						\
+({										\
+	struct page *page = alloc_pages_node(node, GFP_KERNEL | __GFP_COMP,	\
+					     KERNEL_STACK_SIZE_ORDER);		\
+	struct task_struct *ret = page ? page_address(page) : NULL;		\
+										\
+	ret;
+})
 #define free_task_struct(tsk)	free_pages((unsigned long) (tsk), KERNEL_STACK_SIZE_ORDER)
 
 #endif /* !__ASSEMBLY */
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index bed6688..d1d1b0d 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -66,7 +66,7 @@ struct thread_struct {
 	.request		= { 0 } \
 }
 
-extern struct task_struct *alloc_task_struct(void);
+extern struct task_struct *alloc_task_struct_node(int node);
 
 static inline void release_thread(struct task_struct *task)
 {
diff --git a/kernel/fork.c b/kernel/fork.c
index 3b159c5..9e3c656 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -108,8 +108,10 @@ int nr_processes(void)
 }
 
 #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
-# define alloc_task_struct()	kmem_cache_alloc(task_struct_cachep, GFP_KERNEL)
-# define free_task_struct(tsk)	kmem_cache_free(task_struct_cachep, (tsk))
+# define alloc_task_struct_node(node)		\
+		kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node)
+# define free_task_struct(tsk)			\
+		kmem_cache_free(task_struct_cachep, (tsk))
 static struct kmem_cache *task_struct_cachep;
 #endif
 
@@ -246,12 +248,12 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 	struct task_struct *tsk;
 	struct thread_info *ti;
 	unsigned long *stackend;
-
+	int node = numa_node_id();
 	int err;
 
 	prepare_to_copy(orig);
 
-	tsk = alloc_task_struct();
+	tsk = alloc_task_struct_node(node);
 	if (!tsk)
 		return NULL;
 



^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH v2 2/4] mm: NUMA aware alloc_thread_info_node()
  2010-11-29  9:38           ` Eric Dumazet
  2010-11-29 15:13             ` [PATCH v2 0/4] " Eric Dumazet
  2010-11-29 15:13             ` [PATCH v2 1/4] mm: NUMA aware alloc_task_struct_node() Eric Dumazet
@ 2010-11-29 15:14             ` Eric Dumazet
  2010-11-29 15:14             ` [PATCH v2 3/4] kthread: NUMA aware kthread_create_on_cpu() Eric Dumazet
                               ` (2 subsequent siblings)
  5 siblings, 0 replies; 26+ messages in thread
From: Eric Dumazet @ 2010-11-29 15:14 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Andrew Morton, linux-kernel, netdev, David Miller, Tejun Heo,
	Rusty Russell, Tony Luck, Fenghua Yu, linux-arch

Add a node parameter to alloc_thread_info(), and change its name to
alloc_thread_info_node()

This change is needed to allow NUMA aware kthread_create_on_cpu()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Tejun Heo <tj@kernel.org>
Cc: linux-arch@vger.kernel.org
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
---
 arch/cris/include/asm/thread_info.h     |    2 -
 arch/frv/include/asm/thread_info.h      |   13 +++--------
 arch/ia64/include/asm/thread_info.h     |    5 ++--
 arch/m32r/include/asm/thread_info.h     |   13 +++--------
 arch/mips/include/asm/thread_info.h     |    6 +++--
 arch/mn10300/include/asm/thread_info.h  |    6 +++--
 arch/powerpc/include/asm/thread_info.h  |    2 -
 arch/powerpc/kernel/process.c           |    4 +--
 arch/score/include/asm/thread_info.h    |    2 -
 arch/sh/include/asm/thread_info.h       |    2 -
 arch/sh/kernel/process.c                |   16 ++++++++------
 arch/sparc/include/asm/thread_info_32.h |    6 ++---
 arch/sparc/include/asm/thread_info_64.h |   24 +++++++++++-----------
 arch/sparc/mm/srmmu.c                   |    4 +--
 arch/sparc/mm/sun4c.c                   |    4 +--
 arch/tile/include/asm/thread_info.h     |    2 -
 arch/tile/kernel/process.c              |    4 +--
 arch/x86/include/asm/thread_info.h      |   10 +++++++--
 kernel/fork.c                           |    9 +++++---
 19 files changed, 70 insertions(+), 64 deletions(-)

diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h
index 9177606..29b74a1 100644
--- a/arch/cris/include/asm/thread_info.h
+++ b/arch/cris/include/asm/thread_info.h
@@ -68,7 +68,7 @@ struct thread_info {
 #define init_thread_info	(init_thread_union.thread_info)
 
 /* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) __get_free_pages(GFP_KERNEL,1))
+#define alloc_thread_info(tsk, node) ((struct thread_info *) __get_free_pages(GFP_KERNEL,1))
 #define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/frv/include/asm/thread_info.h b/arch/frv/include/asm/thread_info.h
index 11f33ea..8582e9c 100644
--- a/arch/frv/include/asm/thread_info.h
+++ b/arch/frv/include/asm/thread_info.h
@@ -84,16 +84,11 @@ register struct thread_info *__current_thread_info asm("gr15");
 
 /* thread information allocation */
 #ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk)					\
-	({							\
-		struct thread_info *ret;			\
-								\
-		ret = kzalloc(THREAD_SIZE, GFP_KERNEL);		\
-								\
-		ret;						\
-	})
+#define alloc_thread_info_node(tsk, node)			\
+		kzalloc_node(THREAD_SIZE, GFP_KERNEL, node)
 #else
-#define alloc_thread_info(tsk)	kmalloc(THREAD_SIZE, GFP_KERNEL)
+#define alloc_thread_info_node(tsk)				\
+		kmalloc_node(THREAD_SIZE, GFP_KERNEL, node)
 #endif
 
 #define free_thread_info(info)	kfree(info)
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index 342004b..6392908 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -59,11 +59,12 @@ struct thread_info {
 #ifndef ASM_OFFSETS_C
 /* how to get the thread information struct from C */
 #define current_thread_info()	((struct thread_info *) ((char *) current + IA64_TASK_SIZE))
-#define alloc_thread_info(tsk)	((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE))
+#define alloc_thread_info_node(tsk, node)	\
+		((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE))
 #define task_thread_info(tsk)	((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE))
 #else
 #define current_thread_info()	((struct thread_info *) 0)
-#define alloc_thread_info(tsk)	((struct thread_info *) 0)
+#define alloc_thread_info_node(tsk, node)	((struct thread_info *) 0)
 #define task_thread_info(tsk)	((struct thread_info *) 0)
 #endif
 #define free_thread_info(ti)	/* nothing */
diff --git a/arch/m32r/include/asm/thread_info.h b/arch/m32r/include/asm/thread_info.h
index 71faff5..0227dba 100644
--- a/arch/m32r/include/asm/thread_info.h
+++ b/arch/m32r/include/asm/thread_info.h
@@ -96,16 +96,11 @@ static inline struct thread_info *current_thread_info(void)
 
 /* thread information allocation */
 #ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk)					\
-	({							\
-		struct thread_info *ret;			\
-	 							\
-	 	ret = kzalloc(THREAD_SIZE, GFP_KERNEL);		\
-								\
-	 	ret;						\
-	 })
+#define alloc_thread_info_node(tsk, node)			\
+		kzalloc_node(THREAD_SIZE, GFP_KERNEL, node)
 #else
-#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
+#define alloc_thread_info_node(tsk, node)			\
+		kmalloc_node(THREAD_SIZE, GFP_KERNEL, node)
 #endif
 
 #define free_thread_info(info) kfree(info)
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index d309556..d71160d 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -88,9 +88,11 @@ register struct thread_info *__current_thread_info __asm__("$28");
 #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
 
 #ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk) kzalloc(THREAD_SIZE, GFP_KERNEL)
+#define alloc_thread_info_node(tsk, node) \
+		kzalloc_node(THREAD_SIZE, GFP_KERNEL, node)
 #else
-#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
+#define alloc_thread_info_node(tsk, node) \
+		kmalloc_node(THREAD_SIZE, GFP_KERNEL, node)
 #endif
 
 #define free_thread_info(info) kfree(info)
diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h
index aa07a4a..8d53f09 100644
--- a/arch/mn10300/include/asm/thread_info.h
+++ b/arch/mn10300/include/asm/thread_info.h
@@ -124,9 +124,11 @@ static inline unsigned long current_stack_pointer(void)
 
 /* thread information allocation */
 #ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk) kzalloc(THREAD_SIZE, GFP_KERNEL)
+#define alloc_thread_info_node(tsk, node)			\
+		kzalloc_node(THREAD_SIZE, GFP_KERNEL, node)
 #else
-#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
+#define alloc_thread_info_node(tsk, node)			\
+		kmalloc_node(THREAD_SIZE, GFP_KERNEL, node)
 #endif
 
 #define free_thread_info(ti)	kfree((ti))
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 65eb859..d8529ef 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -72,7 +72,7 @@ struct thread_info {
 
 #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
 
-extern struct thread_info *alloc_thread_info(struct task_struct *tsk);
+extern struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node);
 extern void free_thread_info(struct thread_info *ti);
 
 #endif /* THREAD_SHIFT < PAGE_SHIFT */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 84906d3..1248460 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1216,11 +1216,11 @@ void __ppc64_runlatch_off(void)
 
 static struct kmem_cache *thread_info_cache;
 
-struct thread_info *alloc_thread_info(struct task_struct *tsk)
+struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node)
 {
 	struct thread_info *ti;
 
-	ti = kmem_cache_alloc(thread_info_cache, GFP_KERNEL);
+	ti = kmem_cache_alloc_node(thread_info_cache, GFP_KERNEL, node);
 	if (unlikely(ti == NULL))
 		return NULL;
 #ifdef CONFIG_DEBUG_STACK_USAGE
diff --git a/arch/score/include/asm/thread_info.h b/arch/score/include/asm/thread_info.h
index 8570d08..2205c62 100644
--- a/arch/score/include/asm/thread_info.h
+++ b/arch/score/include/asm/thread_info.h
@@ -71,7 +71,7 @@ struct thread_info {
 register struct thread_info *__current_thread_info __asm__("r28");
 #define current_thread_info()	__current_thread_info
 
-#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
+#define alloc_thread_info_node(tsk, node) kmalloc_node(THREAD_SIZE, GFP_KERNEL, node)
 #define free_thread_info(info) kfree(info)
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h
index c228946..ea2d508 100644
--- a/arch/sh/include/asm/thread_info.h
+++ b/arch/sh/include/asm/thread_info.h
@@ -95,7 +95,7 @@ static inline struct thread_info *current_thread_info(void)
 
 #endif
 
-extern struct thread_info *alloc_thread_info(struct task_struct *tsk);
+extern struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node);
 extern void free_thread_info(struct thread_info *ti);
 extern void arch_task_cache_init(void);
 #define arch_task_cache_init arch_task_cache_init
diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c
index dcb126d..f39ad57 100644
--- a/arch/sh/kernel/process.c
+++ b/arch/sh/kernel/process.c
@@ -32,16 +32,16 @@ void free_thread_xstate(struct task_struct *tsk)
 #if THREAD_SHIFT < PAGE_SHIFT
 static struct kmem_cache *thread_info_cache;
 
-struct thread_info *alloc_thread_info(struct task_struct *tsk)
+struct thread_info *alloc_thread_info(struct task_struct *tsk, int node)
 {
 	struct thread_info *ti;
-
-	ti = kmem_cache_alloc(thread_info_cache, GFP_KERNEL);
-	if (unlikely(ti == NULL))
-		return NULL;
 #ifdef CONFIG_DEBUG_STACK_USAGE
-	memset(ti, 0, THREAD_SIZE);
+	gfp_t mask = GFP_KERNEL | __GFP_ZERO;
+#else
+	gfp_t mask = GFP_KERNEL;
 #endif
+
+	ti = kmem_cache_alloc_node(thread_info_cache, mask, node);
 	return ti;
 }
 
@@ -64,7 +64,9 @@ struct thread_info *alloc_thread_info(struct task_struct *tsk)
 #else
 	gfp_t mask = GFP_KERNEL;
 #endif
-	return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER);
+	struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER);
+
+	return page ? page_address(page) : NULL;
 }
 
 void free_thread_info(struct thread_info *ti)
diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h
index 9dd0318..fa57532 100644
--- a/arch/sparc/include/asm/thread_info_32.h
+++ b/arch/sparc/include/asm/thread_info_32.h
@@ -82,8 +82,8 @@ register struct thread_info *current_thread_info_reg asm("g6");
 
 #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
 
-BTFIXUPDEF_CALL(struct thread_info *, alloc_thread_info, void)
-#define alloc_thread_info(tsk) BTFIXUP_CALL(alloc_thread_info)()
+BTFIXUPDEF_CALL(struct thread_info *, alloc_thread_info_node, int)
+#define alloc_thread_info_node(tsk, node) BTFIXUP_CALL(alloc_thread_info_node)(node)
 
 BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *)
 #define free_thread_info(ti) BTFIXUP_CALL(free_thread_info)(ti)
@@ -92,7 +92,7 @@ BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *)
 
 /*
  * Size of kernel stack for each process.
- * Observe the order of get_free_pages() in alloc_thread_info().
+ * Observe the order of get_free_pages() in alloc_thread_info_node().
  * The sun4 has 8K stack too, because it's short on memory, and 16K is a waste.
  */
 #define THREAD_SIZE		8192
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index fb2ea77..60d86be 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -146,21 +146,21 @@ register struct thread_info *current_thread_info_reg asm("g6");
 #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
 
 #ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk)					\
-({								\
-	struct thread_info *ret;				\
-								\
-	ret = (struct thread_info *)				\
-	  __get_free_pages(GFP_KERNEL, __THREAD_INFO_ORDER);	\
-	if (ret)						\
-		memset(ret, 0, PAGE_SIZE<<__THREAD_INFO_ORDER);	\
-	ret;							\
-})
+#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO)
 #else
-#define alloc_thread_info(tsk) \
-	((struct thread_info *)__get_free_pages(GFP_KERNEL, __THREAD_INFO_ORDER))
+#define THREAD_FLAGS (GFP_KERNEL)
 #endif
 
+#define alloc_thread_info_node(tsk, node)				\
+({									\
+	struct page *page = alloc_pages_node(node, THREAD_FLAGS,	\
+					     __THREAD_INFO_ORDER);	\
+	struct thread_info *ret;					\
+									\
+	ret = page ? page_address(page) : NULL;				\
+	ret;								\
+})
+
 #define free_thread_info(ti) \
 	free_pages((unsigned long)(ti),__THREAD_INFO_ORDER)
 
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index 92319aa..fe09fd8 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -650,7 +650,7 @@ static void srmmu_unmapiorange(unsigned long virt_addr, unsigned int len)
  * mappings on the kernel stack without any special code as we did
  * need on the sun4c.
  */
-static struct thread_info *srmmu_alloc_thread_info(void)
+static struct thread_info *srmmu_alloc_thread_info_node(int node)
 {
 	struct thread_info *ret;
 
@@ -2271,7 +2271,7 @@ void __init ld_mmu_srmmu(void)
 
 	BTFIXUPSET_CALL(mmu_info, srmmu_mmu_info, BTFIXUPCALL_NORM);
 
-	BTFIXUPSET_CALL(alloc_thread_info, srmmu_alloc_thread_info, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(alloc_thread_info_node, srmmu_alloc_thread_info_node, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(free_thread_info, srmmu_free_thread_info, BTFIXUPCALL_NORM);
 
 	BTFIXUPSET_CALL(pte_to_pgoff, srmmu_pte_to_pgoff, BTFIXUPCALL_NORM);
diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c
index ddd0d86..f6f4c54 100644
--- a/arch/sparc/mm/sun4c.c
+++ b/arch/sparc/mm/sun4c.c
@@ -924,7 +924,7 @@ static inline void garbage_collect(int entry)
 	free_locked_segment(BUCKET_ADDR(entry));
 }
 
-static struct thread_info *sun4c_alloc_thread_info(void)
+static struct thread_info *sun4c_alloc_thread_info_node(int node)
 {
 	unsigned long addr, pages;
 	int entry;
@@ -2157,7 +2157,7 @@ void __init ld_mmu_sun4c(void)
 	BTFIXUPSET_CALL(__swp_offset, sun4c_swp_offset, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(__swp_entry, sun4c_swp_entry, BTFIXUPCALL_NORM);
 
-	BTFIXUPSET_CALL(alloc_thread_info, sun4c_alloc_thread_info, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(alloc_thread_info_node, sun4c_alloc_thread_info_node, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(free_thread_info, sun4c_free_thread_info, BTFIXUPCALL_NORM);
 
 	BTFIXUPSET_CALL(mmu_info, sun4c_mmu_info, BTFIXUPCALL_NORM);
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index 3872f2b..145e578 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -83,7 +83,7 @@ register unsigned long stack_pointer __asm__("sp");
   ((struct thread_info *)(stack_pointer & -THREAD_SIZE))
 
 #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
-extern struct thread_info *alloc_thread_info(struct task_struct *task);
+extern struct thread_info *alloc_thread_info_node(struct task_struct *task, int node);
 extern void free_thread_info(struct thread_info *info);
 
 /* Sit on a nap instruction until interrupted. */
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 8430f45..7f7179a 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -109,7 +109,7 @@ void cpu_idle(void)
 	}
 }
 
-struct thread_info *alloc_thread_info(struct task_struct *task)
+struct thread_info *alloc_thread_info_node(struct task_struct *task, int node)
 {
 	struct page *page;
 	gfp_t flags = GFP_KERNEL;
@@ -118,7 +118,7 @@ struct thread_info *alloc_thread_info(struct task_struct *task)
 	flags |= __GFP_ZERO;
 #endif
 
-	page = alloc_pages(flags, THREAD_SIZE_ORDER);
+	page = alloc_pages_node(node, flags, THREAD_SIZE_ORDER);
 	if (!page)
 		return NULL;
 
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index f0b6e5d..1f2e61e 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -161,8 +161,14 @@ struct thread_info {
 
 #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
 
-#define alloc_thread_info(tsk)						\
-	((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER))
+#define alloc_thread_info_node(tsk, node)				\
+({									\
+	struct page *page = alloc_pages_node(node, THREAD_FLAGS,	\
+					     THREAD_ORDER);		\
+	struct thread_info *ret = page ? page_address(page) : NULL;	\
+									\
+	ret;								\
+})
 
 #ifdef CONFIG_X86_32
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 9e3c656..3ebae16 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -116,14 +116,17 @@ static struct kmem_cache *task_struct_cachep;
 #endif
 
 #ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR
-static inline struct thread_info *alloc_thread_info(struct task_struct *tsk)
+static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
+						  int node)
 {
 #ifdef CONFIG_DEBUG_STACK_USAGE
 	gfp_t mask = GFP_KERNEL | __GFP_ZERO;
 #else
 	gfp_t mask = GFP_KERNEL;
 #endif
-	return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER);
+	struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER);
+
+	return page ? page_address(page) : NULL;
 }
 
 static inline void free_thread_info(struct thread_info *ti)
@@ -257,7 +260,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 	if (!tsk)
 		return NULL;
 
-	ti = alloc_thread_info(tsk);
+	ti = alloc_thread_info_node(tsk, node);
 	if (!ti) {
 		free_task_struct(tsk);
 		return NULL;



^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH v2 3/4] kthread: NUMA aware kthread_create_on_cpu()
  2010-11-29  9:38           ` Eric Dumazet
                               ` (2 preceding siblings ...)
  2010-11-29 15:14             ` [PATCH v2 2/4] mm: NUMA aware alloc_thread_info_node() Eric Dumazet
@ 2010-11-29 15:14             ` Eric Dumazet
  2010-12-10  0:44               ` Andrew Morton
  2010-11-29 15:15             ` [PATCH v2 4/4] kthread: use kthread_create_on_cpu() Eric Dumazet
  2010-11-30  9:38             ` David Howells
  5 siblings, 1 reply; 26+ messages in thread
From: Eric Dumazet @ 2010-11-29 15:14 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Andrew Morton, linux-kernel, netdev, David Miller, Tejun Heo,
	Rusty Russell, Tony Luck, Fenghua Yu

All kthreads being created from a single helper task, they all use
memory from a single node for their kernel stack and task struct.

This patch suite creates kthread_create_on_cpu(), adding a 'cpu'
parameter to parameters already used by kthread_create().

This parameter serves in allocating memory for the new kthread on its
memory node if possible.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Tejun Heo <tj@kernel.org>
Cc: linux-arch@vger.kernel.org
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
---
 include/linux/kthread.h |   14 ++++++++++----
 include/linux/sched.h   |    1 +
 kernel/fork.c           |    3 ++-
 kernel/kthread.c        |   32 ++++++++++++++++++++++++++------
 4 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 685ea65..ad753bc 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -4,10 +4,15 @@
 #include <linux/err.h>
 #include <linux/sched.h>
 
-struct task_struct *kthread_create(int (*threadfn)(void *data),
-				   void *data,
-				   const char namefmt[], ...)
-	__attribute__((format(printf, 3, 4)));
+struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
+					  void *data,
+					  int cpu,
+					  const char namefmt[], ...)
+	__attribute__((format(printf, 4, 5)));
+
+#define kthread_create(threadfn, data, namefmt, arg...) \
+	kthread_create_on_cpu(threadfn, data, -1, namefmt, ##arg)
+
 
 /**
  * kthread_run - create and wake a thread.
@@ -34,6 +39,7 @@ void *kthread_data(struct task_struct *k);
 
 int kthreadd(void *unused);
 extern struct task_struct *kthreadd_task;
+extern int tsk_fork_get_node(struct task_struct *tsk);
 
 /*
  * Simple work processor based on kthread.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c79e92..e212776 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1456,6 +1456,7 @@ struct task_struct {
 #ifdef CONFIG_NUMA
 	struct mempolicy *mempolicy;	/* Protected by alloc_lock */
 	short il_next;
+	short pref_node_fork;
 #endif
 	atomic_t fs_excl;	/* holding fs exclusive resources */
 	struct rcu_head rcu;
diff --git a/kernel/fork.c b/kernel/fork.c
index 3ebae16..4355dd4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -66,6 +66,7 @@
 #include <linux/posix-timers.h>
 #include <linux/user-return-notifier.h>
 #include <linux/oom.h>
+#include <linux/kthread.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -251,7 +252,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 	struct task_struct *tsk;
 	struct thread_info *ti;
 	unsigned long *stackend;
-	int node = numa_node_id();
+	int node = tsk_fork_get_node(orig);
 	int err;
 
 	prepare_to_copy(orig);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 2dc3786..4aec26d 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -27,6 +27,7 @@ struct kthread_create_info
 	/* Information passed to kthread() from kthreadd. */
 	int (*threadfn)(void *data);
 	void *data;
+	int cpu;
 
 	/* Result passed back to kthread_create() from kthreadd. */
 	struct task_struct *result;
@@ -98,10 +99,24 @@ static int kthread(void *_create)
 	do_exit(ret);
 }
 
+/* called from do_fork() to get node information for about to be created task */
+int tsk_fork_get_node(struct task_struct *tsk)
+{
+#ifdef CONFIG_NUMA
+	if (tsk == kthreadd_task)
+		return tsk->pref_node_fork;
+#endif
+	return numa_node_id();
+}
+
 static void create_kthread(struct kthread_create_info *create)
 {
 	int pid;
 
+#ifdef CONFIG_NUMA
+	current->pref_node_fork = (create->cpu != -1) ?
+		cpu_to_node(create->cpu) : -1;
+#endif
 	/* We want our own signal handler (we take no signals by default). */
 	pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
 	if (pid < 0) {
@@ -111,15 +126,18 @@ static void create_kthread(struct kthread_create_info *create)
 }
 
 /**
- * kthread_create - create a kthread.
+ * kthread_create_on_cpu - create a kthread.
  * @threadfn: the function to run until signal_pending(current).
  * @data: data ptr for @threadfn.
+ * @cpu: cpu number.
  * @namefmt: printf-style name for the thread.
  *
  * Description: This helper function creates and names a kernel
  * thread.  The thread will be stopped: use wake_up_process() to start
  * it.  See also kthread_run().
  *
+ * If thread is going to be bound on a particular cpu, give its number
+ * in @cpu, to get NUMA affinity for kthread stack, or else give -1.
  * When woken, the thread will run @threadfn() with @data as its
  * argument. @threadfn() can either call do_exit() directly if it is a
  * standalone thread for which noone will call kthread_stop(), or
@@ -129,15 +147,17 @@ static void create_kthread(struct kthread_create_info *create)
  *
  * Returns a task_struct or ERR_PTR(-ENOMEM).
  */
-struct task_struct *kthread_create(int (*threadfn)(void *data),
-				   void *data,
-				   const char namefmt[],
-				   ...)
+struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
+					  void *data,
+					  int cpu,
+					  const char namefmt[],
+					  ...)
 {
 	struct kthread_create_info create;
 
 	create.threadfn = threadfn;
 	create.data = data;
+	create.cpu = cpu;
 	init_completion(&create.done);
 
 	spin_lock(&kthread_create_lock);
@@ -164,7 +184,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
 	}
 	return create.result;
 }
-EXPORT_SYMBOL(kthread_create);
+EXPORT_SYMBOL(kthread_create_on_cpu);
 
 /**
  * kthread_bind - bind a just-created kthread to a cpu.



^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH v2 4/4] kthread: use kthread_create_on_cpu()
  2010-11-29  9:38           ` Eric Dumazet
                               ` (3 preceding siblings ...)
  2010-11-29 15:14             ` [PATCH v2 3/4] kthread: NUMA aware kthread_create_on_cpu() Eric Dumazet
@ 2010-11-29 15:15             ` Eric Dumazet
  2010-11-29 15:19               ` Tejun Heo
  2010-11-30  9:38             ` David Howells
  5 siblings, 1 reply; 26+ messages in thread
From: Eric Dumazet @ 2010-11-29 15:15 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Andrew Morton, linux-kernel, netdev, David Miller, Tejun Heo,
	Rusty Russell, linux-arch

ksoftirqd, kworker, migration, and pktgend kthreads can be created with
kthread_create_on_cpu(), to get proper NUMA affinities for their stack
and task_struct.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Tejun Heo <tj@kernel.org>
Cc: linux-arch@vger.kernel.org
---
 kernel/softirq.c      |    3 ++-
 kernel/stop_machine.c |    4 ++--
 kernel/workqueue.c    |    4 ++--
 net/core/pktgen.c     |    3 ++-
 4 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index 18f4be0..b2b7044 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -831,7 +831,8 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
 	switch (action) {
 	case CPU_UP_PREPARE:
 	case CPU_UP_PREPARE_FROZEN:
-		p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
+		p = kthread_create_on_cpu(run_ksoftirqd, hcpu, hotcpu,
+					  "ksoftirqd/%d", hotcpu);
 		if (IS_ERR(p)) {
 			printk("ksoftirqd for %i failed\n", hotcpu);
 			return notifier_from_errno(PTR_ERR(p));
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 2df820b..7c0f287 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -301,8 +301,8 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
 	case CPU_UP_PREPARE:
 		BUG_ON(stopper->thread || stopper->enabled ||
 		       !list_empty(&stopper->works));
-		p = kthread_create(cpu_stopper_thread, stopper, "migration/%d",
-				   cpu);
+		p = kthread_create_on_cpu(cpu_stopper_thread, stopper, cpu,
+					  "migration/%d", cpu);
 		if (IS_ERR(p))
 			return notifier_from_errno(PTR_ERR(p));
 		get_task_struct(p);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 90db1bd..c8feaf4 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1318,8 +1318,8 @@ static struct worker *create_worker(struct global_cwq *gcwq, bool bind)
 	worker->id = id;
 
 	if (!on_unbound_cpu)
-		worker->task = kthread_create(worker_thread, worker,
-					      "kworker/%u:%d", gcwq->cpu, id);
+		worker->task = kthread_create_on_cpu(worker_thread, worker, gcwq->cpu,
+						     "kworker/%u:%d", gcwq->cpu, id);
 	else
 		worker->task = kthread_create(worker_thread, worker,
 					      "kworker/u:%d", id);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 33bc382..c921fe9 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3785,7 +3785,8 @@ static int __init pktgen_create_thread(int cpu)
 	list_add_tail(&t->th_list, &pktgen_threads);
 	init_completion(&t->start_done);
 
-	p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu);
+	p = kthread_create_on_cpu(pktgen_thread_worker, t, cpu,
+				  "kpktgend_%d", cpu);
 	if (IS_ERR(p)) {
 		pr_err("kernel_thread() failed for cpu %d\n", t->cpu);
 		list_del(&t->th_list);



^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 4/4] kthread: use kthread_create_on_cpu()
  2010-11-29 15:15             ` [PATCH v2 4/4] kthread: use kthread_create_on_cpu() Eric Dumazet
@ 2010-11-29 15:19               ` Tejun Heo
  0 siblings, 0 replies; 26+ messages in thread
From: Tejun Heo @ 2010-11-29 15:19 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Andi Kleen, Andrew Morton, linux-kernel, netdev, David Miller,
	Rusty Russell, linux-arch

On 11/29/2010 04:15 PM, Eric Dumazet wrote:
> ksoftirqd, kworker, migration, and pktgend kthreads can be created with
> kthread_create_on_cpu(), to get proper NUMA affinities for their stack
> and task_struct.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> Cc: David Miller <davem@davemloft.net>
> Cc: Andi Kleen <andi@firstfloor.org>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Rusty Russell <rusty@rustcorp.com.au>
> Cc: Tejun Heo <tj@kernel.org>
> Cc: linux-arch@vger.kernel.org

Acked-by: Tejun Heo <tj@kernel.org>

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 0/4] kthread: NUMA aware kthread_create_on_cpu()
  2010-11-29 15:13             ` [PATCH v2 0/4] " Eric Dumazet
@ 2010-11-29 16:09               ` Andi Kleen
  2010-11-29 17:39               ` David Miller
  1 sibling, 0 replies; 26+ messages in thread
From: Andi Kleen @ 2010-11-29 16:09 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Andi Kleen, Andrew Morton, linux-kernel, linux-arch, netdev,
	David Miller, Tejun Heo, Rusty Russell, Tony Luck, Fenghua Yu

On Mon, Nov 29, 2010 at 04:13:23PM +0100, Eric Dumazet wrote:
> Note : compiled and tested on x86_32 and x86_64 only, but these patches
> take care of other arches as well.
> 
> Cc: linux-arch@vger.kernel.org 
> 
> Thanks for your feedback Andi !

Looks good to me now. You can add

Reviewed-by: Andi Kleen <ak@linux.intel.com>

to the four patches.
-Andi

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 0/4] kthread: NUMA aware kthread_create_on_cpu()
  2010-11-29 15:13             ` [PATCH v2 0/4] " Eric Dumazet
  2010-11-29 16:09               ` Andi Kleen
@ 2010-11-29 17:39               ` David Miller
  2010-11-29 17:59                 ` Eric Dumazet
  1 sibling, 1 reply; 26+ messages in thread
From: David Miller @ 2010-11-29 17:39 UTC (permalink / raw)
  To: eric.dumazet
  Cc: andi, akpm, linux-kernel, linux-arch, netdev, tj, rusty,
	tony.luck, fenghua.yu

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 29 Nov 2010 16:13:23 +0100

> Note : compiled and tested on x86_32 and x86_64 only, but these patches
> take care of other arches as well.
> 
> Cc: linux-arch@vger.kernel.org 
> 
> Thanks for your feedback Andi !

I'm fine with these changes:

Acked-by: David S. Miller <davem@davemloft.net>

Since the majority is non-networking it is pretty clear
that someone other than me should integrate these patches.
:-)

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 0/4] kthread: NUMA aware kthread_create_on_cpu()
  2010-11-29 17:39               ` David Miller
@ 2010-11-29 17:59                 ` Eric Dumazet
  2010-11-29 23:31                   ` Rusty Russell
  0 siblings, 1 reply; 26+ messages in thread
From: Eric Dumazet @ 2010-11-29 17:59 UTC (permalink / raw)
  To: David Miller, Andrew Morton
  Cc: andi, linux-kernel, linux-arch, netdev, tj, rusty, tony.luck, fenghua.yu

Le lundi 29 novembre 2010 à 09:39 -0800, David Miller a écrit :
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Mon, 29 Nov 2010 16:13:23 +0100
> 
> > Note : compiled and tested on x86_32 and x86_64 only, but these patches
> > take care of other arches as well.
> > 
> > Cc: linux-arch@vger.kernel.org 
> > 
> > Thanks for your feedback Andi !
> 
> I'm fine with these changes:
> 
> Acked-by: David S. Miller <davem@davemloft.net>
> 
> Since the majority is non-networking it is pretty clear
> that someone other than me should integrate these patches.
> :-)

Sure !

I was thinking Andrew was the guy to carry this patch series, once
things settle down of course...

Thanks



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 0/4] kthread: NUMA aware kthread_create_on_cpu()
  2010-11-29 17:59                 ` Eric Dumazet
@ 2010-11-29 23:31                   ` Rusty Russell
  0 siblings, 0 replies; 26+ messages in thread
From: Rusty Russell @ 2010-11-29 23:31 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, Andrew Morton, andi, linux-kernel, linux-arch,
	netdev, tj, tony.luck, fenghua.yu

On Tue, 30 Nov 2010 04:29:43 am Eric Dumazet wrote:
> Le lundi 29 novembre 2010 à 09:39 -0800, David Miller a écrit :
> > From: Eric Dumazet <eric.dumazet@gmail.com>
> > Date: Mon, 29 Nov 2010 16:13:23 +0100
> > 
> > > Note : compiled and tested on x86_32 and x86_64 only, but these patches
> > > take care of other arches as well.
> > > 
> > > Cc: linux-arch@vger.kernel.org 
> > > 
> > > Thanks for your feedback Andi !
> > 
> > I'm fine with these changes:
> > 
> > Acked-by: David S. Miller <davem@davemloft.net>
> > 
> > Since the majority is non-networking it is pretty clear
> > that someone other than me should integrate these patches.
> > :-)
> 
> Sure !
> 
> I was thinking Andrew was the guy to carry this patch series, once
> things settle down of course...

I'm happy with that.

Acked-by: Rusty Russell <rusty@rustcorp.com.au>

Cheers,
Rusty.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 4/4] kthread: use kthread_create_on_cpu()
  2010-11-29  9:38           ` Eric Dumazet
                               ` (4 preceding siblings ...)
  2010-11-29 15:15             ` [PATCH v2 4/4] kthread: use kthread_create_on_cpu() Eric Dumazet
@ 2010-11-30  9:38             ` David Howells
  2010-11-30  9:59               ` Eric Dumazet
  5 siblings, 1 reply; 26+ messages in thread
From: David Howells @ 2010-11-30  9:38 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: dhowells, Andi Kleen, Andrew Morton, linux-kernel, netdev,
	David Miller, Tejun Heo, Rusty Russell, linux-arch

Eric Dumazet <eric.dumazet@gmail.com> wrote:

> +		p = kthread_create_on_cpu(run_ksoftirqd, hcpu, hotcpu,
> +					  "ksoftirqd/%d", hotcpu);

Does kthread_create_on_cpu() need to take hotcpu twice?  Can one of the
arguments be folded into the other?

David

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 4/4] kthread: use kthread_create_on_cpu()
  2010-11-30  9:38             ` David Howells
@ 2010-11-30  9:59               ` Eric Dumazet
  0 siblings, 0 replies; 26+ messages in thread
From: Eric Dumazet @ 2010-11-30  9:59 UTC (permalink / raw)
  To: David Howells
  Cc: Andi Kleen, Andrew Morton, linux-kernel, netdev, David Miller,
	Tejun Heo, Rusty Russell, linux-arch

Le mardi 30 novembre 2010 à 09:38 +0000, David Howells a écrit :
> Eric Dumazet <eric.dumazet@gmail.com> wrote:
> 
> > +		p = kthread_create_on_cpu(run_ksoftirqd, hcpu, hotcpu,
> > +					  "ksoftirqd/%d", hotcpu);
> 
> Does kthread_create_on_cpu() need to take hotcpu twice?  Can one of the
> arguments be folded into the other?
> 
> David

The second one is used in a printf() like to build a string, its not
really part of the API..

Caller could do instead :

char name[32];
sprintf(name, "ksoftirqd/%d", whatever_id);
p = kthread_create_on_cpu(run_ksoftirqd, hcpu, hotcpu, name);




^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 3/4] kthread: NUMA aware kthread_create_on_cpu()
  2010-11-29 15:14             ` [PATCH v2 3/4] kthread: NUMA aware kthread_create_on_cpu() Eric Dumazet
@ 2010-12-10  0:44               ` Andrew Morton
  2010-12-10  5:59                 ` Eric Dumazet
  0 siblings, 1 reply; 26+ messages in thread
From: Andrew Morton @ 2010-12-10  0:44 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Andi Kleen, linux-kernel, netdev, David Miller, Tejun Heo,
	Rusty Russell, Tony Luck, Fenghua Yu

On Mon, 29 Nov 2010 16:14:55 +0100
Eric Dumazet <eric.dumazet@gmail.com> wrote:

> All kthreads being created from a single helper task, they all use
> memory from a single node for their kernel stack and task struct.
> 
> This patch suite creates kthread_create_on_cpu(), adding a 'cpu'
> parameter to parameters already used by kthread_create().
> 
> This parameter serves in allocating memory for the new kthread on its
> memory node if possible.

The name "kthread_create_on_cpu" is pretty misleading.

One would expect such a function to create a kthread which is bound to
that CPU.  But what it in fact does is to create a kthread which is
bound to all CPUs and whose stack, task_struct and thread_info were
allocated from the node which contains `cpu'.

Also, a saner interface would be one which takes the numa_node_id, not
the cpu number.

>
> ...
>
>  /**
> - * kthread_create - create a kthread.
> + * kthread_create_on_cpu - create a kthread.
>   * @threadfn: the function to run until signal_pending(current).
>   * @data: data ptr for @threadfn.
> + * @cpu: cpu number.
>   * @namefmt: printf-style name for the thread.
>   *
>   * Description: This helper function creates and names a kernel
>   * thread.  The thread will be stopped: use wake_up_process() to start
>   * it.  See also kthread_run().
>   *
> + * If thread is going to be bound on a particular cpu, give its number
> + * in @cpu, to get NUMA affinity for kthread stack, or else give -1.

This is a bit presumptuous.  The caller might wish to later bind this
thread to some or all of the CPUs on the node, rather than to a single
CPU (eg, kswapd()).


So what to do?  Maybe add a new kthread_create_node() which prepares a
kthread whose memory is bound to that node, then add a
kthread_create_cpu() convenience wrapper around that?

>
> ...
>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 3/4] kthread: NUMA aware kthread_create_on_cpu()
  2010-12-10  0:44               ` Andrew Morton
@ 2010-12-10  5:59                 ` Eric Dumazet
  2010-12-10  6:32                   ` Andrew Morton
  0 siblings, 1 reply; 26+ messages in thread
From: Eric Dumazet @ 2010-12-10  5:59 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Andi Kleen, linux-kernel, netdev, David Miller, Tejun Heo,
	Rusty Russell, Tony Luck, Fenghua Yu

Le jeudi 09 décembre 2010 à 16:44 -0800, Andrew Morton a écrit :

> The name "kthread_create_on_cpu" is pretty misleading.
> 
> One would expect such a function to create a kthread which is bound to
> that CPU.  But what it in fact does is to create a kthread which is
> bound to all CPUs and whose stack, task_struct and thread_info were
> allocated from the node which contains `cpu'.
> 
> Also, a saner interface would be one which takes the numa_node_id, not
> the cpu number.
> 


> >
> > ...
> >
> >  /**
> > - * kthread_create - create a kthread.
> > + * kthread_create_on_cpu - create a kthread.
> >   * @threadfn: the function to run until signal_pending(current).
> >   * @data: data ptr for @threadfn.
> > + * @cpu: cpu number.
> >   * @namefmt: printf-style name for the thread.
> >   *
> >   * Description: This helper function creates and names a kernel
> >   * thread.  The thread will be stopped: use wake_up_process() to start
> >   * it.  See also kthread_run().
> >   *
> > + * If thread is going to be bound on a particular cpu, give its number
> > + * in @cpu, to get NUMA affinity for kthread stack, or else give -1.
> 
> This is a bit presumptuous.  The caller might wish to later bind this
> thread to some or all of the CPUs on the node, rather than to a single
> CPU (eg, kswapd()).
> 
> 
> So what to do?  Maybe add a new kthread_create_node() which prepares a
> kthread whose memory is bound to that node, then add a
> kthread_create_cpu() convenience wrapper around that?
> 

We probably can add the "bind to cpu" as a fifth patch, to avoid one 
kthread_bind(p, cpu);  done by these callers.

My reasoning not including this kthread_bind(p, cpu) in initial patch
series that I was focusing on NUMA properties first, not on scheduling
(this part already runs correctly as far as I know)

Thanks for taking the patch series, I was about to resubmit it today :)




^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 3/4] kthread: NUMA aware kthread_create_on_cpu()
  2010-12-10  5:59                 ` Eric Dumazet
@ 2010-12-10  6:32                   ` Andrew Morton
  2010-12-10  7:02                     ` Eric Dumazet
  0 siblings, 1 reply; 26+ messages in thread
From: Andrew Morton @ 2010-12-10  6:32 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Andi Kleen, linux-kernel, netdev, David Miller, Tejun Heo,
	Rusty Russell, Tony Luck, Fenghua Yu

On Fri, 10 Dec 2010 06:59:58 +0100 Eric Dumazet <eric.dumazet@gmail.com> wrote:

> Le jeudi 09 d__cembre 2010 __ 16:44 -0800, Andrew Morton a __crit :
> 
> > The name "kthread_create_on_cpu" is pretty misleading.
> > 
> > One would expect such a function to create a kthread which is bound to
> > that CPU.  But what it in fact does is to create a kthread which is
> > bound to all CPUs and whose stack, task_struct and thread_info were
> > allocated from the node which contains `cpu'.
> > 
> > Also, a saner interface would be one which takes the numa_node_id, not
> > the cpu number.
> > 
> 
> 
> > >
> > > ...
> > >
> > >  /**
> > > - * kthread_create - create a kthread.
> > > + * kthread_create_on_cpu - create a kthread.
> > >   * @threadfn: the function to run until signal_pending(current).
> > >   * @data: data ptr for @threadfn.
> > > + * @cpu: cpu number.
> > >   * @namefmt: printf-style name for the thread.
> > >   *
> > >   * Description: This helper function creates and names a kernel
> > >   * thread.  The thread will be stopped: use wake_up_process() to start
> > >   * it.  See also kthread_run().
> > >   *
> > > + * If thread is going to be bound on a particular cpu, give its number
> > > + * in @cpu, to get NUMA affinity for kthread stack, or else give -1.
> > 
> > This is a bit presumptuous.  The caller might wish to later bind this
> > thread to some or all of the CPUs on the node, rather than to a single
> > CPU (eg, kswapd()).
> > 
> > 
> > So what to do?  Maybe add a new kthread_create_node() which prepares a
> > kthread whose memory is bound to that node, then add a
> > kthread_create_cpu() convenience wrapper around that?
> > 
> 
> We probably can add the "bind to cpu" as a fifth patch, to avoid one 
> kthread_bind(p, cpu);  done by these callers.
> 
> My reasoning not including this kthread_bind(p, cpu) in initial patch
> series that I was focusing on NUMA properties first, not on scheduling
> (this part already runs correctly as far as I know)
> 
> Thanks for taking the patch series, I was about to resubmit it today :)
> 

but but but.  The name "kthread_create_on_cpu" sucks.  It's plain wrong.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 3/4] kthread: NUMA aware kthread_create_on_cpu()
  2010-12-10  6:32                   ` Andrew Morton
@ 2010-12-10  7:02                     ` Eric Dumazet
  2010-12-10  7:09                       ` Andrew Morton
  0 siblings, 1 reply; 26+ messages in thread
From: Eric Dumazet @ 2010-12-10  7:02 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Andi Kleen, linux-kernel, netdev, David Miller, Tejun Heo,
	Rusty Russell, Tony Luck, Fenghua Yu

Le jeudi 09 décembre 2010 à 22:32 -0800, Andrew Morton a écrit :

> but but but.  The name "kthread_create_on_cpu" sucks.  It's plain wrong.

Okay you are right Andrew ;) I dont have better idea for the moment.

Note that all callers I converted really create one kthread per cpu, not
per node. They didnt care of node affinity, only me :)

kthread_create_on_node() seems misleading to me (some cpus run on
memoryless nodes)




^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 3/4] kthread: NUMA aware kthread_create_on_cpu()
  2010-12-10  7:02                     ` Eric Dumazet
@ 2010-12-10  7:09                       ` Andrew Morton
  2010-12-13  6:26                         ` Thomas Fjellstrom
  0 siblings, 1 reply; 26+ messages in thread
From: Andrew Morton @ 2010-12-10  7:09 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Andi Kleen, linux-kernel, netdev, David Miller, Tejun Heo,
	Rusty Russell, Tony Luck, Fenghua Yu

On Fri, 10 Dec 2010 08:02:41 +0100 Eric Dumazet <eric.dumazet@gmail.com> wrote:

> Le jeudi 09 d__cembre 2010 __ 22:32 -0800, Andrew Morton a __crit :
> 
> > but but but.  The name "kthread_create_on_cpu" sucks.  It's plain wrong.
> 
> Okay you are right Andrew ;) I dont have better idea for the moment.

Dunno.  kthread_create_with_memory_on_node() :)

How's about kthread_create_for_node()?  That's sufficiently vague to
not mislead readers into thinking that it schedules the thread on that
CPU and leaves room in the namespace for a real kthread_create_on_cpu()
(which we could well end up creating).

kthread_create_node_mem()?

> Note that all callers I converted really create one kthread per cpu, not
> per node. They didnt care of node affinity, only me :)
> 
> kthread_create_on_node() seems misleading to me (some cpus run on
> memoryless nodes)

True, but what we're doing here is specifying on which node the
kthread's memory resources should reside - we need to do that even for
CPUs which live on memoryless nodes.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 3/4] kthread: NUMA aware kthread_create_on_cpu()
  2010-12-10  7:09                       ` Andrew Morton
@ 2010-12-13  6:26                         ` Thomas Fjellstrom
  0 siblings, 0 replies; 26+ messages in thread
From: Thomas Fjellstrom @ 2010-12-13  6:26 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Eric Dumazet, Andi Kleen, linux-kernel, netdev, David Miller,
	Tejun Heo, Rusty Russell, Tony Luck, Fenghua Yu

On December 10, 2010, Andrew Morton wrote:
> On Fri, 10 Dec 2010 08:02:41 +0100 Eric Dumazet <eric.dumazet@gmail.com> 
wrote:
> > Le jeudi 09 d__cembre 2010 __ 22:32 -0800, Andrew Morton a __crit :
> > > but but but.  The name "kthread_create_on_cpu" sucks.  It's plain
> > > wrong.
> > 
> > Okay you are right Andrew ;) I dont have better idea for the moment.
> 
> Dunno.  kthread_create_with_memory_on_node() :)
> 
> How's about kthread_create_for_node()?  That's sufficiently vague to
> not mislead readers into thinking that it schedules the thread on that
> CPU and leaves room in the namespace for a real kthread_create_on_cpu()
> (which we could well end up creating).
> 
> kthread_create_node_mem()?

Just a kernel newb here, how about kthread_create_from_cpu ? unless that has 
some connotations I'm not aware of.

> > Note that all callers I converted really create one kthread per cpu, not
> > per node. They didnt care of node affinity, only me :)
> > 
> > kthread_create_on_node() seems misleading to me (some cpus run on
> > memoryless nodes)
> 
> True, but what we're doing here is specifying on which node the
> kthread's memory resources should reside - we need to do that even for
> CPUs which live on memoryless nodes.
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/


-- 
Thomas Fjellstrom
thomas@fjellstrom.ca

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2010-12-13  6:26 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-11-28 19:33 [PATCH] kthread: NUMA aware kthread_create_on_cpu() Eric Dumazet
2010-11-28 22:40 ` Andi Kleen
2010-11-28 22:51   ` Eric Dumazet
2010-11-28 23:01     ` Andi Kleen
2010-11-28 23:37       ` Eric Dumazet
2010-11-29  9:05         ` Andi Kleen
2010-11-29  9:38           ` Eric Dumazet
2010-11-29 15:13             ` [PATCH v2 0/4] " Eric Dumazet
2010-11-29 16:09               ` Andi Kleen
2010-11-29 17:39               ` David Miller
2010-11-29 17:59                 ` Eric Dumazet
2010-11-29 23:31                   ` Rusty Russell
2010-11-29 15:13             ` [PATCH v2 1/4] mm: NUMA aware alloc_task_struct_node() Eric Dumazet
2010-11-29 15:14             ` [PATCH v2 2/4] mm: NUMA aware alloc_thread_info_node() Eric Dumazet
2010-11-29 15:14             ` [PATCH v2 3/4] kthread: NUMA aware kthread_create_on_cpu() Eric Dumazet
2010-12-10  0:44               ` Andrew Morton
2010-12-10  5:59                 ` Eric Dumazet
2010-12-10  6:32                   ` Andrew Morton
2010-12-10  7:02                     ` Eric Dumazet
2010-12-10  7:09                       ` Andrew Morton
2010-12-13  6:26                         ` Thomas Fjellstrom
2010-11-29 15:15             ` [PATCH v2 4/4] kthread: use kthread_create_on_cpu() Eric Dumazet
2010-11-29 15:19               ` Tejun Heo
2010-11-30  9:38             ` David Howells
2010-11-30  9:59               ` Eric Dumazet
2010-11-29  9:03       ` [PATCH] kthread: NUMA aware kthread_create_on_cpu() Américo Wang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.