linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: module changes
       [not found] <20030217213221.3103.qmail@eklektix.com>
@ 2003-02-18  7:19 ` Rusty Russell
  2003-02-18 15:56   ` Mikael Pettersson
  0 siblings, 1 reply; 8+ messages in thread
From: Rusty Russell @ 2003-02-18  7:19 UTC (permalink / raw)
  To: Jonathan Corbet; +Cc: linux-kernel, rth, davidm, paulus

In message <20030217213221.3103.qmail@eklektix.com> you write:
> I had a quick question: is the inability to declare per-CPU variables in
> modules going to be permanent?

<sigh>

I'd really like to fix it, but that's *hard*.

<think think think, ask Paulus>

There might be a neater way, and there's definitely a more
space-efficient way, but this is a start (the wastage is small as long
as there are only a few per-cpu vars, as there are at the moment).

Rusty.
--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

Name: per-cpu support inside modules
Author: Rusty Russell
Status: Tested on 2.5.62

D: This adds percpu support for modules.  A module cannot have more
D: percpu data than the base kernel does (on my kernel 5636 bytes).
D: This version is quite wasteful: we could allocate them separately
D: in base-kernel-size chunks, and/or only allocate up to the max
D: cpu_possible() cpu, if we cared enough.

diff -urNp --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.62/include/asm-generic/percpu.h working-2.5.62-percpu-modules/include/asm-generic/percpu.h
--- linux-2.5.62/include/asm-generic/percpu.h	2003-01-02 12:32:47.000000000 +1100
+++ working-2.5.62-percpu-modules/include/asm-generic/percpu.h	2003-02-18 11:40:42.000000000 +1100
@@ -8,10 +8,8 @@
 extern unsigned long __per_cpu_offset[NR_CPUS];
 
 /* Separate out the type, so (int[3], foo) works. */
-#ifndef MODULE
 #define DEFINE_PER_CPU(type, name) \
     __attribute__((__section__(".data.percpu"))) __typeof__(type) name##__per_cpu
-#endif
 
 /* var is in discarded region: offset to particular copy we want */
 #define per_cpu(var, cpu) (*RELOC_HIDE(&var##__per_cpu, __per_cpu_offset[cpu]))
@@ -19,11 +17,8 @@ extern unsigned long __per_cpu_offset[NR
 
 #else /* ! SMP */
 
-/* Can't define per-cpu variables in modules.  Sorry --RR */
-#ifndef MODULE
 #define DEFINE_PER_CPU(type, name) \
     __typeof__(type) name##__per_cpu
-#endif
 
 #define per_cpu(var, cpu)			((void)cpu, var##__per_cpu)
 #define __get_cpu_var(var)			var##__per_cpu
diff -urNp --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.62/include/linux/module.h working-2.5.62-percpu-modules/include/linux/module.h
--- linux-2.5.62/include/linux/module.h	2003-02-18 11:18:56.000000000 +1100
+++ working-2.5.62-percpu-modules/include/linux/module.h	2003-02-18 13:43:06.000000000 +1100
@@ -264,6 +264,11 @@ struct module
 	char *strtab;
 #endif
 
+#ifdef CONFIG_SMP
+	/* How far into module_core is the per-cpu stuff (it's at the end) */
+	unsigned long percpu_offset;
+#endif
+
 	/* The command line arguments (may be mangled).  People like
 	   keeping pointers to this stuff */
 	char *args;
diff -urNp --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.62/kernel/module.c working-2.5.62-percpu-modules/kernel/module.c
--- linux-2.5.62/kernel/module.c	2003-02-18 11:18:57.000000000 +1100
+++ working-2.5.62-percpu-modules/kernel/module.c	2003-02-18 17:23:15.000000000 +1100
@@ -980,6 +980,66 @@ static long get_offset(unsigned long *si
 	return ret;
 }
 
+#ifdef CONFIG_SMP
+/* Created by linker magic */
+extern char __per_cpu_start[], __per_cpu_end[];
+
+static int add_percpu_area(Elf_Shdr *sechdrs,
+			   unsigned int pcpuindex,
+			   struct module *mod)
+{
+	unsigned long size;
+
+	if (!pcpuindex)
+		return 0;
+
+	/* We're stuck with the size used by the core kernel. */
+	size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
+	if (sechdrs[pcpuindex].sh_size > size) {
+		printk(KERN_ERR "%s: too much percpu data: %li vs %li\n",
+		       mod->name, (long)__per_cpu_end - (long)__per_cpu_start,
+		       (long)size);
+		return -EINVAL;
+	}
+	mod->core_size = ALIGN(mod->core_size, SMP_CACHE_BYTES);
+	mod->percpu_offset = mod->core_size;
+	mod->core_size += NR_CPUS * size;
+	return 0;
+}
+
+/* Size of section 0 is 0, so this is a noop if no percpu section */
+static void copy_percpu_area(Elf_Shdr *percpusec, struct module *mod)
+{
+	unsigned long i, size;
+
+	/* Duplicate the sections. */
+	size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
+	for (i = 0; i < NR_CPUS; i++)
+		memcpy(mod->module_core + mod->percpu_offset + size*i,
+		       (void *)percpusec->sh_addr,
+		       percpusec->sh_size);
+
+	/* Change address of per-cpu section so that address +
+           __per_cpu_offset[0] gives new location */
+	percpusec->sh_addr = (unsigned long)mod->module_core
+		+ mod->percpu_offset
+		- __per_cpu_offset[0];
+}
+#else
+static int add_percpu_area(Elf_Shdr *sechdrs,
+			   unsigned int pcpuindex,
+			   struct module *mod)
+{
+	/* Shouldn't be here on non-SMP, but act normally. */
+	if (pcpuindex)
+		sechdrs[pcpuindex].sh_flags |= SHF_ALLOC;
+	return 0;
+}
+static void copy_percpu_area(Elf_Shdr *percpusec, struct module *mod)
+{
+}
+#endif
+
 /* Lay out the SHF_ALLOC sections in a way not dissimilar to how ld
    might -- code, read-only data, read-write data, small data.  Tally
    sizes, and place the offsets into sh_entsize fields: high bit means it
@@ -1069,7 +1131,7 @@ static struct module *load_module(void *
 	char *secstrings, *args;
 	unsigned int i, symindex, exportindex, strindex, setupindex, exindex,
 		modindex, obsparmindex, licenseindex, gplindex, vmagindex,
-		crcindex, gplcrcindex, versindex;
+		crcindex, gplcrcindex, versindex, pcpuindex;
 	long arglen;
 	struct module *mod;
 	long err = 0;
@@ -1106,7 +1168,7 @@ static struct module *load_module(void *
 	/* May not export symbols, or have setup params, so these may
            not exist */
 	exportindex = setupindex = obsparmindex = gplindex = licenseindex 
-		= crcindex = gplcrcindex = versindex = 0;
+		= crcindex = gplcrcindex = versindex = pcpuindex = 0;
 
 	/* And these should exist, but gcc whinges if we don't init them */
 	symindex = strindex = exindex = modindex = vmagindex = 0;
@@ -1171,6 +1233,12 @@ static struct module *load_module(void *
 			licenseindex = i;
 			sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC;
 		} else if (strcmp(secstrings+sechdrs[i].sh_name,
+				  ".data.percpu") == 0) {
+			/* Per-cpu data. */
+			DEBUGP("Per-cpu data found in section %u\n", i);
+			pcpuindex = i;
+			sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC;
+		} else if (strcmp(secstrings+sechdrs[i].sh_name,
 				  "__vermagic") == 0 &&
 			   (sechdrs[i].sh_flags & SHF_ALLOC)) {
 			/* Version magic. */
@@ -1255,6 +1323,10 @@ static struct module *load_module(void *
 	   special cases for the architectures. */
 	layout_sections(mod, hdr, sechdrs, secstrings);
 
+	err = add_percpu_area(sechdrs, pcpuindex, mod);
+	if (err < 0)
+		goto free_mod;
+
 	/* Do the allocs. */
 	ptr = module_alloc(mod->core_size);
 	if (!ptr) {
@@ -1294,6 +1366,9 @@ static struct module *load_module(void *
 	/* Module has been moved. */
 	mod = (void *)sechdrs[modindex].sh_addr;
 
+	/* Copy the per-cpu variables across for each CPU. */
+	copy_percpu_area(&sechdrs[pcpuindex], mod);
+
 	/* Now we've moved module, initialize linked lists, etc. */
 	module_unload_init(mod);
 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: module changes
  2003-02-18  7:19 ` module changes Rusty Russell
@ 2003-02-18 15:56   ` Mikael Pettersson
  2003-02-18 16:05     ` John Levon
  2003-02-18 22:24     ` Rusty Russell
  0 siblings, 2 replies; 8+ messages in thread
From: Mikael Pettersson @ 2003-02-18 15:56 UTC (permalink / raw)
  To: Rusty Russell; +Cc: linux-kernel

Rusty Russell writes:
 > In message <20030217213221.3103.qmail@eklektix.com> you write:
 > > I had a quick question: is the inability to declare per-CPU variables in
 > > modules going to be permanent?
 > 
 > <sigh>
 > 
 > I'd really like to fix it, but that's *hard*.
 > 
 > <think think think, ask Paulus>
 > 
 > There might be a neater way, and there's definitely a more
 > space-efficient way, but this is a start (the wastage is small as long
 > as there are only a few per-cpu vars, as there are at the moment).
 > 
 > Rusty.
 > --
 >   Anyone who quotes me in their sig is an idiot. -- Rusty Russell.
 > 
 > Name: per-cpu support inside modules
 > Author: Rusty Russell
 > Status: Tested on 2.5.62
 > 
 > D: This adds percpu support for modules.  A module cannot have more
 > D: percpu data than the base kernel does (on my kernel 5636 bytes).

This limitation is quite horrible.

Does the implementation have to be perfect? The per_cpu API can easily
be simulated using good old NR_CPUS arrays:

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,8) && !defined(MODULE)
#include <linux/percpu.h>	/* why doesn't this work for modules? */
#else
/* Simulate per_cpu() for older kernels or modular builds. */
#define DEFINE_PER_CPU(type, name) \
	__typeof__(type) name[NR_CPUS] __cacheline_aligned
#define per_cpu(var, cpu)	((var)[(cpu)])
#endif

Yes it wastes space. Big deal. Rather that than arbitrary limitations.

/Mikael

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: module changes
  2003-02-18 15:56   ` Mikael Pettersson
@ 2003-02-18 16:05     ` John Levon
  2003-02-18 22:24     ` Rusty Russell
  1 sibling, 0 replies; 8+ messages in thread
From: John Levon @ 2003-02-18 16:05 UTC (permalink / raw)
  To: Mikael Pettersson; +Cc: Rusty Russell, linux-kernel

On Tue, Feb 18, 2003 at 04:56:11PM +0100, Mikael Pettersson wrote:

> Does the implementation have to be perfect? The per_cpu API can easily
> be simulated using good old NR_CPUS arrays:

I agree, the main advantage of having per-cpu API available to modules
is that code can just use it and not worry about module vs. monolithic.
Some obscure limitation doesn't sound like a good idea in that respect
:)

regards
john

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: module changes
  2003-02-18 15:56   ` Mikael Pettersson
  2003-02-18 16:05     ` John Levon
@ 2003-02-18 22:24     ` Rusty Russell
  2003-02-19  3:51       ` Andrew Morton
  2003-02-19 12:52       ` Mikael Pettersson
  1 sibling, 2 replies; 8+ messages in thread
From: Rusty Russell @ 2003-02-18 22:24 UTC (permalink / raw)
  To: Mikael Pettersson; +Cc: linux-kernel

In message <15954.22427.557293.353363@gargle.gargle.HOWL> you write:
> Rusty Russell writes:
>  > D: This adds percpu support for modules.  A module cannot have more
>  > D: percpu data than the base kernel does (on my kernel 5636 bytes).
> 
> This limitation is quite horrible.
> 
> Does the implementation have to be perfect? The per_cpu API can easily
> be simulated using good old NR_CPUS arrays:

The problem is that then you have to have to know whether this is a
per-cpu thing created in a module, or not, when you use it 8(

There are two things we can use to alleviate the problem.  The first
would be to put a minimal cap on the per-cpu data size (eg. 8k).  The
other possibility is to allocate on an object granularity, in which
case the rule becomes "no single per-cpu object can be larger than
XXX", but the cost is to write a mini allocator.

I agree with you (and John) about disliking the limitation, but is it
worse than the current no per-cpu stuff in modules at all?

Thanks!
Rusty.
--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: module changes
  2003-02-18 22:24     ` Rusty Russell
@ 2003-02-19  3:51       ` Andrew Morton
  2003-02-19  4:28         ` Rusty Russell
  2003-02-19 12:52       ` Mikael Pettersson
  1 sibling, 1 reply; 8+ messages in thread
From: Andrew Morton @ 2003-02-19  3:51 UTC (permalink / raw)
  To: Rusty Russell; +Cc: mikpe, linux-kernel

Rusty Russell <rusty@rustcorp.com.au> wrote:
>
> In message <15954.22427.557293.353363@gargle.gargle.HOWL> you write:
> > Rusty Russell writes:
> >  > D: This adds percpu support for modules.  A module cannot have more
> >  > D: percpu data than the base kernel does (on my kernel 5636 bytes).
> > 
> > This limitation is quite horrible.
> > 
> > Does the implementation have to be perfect? The per_cpu API can easily
> > be simulated using good old NR_CPUS arrays:
> 
> The problem is that then you have to have to know whether this is a
> per-cpu thing created in a module, or not, when you use it 8(
> 
> There are two things we can use to alleviate the problem.  The first
> would be to put a minimal cap on the per-cpu data size (eg. 8k).  The
> other possibility is to allocate on an object granularity, in which
> case the rule becomes "no single per-cpu object can be larger than
> XXX", but the cost is to write a mini allocator.
> 

Is kmalloc_percpu() not suitable?

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: module changes
  2003-02-19  3:51       ` Andrew Morton
@ 2003-02-19  4:28         ` Rusty Russell
  0 siblings, 0 replies; 8+ messages in thread
From: Rusty Russell @ 2003-02-19  4:28 UTC (permalink / raw)
  To: Andrew Morton; +Cc: mikpe, linux-kernel

In message <20030218195140.27b0798f.akpm@digeo.com> you write:
> Rusty Russell <rusty@rustcorp.com.au> wrote:
> > The problem is that then you have to have to know whether this is a
> > per-cpu thing created in a module, or not, when you use it 8(
> > 
> > There are two things we can use to alleviate the problem.  The first
> > would be to put a minimal cap on the per-cpu data size (eg. 8k).  The
> > other possibility is to allocate on an object granularity, in which
> > case the rule becomes "no single per-cpu object can be larger than
> > XXX", but the cost is to write a mini allocator.
> > 
> 
> Is kmalloc_percpu() not suitable?

Unfortunately, no.  You have to use the same offsets as the in-kernel
DEFINE_PER_CPU declarations, meaning that each cpu's stuff needs to be
"sizeof kernel per-cpu-section" apart.

Which means an allocator which keeps a linked list of NR_CPUS * sizeof
kernel-per-cpu-section things, and allocs and frees from that.

Hence this patch just tacks it on the end of the module, rather than
deal with an allocator.  A minor improvement would be only to allocate
for the maximum possible CPU, which means about 6k * numcpus per
module which declares per-cpu data, which is probably fine.

Rusty.
--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: module changes
  2003-02-18 22:24     ` Rusty Russell
  2003-02-19  3:51       ` Andrew Morton
@ 2003-02-19 12:52       ` Mikael Pettersson
  2003-02-20  0:15         ` Rusty Russell
  1 sibling, 1 reply; 8+ messages in thread
From: Mikael Pettersson @ 2003-02-19 12:52 UTC (permalink / raw)
  To: Rusty Russell; +Cc: linux-kernel

Rusty Russell writes:
 > In message <15954.22427.557293.353363@gargle.gargle.HOWL> you write:
 > > Rusty Russell writes:
 > >  > D: This adds percpu support for modules.  A module cannot have more
 > >  > D: percpu data than the base kernel does (on my kernel 5636 bytes).
 > > 
 > > This limitation is quite horrible.
 > > 
 > > Does the implementation have to be perfect? The per_cpu API can easily
 > > be simulated using good old NR_CPUS arrays:
 > 
 > The problem is that then you have to have to know whether this is a
 > per-cpu thing created in a module, or not, when you use it 8(

Ah yes. I totally missed that. (Shakes head in disbelief.)

 > I agree with you (and John) about disliking the limitation, but is it
 > worse than the current no per-cpu stuff in modules at all?

In my case (perfctr driver) it means not being able to use per-cpu
stuff at all since I need to be able to build it modular. Or I have
to hide per_cpu() behind private macros that fall back to an [NR_CPUS]
implementation in the modular case. I can live with that.

/Mikael

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: module changes
  2003-02-19 12:52       ` Mikael Pettersson
@ 2003-02-20  0:15         ` Rusty Russell
  0 siblings, 0 replies; 8+ messages in thread
From: Rusty Russell @ 2003-02-20  0:15 UTC (permalink / raw)
  To: Mikael Pettersson; +Cc: linux-kernel

In message <15955.32295.830237.912@gargle.gargle.HOWL> you write:
> Rusty Russell writes:
>  > The problem is that then you have to have to know whether this is a
>  > per-cpu thing created in a module, or not, when you use it 8(
> 
> Ah yes. I totally missed that. (Shakes head in disbelief.)

<shrug> It's an easy thing to miss.

>  > I agree with you (and John) about disliking the limitation, but is it
>  > worse than the current no per-cpu stuff in modules at all?
> 
> In my case (perfctr driver) it means not being able to use per-cpu
> stuff at all since I need to be able to build it modular. Or I have
> to hide per_cpu() behind private macros that fall back to an [NR_CPUS]
> implementation in the modular case. I can live with that.

Well, there's kmalloc_percpu() there already.  But perfctr certainly
won't hit the "more per-cpu data than the core kernel" case, from my
brief reading of the code.  If something does, then the core kernel
minimum can be increased (which is a little hacky, but hey).

Thanks,
Rusty.
--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2003-02-20  0:25 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20030217213221.3103.qmail@eklektix.com>
2003-02-18  7:19 ` module changes Rusty Russell
2003-02-18 15:56   ` Mikael Pettersson
2003-02-18 16:05     ` John Levon
2003-02-18 22:24     ` Rusty Russell
2003-02-19  3:51       ` Andrew Morton
2003-02-19  4:28         ` Rusty Russell
2003-02-19 12:52       ` Mikael Pettersson
2003-02-20  0:15         ` Rusty Russell

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).