linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Fake NUMA emulation for PowerPC (Take 2)
@ 2007-12-07 22:37 Balbir Singh
  2007-12-10 19:36 ` Balbir Singh
                   ` (3 more replies)
  0 siblings, 4 replies; 18+ messages in thread
From: Balbir Singh @ 2007-12-07 22:37 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Balbir Singh, LKML


Changelog

1. Get rid of the constant 5 (based on comments from
                                Geert.Uytterhoeven@sonycom.com)
2. Implement suggestions from Olof Johannson
3. Check if cmdline is NULL in fake_numa_create_new_node()

Tested with additional parameters from Olof

numa=debug,fake=
numa=foo,fake=bar


Here's a dumb simple implementation of fake NUMA nodes for PowerPC. Fake
NUMA nodes can be specified using the following command line option

numa=fake=<node range>

node range is of the format <range1>,<range2>,...<rangeN>

Each of the rangeX parameters is passed using memparse(). I find the patch
useful for fake NUMA emulation on my simple PowerPC machine. I've tested it
on a non-numa box with the following arguments

numa=fake=1G
numa=fake=1G,2G
name=fake=1G,512M,2G
numa=fake=1500M,2800M mem=3500M
numa=fake=1G mem=512M
numa=fake=1G mem=1G

This patch applies on top of 2.6.24-rc4.

All though I've tried my best to handle some of the architecture specific
details of PowerPC, I might have overlooked something obvious, like the usage
of an API or some architecture tweaks. The patch depends on CONFIG_NUMA and
I decided against creating a separate config option for fake NUMA to keep
the code simple.

Comments are as always welcome!

Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
---

 arch/powerpc/mm/numa.c |   59 ++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 54 insertions(+), 5 deletions(-)

diff -puN arch/powerpc/mm/numa.c~ppc-fake-numa-easy arch/powerpc/mm/numa.c
--- linux-2.6.24-rc4-mm1/arch/powerpc/mm/numa.c~ppc-fake-numa-easy	2007-12-07 21:25:55.000000000 +0530
+++ linux-2.6.24-rc4-mm1-balbir/arch/powerpc/mm/numa.c	2007-12-08 03:19:46.000000000 +0530
@@ -24,6 +24,8 @@
 
 static int numa_enabled = 1;
 
+static char *cmdline __initdata;
+
 static int numa_debug;
 #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
 
@@ -39,6 +41,43 @@ static bootmem_data_t __initdata plat_no
 static int min_common_depth;
 static int n_mem_addr_cells, n_mem_size_cells;
 
+static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
+						unsigned int *nid)
+{
+	unsigned long long mem;
+	char *p = cmdline;
+	static unsigned int fake_nid = 0;
+	static unsigned long long curr_boundary = 0;
+
+	*nid = fake_nid;
+	if (!p)
+		return 0;
+
+	mem = memparse(p, &p);
+	if (!mem)
+		return 0;
+
+	if (mem < curr_boundary)
+		return 0;
+
+	curr_boundary = mem;
+
+	if ((end_pfn << PAGE_SHIFT) > mem) {
+		/*
+		 * Skip commas and spaces
+		 */
+		while (*p == ',' || *p == ' ' || *p == '\t')
+			p++;
+
+		cmdline = p;
+		fake_nid++;
+		*nid = fake_nid;
+		dbg("created new fake_node with id %d\n", fake_nid);
+		return 1;
+	}
+	return 0;
+}
+
 static void __cpuinit map_cpu_to_node(int cpu, int node)
 {
 	numa_cpu_lookup_table[cpu] = node;
@@ -344,12 +383,14 @@ static void __init parse_drconf_memory(s
 			if (nid == 0xffff || nid >= MAX_NUMNODES)
 				nid = default_nid;
 		}
-		node_set_online(nid);
 
 		size = numa_enforce_memory_limit(start, lmb_size);
 		if (!size)
 			continue;
 
+		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
+		node_set_online(nid);
+
 		add_active_range(nid, start >> PAGE_SHIFT,
 				 (start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
 	}
@@ -429,7 +470,6 @@ new_range:
 		nid = of_node_to_nid_single(memory);
 		if (nid < 0)
 			nid = default_nid;
-		node_set_online(nid);
 
 		if (!(size = numa_enforce_memory_limit(start, size))) {
 			if (--ranges)
@@ -438,6 +478,9 @@ new_range:
 				continue;
 		}
 
+		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
+		node_set_online(nid);
+
 		add_active_range(nid, start >> PAGE_SHIFT,
 				(start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
 
@@ -461,7 +504,7 @@ static void __init setup_nonnuma(void)
 	unsigned long top_of_ram = lmb_end_of_DRAM();
 	unsigned long total_ram = lmb_phys_mem_size();
 	unsigned long start_pfn, end_pfn;
-	unsigned int i;
+	unsigned int i, nid = 0;
 
 	printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
 	       top_of_ram, total_ram);
@@ -471,9 +514,11 @@ static void __init setup_nonnuma(void)
 	for (i = 0; i < lmb.memory.cnt; ++i) {
 		start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT;
 		end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i);
-		add_active_range(0, start_pfn, end_pfn);
+
+		fake_numa_create_new_node(end_pfn, &nid);
+		add_active_range(nid, start_pfn, end_pfn);
+		node_set_online(nid);
 	}
-	node_set_online(0);
 }
 
 void __init dump_numa_cpu_topology(void)
@@ -702,6 +747,10 @@ static int __init early_numa(char *p)
 	if (strstr(p, "debug"))
 		numa_debug = 1;
 
+	p = strstr(p, "fake=");
+	if (p)
+		cmdline = p + strlen("fake=");
+
 	return 0;
 }
 early_param("numa", early_numa);
_

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fake NUMA emulation for PowerPC (Take 2)
  2007-12-07 22:37 [PATCH] Fake NUMA emulation for PowerPC (Take 2) Balbir Singh
@ 2007-12-10 19:36 ` Balbir Singh
  2007-12-10 23:07 ` Olof Johansson
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 18+ messages in thread
From: Balbir Singh @ 2007-12-10 19:36 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: LKML, Andrew Morton

Balbir Singh wrote:
> Changelog
> 
> 1. Get rid of the constant 5 (based on comments from
>                                 Geert.Uytterhoeven@sonycom.com)
> 2. Implement suggestions from Olof Johannson
> 3. Check if cmdline is NULL in fake_numa_create_new_node()
> 
> Tested with additional parameters from Olof
> 
> numa=debug,fake=
> numa=foo,fake=bar
> 
> 
> Here's a dumb simple implementation of fake NUMA nodes for PowerPC. Fake
> NUMA nodes can be specified using the following command line option
> 
> numa=fake=<node range>
> 
> node range is of the format <range1>,<range2>,...<rangeN>
> 
> Each of the rangeX parameters is passed using memparse(). I find the patch
> useful for fake NUMA emulation on my simple PowerPC machine. I've tested it
> on a non-numa box with the following arguments
> 
> numa=fake=1G
> numa=fake=1G,2G
> name=fake=1G,512M,2G
> numa=fake=1500M,2800M mem=3500M
> numa=fake=1G mem=512M
> numa=fake=1G mem=1G
> 
> This patch applies on top of 2.6.24-rc4.
> 
> All though I've tried my best to handle some of the architecture specific
> details of PowerPC, I might have overlooked something obvious, like the usage
> of an API or some architecture tweaks. The patch depends on CONFIG_NUMA and
> I decided against creating a separate config option for fake NUMA to keep
> the code simple.
> 
> Comments are as always welcome!
> 
> Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
> ---
> 
>  arch/powerpc/mm/numa.c |   59 ++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 54 insertions(+), 5 deletions(-)
> 
> diff -puN arch/powerpc/mm/numa.c~ppc-fake-numa-easy arch/powerpc/mm/numa.c
> --- linux-2.6.24-rc4-mm1/arch/powerpc/mm/numa.c~ppc-fake-numa-easy	2007-12-07 21:25:55.000000000 +0530
> +++ linux-2.6.24-rc4-mm1-balbir/arch/powerpc/mm/numa.c	2007-12-08 03:19:46.000000000 +0530
> @@ -24,6 +24,8 @@
> 
>  static int numa_enabled = 1;
> 
> +static char *cmdline __initdata;
> +
>  static int numa_debug;
>  #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
> 
> @@ -39,6 +41,43 @@ static bootmem_data_t __initdata plat_no
>  static int min_common_depth;
>  static int n_mem_addr_cells, n_mem_size_cells;
> 
> +static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
> +						unsigned int *nid)
> +{
> +	unsigned long long mem;
> +	char *p = cmdline;
> +	static unsigned int fake_nid = 0;
> +	static unsigned long long curr_boundary = 0;
> +
> +	*nid = fake_nid;
> +	if (!p)
> +		return 0;
> +
> +	mem = memparse(p, &p);
> +	if (!mem)
> +		return 0;
> +
> +	if (mem < curr_boundary)
> +		return 0;
> +
> +	curr_boundary = mem;
> +
> +	if ((end_pfn << PAGE_SHIFT) > mem) {
> +		/*
> +		 * Skip commas and spaces
> +		 */
> +		while (*p == ',' || *p == ' ' || *p == '\t')
> +			p++;
> +
> +		cmdline = p;
> +		fake_nid++;
> +		*nid = fake_nid;
> +		dbg("created new fake_node with id %d\n", fake_nid);
> +		return 1;
> +	}
> +	return 0;
> +}
> +
>  static void __cpuinit map_cpu_to_node(int cpu, int node)
>  {
>  	numa_cpu_lookup_table[cpu] = node;
> @@ -344,12 +383,14 @@ static void __init parse_drconf_memory(s
>  			if (nid == 0xffff || nid >= MAX_NUMNODES)
>  				nid = default_nid;
>  		}
> -		node_set_online(nid);
> 
>  		size = numa_enforce_memory_limit(start, lmb_size);
>  		if (!size)
>  			continue;
> 
> +		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
> +		node_set_online(nid);
> +
>  		add_active_range(nid, start >> PAGE_SHIFT,
>  				 (start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
>  	}
> @@ -429,7 +470,6 @@ new_range:
>  		nid = of_node_to_nid_single(memory);
>  		if (nid < 0)
>  			nid = default_nid;
> -		node_set_online(nid);
> 
>  		if (!(size = numa_enforce_memory_limit(start, size))) {
>  			if (--ranges)
> @@ -438,6 +478,9 @@ new_range:
>  				continue;
>  		}
> 
> +		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
> +		node_set_online(nid);
> +
>  		add_active_range(nid, start >> PAGE_SHIFT,
>  				(start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
> 
> @@ -461,7 +504,7 @@ static void __init setup_nonnuma(void)
>  	unsigned long top_of_ram = lmb_end_of_DRAM();
>  	unsigned long total_ram = lmb_phys_mem_size();
>  	unsigned long start_pfn, end_pfn;
> -	unsigned int i;
> +	unsigned int i, nid = 0;
> 
>  	printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
>  	       top_of_ram, total_ram);
> @@ -471,9 +514,11 @@ static void __init setup_nonnuma(void)
>  	for (i = 0; i < lmb.memory.cnt; ++i) {
>  		start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT;
>  		end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i);
> -		add_active_range(0, start_pfn, end_pfn);
> +
> +		fake_numa_create_new_node(end_pfn, &nid);
> +		add_active_range(nid, start_pfn, end_pfn);
> +		node_set_online(nid);
>  	}
> -	node_set_online(0);
>  }
> 
>  void __init dump_numa_cpu_topology(void)
> @@ -702,6 +747,10 @@ static int __init early_numa(char *p)
>  	if (strstr(p, "debug"))
>  		numa_debug = 1;
> 
> +	p = strstr(p, "fake=");
> +	if (p)
> +		cmdline = p + strlen("fake=");
> +
>  	return 0;
>  }
>  early_param("numa", early_numa);
> _
> 


If there are no other major objections, could we get this infrastructure
into -mm?


-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fake NUMA emulation for PowerPC (Take 2)
  2007-12-07 22:37 [PATCH] Fake NUMA emulation for PowerPC (Take 2) Balbir Singh
  2007-12-10 19:36 ` Balbir Singh
@ 2007-12-10 23:07 ` Olof Johansson
  2008-01-18  5:34 ` Michael Ellerman
  2008-01-18  5:55 ` [PATCH] Fake NUMA emulation for PowerPC (Take 2) Michael Ellerman
  3 siblings, 0 replies; 18+ messages in thread
From: Olof Johansson @ 2007-12-10 23:07 UTC (permalink / raw)
  To: Balbir Singh; +Cc: linuxppc-dev, LKML

On Sat, Dec 08, 2007 at 04:07:14AM +0530, Balbir Singh wrote:

> Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>

Looks good to me. Sure, it could be fleshed out to something more
generic and in common code, but this is small and simple and doesn't
bloat the kernel much as it stands, and it has value for debugging.

Acked-by: Olof Johansson <olof@lixom.net>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fake NUMA emulation for PowerPC (Take 2)
  2007-12-07 22:37 [PATCH] Fake NUMA emulation for PowerPC (Take 2) Balbir Singh
  2007-12-10 19:36 ` Balbir Singh
  2007-12-10 23:07 ` Olof Johansson
@ 2008-01-18  5:34 ` Michael Ellerman
  2008-01-18  5:41   ` Balbir Singh
  2008-01-18  5:44   ` Michael Ellerman
  2008-01-18  5:55 ` [PATCH] Fake NUMA emulation for PowerPC (Take 2) Michael Ellerman
  3 siblings, 2 replies; 18+ messages in thread
From: Michael Ellerman @ 2008-01-18  5:34 UTC (permalink / raw)
  To: Balbir Singh; +Cc: linuxppc-dev, LKML

[-- Attachment #1: Type: text/plain, Size: 803 bytes --]

On Sat, 2007-12-08 at 04:07 +0530, Balbir Singh wrote:
> Changelog
> 
> 1. Get rid of the constant 5 (based on comments from
>                                 Geert.Uytterhoeven@sonycom.com)
> 2. Implement suggestions from Olof Johannson
> 3. Check if cmdline is NULL in fake_numa_create_new_node()
> 
> Tested with additional parameters from Olof
> 
> numa=debug,fake=
> numa=foo,fake=bar


I'm not sure why yet, but git bisect tells me it's this patch that's
causing the for-2.6.25 tree to explode on boot on cell machines.

cheers

-- 
Michael Ellerman
OzLabs, IBM Australia Development Lab

wwweb: http://michael.ellerman.id.au
phone: +61 2 6212 1183 (tie line 70 21183)

We do not inherit the earth from our ancestors,
we borrow it from our children. - S.M.A.R.T Person

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fake NUMA emulation for PowerPC (Take 2)
  2008-01-18  5:34 ` Michael Ellerman
@ 2008-01-18  5:41   ` Balbir Singh
  2008-01-18  5:44   ` Michael Ellerman
  1 sibling, 0 replies; 18+ messages in thread
From: Balbir Singh @ 2008-01-18  5:41 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev, LKML

* Michael Ellerman <michael@ellerman.id.au> [2008-01-18 16:34:53]:

> On Sat, 2007-12-08 at 04:07 +0530, Balbir Singh wrote:
> > Changelog
> > 
> > 1. Get rid of the constant 5 (based on comments from
> >                                 Geert.Uytterhoeven@sonycom.com)
> > 2. Implement suggestions from Olof Johannson
> > 3. Check if cmdline is NULL in fake_numa_create_new_node()
> > 
> > Tested with additional parameters from Olof
> > 
> > numa=debug,fake=
> > numa=foo,fake=bar
> 
> 
> I'm not sure why yet, but git bisect tells me it's this patch that's
> causing the for-2.6.25 tree to explode on boot on cell machines.
>

Hi,

Do you boot with numa=<options> on your machine? Could I have your
machine configuration? Any OOPS/log would be helpful.

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fake NUMA emulation for PowerPC (Take 2)
  2008-01-18  5:34 ` Michael Ellerman
  2008-01-18  5:41   ` Balbir Singh
@ 2008-01-18  5:44   ` Michael Ellerman
  2008-01-18  7:08     ` Balbir Singh
  2008-01-26  7:13     ` Balbir Singh
  1 sibling, 2 replies; 18+ messages in thread
From: Michael Ellerman @ 2008-01-18  5:44 UTC (permalink / raw)
  To: Balbir Singh; +Cc: linuxppc-dev, LKML, Paul Mackerras

[-- Attachment #1: Type: text/plain, Size: 1358 bytes --]

On Fri, 2008-01-18 at 16:34 +1100, Michael Ellerman wrote:
> On Sat, 2007-12-08 at 04:07 +0530, Balbir Singh wrote:
> > Changelog
> > 
> > 1. Get rid of the constant 5 (based on comments from
> >                                 Geert.Uytterhoeven@sonycom.com)
> > 2. Implement suggestions from Olof Johannson
> > 3. Check if cmdline is NULL in fake_numa_create_new_node()
> > 
> > Tested with additional parameters from Olof
> > 
> > numa=debug,fake=
> > numa=foo,fake=bar
> 
> 
> I'm not sure why yet, but git bisect tells me it's this patch that's
> causing the for-2.6.25 tree to explode on boot on cell machines.

This fixes it, although I'm a little worried about some of the
removals/movings of node_set_online() in the patch.


diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 1666e7d..dcedc26 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -49,7 +49,6 @@ static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
 	static unsigned int fake_nid = 0;
 	static unsigned long long curr_boundary = 0;
 
-	*nid = fake_nid;
 	if (!p)
 		return 0;
 
@@ -60,6 +59,7 @@ static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
 	if (mem < curr_boundary)
 		return 0;
 
+	*nid = fake_nid;
 	curr_boundary = mem;
 
 	if ((end_pfn << PAGE_SHIFT) > mem) {


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fake NUMA emulation for PowerPC (Take 2)
  2007-12-07 22:37 [PATCH] Fake NUMA emulation for PowerPC (Take 2) Balbir Singh
                   ` (2 preceding siblings ...)
  2008-01-18  5:34 ` Michael Ellerman
@ 2008-01-18  5:55 ` Michael Ellerman
  2008-01-18  6:51   ` Balbir Singh
  3 siblings, 1 reply; 18+ messages in thread
From: Michael Ellerman @ 2008-01-18  5:55 UTC (permalink / raw)
  To: Balbir Singh; +Cc: linuxppc-dev, LKML, Paul Mackerras

[-- Attachment #1: Type: text/plain, Size: 3288 bytes --]

On Sat, 2007-12-08 at 04:07 +0530, Balbir Singh wrote:
> Here's a dumb simple implementation of fake NUMA nodes for PowerPC. Fake
> NUMA nodes can be specified using the following command line option
> 

> 
> Comments are as always welcome!

Here's some :)

> diff -puN arch/powerpc/mm/numa.c~ppc-fake-numa-easy arch/powerpc/mm/numa.c
> --- linux-2.6.24-rc4-mm1/arch/powerpc/mm/numa.c~ppc-fake-numa-easy	2007-12-07 21:25:55.000000000 +0530
> +++ linux-2.6.24-rc4-mm1-balbir/arch/powerpc/mm/numa.c	2007-12-08 03:19:46.000000000 +0530
> @@ -24,6 +24,8 @@
>  
>  static int numa_enabled = 1;
>  
> +static char *cmdline __initdata;

Can you call this fake_numa_args or something, cmdline is a bit generic.


> @@ -39,6 +41,43 @@ static bootmem_data_t __initdata plat_no
>  static int min_common_depth;
>  static int n_mem_addr_cells, n_mem_size_cells;
>  
> +static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
> +						unsigned int *nid)
> +{
> +	unsigned long long mem;
> +	char *p = cmdline;
> +	static unsigned int fake_nid = 0;
> +	static unsigned long long curr_boundary = 0;
> +
> +	*nid = fake_nid;

As I mentioned in my other email I think this is broken, you
unconditionally overwrite *nid, even if no fake numa was specified?

> +	if (!p)
> +		return 0;
> +
> +	mem = memparse(p, &p);
> +	if (!mem)
> +		return 0;
> +
> +	if (mem < curr_boundary)
> +		return 0;
> +
> +	curr_boundary = mem;
> +
> +	if ((end_pfn << PAGE_SHIFT) > mem) {
> +		/*
> +		 * Skip commas and spaces
> +		 */
> +		while (*p == ',' || *p == ' ' || *p == '\t')
> +			p++;
> +
> +		cmdline = p;
> +		fake_nid++;
> +		*nid = fake_nid;
> +		dbg("created new fake_node with id %d\n", fake_nid);
> +		return 1;
> +	}
> +	return 0;
> +}
> +
>  static void __cpuinit map_cpu_to_node(int cpu, int node)
>  {
>  	numa_cpu_lookup_table[cpu] = node;
> @@ -344,12 +383,14 @@ static void __init parse_drconf_memory(s
>  			if (nid == 0xffff || nid >= MAX_NUMNODES)
>  				nid = default_nid;
>  		}
> -		node_set_online(nid);
>  
>  		size = numa_enforce_memory_limit(start, lmb_size);
>  		if (!size)
>  			continue;
>  
> +		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
> +		node_set_online(nid);

I can't convince myself that this is 100% ok, the moving of
node_set_online(). At the very least it's a change in behaviour,
previously we would online the node regardless of the memory limit.

>  		add_active_range(nid, start >> PAGE_SHIFT,
>  				 (start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
>  	}
> @@ -429,7 +470,6 @@ new_range:
>  		nid = of_node_to_nid_single(memory);
>  		if (nid < 0)
>  			nid = default_nid;
> -		node_set_online(nid);
>  
>  		if (!(size = numa_enforce_memory_limit(start, size))) {
>  			if (--ranges)
> @@ -438,6 +478,9 @@ new_range:
>  				continue;
>  		}
>  
> +		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
> +		node_set_online(nid);

Ditto previous comment.

cheers

-- 
Michael Ellerman
OzLabs, IBM Australia Development Lab

wwweb: http://michael.ellerman.id.au
phone: +61 2 6212 1183 (tie line 70 21183)

We do not inherit the earth from our ancestors,
we borrow it from our children. - S.M.A.R.T Person

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fake NUMA emulation for PowerPC (Take 2)
  2008-01-18  5:55 ` [PATCH] Fake NUMA emulation for PowerPC (Take 2) Michael Ellerman
@ 2008-01-18  6:51   ` Balbir Singh
  0 siblings, 0 replies; 18+ messages in thread
From: Balbir Singh @ 2008-01-18  6:51 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev, LKML, Paul Mackerras

* Michael Ellerman <michael@ellerman.id.au> [2008-01-18 16:55:03]:

> On Sat, 2007-12-08 at 04:07 +0530, Balbir Singh wrote:
> > Here's a dumb simple implementation of fake NUMA nodes for PowerPC. Fake
> > NUMA nodes can be specified using the following command line option
> > 
> 
> > 
> > Comments are as always welcome!
> 
> Here's some :)
> 

Thanks!

> > diff -puN arch/powerpc/mm/numa.c~ppc-fake-numa-easy arch/powerpc/mm/numa.c
> > --- linux-2.6.24-rc4-mm1/arch/powerpc/mm/numa.c~ppc-fake-numa-easy	2007-12-07 21:25:55.000000000 +0530
> > +++ linux-2.6.24-rc4-mm1-balbir/arch/powerpc/mm/numa.c	2007-12-08 03:19:46.000000000 +0530
> > @@ -24,6 +24,8 @@
> >  
> >  static int numa_enabled = 1;
> >  
> > +static char *cmdline __initdata;
> 
> Can you call this fake_numa_args or something, cmdline is a bit generic.
> 


I could if it makes code easier to understand. Will put it in my TODO
list.

> 
> > @@ -39,6 +41,43 @@ static bootmem_data_t __initdata plat_no
> >  static int min_common_depth;
> >  static int n_mem_addr_cells, n_mem_size_cells;
> >  
> > +static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
> > +						unsigned int *nid)
> > +{
> > +	unsigned long long mem;
> > +	char *p = cmdline;
> > +	static unsigned int fake_nid = 0;
> > +	static unsigned long long curr_boundary = 0;
> > +
> > +	*nid = fake_nid;
> 
> As I mentioned in my other email I think this is broken, you
> unconditionally overwrite *nid, even if no fake numa was specified?
> 

Aah.. OK.. looks like a BUG. I'll also respond to your other email.


> > +	if (!p)
> > +		return 0;
> > +
> > +	mem = memparse(p, &p);
> > +	if (!mem)
> > +		return 0;
> > +
> > +	if (mem < curr_boundary)
> > +		return 0;
> > +
> > +	curr_boundary = mem;
> > +
> > +	if ((end_pfn << PAGE_SHIFT) > mem) {
> > +		/*
> > +		 * Skip commas and spaces
> > +		 */
> > +		while (*p == ',' || *p == ' ' || *p == '\t')
> > +			p++;
> > +
> > +		cmdline = p;
> > +		fake_nid++;
> > +		*nid = fake_nid;
> > +		dbg("created new fake_node with id %d\n", fake_nid);
> > +		return 1;
> > +	}
> > +	return 0;
> > +}
> > +
> >  static void __cpuinit map_cpu_to_node(int cpu, int node)
> >  {
> >  	numa_cpu_lookup_table[cpu] = node;
> > @@ -344,12 +383,14 @@ static void __init parse_drconf_memory(s
> >  			if (nid == 0xffff || nid >= MAX_NUMNODES)
> >  				nid = default_nid;
> >  		}
> > -		node_set_online(nid);
> >  
> >  		size = numa_enforce_memory_limit(start, lmb_size);
> >  		if (!size)
> >  			continue;
> >  
> > +		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
> > +		node_set_online(nid);
> 
> I can't convince myself that this is 100% ok, the moving of
> node_set_online(). At the very least it's a change in behaviour,
> previously we would online the node regardless of the memory limit.
> 

Hmm.. this can be reverted, but do we gain anything by enabling nodes,
even though we are over the memory limit?


> >  		add_active_range(nid, start >> PAGE_SHIFT,
> >  				 (start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
> >  	}
> > @@ -429,7 +470,6 @@ new_range:
> >  		nid = of_node_to_nid_single(memory);
> >  		if (nid < 0)
> >  			nid = default_nid;
> > -		node_set_online(nid);
> >  
> >  		if (!(size = numa_enforce_memory_limit(start, size))) {
> >  			if (--ranges)
> > @@ -438,6 +478,9 @@ new_range:
> >  				continue;
> >  		}
> >  
> > +		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
> > +		node_set_online(nid);
> 
> Ditto previous comment.
> 

Yes, point noted.

Thanks for your review and problem report.

> cheers
> 
> -- 
> Michael Ellerman
> OzLabs, IBM Australia Development Lab
> 
> wwweb: http://michael.ellerman.id.au
> phone: +61 2 6212 1183 (tie line 70 21183)
> 
> We do not inherit the earth from our ancestors,
> we borrow it from our children. - S.M.A.R.T Person



-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fake NUMA emulation for PowerPC (Take 2)
  2008-01-18  5:44   ` Michael Ellerman
@ 2008-01-18  7:08     ` Balbir Singh
  2008-01-26  7:13     ` Balbir Singh
  1 sibling, 0 replies; 18+ messages in thread
From: Balbir Singh @ 2008-01-18  7:08 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev, LKML, Paul Mackerras

* Michael Ellerman <michael@ellerman.id.au> [2008-01-18 16:44:58]:

> On Fri, 2008-01-18 at 16:34 +1100, Michael Ellerman wrote:
> > On Sat, 2007-12-08 at 04:07 +0530, Balbir Singh wrote:
> > > Changelog
> > > 
> > > 1. Get rid of the constant 5 (based on comments from
> > >                                 Geert.Uytterhoeven@sonycom.com)
> > > 2. Implement suggestions from Olof Johannson
> > > 3. Check if cmdline is NULL in fake_numa_create_new_node()
> > > 
> > > Tested with additional parameters from Olof
> > > 
> > > numa=debug,fake=
> > > numa=foo,fake=bar
> > 
> > 
> > I'm not sure why yet, but git bisect tells me it's this patch that's
> > causing the for-2.6.25 tree to explode on boot on cell machines.
> 
> This fixes it, although I'm a little worried about some of the
> removals/movings of node_set_online() in the patch.
> 
> 
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index 1666e7d..dcedc26 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -49,7 +49,6 @@ static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
>  	static unsigned int fake_nid = 0;
>  	static unsigned long long curr_boundary = 0;
>  
> -	*nid = fake_nid;
>  	if (!p)
>  		return 0;
>  
> @@ -60,6 +59,7 @@ static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
>  	if (mem < curr_boundary)
>  		return 0;
>  
> +	*nid = fake_nid;
>  	curr_boundary = mem;
>  
>  	if ((end_pfn << PAGE_SHIFT) > mem) {
> 

This patch makes sense, ideally fake_numa_create_new_node() should
just be a no-op in the case of machines with real NUMA nodes.


-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fake NUMA emulation for PowerPC (Take 2)
  2008-01-18  5:44   ` Michael Ellerman
  2008-01-18  7:08     ` Balbir Singh
@ 2008-01-26  7:13     ` Balbir Singh
  2008-01-27 11:55       ` Paul Mackerras
  1 sibling, 1 reply; 18+ messages in thread
From: Balbir Singh @ 2008-01-26  7:13 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev, LKML, Paul Mackerras

* Michael Ellerman <michael@ellerman.id.au> [2008-01-18 16:44:58]:

> 
> This fixes it, although I'm a little worried about some of the
> removals/movings of node_set_online() in the patch.
> 
> 
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index 1666e7d..dcedc26 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -49,7 +49,6 @@ static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
>  	static unsigned int fake_nid = 0;
>  	static unsigned long long curr_boundary = 0;
>  
> -	*nid = fake_nid;
>  	if (!p)
>  		return 0;
>  
> @@ -60,6 +59,7 @@ static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
>  	if (mem < curr_boundary)
>  		return 0;
>  
> +	*nid = fake_nid;
>  	curr_boundary = mem;
>  
>  	if ((end_pfn << PAGE_SHIFT) > mem) {
> 

Hi, Michael,

Here's a better and more complete fix for the problem. Could you
please see if it works for you? I tested it on a real NUMA box and it
seemed to work fine there.

Description
-----------

This patch provides a fix for the problem found by
Michael Ellerman <michael@ellerman.id.au> while using fake NUMA nodes
on a cell box. The code modifies node id iff (as in if and only if)
fake NUMA nodes are created.

Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
---

 arch/powerpc/mm/numa.c |    7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff -puN arch/powerpc/mm/numa.c~fix-fake-numa-nid-on-numa arch/powerpc/mm/numa.c
--- linux-2.6.24-rc8/arch/powerpc/mm/numa.c~fix-fake-numa-nid-on-numa	2008-01-26 12:20:29.000000000 +0530
+++ linux-2.6.24-rc8-balbir/arch/powerpc/mm/numa.c	2008-01-26 12:27:53.000000000 +0530
@@ -49,7 +49,12 @@ static int __cpuinit fake_numa_create_ne
 	static unsigned int fake_nid = 0;
 	static unsigned long long curr_boundary = 0;
 
-	*nid = fake_nid;
+	/*
+	 * If we did enable fake nodes and cross a node,
+	 * remember the last node and start from there.
+	 */
+	if (fake_nid)
+		*nid = fake_nid;
 	if (!p)
 		return 0;
 
_

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fake NUMA emulation for PowerPC (Take 2)
  2008-01-26  7:13     ` Balbir Singh
@ 2008-01-27 11:55       ` Paul Mackerras
  2008-01-27 15:01         ` Balbir Singh
  2008-01-28 12:52         ` [PATCH powerpc] Fake NUMA emulation for PowerPC (Take 3) Balbir Singh
  0 siblings, 2 replies; 18+ messages in thread
From: Paul Mackerras @ 2008-01-27 11:55 UTC (permalink / raw)
  To: balbir; +Cc: Michael Ellerman, linuxppc-dev, LKML

Balbir Singh writes:

> Here's a better and more complete fix for the problem. Could you
> please see if it works for you? I tested it on a real NUMA box and it
> seemed to work fine there.

There are a couple of other changes in behaviour that your patch
introduces, and I'd like to understand them better before taking the
patch.  First, with your patch we don't set nodes online if they end
up having no memory in them because of the memory limit, whereas
previously we did.  Secondly, in the case where we don't have NUMA
information, we now set node 0 online after adding each LMB, whereas
previously we only set it online once.

If in fact these changes are benign, then your patch description
should mention them and explain why they are benign.

Paul.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fake NUMA emulation for PowerPC (Take 2)
  2008-01-27 11:55       ` Paul Mackerras
@ 2008-01-27 15:01         ` Balbir Singh
  2008-01-27 20:22           ` Nish Aravamudan
  2008-01-28 12:52         ` [PATCH powerpc] Fake NUMA emulation for PowerPC (Take 3) Balbir Singh
  1 sibling, 1 reply; 18+ messages in thread
From: Balbir Singh @ 2008-01-27 15:01 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev, LKML

* Paul Mackerras <paulus@samba.org> [2008-01-27 22:55:43]:

> Balbir Singh writes:
> 
> > Here's a better and more complete fix for the problem. Could you
> > please see if it works for you? I tested it on a real NUMA box and it
> > seemed to work fine there.
> 
> There are a couple of other changes in behaviour that your patch
> introduces, and I'd like to understand them better before taking the
> patch.  First, with your patch we don't set nodes online if they end
> up having no memory in them because of the memory limit, whereas
> previously we did.  Secondly, in the case where we don't have NUMA
> information, we now set node 0 online after adding each LMB, whereas
> previously we only set it online once.
> 
> If in fact these changes are benign, then your patch description
> should mention them and explain why they are benign.
>

Yes, they are. I'll try and justify the changes with a good detailed
changelog. If people prefer it, I can hide fake NUMA nodes under a
config option, so that it does not come enabled by default.

Thanks for keeping me honest.
 
> Paul.
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@ozlabs.org
> https://ozlabs.org/mailman/listinfo/linuxppc-dev
> 

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fake NUMA emulation for PowerPC (Take 2)
  2008-01-27 15:01         ` Balbir Singh
@ 2008-01-27 20:22           ` Nish Aravamudan
  2008-01-28  9:41             ` Balbir Singh
  0 siblings, 1 reply; 18+ messages in thread
From: Nish Aravamudan @ 2008-01-27 20:22 UTC (permalink / raw)
  To: balbir, Paul Mackerras, linuxppc-dev, LKML; +Cc: Mel Gorman

On 1/27/08, Balbir Singh <balbir@linux.vnet.ibm.com> wrote:
> * Paul Mackerras <paulus@samba.org> [2008-01-27 22:55:43]:
>
> > Balbir Singh writes:
> >
> > > Here's a better and more complete fix for the problem. Could you
> > > please see if it works for you? I tested it on a real NUMA box and it
> > > seemed to work fine there.
> >
> > There are a couple of other changes in behaviour that your patch
> > introduces, and I'd like to understand them better before taking the
> > patch.  First, with your patch we don't set nodes online if they end
> > up having no memory in them because of the memory limit, whereas
> > previously we did.  Secondly, in the case where we don't have NUMA
> > information, we now set node 0 online after adding each LMB, whereas
> > previously we only set it online once.
> >
> > If in fact these changes are benign, then your patch description
> > should mention them and explain why they are benign.
> >
>
> Yes, they are. I'll try and justify the changes with a good detailed
> changelog. If people prefer it, I can hide fake NUMA nodes under a
> config option, so that it does not come enabled by default.

Sigh, there already *is* a fake NUMA config option: CONFIG_NUMA_EMU.

"CONFIG_NUMA_EMU:
  Enable NUMA emulation. A flat machine will be split
  into virtual nodes when booted with "numa=fake=N", where N is the
  number of nodes. This is only useful for debugging."

I have to assume your patch is implementing the same feature for
powerpc (really just extending the x86_64 one), and thus should share
the config option.

Any chance you can just make some of that code common? Maybe as a
follow-on patch. I expect that some of Mel's (added to Cc) work to
allow NUMA to be set on x86 more easily will flow quite simply into
adding fake NUMA support there as well. So moving the code to a common
place (at least the parsing) makes sense.

I also feel like you want to be able to online memoryless nodes --
that's where we've been hitting a number of bugs lately in the VM. I
can't tell from Paul's comment if your patch prevents that from being
faked or not.

Thanks,
Nish

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fake NUMA emulation for PowerPC (Take 2)
  2008-01-27 20:22           ` Nish Aravamudan
@ 2008-01-28  9:41             ` Balbir Singh
  0 siblings, 0 replies; 18+ messages in thread
From: Balbir Singh @ 2008-01-28  9:41 UTC (permalink / raw)
  To: Nish Aravamudan; +Cc: Paul Mackerras, linuxppc-dev, LKML, Mel Gorman

* Nish Aravamudan <nish.aravamudan@gmail.com> [2008-01-27 12:22:54]:

> On 1/27/08, Balbir Singh <balbir@linux.vnet.ibm.com> wrote:
> > * Paul Mackerras <paulus@samba.org> [2008-01-27 22:55:43]:
> >
> > > Balbir Singh writes:
> > >
> > > > Here's a better and more complete fix for the problem. Could you
> > > > please see if it works for you? I tested it on a real NUMA box and it
> > > > seemed to work fine there.
> > >
> > > There are a couple of other changes in behaviour that your patch
> > > introduces, and I'd like to understand them better before taking the
> > > patch.  First, with your patch we don't set nodes online if they end
> > > up having no memory in them because of the memory limit, whereas
> > > previously we did.  Secondly, in the case where we don't have NUMA
> > > information, we now set node 0 online after adding each LMB, whereas
> > > previously we only set it online once.
> > >
> > > If in fact these changes are benign, then your patch description
> > > should mention them and explain why they are benign.
> > >
> >
> > Yes, they are. I'll try and justify the changes with a good detailed
> > changelog. If people prefer it, I can hide fake NUMA nodes under a
> > config option, so that it does not come enabled by default.
> 
> Sigh, there already *is* a fake NUMA config option: CONFIG_NUMA_EMU.
>
> "CONFIG_NUMA_EMU:
>   Enable NUMA emulation. A flat machine will be split
>   into virtual nodes when booted with "numa=fake=N", where N is the
>   number of nodes. This is only useful for debugging."
> 
> I have to assume your patch is implementing the same feature for
> powerpc (really just extending the x86_64 one), and thus should share
> the config option.
>
> Any chance you can just make some of that code common? Maybe as a
> follow-on patch. I expect that some of Mel's (added to Cc) work to
> allow NUMA to be set on x86 more easily will flow quite simply into
> adding fake NUMA support there as well. So moving the code to a common
> place (at least the parsing) makes sense.
>

That's the long term plan and we discussed using common code in the
discussion thread for fake NUMA (for PowerPC). We'll get there in
steps. My patch is the basic initial, simple method for implementing
fake NUMA nodes.
 
> I also feel like you want to be able to online memoryless nodes --
> that's where we've been hitting a number of bugs lately in the VM. I
> can't tell from Paul's comment if your patch prevents that from being
> faked or not.
> 

My patch prevents nodes from being enabled if we cross the memory
limit. Earlier they were being enabled.


> Thanks,
> Nish
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@ozlabs.org
> https://ozlabs.org/mailman/listinfo/linuxppc-dev
> 

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH powerpc] Fake NUMA emulation for PowerPC (Take 3)
  2008-01-27 11:55       ` Paul Mackerras
  2008-01-27 15:01         ` Balbir Singh
@ 2008-01-28 12:52         ` Balbir Singh
  2008-01-29 13:04           ` Michael Ellerman
  1 sibling, 1 reply; 18+ messages in thread
From: Balbir Singh @ 2008-01-28 12:52 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: Michael Ellerman, linuxppc-dev, LKML

* Paul Mackerras <paulus@samba.org> [2008-01-27 22:55:43]:

> Balbir Singh writes:
> 
> > Here's a better and more complete fix for the problem. Could you
> > please see if it works for you? I tested it on a real NUMA box and it
> > seemed to work fine there.
> 
> There are a couple of other changes in behaviour that your patch
> introduces, and I'd like to understand them better before taking the
> patch.  First, with your patch we don't set nodes online if they end
> up having no memory in them because of the memory limit, whereas
> previously we did.  Secondly, in the case where we don't have NUMA
> information, we now set node 0 online after adding each LMB, whereas
> previously we only set it online once.
> 
> If in fact these changes are benign, then your patch description
> should mention them and explain why they are benign.
> 
> Paul.
>

Hi, Paul,

Here's version 3 of the patch. I've commented the side-effect of
repeatedly setting node 0 online (as to why that is done) and I've
removed the side effect of not creating memory less nodes
(when we hit the memory limit).

I've described all my tests below
 
Changelog v3
1. Remove the side-effect of not setting nodes online if they end
   up having no memory in them because of the memory limit.

Changelog v2

1. Get rid of the constant 5 (based on comments from
                                Geert.Uytterhoeven@sonycom.com)
2. Implement suggestions from Olof Johannson
3. Check if cmdline is NULL in fake_numa_create_new_node()

Here's a dumb simple implementation of fake NUMA nodes for PowerPC. Fake
NUMA nodes can be specified using the following command line option

numa=fake=<node range>

node range is of the format <range1>,<range2>,...<rangeN>

Each of the rangeX parameters is passed using memparse(). I find the patch
useful for fake NUMA emulation on my simple PowerPC machine. I've tested it
on a numa box with the following arguments

numa=fake=512M
numa=fake=512M,768M
numa=fake=256M,512M mem=512M
numa=fake=1G mem=768M
numa=fake=
without any numa= argument

The other side-effect introduced by this patch is that; in the case where we
don't have NUMA information, we now set a node online after adding each LMB.
This node could very well be node 0, but in the case that we enable fake
NUMA nodes, when we cross node boundaries, we need to set the new node online.


Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
---

 arch/powerpc/mm/numa.c |   60 ++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 57 insertions(+), 3 deletions(-)

diff -puN arch/powerpc/mm/numa.c~fakenumappc arch/powerpc/mm/numa.c
--- linux-2.6.24-rc8/arch/powerpc/mm/numa.c~fakenumappc	2008-01-28 17:05:34.000000000 +0530
+++ linux-2.6.24-rc8-balbir/arch/powerpc/mm/numa.c	2008-01-28 18:15:41.000000000 +0530
@@ -24,6 +24,8 @@
 
 static int numa_enabled = 1;
 
+static char *cmdline __initdata;
+
 static int numa_debug;
 #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
 
@@ -39,6 +41,47 @@ static bootmem_data_t __initdata plat_no
 static int min_common_depth;
 static int n_mem_addr_cells, n_mem_size_cells;
 
+static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
+						unsigned int *nid)
+{
+	unsigned long long mem;
+	char *p = cmdline;
+	static unsigned int fake_nid;
+	static unsigned long long curr_boundary;
+
+	/*
+	 * Modify node id, iff we started creating NUMA nodes
+	 */
+	if (fake_nid)
+		*nid = fake_nid;
+	if (!p)
+		return 0;
+
+	mem = memparse(p, &p);
+	if (!mem)
+		return 0;
+
+	if (mem < curr_boundary)
+		return 0;
+
+	curr_boundary = mem;
+
+	if ((end_pfn << PAGE_SHIFT) > mem) {
+		/*
+		 * Skip commas and spaces
+		 */
+		while (*p == ',' || *p == ' ' || *p == '\t')
+			p++;
+
+		cmdline = p;
+		fake_nid++;
+		*nid = fake_nid;
+		dbg("created new fake_node with id %d\n", fake_nid);
+		return 1;
+	}
+	return 0;
+}
+
 static void __cpuinit map_cpu_to_node(int cpu, int node)
 {
 	numa_cpu_lookup_table[cpu] = node;
@@ -344,6 +387,9 @@ static void __init parse_drconf_memory(s
 			if (nid == 0xffff || nid >= MAX_NUMNODES)
 				nid = default_nid;
 		}
+
+		fake_numa_create_new_node(((start + lmb_size) >> PAGE_SHIFT),
+						&nid);
 		node_set_online(nid);
 
 		size = numa_enforce_memory_limit(start, lmb_size);
@@ -429,6 +475,8 @@ new_range:
 		nid = of_node_to_nid_single(memory);
 		if (nid < 0)
 			nid = default_nid;
+
+		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
 		node_set_online(nid);
 
 		if (!(size = numa_enforce_memory_limit(start, size))) {
@@ -461,7 +509,7 @@ static void __init setup_nonnuma(void)
 	unsigned long top_of_ram = lmb_end_of_DRAM();
 	unsigned long total_ram = lmb_phys_mem_size();
 	unsigned long start_pfn, end_pfn;
-	unsigned int i;
+	unsigned int i, nid = 0;
 
 	printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
 	       top_of_ram, total_ram);
@@ -471,9 +519,11 @@ static void __init setup_nonnuma(void)
 	for (i = 0; i < lmb.memory.cnt; ++i) {
 		start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT;
 		end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i);
-		add_active_range(0, start_pfn, end_pfn);
+
+		fake_numa_create_new_node(end_pfn, &nid);
+		add_active_range(nid, start_pfn, end_pfn);
+		node_set_online(nid);
 	}
-	node_set_online(0);
 }
 
 void __init dump_numa_cpu_topology(void)
@@ -702,6 +752,10 @@ static int __init early_numa(char *p)
 	if (strstr(p, "debug"))
 		numa_debug = 1;
 
+	p = strstr(p, "fake=");
+	if (p)
+		cmdline = p + strlen("fake=");
+
 	return 0;
 }
 early_param("numa", early_numa);
_
 

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH powerpc] Fake NUMA emulation for PowerPC (Take 3)
  2008-01-28 12:52         ` [PATCH powerpc] Fake NUMA emulation for PowerPC (Take 3) Balbir Singh
@ 2008-01-29 13:04           ` Michael Ellerman
  2008-01-29 13:50             ` Balbir Singh
  2008-02-01  4:57             ` [PATCH powerpc] Fake NUMA emulation for PowerPC (Take 4) Balbir Singh
  0 siblings, 2 replies; 18+ messages in thread
From: Michael Ellerman @ 2008-01-29 13:04 UTC (permalink / raw)
  To: balbir; +Cc: Paul Mackerras, linuxppc-dev, LKML

[-- Attachment #1: Type: text/plain, Size: 2113 bytes --]

On Mon, 2008-01-28 at 18:22 +0530, Balbir Singh wrote:
> Hi, Paul,
> 
> Here's version 3 of the patch. I've commented the side-effect of
> repeatedly setting node 0 online (as to why that is done) and I've
> removed the side effect of not creating memory less nodes
> (when we hit the memory limit).
> 
> I've described all my tests below
>  
> Changelog v3
> 1. Remove the side-effect of not setting nodes online if they end
>    up having no memory in them because of the memory limit.
> 
> Changelog v2
> 
> 1. Get rid of the constant 5 (based on comments from
>                                 Geert.Uytterhoeven@sonycom.com)
> 2. Implement suggestions from Olof Johannson
> 3. Check if cmdline is NULL in fake_numa_create_new_node()

> diff -puN arch/powerpc/mm/numa.c~fakenumappc arch/powerpc/mm/numa.c
> --- linux-2.6.24-rc8/arch/powerpc/mm/numa.c~fakenumappc	2008-01-28 17:05:34.000000000 +0530
> +++ linux-2.6.24-rc8-balbir/arch/powerpc/mm/numa.c	2008-01-28 18:15:41.000000000 +0530
> @@ -39,6 +41,47 @@ static bootmem_data_t __initdata plat_no
>  static int min_common_depth;
>  static int n_mem_addr_cells, n_mem_size_cells;
>  
> +static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
> +						unsigned int *nid)
> +{
> +	unsigned long long mem;
> +	char *p = cmdline;
> +	static unsigned int fake_nid;
> +	static unsigned long long curr_boundary;
> +
> +	/*
> +	 * Modify node id, iff we started creating NUMA nodes
> +	 */
> +	if (fake_nid)
> +		*nid = fake_nid;
> +	if (!p)
> +		return 0;

Why do you check !p after assigning to nid? I assume it's because we
might have reached the end of the command line, ie. p == NULL, but we're
still adding memory to the last node? If so it's a it's a little subtle
and deserves a comment I think.

Otherwise this looks pretty good.

cheers

-- 
Michael Ellerman
OzLabs, IBM Australia Development Lab

wwweb: http://michael.ellerman.id.au
phone: +61 2 6212 1183 (tie line 70 21183)

We do not inherit the earth from our ancestors,
we borrow it from our children. - S.M.A.R.T Person

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH powerpc] Fake NUMA emulation for PowerPC (Take 3)
  2008-01-29 13:04           ` Michael Ellerman
@ 2008-01-29 13:50             ` Balbir Singh
  2008-02-01  4:57             ` [PATCH powerpc] Fake NUMA emulation for PowerPC (Take 4) Balbir Singh
  1 sibling, 0 replies; 18+ messages in thread
From: Balbir Singh @ 2008-01-29 13:50 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: Paul Mackerras, linuxppc-dev, LKML

* Michael Ellerman <michael@ellerman.id.au> [2008-01-30 00:04:58]:

> Why do you check !p after assigning to nid? I assume it's because we
> might have reached the end of the command line, ie. p == NULL, but we're
> still adding memory to the last node? If so it's a it's a little subtle
> and deserves a comment I think.
>

The reason that we check for !p after assigning node id is that, in
case we create fake NUMA nodes, we want nid to be the fake numa node
id and not the real node id or in the non NUMA case, node id 0.

The if (!p) checks to see if we do have more arguments to parse.
 
> Otherwise this looks pretty good.
> 

Thanks!

> cheers
> 
> -- 
> Michael Ellerman
> OzLabs, IBM Australia Development Lab
> 
> wwweb: http://michael.ellerman.id.au
> phone: +61 2 6212 1183 (tie line 70 21183)
> 
> We do not inherit the earth from our ancestors,
> we borrow it from our children. - S.M.A.R.T Person



-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH powerpc] Fake NUMA emulation for PowerPC (Take 4)
  2008-01-29 13:04           ` Michael Ellerman
  2008-01-29 13:50             ` Balbir Singh
@ 2008-02-01  4:57             ` Balbir Singh
  1 sibling, 0 replies; 18+ messages in thread
From: Balbir Singh @ 2008-02-01  4:57 UTC (permalink / raw)
  To: Michael Ellerman, Paul Mackerras; +Cc: linuxppc-dev, LKML

* Michael Ellerman <michael@ellerman.id.au> [2008-01-30 00:04:58]:

> Why do you check !p after assigning to nid? I assume it's because we
> might have reached the end of the command line, ie. p == NULL, but we're
> still adding memory to the last node? If so it's a it's a little subtle
> and deserves a comment I think.
> 
> Otherwise this looks pretty good.
> 
> cheers
> 

Hi, Paul,

Could you please consider version 4 for inclusion?


Changelong v4

1. Add more comments around the checks for command line arguments.

Changelog v3
1. Remove the side-effect of not setting nodes online if they end
   up having no memory in them because of the memory limit.

Changelog v2

1. Get rid of the constant 5 (based on comments from
                                Geert.Uytterhoeven@sonycom.com)
2. Implement suggestions from Olof Johannson
3. Check if cmdline is NULL in fake_numa_create_new_node()

Tested with additional parameters from Olof

numa=debug,fake=
numa=foo,fake=bar


Here's a dumb simple implementation of fake NUMA nodes for PowerPC. Fake
NUMA nodes can be specified using the following command line option

numa=fake=<node range>

node range is of the format <range1>,<range2>,...<rangeN>

Each of the rangeX parameters is passed using memparse(). I find the patch
useful for fake NUMA emulation on my simple PowerPC machine. I've tested it
on a numa box with the following arguments

numa=fake=512M
numa=fake=512M,768M
numa=fake=256M,512M mem=512M
numa=fake=1G mem=768M
numa=fake=
without any numa= argument

The other side-effect introduced by this patch is that; in the case where we
don't have NUMA information, we now set a node online after adding each LMB.
This node could very well be node 0, but in the case that we enable fake
NUMA nodes, when we cross node boundaries, we need to set the new node online.


Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
---

 arch/powerpc/mm/numa.c |   66 ++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 63 insertions(+), 3 deletions(-)

diff -puN arch/powerpc/mm/numa.c~fakenumappc arch/powerpc/mm/numa.c
--- linux-2.6.24-rc8/arch/powerpc/mm/numa.c~fakenumappc	2008-01-28 17:05:34.000000000 +0530
+++ linux-2.6.24-rc8-balbir/arch/powerpc/mm/numa.c	2008-02-01 10:24:57.000000000 +0530
@@ -24,6 +24,8 @@
 
 static int numa_enabled = 1;
 
+static char *cmdline __initdata;
+
 static int numa_debug;
 #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
 
@@ -39,6 +41,53 @@ static bootmem_data_t __initdata plat_no
 static int min_common_depth;
 static int n_mem_addr_cells, n_mem_size_cells;
 
+static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
+						unsigned int *nid)
+{
+	unsigned long long mem;
+	char *p = cmdline;
+	static unsigned int fake_nid;
+	static unsigned long long curr_boundary;
+
+	/*
+	 * Modify node id, iff we started creating NUMA nodes
+	 * We want to continue from where we left of the last time
+	 */
+	if (fake_nid)
+		*nid = fake_nid;
+	/*
+	 * In case there are no more arguments to parse, the
+	 * node_id should be the same as the last fake node id
+	 * (we've handled this above).
+	 */
+	if (!p)
+		return 0;
+
+	mem = memparse(p, &p);
+	if (!mem)
+		return 0;
+
+	if (mem < curr_boundary)
+		return 0;
+
+	curr_boundary = mem;
+
+	if ((end_pfn << PAGE_SHIFT) > mem) {
+		/*
+		 * Skip commas and spaces
+		 */
+		while (*p == ',' || *p == ' ' || *p == '\t')
+			p++;
+
+		cmdline = p;
+		fake_nid++;
+		*nid = fake_nid;
+		dbg("created new fake_node with id %d\n", fake_nid);
+		return 1;
+	}
+	return 0;
+}
+
 static void __cpuinit map_cpu_to_node(int cpu, int node)
 {
 	numa_cpu_lookup_table[cpu] = node;
@@ -344,6 +393,9 @@ static void __init parse_drconf_memory(s
 			if (nid == 0xffff || nid >= MAX_NUMNODES)
 				nid = default_nid;
 		}
+
+		fake_numa_create_new_node(((start + lmb_size) >> PAGE_SHIFT),
+						&nid);
 		node_set_online(nid);
 
 		size = numa_enforce_memory_limit(start, lmb_size);
@@ -429,6 +481,8 @@ new_range:
 		nid = of_node_to_nid_single(memory);
 		if (nid < 0)
 			nid = default_nid;
+
+		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
 		node_set_online(nid);
 
 		if (!(size = numa_enforce_memory_limit(start, size))) {
@@ -461,7 +515,7 @@ static void __init setup_nonnuma(void)
 	unsigned long top_of_ram = lmb_end_of_DRAM();
 	unsigned long total_ram = lmb_phys_mem_size();
 	unsigned long start_pfn, end_pfn;
-	unsigned int i;
+	unsigned int i, nid = 0;
 
 	printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
 	       top_of_ram, total_ram);
@@ -471,9 +525,11 @@ static void __init setup_nonnuma(void)
 	for (i = 0; i < lmb.memory.cnt; ++i) {
 		start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT;
 		end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i);
-		add_active_range(0, start_pfn, end_pfn);
+
+		fake_numa_create_new_node(end_pfn, &nid);
+		add_active_range(nid, start_pfn, end_pfn);
+		node_set_online(nid);
 	}
-	node_set_online(0);
 }
 
 void __init dump_numa_cpu_topology(void)
@@ -702,6 +758,10 @@ static int __init early_numa(char *p)
 	if (strstr(p, "debug"))
 		numa_debug = 1;
 
+	p = strstr(p, "fake=");
+	if (p)
+		cmdline = p + strlen("fake=");
+
 	return 0;
 }
 early_param("numa", early_numa);
_

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2008-02-01  5:04 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-12-07 22:37 [PATCH] Fake NUMA emulation for PowerPC (Take 2) Balbir Singh
2007-12-10 19:36 ` Balbir Singh
2007-12-10 23:07 ` Olof Johansson
2008-01-18  5:34 ` Michael Ellerman
2008-01-18  5:41   ` Balbir Singh
2008-01-18  5:44   ` Michael Ellerman
2008-01-18  7:08     ` Balbir Singh
2008-01-26  7:13     ` Balbir Singh
2008-01-27 11:55       ` Paul Mackerras
2008-01-27 15:01         ` Balbir Singh
2008-01-27 20:22           ` Nish Aravamudan
2008-01-28  9:41             ` Balbir Singh
2008-01-28 12:52         ` [PATCH powerpc] Fake NUMA emulation for PowerPC (Take 3) Balbir Singh
2008-01-29 13:04           ` Michael Ellerman
2008-01-29 13:50             ` Balbir Singh
2008-02-01  4:57             ` [PATCH powerpc] Fake NUMA emulation for PowerPC (Take 4) Balbir Singh
2008-01-18  5:55 ` [PATCH] Fake NUMA emulation for PowerPC (Take 2) Michael Ellerman
2008-01-18  6:51   ` Balbir Singh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).