linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] allow x86 NUMA architecture detection to fail
@ 2003-09-11  1:14 Dave Hansen
  2003-09-11  1:22 ` William Lee Irwin III
  0 siblings, 1 reply; 3+ messages in thread
From: Dave Hansen @ 2003-09-11  1:14 UTC (permalink / raw)
  To: Linux Kernel Mailing List
  Cc: Martin J. Bligh, Patricia Gaughen, James Cleverdon, Andi Kleen

[-- Attachment #1: Type: text/plain, Size: 786 bytes --]

As described in this bug: http://bugme.osdl.org/show_bug.cgi?id=653 , if
you enable Summit support and NUMA Discontigmem support but boot on a
non-Summit box, the kernel will fail to boot.  The problem is that the
Summit code can not correctly get the NUMA memory configuration of a
flat box.  The code to do that is in get_memcfg_numa_flat(), but it
never gets called.  

This patch implements a fallback to the generic NUMA code in
get_memcfg_numa_flat() if the Summit detection fails.  The patch also
adds the necessary bits to the Summit code so that it *knows* when it
fails.  

BTW, this doesn't address NUMA-Q.  I think I have posession of more than
50% of the NUMA-Q's running Linux on the planet, and I'm too lazy to fix
it for just myself.
-- 
Dave Hansen
haveblue@us.ibm.com

[-- Attachment #2: get_memcfg_numa-2.6.0-test5-0.patch --]
[-- Type: text/x-patch, Size: 7537 bytes --]

diff -rup linux-2.6.0-test5-summit-include/arch/i386/kernel/numaq.c linux-2.6.0-test5-summit/arch/i386/kernel/numaq.c
--- linux-2.6.0-test5-summit-include/arch/i386/kernel/numaq.c	Wed Sep 10 17:50:14 2003
+++ linux-2.6.0-test5-summit/arch/i386/kernel/numaq.c	Wed Sep 10 17:45:07 2003
@@ -99,8 +99,14 @@ static void __init initialize_physnode_m
 	}
 }
 
-void __init get_memcfg_numaq(void)
+/*
+ * Unlike Summit, we don't really care to let the NUMA-Q
+ * fall back to flat mode.  Don't compile for NUMA-Q
+ * unless you really need it!
+ */
+int __init get_memcfg_numaq(void)
 {
 	smp_dump_qct();
 	initialize_physnode_map();
+	return 1;
 }
diff -rup linux-2.6.0-test5-summit-include/arch/i386/kernel/srat.c linux-2.6.0-test5-summit/arch/i386/kernel/srat.c
--- linux-2.6.0-test5-summit-include/arch/i386/kernel/srat.c	Wed Sep 10 17:50:14 2003
+++ linux-2.6.0-test5-summit/arch/i386/kernel/srat.c	Wed Sep 10 17:31:59 2003
@@ -239,6 +239,11 @@ static int __init acpi20_parse_srat(stru
 		}
 	}
 
+	if (num_memory_chunks == 0) {
+		printk("could not finy any ACPI SRAT memory areas.\n");
+		goto out_fail;
+	}
+	
 	/* Calculate total number of nodes in system from PXM bitmap and create
 	 * a set of sequential node IDs starting at zero.  (ACPI doesn't seem
 	 * to specify the range of _PXM values.)
@@ -295,10 +300,12 @@ static int __init acpi20_parse_srat(stru
 			}
 		}
 	}
+	return 1;
+out_fail:
 	return 0;
 }
 
-void __init get_memcfg_from_srat(void)
+int __init get_memcfg_from_srat(void)
 {
 	struct acpi_table_header *header = NULL;
 	struct acpi_table_rsdp *rsdp = NULL;
@@ -316,11 +323,11 @@ void __init get_memcfg_from_srat(void)
 				(u32)rsdp_address->pointer.physical;
 	} else {
 		printk("%s: rsdp_address is not a physical pointer\n", __FUNCTION__);
-		return;
+		goto out_err;
 	}
 	if (!rsdp) {
 		printk("%s: Didn't find ACPI root!\n", __FUNCTION__);
-		return;
+		goto out_err;
 	}
 
 	printk(KERN_INFO "%.8s v%d [%.6s]\n", rsdp->signature, rsdp->revision,
@@ -328,7 +335,7 @@ void __init get_memcfg_from_srat(void)
 
 	if (strncmp(rsdp->signature, RSDP_SIG,strlen(RSDP_SIG))) {
 		printk(KERN_WARNING "%s: RSDP table signature incorrect\n", __FUNCTION__);
-		return;
+		goto out_err;
 	}
 
 	rsdt = (struct acpi_table_rsdt *)
@@ -338,14 +345,14 @@ void __init get_memcfg_from_srat(void)
 		printk(KERN_WARNING
 		       "%s: ACPI: Invalid root system description tables (RSDT)\n",
 		       __FUNCTION__);
-		return;
+		goto out_err;
 	}
 
 	header = & rsdt->header;
 
 	if (strncmp(header->signature, RSDT_SIG, strlen(RSDT_SIG))) {
 		printk(KERN_WARNING "ACPI: RSDT signature incorrect\n");
-		return;
+		goto out_err;
 	}
 
 	/* 
@@ -356,15 +363,18 @@ void __init get_memcfg_from_srat(void)
 	 */
 	tables = (header->length - sizeof(struct acpi_table_header)) / 4;
 
+	if (!tables)
+		goto out_err;
+	
 	memcpy(&saved_rsdt, rsdt, sizeof(saved_rsdt));
 
 	if (saved_rsdt.header.length > sizeof(saved_rsdt)) {
 		printk(KERN_WARNING "ACPI: Too big length in RSDT: %d\n",
 		       saved_rsdt.header.length);
-		return;
+		goto out_err;
 	}
 
-printk("Begin table scan....\n");
+	printk("Begin SRAT table scan....\n");
 
 	for (i = 0; i < tables; i++) {
 		/* Map in header, then map in full table length. */
@@ -379,10 +389,13 @@ printk("Begin table scan....\n");
 
 		if (strncmp((char *) &header->signature, "SRAT", 4))
 			continue;
-		acpi20_parse_srat((struct acpi_table_srat *)header);
+		
 		/* we've found the srat table. don't need to look at any more tables */
-		break;
+		return acpi20_parse_srat((struct acpi_table_srat *)header);
 	}
+out_err:
+	printk("failed to get NUMA memory information from SRAT table\n");
+	return 0;
 }
 
 /* For each node run the memory list to determine whether there are
diff -rup linux-2.6.0-test5-summit-include/arch/i386/mm/discontig.c linux-2.6.0-test5-summit/arch/i386/mm/discontig.c
--- linux-2.6.0-test5-summit-include/arch/i386/mm/discontig.c	Wed Sep 10 17:50:14 2003
+++ linux-2.6.0-test5-summit/arch/i386/mm/discontig.c	Wed Sep 10 17:42:52 2003
@@ -30,6 +30,7 @@
 #include <linux/initrd.h>
 #include <asm/e820.h>
 #include <asm/setup.h>
+#include <asm/mmzone.h>
 
 struct pglist_data *node_data[MAX_NUMNODES];
 bootmem_data_t node0_bdata;
@@ -84,7 +85,7 @@ void set_pmd_pfn(unsigned long vaddr, un
  *        a single node with all available processors in it with a flat
  *        memory map.
  */
-void __init get_memcfg_numa_flat(void)
+int __init get_memcfg_numa_flat(void)
 {
 	int pfn;
 
@@ -107,6 +108,7 @@ void __init get_memcfg_numa_flat(void)
          /* Indicate there is one node available. */
 	node_set_online(0);
 	numnodes = 1;
+	return 1;
 }
 
 /*
@@ -351,21 +353,24 @@ void __init zone_sizes_init(void)
 		unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
 		unsigned long *zholes_size;
 		unsigned int max_dma;
-
+		
 		unsigned long low = max_low_pfn;
 		unsigned long start = node_start_pfn[nid];
 		unsigned long high = node_end_pfn[nid];
-		
+
 		max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
 
 		if (start > low) {
 #ifdef CONFIG_HIGHMEM
-		  zones_size[ZONE_HIGHMEM] = high - start;
+			BUG_ON(start > high);
+			zones_size[ZONE_HIGHMEM] = high - start;
 #endif
 		} else {
 			if (low < max_dma)
 				zones_size[ZONE_DMA] = low;
 			else {
+				BUG_ON(max_dma > low);
+				BUG_ON(low > high);
 				zones_size[ZONE_DMA] = max_dma;
 				zones_size[ZONE_NORMAL] = low - max_dma;
 #ifdef CONFIG_HIGHMEM
diff -rup linux-2.6.0-test5-summit-include/include/asm-i386/mmzone.h linux-2.6.0-test5-summit/include/asm-i386/mmzone.h
--- linux-2.6.0-test5-summit-include/include/asm-i386/mmzone.h	Wed Sep 10 17:54:26 2003
+++ linux-2.6.0-test5-summit/include/asm-i386/mmzone.h	Wed Sep 10 17:48:37 2003
@@ -122,11 +122,29 @@ static inline struct pglist_data *pfn_to
 #elif CONFIG_ACPI_SRAT
 #include <asm/srat.h>
 #elif CONFIG_X86_PC
-#define get_memcfg_numa get_memcfg_numa_flat
 #define get_zholes_size(n) (0)
 #else
 #define pfn_to_nid(pfn)		(0)
 #endif /* CONFIG_X86_NUMAQ */
+
+extern int get_memcfg_numa_flat(void );
+/*
+ * This allows any one NUMA architecture to be compiled
+ * for, and still fall back to the flat function if it
+ * fails.
+ */
+static inline void get_memcfg_numa(void)
+{
+#ifdef CONFIG_X86_NUMAQ
+	if (get_memcfg_numaq())
+		return;
+#elif CONFIG_ACPI_SRAT
+	if (get_memcfg_from_srat())
+		return;
+#endif
+
+	get_memcfg_numa_flat();
+}
 
 #endif /* CONFIG_DISCONTIGMEM */
 #endif /* _ASM_MMZONE_H_ */
diff -rup linux-2.6.0-test5-summit-include/include/asm-i386/numaq.h linux-2.6.0-test5-summit/include/asm-i386/numaq.h
--- linux-2.6.0-test5-summit-include/include/asm-i386/numaq.h	Wed Sep 10 17:50:11 2003
+++ linux-2.6.0-test5-summit/include/asm-i386/numaq.h	Wed Sep 10 17:19:04 2003
@@ -29,8 +29,7 @@
 #ifdef CONFIG_X86_NUMAQ
 
 #define MAX_NUMNODES		16
-extern void get_memcfg_numaq(void);
-#define get_memcfg_numa() get_memcfg_numaq()
+extern int get_memcfg_numaq(void);
 
 /*
  * SYS_CFG_DATA_PRIV_ADDR, struct eachquadmem, and struct sys_cfg_data are the
diff -rup linux-2.6.0-test5-summit-include/include/asm-i386/srat.h linux-2.6.0-test5-summit/include/asm-i386/srat.h
--- linux-2.6.0-test5-summit-include/include/asm-i386/srat.h	Wed Sep 10 17:54:26 2003
+++ linux-2.6.0-test5-summit/include/asm-i386/srat.h	Wed Sep 10 17:48:37 2003
@@ -32,8 +32,7 @@
 #endif
 
 #define MAX_NUMNODES		8
-extern void get_memcfg_from_srat(void);
+extern int get_memcfg_from_srat(void);
 extern unsigned long *get_zholes_size(int);
-#define get_memcfg_numa() get_memcfg_from_srat()
 
 #endif /* _ASM_SRAT_H_ */

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] allow x86 NUMA architecture detection to fail
  2003-09-11  1:14 [PATCH] allow x86 NUMA architecture detection to fail Dave Hansen
@ 2003-09-11  1:22 ` William Lee Irwin III
  2003-09-11  2:35   ` Martin J. Bligh
  0 siblings, 1 reply; 3+ messages in thread
From: William Lee Irwin III @ 2003-09-11  1:22 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Linux Kernel Mailing List, Martin J. Bligh, Patricia Gaughen,
	James Cleverdon, Andi Kleen

On Wed, Sep 10, 2003 at 06:14:48PM -0700, Dave Hansen wrote:
> BTW, this doesn't address NUMA-Q.  I think I have posession of more than
> 50% of the NUMA-Q's running Linux on the planet, and I'm too lazy to fix
> it for just myself.

I think we can let this slide until we can run with unpatched firmware
and by some catastrophe an external person running Linux on one
materializes, of which the latter is rather extremely unlikely.


-- wli

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] allow x86 NUMA architecture detection to fail
  2003-09-11  1:22 ` William Lee Irwin III
@ 2003-09-11  2:35   ` Martin J. Bligh
  0 siblings, 0 replies; 3+ messages in thread
From: Martin J. Bligh @ 2003-09-11  2:35 UTC (permalink / raw)
  To: William Lee Irwin III, Dave Hansen
  Cc: Linux Kernel Mailing List, Patricia Gaughen, James Cleverdon, Andi Kleen



--William Lee Irwin III <wli@holomorphy.com> wrote (on Wednesday, September 10, 2003 18:22:00 -0700):

> On Wed, Sep 10, 2003 at 06:14:48PM -0700, Dave Hansen wrote:
>> BTW, this doesn't address NUMA-Q.  I think I have posession of more than
>> 50% of the NUMA-Q's running Linux on the planet, and I'm too lazy to fix
>> it for just myself.
> 
> I think we can let this slide until we can run with unpatched firmware
> and by some catastrophe an external person running Linux on one
> materializes, of which the latter is rather extremely unlikely.

Yeah, NUMA-Q doesn't matter for this - it's a static compile time option.
We need Summit dynamically to get one core kernel for the distros.

M.


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2003-09-11  2:36 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-09-11  1:14 [PATCH] allow x86 NUMA architecture detection to fail Dave Hansen
2003-09-11  1:22 ` William Lee Irwin III
2003-09-11  2:35   ` Martin J. Bligh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).