All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0 of 2] x86/numa logical fixes
@ 2012-07-11 12:49 Andrew Cooper
  2012-07-11 12:49 ` [PATCH 1 of 2] x86/numa: Correct assumption that each NUMA node has memory Andrew Cooper
  2012-07-11 12:49 ` [PATCH 2 of 2] x86/numa: Remove warning about small NUMA nodes Andrew Cooper
  0 siblings, 2 replies; 3+ messages in thread
From: Andrew Cooper @ 2012-07-11 12:49 UTC (permalink / raw)
  To: xen-devel; +Cc: dario.faggioli, keir, jbeulich

These patches attempt to correct some of the Xen NUMA logic with respect to NUMA
nodes without attached memory.  They are a followup to my earlier RFC patche on
27/06/2012

Given the nature of the issue, I would recomend that they be considered for 4.2

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1 of 2] x86/numa: Correct assumption that each NUMA node has memory
  2012-07-11 12:49 [PATCH 0 of 2] x86/numa logical fixes Andrew Cooper
@ 2012-07-11 12:49 ` Andrew Cooper
  2012-07-11 12:49 ` [PATCH 2 of 2] x86/numa: Remove warning about small NUMA nodes Andrew Cooper
  1 sibling, 0 replies; 3+ messages in thread
From: Andrew Cooper @ 2012-07-11 12:49 UTC (permalink / raw)
  To: xen-devel; +Cc: dario.faggioli, keir, jbeulich

[-- Attachment #1: numa-init.patch --]
[-- Type: text/x-patch, Size: 3550 bytes --]

It is now quite easy to buy servers with incorrectly populated DIMMs, especially
with AMD Magny-Cours and Interlagos systems which have two NUMA nodes per socket.

Currently, Xen will assign all CPUs on nodes without memory to node 0, which
leads to interestingly wrong NUMA information, causing numa aware functionality
such as alloc_domheap_pages() to get things very wrong.

This patch splits the current logic to accept NUMA nodes without memory, which
corrects the accounting of CPUs to online NUMA nodes.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

diff -r 7b0dc7f3ddfe -r a2dbed3582e5 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -195,8 +195,10 @@ void __devinit srat_detect_node(int cpu)
     u32 apicid = x86_cpu_to_apicid[cpu];
 
     node = apicid_to_node[apicid];
-    if ( node == NUMA_NO_NODE || !node_online(node) )
+    if ( node == NUMA_NO_NODE )
         node = 0;
+
+    node_set_online(node);
     numa_set_node(cpu, node);
 
     if ( opt_cpu_info && acpi_numa > 0 )
diff -r 7b0dc7f3ddfe -r a2dbed3582e5 xen/arch/x86/srat.c
--- a/xen/arch/x86/srat.c
+++ b/xen/arch/x86/srat.c
@@ -23,7 +23,8 @@
 
 static struct acpi_table_slit *__read_mostly acpi_slit;
 
-static nodemask_t nodes_parsed __initdata;
+static nodemask_t memory_nodes_parsed __initdata;
+static nodemask_t processor_nodes_parsed __initdata;
 static nodemask_t nodes_found __initdata;
 static struct node nodes[MAX_NUMNODES] __initdata;
 static u8 __read_mostly pxm2node[256] = { [0 ... 255] = NUMA_NO_NODE };
@@ -221,6 +222,7 @@ acpi_numa_processor_affinity_init(struct
 		return;
 	}
 	apicid_to_node[pa->apic_id] = node;
+	node_set(node, processor_nodes_parsed);
 	acpi_numa = 1;
 	printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
 	       pxm, pa->apic_id, node);
@@ -287,7 +289,7 @@ acpi_numa_memory_affinity_init(struct ac
 		return;
 	}
 	nd = &nodes[node];
-	if (!node_test_and_set(node, nodes_parsed)) {
+	if (!node_test_and_set(node, memory_nodes_parsed)) {
 		nd->start = start;
 		nd->end = end;
 	} else {
@@ -324,7 +326,7 @@ static int nodes_cover_memory(void)
 
 		do {
 			found = 0;
-			for_each_node_mask(j, nodes_parsed)
+			for_each_node_mask(j, memory_nodes_parsed)
 				if (start < nodes[j].end
 				    && end > nodes[j].start) {
 					if (start >= nodes[j].start) {
@@ -418,6 +420,7 @@ void __init srat_parse_regions(u64 addr)
 int __init acpi_scan_nodes(u64 start, u64 end)
 {
 	int i;
+	nodemask_t all_nodes_parsed;
 
 	/* First clean up the node list */
 	for (i = 0; i < MAX_NUMNODES; i++)
@@ -441,17 +444,26 @@ int __init acpi_scan_nodes(u64 start, u6
 		return -1;
 	}
 
+	nodes_or(all_nodes_parsed, memory_nodes_parsed, processor_nodes_parsed);
+
 	/* Finally register nodes */
-	for_each_node_mask(i, nodes_parsed)
+	for_each_node_mask(i, all_nodes_parsed)
 	{
-		if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE)
-			continue;
+		u64 size = nodes[i].end - nodes[i].start;
+		if ( size == 0 )
+			printk(KERN_WARNING "SRAT: Node %u has no memory. "
+			       "BIOS Bug or mis-configured hardware?\n", i);
+
+		else if (size < NODE_MIN_SIZE)
+			printk(KERN_WARNING "SRAT: Node %u has only %"PRIu64
+			       " bytes of memory. BIOS Bug?\n", i, size);
+
 		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 	}
 	for (i = 0; i < nr_cpu_ids; i++) {
 		if (cpu_to_node[i] == NUMA_NO_NODE)
 			continue;
-		if (!node_isset(cpu_to_node[i], nodes_parsed))
+		if (!node_isset(cpu_to_node[i], processor_nodes_parsed))
 			numa_set_node(i, NUMA_NO_NODE);
 	}
 	numa_init_array();

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 2 of 2] x86/numa: Remove warning about small NUMA nodes
  2012-07-11 12:49 [PATCH 0 of 2] x86/numa logical fixes Andrew Cooper
  2012-07-11 12:49 ` [PATCH 1 of 2] x86/numa: Correct assumption that each NUMA node has memory Andrew Cooper
@ 2012-07-11 12:49 ` Andrew Cooper
  1 sibling, 0 replies; 3+ messages in thread
From: Andrew Cooper @ 2012-07-11 12:49 UTC (permalink / raw)
  To: xen-devel; +Cc: dario.faggioli, keir, jbeulich

[-- Attachment #1: numa-remove-warning.patch --]
[-- Type: text/x-patch, Size: 1195 bytes --]

This logic came with the other NUMA logic from Linux 2.6.16 in c/s
11893:f312c2d01d8b.  It appears that the Xen memory management subsystem does
not suffer from the expressed problems.  Furthermore, NUMA nodes with no memory
are now quite easy to find, and are not BIOS bugs in the SRAT ACPI table.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

diff -r a2dbed3582e5 -r d099f26f6e99 xen/arch/x86/srat.c
--- a/xen/arch/x86/srat.c
+++ b/xen/arch/x86/srat.c
@@ -34,9 +34,6 @@ static int num_node_memblks;
 static struct node node_memblk_range[NR_NODE_MEMBLKS];
 static int memblk_nodeid[NR_NODE_MEMBLKS];
 
-/* Too small nodes confuse the VM badly. Usually they result
-   from BIOS bugs. */
-#define NODE_MIN_SIZE (4*1024*1024)
 
 static int node_to_pxm(int n);
 
@@ -454,10 +451,6 @@ int __init acpi_scan_nodes(u64 start, u6
 			printk(KERN_WARNING "SRAT: Node %u has no memory. "
 			       "BIOS Bug or mis-configured hardware?\n", i);
 
-		else if (size < NODE_MIN_SIZE)
-			printk(KERN_WARNING "SRAT: Node %u has only %"PRIu64
-			       " bytes of memory. BIOS Bug?\n", i, size);
-
 		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 	}
 	for (i = 0; i < nr_cpu_ids; i++) {

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2012-07-11 12:49 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-07-11 12:49 [PATCH 0 of 2] x86/numa logical fixes Andrew Cooper
2012-07-11 12:49 ` [PATCH 1 of 2] x86/numa: Correct assumption that each NUMA node has memory Andrew Cooper
2012-07-11 12:49 ` [PATCH 2 of 2] x86/numa: Remove warning about small NUMA nodes Andrew Cooper

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.