All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] x86: remove MEMORY_HOTPLUG_RESERVE related code
@ 2009-05-09  6:45 Yinghai Lu
  2009-05-09  6:48 ` [PATCH 2/3] x86: add numa_move_cpus_to_node Yinghai Lu
                   ` (4 more replies)
  0 siblings, 5 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-05-09  6:45 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Suresh Siddha, Christoph Lameter
  Cc: linux-kernel, Al Viro, Rusty Russell


after
| commit b263295dbffd33b0fbff670720fa178c30e3392a
| Author: Christoph Lameter <clameter@sgi.com>
| Date:   Wed Jan 30 13:30:47 2008 +0100
|
|    x86: 64-bit, make sparsemem vmemmap the only memory model

we don't have MEMORY_HOTPLUG_RESERVE anymore.

remove related dead code.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 arch/x86/include/asm/numa_64.h |    3 -
 arch/x86/mm/numa_64.c          |    5 --
 arch/x86/mm/srat_64.c          |   63 +++++++------------------------------
 include/linux/mm.h             |    2 -
 mm/page_alloc.c                |   69 -----------------------------------------
 5 files changed, 12 insertions(+), 130 deletions(-)

Index: linux-2.6/arch/x86/include/asm/numa_64.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/numa_64.h
+++ linux-2.6/arch/x86/include/asm/numa_64.h
@@ -17,9 +17,6 @@ extern int compute_hash_shift(struct boo
 extern void numa_init_array(void);
 extern int numa_off;
 
-extern void srat_reserve_add_area(int nodeid);
-extern int hotadd_percent;
-
 extern s16 apicid_to_node[MAX_LOCAL_APIC];
 
 extern unsigned long numa_free_all_bootmem(void);
Index: linux-2.6/arch/x86/mm/numa_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/numa_64.c
+++ linux-2.6/arch/x86/mm/numa_64.c
@@ -272,9 +272,6 @@ void __init setup_node_bootmem(int nodei
 		reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
 				 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
 
-#ifdef CONFIG_ACPI_NUMA
-	srat_reserve_add_area(nodeid);
-#endif
 	node_set_online(nodeid);
 }
 
@@ -608,8 +605,6 @@ static __init int numa_setup(char *opt)
 #ifdef CONFIG_ACPI_NUMA
 	if (!strncmp(opt, "noacpi", 6))
 		acpi_numa = -1;
-	if (!strncmp(opt, "hotadd=", 7))
-		hotadd_percent = simple_strtoul(opt+7, NULL, 10);
 #endif
 	return 0;
 }
Index: linux-2.6/arch/x86/mm/srat_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/srat_64.c
+++ linux-2.6/arch/x86/mm/srat_64.c
@@ -31,8 +31,6 @@ static nodemask_t nodes_parsed __initdat
 static nodemask_t cpu_nodes_parsed __initdata;
 static struct bootnode nodes[MAX_NUMNODES] __initdata;
 static struct bootnode nodes_add[MAX_NUMNODES];
-static int found_add_area __initdata;
-int hotadd_percent __initdata = 0;
 
 static int num_node_memblks __initdata;
 static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
@@ -66,9 +64,6 @@ static __init void cutoff_node(int i, un
 {
 	struct bootnode *nd = &nodes[i];
 
-	if (found_add_area)
-		return;
-
 	if (nd->start < start) {
 		nd->start = start;
 		if (nd->end < nd->start)
@@ -86,7 +81,6 @@ static __init void bad_srat(void)
 	int i;
 	printk(KERN_ERR "SRAT: SRAT not used.\n");
 	acpi_numa = -1;
-	found_add_area = 0;
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
 		apicid_to_node[i] = NUMA_NO_NODE;
 	for (i = 0; i < MAX_NUMNODES; i++)
@@ -182,24 +176,21 @@ acpi_numa_processor_affinity_init(struct
 	       pxm, apic_id, node);
 }
 
-static int update_end_of_memory(unsigned long end) {return -1;}
-static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 static inline int save_add_info(void) {return 1;}
 #else
 static inline int save_add_info(void) {return 0;}
 #endif
 /*
- * Update nodes_add and decide if to include add are in the zone.
- * Both SPARSE and RESERVE need nodes_add information.
- * This code supports one contiguous hot add area per node.
+ * Update nodes_add[]
+ * This code supports one contiguous hot add area per node
  */
-static int __init
-reserve_hotadd(int node, unsigned long start, unsigned long end)
+static void __init
+update_nodes_add(int node, unsigned long start, unsigned long end)
 {
 	unsigned long s_pfn = start >> PAGE_SHIFT;
 	unsigned long e_pfn = end >> PAGE_SHIFT;
-	int ret = 0, changed = 0;
+	int changed = 0;
 	struct bootnode *nd = &nodes_add[node];
 
 	/* I had some trouble with strange memory hotadd regions breaking
@@ -210,7 +201,7 @@ reserve_hotadd(int node, unsigned long s
 	   mistakes */
 	if ((signed long)(end - start) < NODE_MIN_SIZE) {
 		printk(KERN_ERR "SRAT: Hotplug area too small\n");
-		return -1;
+		return;
 	}
 
 	/* This check might be a bit too strict, but I'm keeping it for now. */
@@ -218,12 +209,7 @@ reserve_hotadd(int node, unsigned long s
 		printk(KERN_ERR
 			"SRAT: Hotplug area %lu -> %lu has existing memory\n",
 			s_pfn, e_pfn);
-		return -1;
-	}
-
-	if (!hotadd_enough_memory(&nodes_add[node]))  {
-		printk(KERN_ERR "SRAT: Hotplug area too large\n");
-		return -1;
+		return;
 	}
 
 	/* Looks good */
@@ -245,11 +231,9 @@ reserve_hotadd(int node, unsigned long s
 			printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
 	}
 
-	ret = update_end_of_memory(nd->end);
-
 	if (changed)
-	 	printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
-	return ret;
+		printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
+				 nd->start, nd->end);
 }
 
 /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
@@ -310,13 +294,10 @@ acpi_numa_memory_affinity_init(struct ac
 	       start, end);
 	e820_register_active_regions(node, start >> PAGE_SHIFT,
 				     end >> PAGE_SHIFT);
-	push_node_boundaries(node, nd->start >> PAGE_SHIFT,
-						nd->end >> PAGE_SHIFT);
 
-	if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) &&
-	    (reserve_hotadd(node, start, end) < 0)) {
-		/* Ignore hotadd region. Undo damage */
-		printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
+	if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
+		update_nodes_add(node, start, end);
+		/* restore nodes[node] */
 		*nd = oldnode;
 		if ((nd->start | nd->end) == 0)
 			node_clear(node, nodes_parsed);
@@ -510,26 +491,6 @@ static int null_slit_node_compare(int a,
 }
 #endif /* CONFIG_NUMA_EMU */
 
-void __init srat_reserve_add_area(int nodeid)
-{
-	if (found_add_area && nodes_add[nodeid].end) {
-		u64 total_mb;
-
-		printk(KERN_INFO "SRAT: Reserving hot-add memory space "
-				"for node %d at %Lx-%Lx\n",
-			nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end);
-		total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start)
-					>> PAGE_SHIFT;
-		total_mb *= sizeof(struct page);
-		total_mb >>= 20;
-		printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
-				"pre-allocated memory.\n", (unsigned long long)total_mb);
-		reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
-			       nodes_add[nodeid].end - nodes_add[nodeid].start,
-			       BOOTMEM_DEFAULT);
-	}
-}
-
 int __node_distance(int a, int b)
 {
 	int index;
Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -1032,8 +1032,6 @@ extern void add_active_range(unsigned in
 					unsigned long end_pfn);
 extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
 					unsigned long end_pfn);
-extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn,
-					unsigned long end_pfn);
 extern void remove_all_active_ranges(void);
 extern unsigned long absent_pages_in_range(unsigned long start_pfn,
 						unsigned long end_pfn);
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -150,10 +150,6 @@ static unsigned long __meminitdata dma_r
   static int __meminitdata nr_nodemap_entries;
   static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
   static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-  static unsigned long __meminitdata node_boundary_start_pfn[MAX_NUMNODES];
-  static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES];
-#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
   static unsigned long __initdata required_kernelcore;
   static unsigned long __initdata required_movablecore;
   static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
@@ -3121,64 +3117,6 @@ void __init sparse_memory_present_with_a
 }
 
 /**
- * push_node_boundaries - Push node boundaries to at least the requested boundary
- * @nid: The nid of the node to push the boundary for
- * @start_pfn: The start pfn of the node
- * @end_pfn: The end pfn of the node
- *
- * In reserve-based hot-add, mem_map is allocated that is unused until hotadd
- * time. Specifically, on x86_64, SRAT will report ranges that can potentially
- * be hotplugged even though no physical memory exists. This function allows
- * an arch to push out the node boundaries so mem_map is allocated that can
- * be used later.
- */
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-void __init push_node_boundaries(unsigned int nid,
-		unsigned long start_pfn, unsigned long end_pfn)
-{
-	mminit_dprintk(MMINIT_TRACE, "zoneboundary",
-			"Entering push_node_boundaries(%u, %lu, %lu)\n",
-			nid, start_pfn, end_pfn);
-
-	/* Initialise the boundary for this node if necessary */
-	if (node_boundary_end_pfn[nid] == 0)
-		node_boundary_start_pfn[nid] = -1UL;
-
-	/* Update the boundaries */
-	if (node_boundary_start_pfn[nid] > start_pfn)
-		node_boundary_start_pfn[nid] = start_pfn;
-	if (node_boundary_end_pfn[nid] < end_pfn)
-		node_boundary_end_pfn[nid] = end_pfn;
-}
-
-/* If necessary, push the node boundary out for reserve hotadd */
-static void __meminit account_node_boundary(unsigned int nid,
-		unsigned long *start_pfn, unsigned long *end_pfn)
-{
-	mminit_dprintk(MMINIT_TRACE, "zoneboundary",
-			"Entering account_node_boundary(%u, %lu, %lu)\n",
-			nid, *start_pfn, *end_pfn);
-
-	/* Return if boundary information has not been provided */
-	if (node_boundary_end_pfn[nid] == 0)
-		return;
-
-	/* Check the boundaries and update if necessary */
-	if (node_boundary_start_pfn[nid] < *start_pfn)
-		*start_pfn = node_boundary_start_pfn[nid];
-	if (node_boundary_end_pfn[nid] > *end_pfn)
-		*end_pfn = node_boundary_end_pfn[nid];
-}
-#else
-void __init push_node_boundaries(unsigned int nid,
-		unsigned long start_pfn, unsigned long end_pfn) {}
-
-static void __meminit account_node_boundary(unsigned int nid,
-		unsigned long *start_pfn, unsigned long *end_pfn) {}
-#endif
-
-
-/**
  * get_pfn_range_for_nid - Return the start and end page frames for a node
  * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned.
  * @start_pfn: Passed by reference. On return, it will have the node start_pfn.
@@ -3203,9 +3141,6 @@ void __meminit get_pfn_range_for_nid(uns
 
 	if (*start_pfn == -1UL)
 		*start_pfn = 0;
-
-	/* Push the node boundaries out if requested */
-	account_node_boundary(nid, start_pfn, end_pfn);
 }
 
 /*
@@ -3810,10 +3745,6 @@ void __init remove_all_active_ranges(voi
 {
 	memset(early_node_map, 0, sizeof(early_node_map));
 	nr_nodemap_entries = 0;
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-	memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn));
-	memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn));
-#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
 }
 
 /* Compare two active node_active_regions */

^ permalink raw reply	[flat|nested] 102+ messages in thread

* [PATCH 2/3] x86: add numa_move_cpus_to_node
  2009-05-09  6:45 [PATCH 1/3] x86: remove MEMORY_HOTPLUG_RESERVE related code Yinghai Lu
@ 2009-05-09  6:48 ` Yinghai Lu
  2009-05-09  7:05   ` Justin P. Mattock
  2009-05-12  1:27   ` Christoph Lameter
  2009-05-09  6:50 ` [PATCH 3/3] x86: fix node_possible_map logic -v2 Yinghai Lu
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-05-09  6:48 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Suresh Siddha, Christoph Lameter
  Cc: linux-kernel, Al Viro, Rusty Russell, Pekka Enberg



when node only have hot add range and don't have other static range.
that node will not be onlined, and cpus on that will be linked to nearby
node with memory.
then when that host add range is added later, we need to linked those cpus
back.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 arch/x86/include/asm/numa_64.h |   10 ++++---
 arch/x86/mm/init_64.c          |    3 ++
 arch/x86/mm/numa_64.c          |   52 +++++++++++++++++++++++++++++++++++------
 3 files changed, 54 insertions(+), 11 deletions(-)

Index: linux-2.6/arch/x86/include/asm/numa_64.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/numa_64.h
+++ linux-2.6/arch/x86/include/asm/numa_64.h
@@ -25,16 +25,18 @@ extern void setup_node_bootmem(int nodei
 
 #ifdef CONFIG_NUMA
 extern void __init init_cpu_to_node(void);
-extern void __cpuinit numa_set_node(int cpu, int node);
-extern void __cpuinit numa_clear_node(int cpu);
-extern void __cpuinit numa_add_cpu(int cpu);
-extern void __cpuinit numa_remove_cpu(int cpu);
+extern void numa_set_node(int cpu, int node);
+extern void numa_clear_node(int cpu);
+extern void numa_add_cpu(int cpu);
+extern void numa_remove_cpu(int cpu);
+extern void numa_move_cpus_to_node(int nid);
 #else
 static inline void init_cpu_to_node(void)		{ }
 static inline void numa_set_node(int cpu, int node)	{ }
 static inline void numa_clear_node(int cpu)		{ }
 static inline void numa_add_cpu(int cpu, int node)	{ }
 static inline void numa_remove_cpu(int cpu)		{ }
+static inline void numa_move_cpus_to_node(int nid)	{ }
 #endif
 
 #endif /* _ASM_X86_NUMA_64_H */
Index: linux-2.6/arch/x86/mm/numa_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/numa_64.c
+++ linux-2.6/arch/x86/mm/numa_64.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/nodemask.h>
 #include <linux/sched.h>
+#include <linux/node.h>
 
 #include <asm/e820.h>
 #include <asm/proto.h>
@@ -660,7 +661,7 @@ void __init init_cpu_to_node(void)
 #endif
 
 
-void __cpuinit numa_set_node(int cpu, int node)
+void numa_set_node(int cpu, int node)
 {
 	int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
 
@@ -683,19 +684,56 @@ void __cpuinit numa_set_node(int cpu, in
 		per_cpu(node_number, cpu) = node;
 }
 
-void __cpuinit numa_clear_node(int cpu)
+void numa_clear_node(int cpu)
 {
 	numa_set_node(cpu, NUMA_NO_NODE);
 }
 
+static int real_cpu_to_node(int cpu)
+{
+	int apicid, nodeid = -1;
+
+	/*
+	 * when the node doesn't have memory before, cpu_to_node(cpu) is
+	 * point to other node, but apicid_to_node still hold the real nodeid
+	 */
+	apicid = per_cpu(x86_cpu_to_apicid, cpu);
+	if (apicid == BAD_APICID)
+		return nodeid;
+
+	nodeid = apicid_to_node[apicid];
+	return nodeid;
+}
+
+void numa_move_cpus_to_node(int nid)
+{
+	int cpu;
+
+	for_each_present_cpu(cpu) {
+		int nodeid;
+
+		nodeid = real_cpu_to_node(cpu);
+		if (nodeid != nid)
+			continue;
+
+		nodeid = cpu_to_node(cpu);
+		if (nodeid != nid) {
+			unregister_cpu_under_node(cpu, nodeid);
+			numa_remove_cpu(cpu);
+			numa_set_node(cpu, nid);
+			numa_add_cpu(cpu);
+		}
+	}
+}
+
 #ifndef CONFIG_DEBUG_PER_CPU_MAPS
 
-void __cpuinit numa_add_cpu(int cpu)
+void numa_add_cpu(int cpu)
 {
 	cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
 }
 
-void __cpuinit numa_remove_cpu(int cpu)
+void numa_remove_cpu(int cpu)
 {
 	cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
 }
@@ -705,7 +743,7 @@ void __cpuinit numa_remove_cpu(int cpu)
 /*
  * --------- debug versions of the numa functions ---------
  */
-static void __cpuinit numa_set_cpumask(int cpu, int enable)
+static void numa_set_cpumask(int cpu, int enable)
 {
 	int node = early_cpu_to_node(cpu);
 	struct cpumask *mask;
@@ -728,12 +766,12 @@ static void __cpuinit numa_set_cpumask(i
 		enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
 }
 
-void __cpuinit numa_add_cpu(int cpu)
+void numa_add_cpu(int cpu)
 {
 	numa_set_cpumask(cpu, 1);
 }
 
-void __cpuinit numa_remove_cpu(int cpu)
+void numa_remove_cpu(int cpu)
 {
 	numa_set_cpumask(cpu, 0);
 }
Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -631,6 +631,9 @@ int arch_add_memory(int nid, u64 start,
 	ret = __add_pages(nid, zone, start_pfn, nr_pages);
 	WARN_ON_ONCE(ret);
 
+	if (!ret)
+		numa_move_cpus_to_node(nid);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(arch_add_memory);

^ permalink raw reply	[flat|nested] 102+ messages in thread

* [PATCH 3/3] x86: fix node_possible_map logic -v2
  2009-05-09  6:45 [PATCH 1/3] x86: remove MEMORY_HOTPLUG_RESERVE related code Yinghai Lu
  2009-05-09  6:48 ` [PATCH 2/3] x86: add numa_move_cpus_to_node Yinghai Lu
@ 2009-05-09  6:50 ` Yinghai Lu
  2009-05-11 17:53   ` Jack Steiner
  2009-05-12  1:02 ` [PATCH 1/3] x86: remove MEMORY_HOTPLUG_RESERVE related code Christoph Lameter
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 102+ messages in thread
From: Yinghai Lu @ 2009-05-09  6:50 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Jack Steiner, David Rientjes
  Cc: linux-kernel


recently there are some changes to about meaning of node_possible_map

and it is some strange:
the node without memory would be set in node_possible_map
but some node with less NODE_MIN_SIZE will be kicked out of node_possible_map.

try to fix it by adding strict_setup_node_bootmem.
also remove unparse_node.

so result will be:
1. cpu_to_node will return online node only (nearest one)
2. apicid_to_node still return the node that could be not online but is set
   in node_possible_map.
3. node_possible_map will include nodes that mem on it are less NODE_MIN_SIZE

v2: after move_cpus_to_node change.

[ Impact: get node_possible_map right ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 arch/x86/include/asm/numa_64.h |    4 ++++
 arch/x86/mm/numa_64.c          |    7 +++++++
 arch/x86/mm/srat_64.c          |   29 ++---------------------------
 3 files changed, 13 insertions(+), 27 deletions(-)

Index: linux-2.6/arch/x86/mm/srat_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/srat_64.c
+++ linux-2.6/arch/x86/mm/srat_64.c
@@ -36,10 +36,6 @@ static int num_node_memblks __initdata;
 static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
 static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
 
-/* Too small nodes confuse the VM badly. Usually they result
-   from BIOS bugs. */
-#define NODE_MIN_SIZE (4*1024*1024)
-
 static __init int setup_node(int pxm)
 {
 	return acpi_map_pxm_to_node(pxm);
@@ -338,17 +334,6 @@ static int __init nodes_cover_memory(con
 	return 1;
 }
 
-static void __init unparse_node(int node)
-{
-	int i;
-	node_clear(node, nodes_parsed);
-	node_clear(node, cpu_nodes_parsed);
-	for (i = 0; i < MAX_LOCAL_APIC; i++) {
-		if (apicid_to_node[i] == node)
-			apicid_to_node[i] = NUMA_NO_NODE;
-	}
-}
-
 void __init acpi_numa_arch_fixup(void) {}
 
 /* Use the information discovered above to actually set up the nodes. */
@@ -360,18 +345,8 @@ int __init acpi_scan_nodes(unsigned long
 		return -1;
 
 	/* First clean up the node list */
-	for (i = 0; i < MAX_NUMNODES; i++) {
+	for (i = 0; i < MAX_NUMNODES; i++)
 		cutoff_node(i, start, end);
-		/*
-		 * don't confuse VM with a node that doesn't have the
-		 * minimum memory.
-		 */
-		if (nodes[i].end &&
-			(nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
-			unparse_node(i);
-			node_set_offline(i);
-		}
-	}
 
 	if (!nodes_cover_memory(nodes)) {
 		bad_srat();
@@ -404,7 +379,7 @@ int __init acpi_scan_nodes(unsigned long
 
 		if (node == NUMA_NO_NODE)
 			continue;
-		if (!node_isset(node, node_possible_map))
+		if (!node_online(node))
 			numa_clear_node(i);
 	}
 	numa_init_array();
Index: linux-2.6/arch/x86/mm/numa_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/numa_64.c
+++ linux-2.6/arch/x86/mm/numa_64.c
@@ -192,6 +192,13 @@ void __init setup_node_bootmem(int nodei
 	if (!end)
 		return;
 
+	/*
+	 * don't confuse VM with a node that doesn't have the
+	 * minimum memory.
+	 */
+	if (end && (end - start) < NODE_MIN_SIZE)
+		return;
+
 	start = roundup(start, ZONE_ALIGN);
 
 	printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
Index: linux-2.6/arch/x86/include/asm/numa_64.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/numa_64.h
+++ linux-2.6/arch/x86/include/asm/numa_64.h
@@ -24,6 +24,10 @@ extern void setup_node_bootmem(int nodei
 			       unsigned long end);
 
 #ifdef CONFIG_NUMA
+/* Too small nodes confuse the VM badly. Usually they result
+   from BIOS bugs. */
+#define NODE_MIN_SIZE (4*1024*1024)
+
 extern void __init init_cpu_to_node(void);
 extern void numa_set_node(int cpu, int node);
 extern void numa_clear_node(int cpu);

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 2/3] x86: add numa_move_cpus_to_node
  2009-05-09  6:48 ` [PATCH 2/3] x86: add numa_move_cpus_to_node Yinghai Lu
@ 2009-05-09  7:05   ` Justin P. Mattock
  2009-05-12  1:27   ` Christoph Lameter
  1 sibling, 0 replies; 102+ messages in thread
From: Justin P. Mattock @ 2009-05-09  7:05 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Suresh Siddha, Christoph Lameter, linux-kernel, Al Viro,
	Rusty Russell, Pekka Enberg

On Fri, 2009-05-08 at 23:48 -0700, Yinghai Lu wrote:

> 
> when node only have hot add range and don't have other static range.

when the node only has "hot add range", then don't have other "static
ranges".

> that node will not be onlined, and cpus on that will be linked to nearby
> node with memory.
> then when that host add range is added later, we need to linked those cpus
Then
> back.
> 
> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
> 
> ---
>  arch/x86/include/asm/numa_64.h |   10 ++++---
>  arch/x86/mm/init_64.c          |    3 ++
>  arch/x86/mm/numa_64.c          |   52 +++++++++++++++++++++++++++++++++++------
>  3 files changed, 54 insertions(+), 11 deletions(-)
> 
> Index: linux-2.6/arch/x86/include/asm/numa_64.h
> ===================================================================
> --- linux-2.6.orig/arch/x86/include/asm/numa_64.h
> +++ linux-2.6/arch/x86/include/asm/numa_64.h
> @@ -25,16 +25,18 @@ extern void setup_node_bootmem(int nodei
>  
>  #ifdef CONFIG_NUMA
>  extern void __init init_cpu_to_node(void);
> -extern void __cpuinit numa_set_node(int cpu, int node);
> -extern void __cpuinit numa_clear_node(int cpu);
> -extern void __cpuinit numa_add_cpu(int cpu);
> -extern void __cpuinit numa_remove_cpu(int cpu);
> +extern void numa_set_node(int cpu, int node);
> +extern void numa_clear_node(int cpu);
> +extern void numa_add_cpu(int cpu);
> +extern void numa_remove_cpu(int cpu);
> +extern void numa_move_cpus_to_node(int nid);
>  #else
>  static inline void init_cpu_to_node(void)		{ }
>  static inline void numa_set_node(int cpu, int node)	{ }
>  static inline void numa_clear_node(int cpu)		{ }
>  static inline void numa_add_cpu(int cpu, int node)	{ }
>  static inline void numa_remove_cpu(int cpu)		{ }
> +static inline void numa_move_cpus_to_node(int nid)	{ }
>  #endif
>  
>  #endif /* _ASM_X86_NUMA_64_H */
> Index: linux-2.6/arch/x86/mm/numa_64.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/mm/numa_64.c
> +++ linux-2.6/arch/x86/mm/numa_64.c
> @@ -12,6 +12,7 @@
>  #include <linux/module.h>
>  #include <linux/nodemask.h>
>  #include <linux/sched.h>
> +#include <linux/node.h>
>  
>  #include <asm/e820.h>
>  #include <asm/proto.h>
> @@ -660,7 +661,7 @@ void __init init_cpu_to_node(void)
>  #endif
>  
> 
> -void __cpuinit numa_set_node(int cpu, int node)
> +void numa_set_node(int cpu, int node)
>  {
>  	int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
>  
> @@ -683,19 +684,56 @@ void __cpuinit numa_set_node(int cpu, in
>  		per_cpu(node_number, cpu) = node;
>  }
>  
> -void __cpuinit numa_clear_node(int cpu)
> +void numa_clear_node(int cpu)
>  {
>  	numa_set_node(cpu, NUMA_NO_NODE);
>  }
>  
> +static int real_cpu_to_node(int cpu)
> +{
> +	int apicid, nodeid = -1;
> +
> +	/*
> +	 * when the node doesn't have memory before, cpu_to_node(cpu) is
> +	 * point to other node, but apicid_to_node still hold the real nodeid
> +	 */
> +	apicid = per_cpu(x86_cpu_to_apicid, cpu);
> +	if (apicid == BAD_APICID)
> +		return nodeid;
> +
> +	nodeid = apicid_to_node[apicid];
> +	return nodeid;
> +}
> +
> +void numa_move_cpus_to_node(int nid)
> +{
> +	int cpu;
> +
> +	for_each_present_cpu(cpu) {
> +		int nodeid;
> +
> +		nodeid = real_cpu_to_node(cpu);
> +		if (nodeid != nid)
> +			continue;
> +
> +		nodeid = cpu_to_node(cpu);
> +		if (nodeid != nid) {
> +			unregister_cpu_under_node(cpu, nodeid);
> +			numa_remove_cpu(cpu);
> +			numa_set_node(cpu, nid);
> +			numa_add_cpu(cpu);
> +		}
> +	}
> +}
> +
>  #ifndef CONFIG_DEBUG_PER_CPU_MAPS
>  
> -void __cpuinit numa_add_cpu(int cpu)
> +void numa_add_cpu(int cpu)
>  {
>  	cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
>  }
>  
> -void __cpuinit numa_remove_cpu(int cpu)
> +void numa_remove_cpu(int cpu)
>  {
>  	cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
>  }
> @@ -705,7 +743,7 @@ void __cpuinit numa_remove_cpu(int cpu)
>  /*
>   * --------- debug versions of the numa functions ---------
>   */
> -static void __cpuinit numa_set_cpumask(int cpu, int enable)
> +static void numa_set_cpumask(int cpu, int enable)
>  {
>  	int node = early_cpu_to_node(cpu);
>  	struct cpumask *mask;
> @@ -728,12 +766,12 @@ static void __cpuinit numa_set_cpumask(i
>  		enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
>  }
>  
> -void __cpuinit numa_add_cpu(int cpu)
> +void numa_add_cpu(int cpu)
>  {
>  	numa_set_cpumask(cpu, 1);
>  }
>  
> -void __cpuinit numa_remove_cpu(int cpu)
> +void numa_remove_cpu(int cpu)
>  {
>  	numa_set_cpumask(cpu, 0);
>  }
> Index: linux-2.6/arch/x86/mm/init_64.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/mm/init_64.c
> +++ linux-2.6/arch/x86/mm/init_64.c
> @@ -631,6 +631,9 @@ int arch_add_memory(int nid, u64 start,
>  	ret = __add_pages(nid, zone, start_pfn, nr_pages);
>  	WARN_ON_ONCE(ret);
>  
> +	if (!ret)
> +		numa_move_cpus_to_node(nid);
> +
>  	return ret;
>  }
>  EXPORT_SYMBOL_GPL(arch_add_memory);
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

This way it sounds better(correct me if I'm wrong).

regards,

Justin P. Mattock


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 3/3] x86: fix node_possible_map logic -v2
  2009-05-09  6:50 ` [PATCH 3/3] x86: fix node_possible_map logic -v2 Yinghai Lu
@ 2009-05-11 17:53   ` Jack Steiner
  2009-05-11 19:15     ` Yinghai Lu
  2009-05-11 19:27     ` David Rientjes
  0 siblings, 2 replies; 102+ messages in thread
From: Jack Steiner @ 2009-05-11 17:53 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	David Rientjes, linux-kernel

On Fri, May 08, 2009 at 11:50:51PM -0700, Yinghai Lu wrote:
> 
> recently there are some changes to about meaning of node_possible_map
> 
> and it is some strange:
> the node without memory would be set in node_possible_map
> but some node with less NODE_MIN_SIZE will be kicked out of node_possible_map.
> 
> try to fix it by adding strict_setup_node_bootmem.
> also remove unparse_node.

I still see the same panic. Entry 0 of the node_data array is NULL &
it is dereferenced building the zonelists.

I'm sure that you are way ahead of me in diagnosing this problem but
this is a regression from previous behavior. Fpor example, in 2.6.27, node_data
is created for both nodes but node 0 contains no memory:

	(2.7.27)
	<6>SRAT: PXM 0 -> APIC 0 -> Node 0
	<6>SRAT: PXM 1 -> APIC 128 -> Node 1
	<6>SRAT: Node 1 PXM 1 0-fff6c000
	<7>NUMA: Using 63 for the hash shift.
	<6>Bootmem setup node 0 0000000000000000-0000000000000000
	<3>Cannot find 212992 bytes in node 0
	<6>Bootmem setup node 1 0000000000000000-0000000010000000
	<6>  NODE_DATA [000000000139be80 - 00000000013cfe7f]
	<6>  bootmap [00000000013d0000 -  00000000013d1fff] pages 2
	<6>(7 early reservations) ==> bootmem [0000000000 - 0010000000]
	<6>  #0 [0000000000 - 0000001000]   BIOS data page ==> [0000000000 - 0000001000]
	<6>  #1 [0000006000 - 0000008000]       TRAMPOLINE ==> [0000006000 - 0000008000]
	<6>  #2 [0000200000 - 000139be38]    TEXT DATA BSS ==> [0000200000 - 000139be38]
	<6>  #3 [000009f000 - 00000e0900]    BIOS reserved ==> [000009f000 - 00000e0900]
	<6>  #4 [00000e0a68 - 0000100000]    BIOS reserved ==> [00000e0a68 - 0000100000]
	<6>  #5 [00000e0900 - 00000e0a68]       EFI memmap ==> [00000e0900 - 00000e0a68]
	<6>  #6 [0000001000 - 0000001030]        ACPI SLIT ==> [0000001000 - 0000001030]
	<6>Bootmem setup node 0 0000000000000000-0000000000000000
	<6>  NODE_DATA [00000000013d2000 - 0000000001405fff]
	<6>  bootmap [0000000000000000 -  ffffffffffffffff] pages 0
	<6>(7 early reservations) ==> bootmem [0000000000 - 0000000000]
	<6>  #0 [0000000000 - 0000001000]   BIOS data page
	<6>  #1 [0000006000 - 0000008000]       TRAMPOLINE
	<6>  #2 [0000200000 - 000139be38]    TEXT DATA BSS
	<6>  #3 [000009f000 - 00000e0900]    BIOS reserved
	<6>  #4 [00000e0a68 - 0000100000]    BIOS reserved
	<6>  #5 [00000e0900 - 00000e0a68]       EFI memmap
	<6>  #6 [0000001000 - 0000001030]        ACPI SLIT
	<6>    NODE_DATA(0) on node 1
	<6>    bootmap(0) on node 1
	<7> [ffffe20000000000-ffffe200003fffff] PMD -> [ffff880001600000-ffff8800019fffff] on node 1
	<4>Zone PFN ranges:
	<4>  DMA      0x00000000 -> 0x00001000
	<4>  DMA32    0x00001000 -> 0x00100000
	<4>  Normal   0x00100000 -> 0x00100000
	<4>Movable zone start PFN for each node
	<4>early_node_map[2] active PFN ranges
	<4>    1: 0x00000000 -> 0x00000006
	<4>    1: 0x00000200 -> 0x00010000
	<4>Could not find start_pfn for node 0
	<7>On node 0 totalpages: 0
	<7>On node 1 totalpages: 65030
	<7>  DMA zone: 3427 pages, LIFO batch:0
	<7>  DMA32 zone: 60480 pages, LIFO batch:15

I have not seen any problems running on 2.6.27 using nodes that have no memory.


Do we have a clear and unambiguous definition of what a node really is?
In this case, is a board (socket) with cpus, a unique PXM but no memory
considered a node. Even though it has no memory, it is a node (depending on the
definition of "node") for purposes such as scheduling. The memoryless node also
has local IO buses that want to direct interrupts to node-local cpus.



> 
> so result will be:
> 1. cpu_to_node will return online node only (nearest one)
> 2. apicid_to_node still return the node that could be not online but is set
>    in node_possible_map.
> 3. node_possible_map will include nodes that mem on it are less NODE_MIN_SIZE
> 
> v2: after move_cpus_to_node change.
> 
> [ Impact: get node_possible_map right ]
> 
> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
> 
> ---
>  arch/x86/include/asm/numa_64.h |    4 ++++
>  arch/x86/mm/numa_64.c          |    7 +++++++
>  arch/x86/mm/srat_64.c          |   29 ++---------------------------
>  3 files changed, 13 insertions(+), 27 deletions(-)
> 
> Index: linux-2.6/arch/x86/mm/srat_64.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/mm/srat_64.c
> +++ linux-2.6/arch/x86/mm/srat_64.c
> @@ -36,10 +36,6 @@ static int num_node_memblks __initdata;
>  static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
>  static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
>  
> -/* Too small nodes confuse the VM badly. Usually they result
> -   from BIOS bugs. */
> -#define NODE_MIN_SIZE (4*1024*1024)
> -
>  static __init int setup_node(int pxm)
>  {
>  	return acpi_map_pxm_to_node(pxm);
> @@ -338,17 +334,6 @@ static int __init nodes_cover_memory(con
>  	return 1;
>  }
>  
> -static void __init unparse_node(int node)
> -{
> -	int i;
> -	node_clear(node, nodes_parsed);
> -	node_clear(node, cpu_nodes_parsed);
> -	for (i = 0; i < MAX_LOCAL_APIC; i++) {
> -		if (apicid_to_node[i] == node)
> -			apicid_to_node[i] = NUMA_NO_NODE;
> -	}
> -}
> -
>  void __init acpi_numa_arch_fixup(void) {}
>  
>  /* Use the information discovered above to actually set up the nodes. */
> @@ -360,18 +345,8 @@ int __init acpi_scan_nodes(unsigned long
>  		return -1;
>  
>  	/* First clean up the node list */
> -	for (i = 0; i < MAX_NUMNODES; i++) {
> +	for (i = 0; i < MAX_NUMNODES; i++)
>  		cutoff_node(i, start, end);
> -		/*
> -		 * don't confuse VM with a node that doesn't have the
> -		 * minimum memory.
> -		 */
> -		if (nodes[i].end &&
> -			(nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
> -			unparse_node(i);
> -			node_set_offline(i);
> -		}
> -	}
>  
>  	if (!nodes_cover_memory(nodes)) {
>  		bad_srat();
> @@ -404,7 +379,7 @@ int __init acpi_scan_nodes(unsigned long
>  
>  		if (node == NUMA_NO_NODE)
>  			continue;
> -		if (!node_isset(node, node_possible_map))
> +		if (!node_online(node))
>  			numa_clear_node(i);
>  	}
>  	numa_init_array();
> Index: linux-2.6/arch/x86/mm/numa_64.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/mm/numa_64.c
> +++ linux-2.6/arch/x86/mm/numa_64.c
> @@ -192,6 +192,13 @@ void __init setup_node_bootmem(int nodei
>  	if (!end)
>  		return;
>  
> +	/*
> +	 * don't confuse VM with a node that doesn't have the
> +	 * minimum memory.
> +	 */
> +	if (end && (end - start) < NODE_MIN_SIZE)
> +		return;
> +
>  	start = roundup(start, ZONE_ALIGN);
>  
>  	printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
> Index: linux-2.6/arch/x86/include/asm/numa_64.h
> ===================================================================
> --- linux-2.6.orig/arch/x86/include/asm/numa_64.h
> +++ linux-2.6/arch/x86/include/asm/numa_64.h
> @@ -24,6 +24,10 @@ extern void setup_node_bootmem(int nodei
>  			       unsigned long end);
>  
>  #ifdef CONFIG_NUMA
> +/* Too small nodes confuse the VM badly. Usually they result
> +   from BIOS bugs. */
> +#define NODE_MIN_SIZE (4*1024*1024)
> +
>  extern void __init init_cpu_to_node(void);
>  extern void numa_set_node(int cpu, int node);
>  extern void numa_clear_node(int cpu);

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 3/3] x86: fix node_possible_map logic -v2
  2009-05-11 17:53   ` Jack Steiner
@ 2009-05-11 19:15     ` Yinghai Lu
  2009-05-11 19:36       ` Yinghai Lu
  2009-05-11 19:27     ` David Rientjes
  1 sibling, 1 reply; 102+ messages in thread
From: Yinghai Lu @ 2009-05-11 19:15 UTC (permalink / raw)
  To: Jack Steiner
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	David Rientjes, linux-kernel

Jack Steiner wrote:
> On Fri, May 08, 2009 at 11:50:51PM -0700, Yinghai Lu wrote:
>> recently there are some changes to about meaning of node_possible_map
>>
>> and it is some strange:
>> the node without memory would be set in node_possible_map
>> but some node with less NODE_MIN_SIZE will be kicked out of node_possible_map.
>>
>> try to fix it by adding strict_setup_node_bootmem.
>> also remove unparse_node.
> 
> I still see the same panic. Entry 0 of the node_data array is NULL &
> it is dereferenced building the zonelists.
> 
> I'm sure that you are way ahead of me in diagnosing this problem but
> this is a regression from previous behavior. Fpor example, in 2.6.27, node_data
> is created for both nodes but node 0 contains no memory:
> 
> 	(2.7.27)
> 	<6>SRAT: PXM 0 -> APIC 0 -> Node 0
> 	<6>SRAT: PXM 1 -> APIC 128 -> Node 1
> 	<6>SRAT: Node 1 PXM 1 0-fff6c000
> 	<7>NUMA: Using 63 for the hash shift.
> 	<6>Bootmem setup node 0 0000000000000000-0000000000000000
> 	<3>Cannot find 212992 bytes in node 0
> 	<6>Bootmem setup node 1 0000000000000000-0000000010000000
> 	<6>  NODE_DATA [000000000139be80 - 00000000013cfe7f]
> 	<6>  bootmap [00000000013d0000 -  00000000013d1fff] pages 2
> 	<6>(7 early reservations) ==> bootmem [0000000000 - 0010000000]
> 	<6>  #0 [0000000000 - 0000001000]   BIOS data page ==> [0000000000 - 0000001000]
> 	<6>  #1 [0000006000 - 0000008000]       TRAMPOLINE ==> [0000006000 - 0000008000]
> 	<6>  #2 [0000200000 - 000139be38]    TEXT DATA BSS ==> [0000200000 - 000139be38]
> 	<6>  #3 [000009f000 - 00000e0900]    BIOS reserved ==> [000009f000 - 00000e0900]
> 	<6>  #4 [00000e0a68 - 0000100000]    BIOS reserved ==> [00000e0a68 - 0000100000]
> 	<6>  #5 [00000e0900 - 00000e0a68]       EFI memmap ==> [00000e0900 - 00000e0a68]
> 	<6>  #6 [0000001000 - 0000001030]        ACPI SLIT ==> [0000001000 - 0000001030]
> 	<6>Bootmem setup node 0 0000000000000000-0000000000000000
> 	<6>  NODE_DATA [00000000013d2000 - 0000000001405fff]
> 	<6>  bootmap [0000000000000000 -  ffffffffffffffff] pages 0
> 	<6>(7 early reservations) ==> bootmem [0000000000 - 0000000000]
> 	<6>  #0 [0000000000 - 0000001000]   BIOS data page
> 	<6>  #1 [0000006000 - 0000008000]       TRAMPOLINE
> 	<6>  #2 [0000200000 - 000139be38]    TEXT DATA BSS
> 	<6>  #3 [000009f000 - 00000e0900]    BIOS reserved
> 	<6>  #4 [00000e0a68 - 0000100000]    BIOS reserved
> 	<6>  #5 [00000e0900 - 00000e0a68]       EFI memmap
> 	<6>  #6 [0000001000 - 0000001030]        ACPI SLIT
> 	<6>    NODE_DATA(0) on node 1
> 	<6>    bootmap(0) on node 1
> 	<7> [ffffe20000000000-ffffe200003fffff] PMD -> [ffff880001600000-ffff8800019fffff] on node 1
> 	<4>Zone PFN ranges:
> 	<4>  DMA      0x00000000 -> 0x00001000
> 	<4>  DMA32    0x00001000 -> 0x00100000
> 	<4>  Normal   0x00100000 -> 0x00100000
> 	<4>Movable zone start PFN for each node
> 	<4>early_node_map[2] active PFN ranges
> 	<4>    1: 0x00000000 -> 0x00000006
> 	<4>    1: 0x00000200 -> 0x00010000
> 	<4>Could not find start_pfn for node 0
> 	<7>On node 0 totalpages: 0
> 	<7>On node 1 totalpages: 65030
> 	<7>  DMA zone: 3427 pages, LIFO batch:0
> 	<7>  DMA32 zone: 60480 pages, LIFO batch:15
> 
> I have not seen any problems running on 2.6.27 using nodes that have no memory.
> 
> 
> Do we have a clear and unambiguous definition of what a node really is?
> In this case, is a board (socket) with cpus, a unique PXM but no memory
> considered a node. Even though it has no memory, it is a node (depending on the
> definition of "node") for purposes such as scheduling. The memoryless node also
> has local IO buses that want to direct interrupts to node-local cpus.
> 

how about 2.6.28, 29, and current linus tree?

we should not have NODE_DATA to node that doesn't have memory.

YH

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 3/3] x86: fix node_possible_map logic -v2
  2009-05-11 17:53   ` Jack Steiner
  2009-05-11 19:15     ` Yinghai Lu
@ 2009-05-11 19:27     ` David Rientjes
  2009-05-11 21:12       ` H. Peter Anvin
                         ` (2 more replies)
  1 sibling, 3 replies; 102+ messages in thread
From: David Rientjes @ 2009-05-11 19:27 UTC (permalink / raw)
  To: Jack Steiner
  Cc: Yinghai Lu, Ingo Molnar, Thomas Gleixner, H. Peter Anvin,
	Andrew Morton, Andi Kleen, linux-kernel

On Mon, 11 May 2009, Jack Steiner wrote:

> Do we have a clear and unambiguous definition of what a node really is?
> In this case, is a board (socket) with cpus, a unique PXM but no memory
> considered a node. Even though it has no memory, it is a node (depending on the
> definition of "node") for purposes such as scheduling. The memoryless node also
> has local IO buses that want to direct interrupts to node-local cpus.
> 

In your example of two cpus (0-1) that are remote to the system's only 
memory and two cpus (2-3) that have affinity to that memory, it appears as 
though the kernel is considering cpus 2-3 and the memory to be a node and 
cpus 0-1 to be a memoryless node.

That's a pretty useless scenario for memoryless node support, actually, 
unless there's a third node with memory that cpus 0-1 have a different 
distance to.  cpus 0-1 have no memory that is local, so the "remote" 
memory should be considered local to them.

I don't know who has been pushing the memoryless node support, but it 
appears as though it hasn't been fully tested yet.  The NULL pglist_data 
here for node 0 seems appropriate since you don't need it unless you're 
describing memory, but the kernel implies that if a bit is set in 
node_online_map or node_possible_map that it has this associated data.

Added Andi Kleen to the cc list.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 3/3] x86: fix node_possible_map logic -v2
  2009-05-11 19:15     ` Yinghai Lu
@ 2009-05-11 19:36       ` Yinghai Lu
  0 siblings, 0 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-05-11 19:36 UTC (permalink / raw)
  To: Jack Steiner
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	David Rientjes, linux-kernel

Yinghai Lu wrote:
> Jack Steiner wrote:
>> On Fri, May 08, 2009 at 11:50:51PM -0700, Yinghai Lu wrote:
>>> recently there are some changes to about meaning of node_possible_map
>>>
>>> and it is some strange:
>>> the node without memory would be set in node_possible_map
>>> but some node with less NODE_MIN_SIZE will be kicked out of node_possible_map.
>>>
>>> try to fix it by adding strict_setup_node_bootmem.
>>> also remove unparse_node.
>> I still see the same panic. Entry 0 of the node_data array is NULL &
>> it is dereferenced building the zonelists.
>>
>> I'm sure that you are way ahead of me in diagnosing this problem but
>> this is a regression from previous behavior. Fpor example, in 2.6.27, node_data
>> is created for both nodes but node 0 contains no memory:
>>
>> 	(2.7.27)
>> 	<6>SRAT: PXM 0 -> APIC 0 -> Node 0
>> 	<6>SRAT: PXM 1 -> APIC 128 -> Node 1
>> 	<6>SRAT: Node 1 PXM 1 0-fff6c000
>> 	<7>NUMA: Using 63 for the hash shift.
>> 	<6>Bootmem setup node 0 0000000000000000-0000000000000000
>> 	<3>Cannot find 212992 bytes in node 0
>> 	<6>Bootmem setup node 1 0000000000000000-0000000010000000
>> 	<6>  NODE_DATA [000000000139be80 - 00000000013cfe7f]
>> 	<6>  bootmap [00000000013d0000 -  00000000013d1fff] pages 2
>> 	<6>(7 early reservations) ==> bootmem [0000000000 - 0010000000]
>> 	<6>  #0 [0000000000 - 0000001000]   BIOS data page ==> [0000000000 - 0000001000]
>> 	<6>  #1 [0000006000 - 0000008000]       TRAMPOLINE ==> [0000006000 - 0000008000]
>> 	<6>  #2 [0000200000 - 000139be38]    TEXT DATA BSS ==> [0000200000 - 000139be38]
>> 	<6>  #3 [000009f000 - 00000e0900]    BIOS reserved ==> [000009f000 - 00000e0900]
>> 	<6>  #4 [00000e0a68 - 0000100000]    BIOS reserved ==> [00000e0a68 - 0000100000]
>> 	<6>  #5 [00000e0900 - 00000e0a68]       EFI memmap ==> [00000e0900 - 00000e0a68]
>> 	<6>  #6 [0000001000 - 0000001030]        ACPI SLIT ==> [0000001000 - 0000001030]
>> 	<6>Bootmem setup node 0 0000000000000000-0000000000000000
>> 	<6>  NODE_DATA [00000000013d2000 - 0000000001405fff]
>> 	<6>  bootmap [0000000000000000 -  ffffffffffffffff] pages 0
>> 	<6>(7 early reservations) ==> bootmem [0000000000 - 0000000000]
>> 	<6>  #0 [0000000000 - 0000001000]   BIOS data page
>> 	<6>  #1 [0000006000 - 0000008000]       TRAMPOLINE
>> 	<6>  #2 [0000200000 - 000139be38]    TEXT DATA BSS
>> 	<6>  #3 [000009f000 - 00000e0900]    BIOS reserved
>> 	<6>  #4 [00000e0a68 - 0000100000]    BIOS reserved
>> 	<6>  #5 [00000e0900 - 00000e0a68]       EFI memmap
>> 	<6>  #6 [0000001000 - 0000001030]        ACPI SLIT
>> 	<6>    NODE_DATA(0) on node 1
>> 	<6>    bootmap(0) on node 1
>> 	<7> [ffffe20000000000-ffffe200003fffff] PMD -> [ffff880001600000-ffff8800019fffff] on node 1
>> 	<4>Zone PFN ranges:
>> 	<4>  DMA      0x00000000 -> 0x00001000
>> 	<4>  DMA32    0x00001000 -> 0x00100000
>> 	<4>  Normal   0x00100000 -> 0x00100000
>> 	<4>Movable zone start PFN for each node
>> 	<4>early_node_map[2] active PFN ranges
>> 	<4>    1: 0x00000000 -> 0x00000006
>> 	<4>    1: 0x00000200 -> 0x00010000
>> 	<4>Could not find start_pfn for node 0
>> 	<7>On node 0 totalpages: 0
>> 	<7>On node 1 totalpages: 65030
>> 	<7>  DMA zone: 3427 pages, LIFO batch:0
>> 	<7>  DMA32 zone: 60480 pages, LIFO batch:15
>>
>> I have not seen any problems running on 2.6.27 using nodes that have no memory.
>>
>>
>> Do we have a clear and unambiguous definition of what a node really is?
>> In this case, is a board (socket) with cpus, a unique PXM but no memory
>> considered a node. Even though it has no memory, it is a node (depending on the
>> definition of "node") for purposes such as scheduling. The memoryless node also
>> has local IO buses that want to direct interrupts to node-local cpus.
>>
> 
> how about 2.6.28, 29, and current linus tree?
> 
> we should not have NODE_DATA to node that doesn't have memory.
> 

also later if memory is hot add to that node, it will get NODE_DATA on the node later.

YH

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 3/3] x86: fix node_possible_map logic -v2
  2009-05-11 19:27     ` David Rientjes
@ 2009-05-11 21:12       ` H. Peter Anvin
  2009-05-11 21:26         ` Alan Cox
                           ` (2 more replies)
  2009-05-11 21:33       ` Jack Steiner
  2009-05-12  7:09       ` Andi Kleen
  2 siblings, 3 replies; 102+ messages in thread
From: H. Peter Anvin @ 2009-05-11 21:12 UTC (permalink / raw)
  To: David Rientjes
  Cc: Jack Steiner, Yinghai Lu, Ingo Molnar, Thomas Gleixner,
	Andrew Morton, Andi Kleen, linux-kernel

David Rientjes wrote:
> 
> In your example of two cpus (0-1) that are remote to the system's only 
> memory and two cpus (2-3) that have affinity to that memory, it appears as 
> though the kernel is considering cpus 2-3 and the memory to be a node and 
> cpus 0-1 to be a memoryless node.
> 
> That's a pretty useless scenario for memoryless node support, actually, 
> unless there's a third node with memory that cpus 0-1 have a different 
> distance to.  cpus 0-1 have no memory that is local, so the "remote" 
> memory should be considered local to them.
> 

Should it?  It seems to me that CPUs 0-1 should be antipreferentially 
scheduled, since they will have slower access to the memory than CPUs 
2-3.  Since in this case all the memory is in the same place you could 
argue that SMP distances could do the same job, which is of course true.

However, consider now:

CPU [0-1]	- no memory
CPU [2-3]	- memory
CPU [4-5]	- memory

Each node is equidistant, but for the memory nodes there is differences 
between their own local memory and the remote memory.

CPU [0-1] cannot be considered local in either node, since they are 
further away from the memory than either, and furthermore, unlike either 
of the memory nodes, they have no preference for memory from either of 
the other two nodes (quite on the contrary; they would probably benefit 
from drawing from both.)

 > I don't know who has been pushing the memoryless node support, but it
 > appears as though it hasn't been fully tested yet.  The NULL
 > pglist_data here for node 0 seems appropriate since you don't need it
 > unless you're describing memory, but the kernel implies that if a bit
 > is set in node_online_map or node_possible_map that it has this
 > associated data.

No doubt there is still bugs.

	-hpa

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 3/3] x86: fix node_possible_map logic -v2
  2009-05-11 21:12       ` H. Peter Anvin
@ 2009-05-11 21:26         ` Alan Cox
  2009-05-11 22:25         ` David Rientjes
  2009-05-12  7:15         ` [PATCH 3/3] x86: fix node_possible_map logic -v2 Andi Kleen
  2 siblings, 0 replies; 102+ messages in thread
From: Alan Cox @ 2009-05-11 21:26 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: David Rientjes, Jack Steiner, Yinghai Lu, Ingo Molnar,
	Thomas Gleixner, Andrew Morton, Andi Kleen, linux-kernel

> CPU [0-1] cannot be considered local in either node, since they are 
> further away from the memory than either, and furthermore, unlike either 
> of the memory nodes, they have no preference for memory from either of 
> the other two nodes (quite on the contrary; they would probably benefit 
> from drawing from both.)

Surely you should schedule based on the memory bandwidth at that point ?
Assuming the data collection overhead is acceptable. A long time ago
someone did a paper on a related topic (Scheduling by memory bandwidth on
the grounds that memory not CPU bandwidth was the resource most
constrained) and that demonstrated that for quite a few processors the
memory bandwidth data is cheaply available in the profiling registers.

Alan

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 3/3] x86: fix node_possible_map logic -v2
  2009-05-11 19:27     ` David Rientjes
  2009-05-11 21:12       ` H. Peter Anvin
@ 2009-05-11 21:33       ` Jack Steiner
  2009-05-11 22:56         ` David Rientjes
  2009-05-12  7:09       ` Andi Kleen
  2 siblings, 1 reply; 102+ messages in thread
From: Jack Steiner @ 2009-05-11 21:33 UTC (permalink / raw)
  To: David Rientjes
  Cc: Yinghai Lu, Ingo Molnar, Thomas Gleixner, H. Peter Anvin,
	Andrew Morton, Andi Kleen, linux-kernel

On Mon, May 11, 2009 at 12:27:49PM -0700, David Rientjes wrote:
> On Mon, 11 May 2009, Jack Steiner wrote:
> 
> > Do we have a clear and unambiguous definition of what a node really is?
> > In this case, is a board (socket) with cpus, a unique PXM but no memory
> > considered a node. Even though it has no memory, it is a node (depending on the
> > definition of "node") for purposes such as scheduling. The memoryless node also
> > has local IO buses that want to direct interrupts to node-local cpus.
> > 
> 
> In your example of two cpus (0-1) that are remote to the system's only 
> memory and two cpus (2-3) that have affinity to that memory, it appears as 
> though the kernel is considering cpus 2-3 and the memory to be a node and 
> cpus 0-1 to be a memoryless node.

Correct.


> 
> That's a pretty useless scenario for memoryless node support, actually, 
> unless there's a third node with memory that cpus 0-1 have a different 
> distance to. 

Yes, a large number of nodes exist. Most have memory but some do not.


> cpus 0-1 have no memory that is local, so the "remote" 
> memory should be considered local to them.

The cpus without local memory will obviously have to use memory from other
nodes. But the problem seems to be more complex. 

Cpus also belong to nodes. The cpu_to_node_map[] provides the mapping.
I have not tried it, but I wonder what happens if you offline all of the
memory of a node (probably not possible so this may be hypothetical for now).
Should offlining all node memory change the node that a cpu on the node
are associated with? That does not seem right.

Does offlining all node memory clear the entry in the node_data[] array?


> 
> I don't know who has been pushing the memoryless node support, but it 
> appears as though it hasn't been fully tested yet.

Agree. FWIW, it works ok in 2.6.27. I need to bisect to find where the regression
occurred.


> The NULL pglist_data 
> here for node 0 seems appropriate since you don't need it unless you're 
> describing memory, but the kernel implies that if a bit is set in 
> node_online_map or node_possible_map that it has this associated data.

> 
> Added Andi Kleen to the cc list.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 2/3] x86: add numa_move_cpus_to_node
  2009-05-12  1:27   ` Christoph Lameter
@ 2009-05-11 21:53     ` Yinghai Lu
  2009-05-12 20:59       ` Christoph Lameter
  0 siblings, 1 reply; 102+ messages in thread
From: Yinghai Lu @ 2009-05-11 21:53 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Suresh Siddha, linux-kernel, Al Viro, Rusty Russell,
	Pekka Enberg

Christoph Lameter wrote:
> On Fri, 8 May 2009, Yinghai Lu wrote:
> 
>> when node only have hot add range and don't have other static range.
>> that node will not be onlined, and cpus on that will be linked to nearby
>> node with memory.
>> then when that host add range is added later, we need to linked those cpus
>> back.
> 
> This going to be fun for the allocators that have put their queues on the
> respective nodes. How are the various OS structures that were allocated on
> the node that is downed / upped relocated?

NODE_DATA is new allocated on that node is backed on line for the first time.

per_cpu will still stay with old near node. because per_cpu is somehow preallocated for all possible cpu.

YH

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 3/3] x86: fix node_possible_map logic -v2
  2009-05-11 21:12       ` H. Peter Anvin
  2009-05-11 21:26         ` Alan Cox
@ 2009-05-11 22:25         ` David Rientjes
  2009-05-12 15:06           ` Jack Steiner
  2009-05-12  7:15         ` [PATCH 3/3] x86: fix node_possible_map logic -v2 Andi Kleen
  2 siblings, 1 reply; 102+ messages in thread
From: David Rientjes @ 2009-05-11 22:25 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Jack Steiner, Yinghai Lu, Ingo Molnar, Thomas Gleixner,
	Andrew Morton, Andi Kleen, linux-kernel

On Mon, 11 May 2009, H. Peter Anvin wrote:

> > In your example of two cpus (0-1) that are remote to the system's only
> > memory and two cpus (2-3) that have affinity to that memory, it appears as
> > though the kernel is considering cpus 2-3 and the memory to be a node and
> > cpus 0-1 to be a memoryless node.
> > 
> > That's a pretty useless scenario for memoryless node support, actually,
> > unless there's a third node with memory that cpus 0-1 have a different
> > distance to.  cpus 0-1 have no memory that is local, so the "remote" memory
> > should be considered local to them.
> > 
> 
> Should it?  It seems to me that CPUs 0-1 should be antipreferentially
> scheduled, since they will have slower access to the memory than CPUs 2-3.
> Since in this case all the memory is in the same place you could argue that
> SMP distances could do the same job, which is of course true.
> 
> However, consider now:
> 
> CPU [0-1]	- no memory
> CPU [2-3]	- memory
> CPU [4-5]	- memory
> 
> Each node is equidistant, but for the memory nodes there is differences
> between their own local memory and the remote memory.
> 
> CPU [0-1] cannot be considered local in either node, since they are further
> away from the memory than either, and furthermore, unlike either of the memory
> nodes, they have no preference for memory from either of the other two nodes
> (quite on the contrary; they would probably benefit from drawing from both.)
> 

Right, there's no difference from Jack's scenario if the three nodes are 
equiadistant.  I was thinking of a topology where cpu 0-1 was closer to, 
for example, cpu 2-3's memory than cpu 4-5's.

The particular topology you're referring to should have a slit that 
describes the relative distances in each direction differently.  The pxms 
that these cpus belong to will always be local to itself, but ACPI 3.0 
allows distances for different directions between the same pxms to be 
different.

That means it's possible that cpus 0-1 above have local distance to all 
memory and cpus 2-3 (and cpus 4-5) have remote distance to all nodes other 
than itself.

numactl --hardware would show something like this:

		0	1	2
	0	10	10	10
	1	20	10	20
	2	20	20	10

which is valid according to the ACPI specification.  This is based on the 
pxms to which the cpus belong so this topology would describe all members 
of those pxms and not just memory.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 3/3] x86: fix node_possible_map logic -v2
  2009-05-11 21:33       ` Jack Steiner
@ 2009-05-11 22:56         ` David Rientjes
  2009-05-11 23:00           ` Yinghai Lu
  0 siblings, 1 reply; 102+ messages in thread
From: David Rientjes @ 2009-05-11 22:56 UTC (permalink / raw)
  To: Jack Steiner
  Cc: Yinghai Lu, Ingo Molnar, Thomas Gleixner, H. Peter Anvin,
	Andrew Morton, Andi Kleen, linux-kernel

On Mon, 11 May 2009, Jack Steiner wrote:

> Cpus also belong to nodes. The cpu_to_node_map[] provides the mapping.
> I have not tried it, but I wonder what happens if you offline all of the
> memory of a node (probably not possible so this may be hypothetical for now).
> Should offlining all node memory change the node that a cpu on the node
> are associated with? That does not seem right.
> 

No, it would simply become a memoryless node and NODE_DATA() should 
probably be freed or just represent spanned_pages of 0.

Memoryless node support has already been merged (and needs to be fixed), 
but the same toplogies could be represented with memory-only nodes which 
may have been a better approach considering your point about memory 
offline and our lack of node hot-remove.

> Agree. FWIW, it works ok in 2.6.27. I need to bisect to find where the regression
> occurred.
> 

I'm not so sure it will help to identify which patch caused the issue, it 
depends on whether we want to support memoryless nodes or not and then 
address the issues as they arise when there is no memory attached, which 
you've done in this case.

We can fix your particular issue by deciding how node_online_map and its 
superset, node_possible_map, are handled for memoryless nodes.  Such users 
need to iterate over N_NORMAL_MEMORY instead.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 3/3] x86: fix node_possible_map logic -v2
  2009-05-11 22:56         ` David Rientjes
@ 2009-05-11 23:00           ` Yinghai Lu
  0 siblings, 0 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-05-11 23:00 UTC (permalink / raw)
  To: David Rientjes
  Cc: Jack Steiner, Ingo Molnar, Thomas Gleixner, H. Peter Anvin,
	Andrew Morton, Andi Kleen, linux-kernel

David Rientjes wrote:
> On Mon, 11 May 2009, Jack Steiner wrote:
> 
>> Cpus also belong to nodes. The cpu_to_node_map[] provides the mapping.
>> I have not tried it, but I wonder what happens if you offline all of the
>> memory of a node (probably not possible so this may be hypothetical for now).
>> Should offlining all node memory change the node that a cpu on the node
>> are associated with? That does not seem right.
>>
> 
> No, it would simply become a memoryless node and NODE_DATA() should 
> probably be freed or just represent spanned_pages of 0.
> 
> Memoryless node support has already been merged (and needs to be fixed), 
> but the same toplogies could be represented with memory-only nodes which 
> may have been a better approach considering your point about memory 
> offline and our lack of node hot-remove.
> 
>> Agree. FWIW, it works ok in 2.6.27. I need to bisect to find where the regression
>> occurred.
>>
> 
> I'm not so sure it will help to identify which patch caused the issue, it 
> depends on whether we want to support memoryless nodes or not and then 
> address the issues as they arise when there is no memory attached, which 
> you've done in this case.
> 
> We can fix your particular issue by deciding how node_online_map and its 
> superset, node_possible_map, are handled for memoryless nodes.  Such users 
> need to iterate over N_NORMAL_MEMORY instead.

not sure his problem. it seems 2.6.28, 29, 30 all don't work for him.

and current tip works on all my test systems with node that does have mem.

YH

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 1/3] x86: remove MEMORY_HOTPLUG_RESERVE related code
  2009-05-09  6:45 [PATCH 1/3] x86: remove MEMORY_HOTPLUG_RESERVE related code Yinghai Lu
  2009-05-09  6:48 ` [PATCH 2/3] x86: add numa_move_cpus_to_node Yinghai Lu
  2009-05-09  6:50 ` [PATCH 3/3] x86: fix node_possible_map logic -v2 Yinghai Lu
@ 2009-05-12  1:02 ` Christoph Lameter
  2009-05-12 11:16 ` Mel Gorman
       [not found] ` <20090511095022.GA23121@elte.hu>
  4 siblings, 0 replies; 102+ messages in thread
From: Christoph Lameter @ 2009-05-12  1:02 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Suresh Siddha, linux-kernel, Al Viro, Rusty Russell



Reviewed-by: Christoph Lameter <cl@linux-foundation.org>



^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 2/3] x86: add numa_move_cpus_to_node
  2009-05-09  6:48 ` [PATCH 2/3] x86: add numa_move_cpus_to_node Yinghai Lu
  2009-05-09  7:05   ` Justin P. Mattock
@ 2009-05-12  1:27   ` Christoph Lameter
  2009-05-11 21:53     ` Yinghai Lu
  1 sibling, 1 reply; 102+ messages in thread
From: Christoph Lameter @ 2009-05-12  1:27 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Suresh Siddha, linux-kernel, Al Viro, Rusty Russell,
	Pekka Enberg

On Fri, 8 May 2009, Yinghai Lu wrote:

> when node only have hot add range and don't have other static range.
> that node will not be onlined, and cpus on that will be linked to nearby
> node with memory.
> then when that host add range is added later, we need to linked those cpus
> back.

This going to be fun for the allocators that have put their queues on the
respective nodes. How are the various OS structures that were allocated on
the node that is downed / upped relocated?


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 3/3] x86: fix node_possible_map logic -v2
  2009-05-11 19:27     ` David Rientjes
  2009-05-11 21:12       ` H. Peter Anvin
  2009-05-11 21:33       ` Jack Steiner
@ 2009-05-12  7:09       ` Andi Kleen
  2 siblings, 0 replies; 102+ messages in thread
From: Andi Kleen @ 2009-05-12  7:09 UTC (permalink / raw)
  To: David Rientjes
  Cc: Jack Steiner, Yinghai Lu, Ingo Molnar, Thomas Gleixner,
	H. Peter Anvin, Andrew Morton, Andi Kleen, linux-kernel

> I don't know who has been pushing the memoryless node support, but it 
> appears as though it hasn't been fully tested yet. 

Yep.

Every subsystem that accesses or specifies nodes directly would
need to become aware of it. That is why I originally punted
and just put these CPUs into nearby nodes.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 3/3] x86: fix node_possible_map logic -v2
  2009-05-11 21:12       ` H. Peter Anvin
  2009-05-11 21:26         ` Alan Cox
  2009-05-11 22:25         ` David Rientjes
@ 2009-05-12  7:15         ` Andi Kleen
  2 siblings, 0 replies; 102+ messages in thread
From: Andi Kleen @ 2009-05-12  7:15 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: David Rientjes, Jack Steiner, Yinghai Lu, Ingo Molnar,
	Thomas Gleixner, Andrew Morton, Andi Kleen, linux-kernel

> Should it?  It seems to me that CPUs 0-1 should be antipreferentially 
> scheduled, 

You could do that, but the question is if it matters. It would
only make a difference on systems which are not fully loaded, 
and it's unclear how much.

And at some point you need to use these cores anyways; usually it's much
worse to not use a CPU and overload others than to use it with slower memory.

Is it worth adding a lot of fixes all over hte kernel? Not clear
to me.

Also it's a obscure situation and it affects a lot of code, so you
would likely have to continuously fight with regressions as Jack
has discovered.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 1/3] x86: remove MEMORY_HOTPLUG_RESERVE related code
  2009-05-09  6:45 [PATCH 1/3] x86: remove MEMORY_HOTPLUG_RESERVE related code Yinghai Lu
                   ` (2 preceding siblings ...)
  2009-05-12  1:02 ` [PATCH 1/3] x86: remove MEMORY_HOTPLUG_RESERVE related code Christoph Lameter
@ 2009-05-12 11:16 ` Mel Gorman
  2009-05-13  5:29   ` Yinghai Lu
  2009-05-13  6:13   ` [PATCH] x86: remove MEMORY_HOTPLUG_RESERVE related code -v2 Yinghai Lu
       [not found] ` <20090511095022.GA23121@elte.hu>
  4 siblings, 2 replies; 102+ messages in thread
From: Mel Gorman @ 2009-05-12 11:16 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Suresh Siddha, Christoph Lameter, linux-kernel, Al Viro,
	Rusty Russell

On Fri, May 08, 2009 at 11:45:49PM -0700, Yinghai Lu wrote:
> 
> after
> | commit b263295dbffd33b0fbff670720fa178c30e3392a
> | Author: Christoph Lameter <clameter@sgi.com>
> | Date:   Wed Jan 30 13:30:47 2008 +0100
> |
> |    x86: 64-bit, make sparsemem vmemmap the only memory model
> 
> we don't have MEMORY_HOTPLUG_RESERVE anymore.
> 
> remove related dead code.
> 

Good spot, this removes a nice amount of code. The changelog could say
more though, how about?

=====
Historically, x86-64 had an architecture-specific method for memory hotplug
whereby it scanned the SRAT for physical memory ranges that could be
potentially used for memory hot-add later. By reserving those ranges
without physical memory, the memmap would be allocated and left dormant
until needed. This depended on the DISCONTIG memory model which has been
removed so the code implementing HOTPLUG_RESERVE is now dead.

This patch removes the dead code used by MEMORY_HOTPLUG_RESERVE
=====

> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
> 
> ---
>  arch/x86/include/asm/numa_64.h |    3 -
>  arch/x86/mm/numa_64.c          |    5 --
>  arch/x86/mm/srat_64.c          |   63 +++++++------------------------------
>  include/linux/mm.h             |    2 -
>  mm/page_alloc.c                |   69 -----------------------------------------
>  5 files changed, 12 insertions(+), 130 deletions(-)
> 
> Index: linux-2.6/arch/x86/include/asm/numa_64.h
> ===================================================================
> --- linux-2.6.orig/arch/x86/include/asm/numa_64.h
> +++ linux-2.6/arch/x86/include/asm/numa_64.h
> @@ -17,9 +17,6 @@ extern int compute_hash_shift(struct boo
>  extern void numa_init_array(void);
>  extern int numa_off;
>  
> -extern void srat_reserve_add_area(int nodeid);
> -extern int hotadd_percent;
> -
>  extern s16 apicid_to_node[MAX_LOCAL_APIC];
>  
>  extern unsigned long numa_free_all_bootmem(void);
> Index: linux-2.6/arch/x86/mm/numa_64.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/mm/numa_64.c
> +++ linux-2.6/arch/x86/mm/numa_64.c
> @@ -272,9 +272,6 @@ void __init setup_node_bootmem(int nodei
>  		reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
>  				 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
>  
> -#ifdef CONFIG_ACPI_NUMA
> -	srat_reserve_add_area(nodeid);
> -#endif
>  	node_set_online(nodeid);
>  }
>  
> @@ -608,8 +605,6 @@ static __init int numa_setup(char *opt)
>  #ifdef CONFIG_ACPI_NUMA
>  	if (!strncmp(opt, "noacpi", 6))
>  		acpi_numa = -1;
> -	if (!strncmp(opt, "hotadd=", 7))
> -		hotadd_percent = simple_strtoul(opt+7, NULL, 10);

Documentation/x86/x86_64/boot-options.txt now needs to be updated to
remove the documentation on hotadd=.

Instead of ignoring the option, should a warning now be printed saying the
option is deprecated?

>  #endif
>  	return 0;
>  }
> Index: linux-2.6/arch/x86/mm/srat_64.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/mm/srat_64.c
> +++ linux-2.6/arch/x86/mm/srat_64.c
> @@ -31,8 +31,6 @@ static nodemask_t nodes_parsed __initdat
>  static nodemask_t cpu_nodes_parsed __initdata;
>  static struct bootnode nodes[MAX_NUMNODES] __initdata;
>  static struct bootnode nodes_add[MAX_NUMNODES];
> -static int found_add_area __initdata;
> -int hotadd_percent __initdata = 0;
>  
>  static int num_node_memblks __initdata;
>  static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
> @@ -66,9 +64,6 @@ static __init void cutoff_node(int i, un
>  {
>  	struct bootnode *nd = &nodes[i];
>  
> -	if (found_add_area)
> -		return;
> -
>  	if (nd->start < start) {
>  		nd->start = start;
>  		if (nd->end < nd->start)
> @@ -86,7 +81,6 @@ static __init void bad_srat(void)
>  	int i;
>  	printk(KERN_ERR "SRAT: SRAT not used.\n");
>  	acpi_numa = -1;
> -	found_add_area = 0;
>  	for (i = 0; i < MAX_LOCAL_APIC; i++)
>  		apicid_to_node[i] = NUMA_NO_NODE;
>  	for (i = 0; i < MAX_NUMNODES; i++)
> @@ -182,24 +176,21 @@ acpi_numa_processor_affinity_init(struct
>  	       pxm, apic_id, node);
>  }
>  
> -static int update_end_of_memory(unsigned long end) {return -1;}
> -static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
>  #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
>  static inline int save_add_info(void) {return 1;}
>  #else
>  static inline int save_add_info(void) {return 0;}
>  #endif
>  /*
> - * Update nodes_add and decide if to include add are in the zone.
> - * Both SPARSE and RESERVE need nodes_add information.
> - * This code supports one contiguous hot add area per node.
> + * Update nodes_add[]
> + * This code supports one contiguous hot add area per node
>   */
> -static int __init
> -reserve_hotadd(int node, unsigned long start, unsigned long end)
> +static void __init
> +update_nodes_add(int node, unsigned long start, unsigned long end)
>  {

It's now very unclear what the purpose of this function is. I'm guessing it
should be something like

validate_hotadd_region()
This validates that the region of memory described by SRAT as suitable
for use with memory hot-add is sane

What it was for was to validate that the SRAT looked sane and then push out the
end of the node boundaries so that the memmap would get allocated. However,
because we are no longer pushing out the node boundaries, is this doing
anything useful at all any more? For sparsemem, memory-hotadd allocates
the memmap as it required.

>  	unsigned long s_pfn = start >> PAGE_SHIFT;
>  	unsigned long e_pfn = end >> PAGE_SHIFT;
> -	int ret = 0, changed = 0;
> +	int changed = 0;
>  	struct bootnode *nd = &nodes_add[node];
>  
>  	/* I had some trouble with strange memory hotadd regions breaking
> @@ -210,7 +201,7 @@ reserve_hotadd(int node, unsigned long s
>  	   mistakes */
>  	if ((signed long)(end - start) < NODE_MIN_SIZE) {
>  		printk(KERN_ERR "SRAT: Hotplug area too small\n");
> -		return -1;
> +		return;
>  	}
>  
>  	/* This check might be a bit too strict, but I'm keeping it for now. */
> @@ -218,12 +209,7 @@ reserve_hotadd(int node, unsigned long s
>  		printk(KERN_ERR
>  			"SRAT: Hotplug area %lu -> %lu has existing memory\n",
>  			s_pfn, e_pfn);
> -		return -1;
> -	}
> -
> -	if (!hotadd_enough_memory(&nodes_add[node]))  {
> -		printk(KERN_ERR "SRAT: Hotplug area too large\n");
> -		return -1;
> +		return;
>  	}
>  
>  	/* Looks good */
> @@ -245,11 +231,9 @@ reserve_hotadd(int node, unsigned long s
>  			printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
>  	}
>  
> -	ret = update_end_of_memory(nd->end);
> -
>  	if (changed)
> -	 	printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
> -	return ret;
> +		printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
> +				 nd->start, nd->end);
>  }
>  
>  /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
> @@ -310,13 +294,10 @@ acpi_numa_memory_affinity_init(struct ac
>  	       start, end);
>  	e820_register_active_regions(node, start >> PAGE_SHIFT,
>  				     end >> PAGE_SHIFT);
> -	push_node_boundaries(node, nd->start >> PAGE_SHIFT,
> -						nd->end >> PAGE_SHIFT);
>  
> -	if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) &&
> -	    (reserve_hotadd(node, start, end) < 0)) {
> -		/* Ignore hotadd region. Undo damage */
> -		printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
> +	if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
> +		update_nodes_add(node, start, end);
> +		/* restore nodes[node] */
>  		*nd = oldnode;
>  		if ((nd->start | nd->end) == 0)
>  			node_clear(node, nodes_parsed);
> @@ -510,26 +491,6 @@ static int null_slit_node_compare(int a,
>  }
>  #endif /* CONFIG_NUMA_EMU */
>  
> -void __init srat_reserve_add_area(int nodeid)
> -{
> -	if (found_add_area && nodes_add[nodeid].end) {
> -		u64 total_mb;
> -
> -		printk(KERN_INFO "SRAT: Reserving hot-add memory space "
> -				"for node %d at %Lx-%Lx\n",
> -			nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end);
> -		total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start)
> -					>> PAGE_SHIFT;
> -		total_mb *= sizeof(struct page);
> -		total_mb >>= 20;
> -		printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
> -				"pre-allocated memory.\n", (unsigned long long)total_mb);
> -		reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
> -			       nodes_add[nodeid].end - nodes_add[nodeid].start,
> -			       BOOTMEM_DEFAULT);
> -	}
> -}
> -
>  int __node_distance(int a, int b)
>  {
>  	int index;
> Index: linux-2.6/include/linux/mm.h
> ===================================================================
> --- linux-2.6.orig/include/linux/mm.h
> +++ linux-2.6/include/linux/mm.h
> @@ -1032,8 +1032,6 @@ extern void add_active_range(unsigned in
>  					unsigned long end_pfn);
>  extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
>  					unsigned long end_pfn);
> -extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn,
> -					unsigned long end_pfn);
>  extern void remove_all_active_ranges(void);
>  extern unsigned long absent_pages_in_range(unsigned long start_pfn,
>  						unsigned long end_pfn);
> Index: linux-2.6/mm/page_alloc.c
> ===================================================================
> --- linux-2.6.orig/mm/page_alloc.c
> +++ linux-2.6/mm/page_alloc.c
> @@ -150,10 +150,6 @@ static unsigned long __meminitdata dma_r
>    static int __meminitdata nr_nodemap_entries;
>    static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
>    static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
> -#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
> -  static unsigned long __meminitdata node_boundary_start_pfn[MAX_NUMNODES];
> -  static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES];
> -#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
>    static unsigned long __initdata required_kernelcore;
>    static unsigned long __initdata required_movablecore;
>    static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
> @@ -3121,64 +3117,6 @@ void __init sparse_memory_present_with_a
>  }
>  
>  /**
> - * push_node_boundaries - Push node boundaries to at least the requested boundary
> - * @nid: The nid of the node to push the boundary for
> - * @start_pfn: The start pfn of the node
> - * @end_pfn: The end pfn of the node
> - *
> - * In reserve-based hot-add, mem_map is allocated that is unused until hotadd
> - * time. Specifically, on x86_64, SRAT will report ranges that can potentially
> - * be hotplugged even though no physical memory exists. This function allows
> - * an arch to push out the node boundaries so mem_map is allocated that can
> - * be used later.
> - */
> -#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
> -void __init push_node_boundaries(unsigned int nid,
> -		unsigned long start_pfn, unsigned long end_pfn)
> -{
> -	mminit_dprintk(MMINIT_TRACE, "zoneboundary",
> -			"Entering push_node_boundaries(%u, %lu, %lu)\n",
> -			nid, start_pfn, end_pfn);
> -
> -	/* Initialise the boundary for this node if necessary */
> -	if (node_boundary_end_pfn[nid] == 0)
> -		node_boundary_start_pfn[nid] = -1UL;
> -
> -	/* Update the boundaries */
> -	if (node_boundary_start_pfn[nid] > start_pfn)
> -		node_boundary_start_pfn[nid] = start_pfn;
> -	if (node_boundary_end_pfn[nid] < end_pfn)
> -		node_boundary_end_pfn[nid] = end_pfn;
> -}
> -
> -/* If necessary, push the node boundary out for reserve hotadd */
> -static void __meminit account_node_boundary(unsigned int nid,
> -		unsigned long *start_pfn, unsigned long *end_pfn)
> -{
> -	mminit_dprintk(MMINIT_TRACE, "zoneboundary",
> -			"Entering account_node_boundary(%u, %lu, %lu)\n",
> -			nid, *start_pfn, *end_pfn);
> -
> -	/* Return if boundary information has not been provided */
> -	if (node_boundary_end_pfn[nid] == 0)
> -		return;
> -
> -	/* Check the boundaries and update if necessary */
> -	if (node_boundary_start_pfn[nid] < *start_pfn)
> -		*start_pfn = node_boundary_start_pfn[nid];
> -	if (node_boundary_end_pfn[nid] > *end_pfn)
> -		*end_pfn = node_boundary_end_pfn[nid];
> -}
> -#else
> -void __init push_node_boundaries(unsigned int nid,
> -		unsigned long start_pfn, unsigned long end_pfn) {}
> -
> -static void __meminit account_node_boundary(unsigned int nid,
> -		unsigned long *start_pfn, unsigned long *end_pfn) {}
> -#endif
> -
> -
> -/**
>   * get_pfn_range_for_nid - Return the start and end page frames for a node
>   * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned.
>   * @start_pfn: Passed by reference. On return, it will have the node start_pfn.
> @@ -3203,9 +3141,6 @@ void __meminit get_pfn_range_for_nid(uns
>  
>  	if (*start_pfn == -1UL)
>  		*start_pfn = 0;
> -
> -	/* Push the node boundaries out if requested */
> -	account_node_boundary(nid, start_pfn, end_pfn);
>  }
>  
>  /*
> @@ -3810,10 +3745,6 @@ void __init remove_all_active_ranges(voi
>  {
>  	memset(early_node_map, 0, sizeof(early_node_map));
>  	nr_nodemap_entries = 0;
> -#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
> -	memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn));
> -	memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn));
> -#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
>  }
>  
>  /* Compare two active node_active_regions */

-- 
Mel Gorman
Part-time Phd Student                          Linux Technology Center
University of Limerick                         IBM Dublin Software Lab

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 3/3] x86: fix node_possible_map logic -v2
  2009-05-11 22:25         ` David Rientjes
@ 2009-05-12 15:06           ` Jack Steiner
  2009-05-12 15:10             ` Yinghai Lu
                               ` (2 more replies)
  0 siblings, 3 replies; 102+ messages in thread
From: Jack Steiner @ 2009-05-12 15:06 UTC (permalink / raw)
  To: David Rientjes
  Cc: H. Peter Anvin, Yinghai Lu, Ingo Molnar, Thomas Gleixner,
	Andrew Morton, Andi Kleen, linux-kernel

On Mon, May 11, 2009 at 03:25:39PM -0700, David Rientjes wrote:
> On Mon, 11 May 2009, H. Peter Anvin wrote:
> 
> > > In your example of two cpus (0-1) that are remote to the system's only
> > > memory and two cpus (2-3) that have affinity to that memory, it appears as
> > > though the kernel is considering cpus 2-3 and the memory to be a node and
> > > cpus 0-1 to be a memoryless node.
> > > 
> > > That's a pretty useless scenario for memoryless node support, actually,
> > > unless there's a third node with memory that cpus 0-1 have a different
> > > distance to.  cpus 0-1 have no memory that is local, so the "remote" memory
> > > should be considered local to them.
> > > 
> > 
> > Should it?  It seems to me that CPUs 0-1 should be antipreferentially
> > scheduled, since they will have slower access to the memory than CPUs 2-3.
> > Since in this case all the memory is in the same place you could argue that
> > SMP distances could do the same job, which is of course true.
> > 
> > However, consider now:
> > 
> > CPU [0-1]	- no memory
> > CPU [2-3]	- memory
> > CPU [4-5]	- memory
> > 
> > Each node is equidistant, but for the memory nodes there is differences
> > between their own local memory and the remote memory.
> > 
> > CPU [0-1] cannot be considered local in either node, since they are further
> > away from the memory than either, and furthermore, unlike either of the memory
> > nodes, they have no preference for memory from either of the other two nodes
> > (quite on the contrary; they would probably benefit from drawing from both.)
> > 
> 
> Right, there's no difference from Jack's scenario if the three nodes are 
> equiadistant.  I was thinking of a topology where cpu 0-1 was closer to, 
> for example, cpu 2-3's memory than cpu 4-5's.

Agree.

We actually have configurations that match both scenarios above. The
system is a blade-based system with 2 processor sockets per blade.
Memory is socket attached and each socket is in a unique PXM.

For the case where 1 socket on a blade has memory & the other does not,
the memoryless socket is very close to it's neighbor and much further from
memory on any other blade.

For the case where neither socket has memory, the blade is equidistant
from 14 nodes located on adjacent blades.

One final point. In case you think this configuration makes no sense, the
sockets actually have memory. However, none of the memory is directly
accessible to the OS nor can it be referenced by cores located on the
processor sockets. The memory is reserved for high speed access to special
blade-attached IO devices. The IO devices need large 2**2n sized chunks of
memory. If the memory is fragmented so that a portion can be used by the
OS, then the max chunk size is reduced by a factor of 4.

> 
> The particular topology you're referring to should have a slit that 
> describes the relative distances in each direction differently.  The pxms 
> that these cpus belong to will always be local to itself, but ACPI 3.0 
> allows distances for different directions between the same pxms to be 
> different.
> 
> That means it's possible that cpus 0-1 above have local distance to all 
> memory and cpus 2-3 (and cpus 4-5) have remote distance to all nodes other 
> than itself.
> 
> numactl --hardware would show something like this:
> 
> 		0	1	2
> 	0	10	10	10
> 	1	20	10	20
> 	2	20	20	10
> 
> which is valid according to the ACPI specification.  This is based on the 
> pxms to which the cpus belong so this topology would describe all members 
> of those pxms and not just memory.

The BIOS currently defines unique PXMs for all nodes as implied above. The
SLIT currently looks like:
 		0	1	2
 	0	10	20	20
 	1	20	10	20
 	2	20	20	10

but I understand your point. This is an easy fix.


--- jack

	

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 3/3] x86: fix node_possible_map logic -v2
  2009-05-12 15:06           ` Jack Steiner
@ 2009-05-12 15:10             ` Yinghai Lu
  2009-05-12 16:16               ` Jack Steiner
  2009-05-12 15:43             ` Andi Kleen
  2009-05-13  1:34             ` [PATCH] x86: fix system without memory on node0 Yinghai Lu
  2 siblings, 1 reply; 102+ messages in thread
From: Yinghai Lu @ 2009-05-12 15:10 UTC (permalink / raw)
  To: Jack Steiner
  Cc: David Rientjes, H. Peter Anvin, Ingo Molnar, Thomas Gleixner,
	Andrew Morton, Andi Kleen, linux-kernel

jack,

can you confirm:
1. without this patch, your system still have problem?
2. does current tip/master work for you?

YH

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 3/3] x86: fix node_possible_map logic -v2
  2009-05-12 15:06           ` Jack Steiner
  2009-05-12 15:10             ` Yinghai Lu
@ 2009-05-12 15:43             ` Andi Kleen
  2009-05-13  1:34             ` [PATCH] x86: fix system without memory on node0 Yinghai Lu
  2 siblings, 0 replies; 102+ messages in thread
From: Andi Kleen @ 2009-05-12 15:43 UTC (permalink / raw)
  To: Jack Steiner
  Cc: David Rientjes, H. Peter Anvin, Yinghai Lu, Ingo Molnar,
	Thomas Gleixner, Andrew Morton, Andi Kleen, linux-kernel

> We actually have configurations that match both scenarios above. The
> system is a blade-based system with 2 processor sockets per blade.
> Memory is socket attached and each socket is in a unique PXM.

Jack, I think the interesting part would be: If you assign the CPUs
to the next nearby node with memory. Do you see any unacceptable performance
problems from that?

I'm sure the problem could be solved, but it would be quite some work,
and that would be only worth spending if it's actually a significant
benefit.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 3/3] x86: fix node_possible_map logic -v2
  2009-05-12 15:10             ` Yinghai Lu
@ 2009-05-12 16:16               ` Jack Steiner
  2009-05-12 16:40                 ` Yinghai Lu
  0 siblings, 1 reply; 102+ messages in thread
From: Jack Steiner @ 2009-05-12 16:16 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: David Rientjes, H. Peter Anvin, Ingo Molnar, Thomas Gleixner,
	Andrew Morton, Andi Kleen, linux-kernel

On Tue, May 12, 2009 at 08:10:18AM -0700, Yinghai Lu wrote:
> jack,
> 
> can you confirm:
> 1. without this patch, your system still have problem?
> 2. does current tip/master work for you?

The current tip/master fails with or without the patch. AFAICT, the
failure is identical in both cases.

--- jack

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 3/3] x86: fix node_possible_map logic -v2
  2009-05-12 16:16               ` Jack Steiner
@ 2009-05-12 16:40                 ` Yinghai Lu
  2009-05-12 18:03                   ` Jack Steiner
  0 siblings, 1 reply; 102+ messages in thread
From: Yinghai Lu @ 2009-05-12 16:40 UTC (permalink / raw)
  To: Jack Steiner
  Cc: David Rientjes, H. Peter Anvin, Ingo Molnar, Thomas Gleixner,
	Andrew Morton, Andi Kleen, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 441 bytes --]

On Tue, May 12, 2009 at 9:16 AM, Jack Steiner <steiner@sgi.com> wrote:
> On Tue, May 12, 2009 at 08:10:18AM -0700, Yinghai Lu wrote:
>> jack,
>>
>> can you confirm:
>> 1. without this patch, your system still have problem?
>> 2. does current tip/master work for you?
>
> The current tip/master fails with or without the patch. AFAICT, the
> failure is identical in both cases.

ok, can you try attach patch to check cpu_to_node mapping?

YH

[-- Attachment #2: debug_extra_numa_init_node.patch --]
[-- Type: text/x-patch, Size: 836 bytes --]

Subject: [PATCH] x86: extra debug for cpu_to_node mapping

print more info about cpu node mapping finally ?

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 arch/x86/mm/numa_64.c |   11 +++++++++++
 1 file changed, 11 insertions(+)

Index: linux-2.6/arch/x86/mm/numa_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/numa_64.c
+++ linux-2.6/arch/x86/mm/numa_64.c
@@ -633,6 +633,17 @@ void __init init_cpu_to_node(void)
 			continue;
 		numa_set_node(cpu, node);
 	}
+	printk(KERN_INFO "init_cpu_to_node:\n");
+	for_each_possible_cpu(cpu) {
+		int node;
+		u16 apicid;
+		node = early_cpu_to_node(cpu);
+		if (node == NUMA_NO_NODE)
+			continue;
+		apicid = cpu_to_apicid[cpu];
+		printk(KERN_INFO "cpu %d -> apicid %#x -> node %d\n",
+				 cpu, apicid, node);
+	}
 }
 #endif
 

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 2/3] x86: add numa_move_cpus_to_node
  2009-05-12 20:59       ` Christoph Lameter
@ 2009-05-12 17:16         ` Yinghai Lu
  2009-05-12 21:21           ` Christoph Lameter
  0 siblings, 1 reply; 102+ messages in thread
From: Yinghai Lu @ 2009-05-12 17:16 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Suresh Siddha, linux-kernel, Al Viro, Rusty Russell,
	Pekka Enberg

Christoph Lameter wrote:
> On Mon, 11 May 2009, Yinghai Lu wrote:
> 
>>> This going to be fun for the allocators that have put their queues on the
>>> respective nodes. How are the various OS structures that were allocated on
>>> the node that is downed / upped relocated?
>> NODE_DATA is new allocated on that node is backed on line for the first time.
>>
>> per_cpu will still stay with old near node. because per_cpu is somehow preallocated for all possible cpu.
> 
> Slab allocators and page allocator use neither NODE_DATA nor percpu
> 
> It may be best to bring the processors down and up again if the node
> assignment changes. That will cause the allocators to reallocate the per
> cpu resources on the right node.

or let user util do that? aka before hotadd mem to that node, use /sys to bring
the cpu offine and put the cpus online after mem is added ?

YH

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 3/3] x86: fix node_possible_map logic -v2
  2009-05-12 16:40                 ` Yinghai Lu
@ 2009-05-12 18:03                   ` Jack Steiner
  2009-05-12 21:31                     ` Yinghai Lu
  0 siblings, 1 reply; 102+ messages in thread
From: Jack Steiner @ 2009-05-12 18:03 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: David Rientjes, H. Peter Anvin, Ingo Molnar, Thomas Gleixner,
	Andrew Morton, Andi Kleen, linux-kernel

On Tue, May 12, 2009 at 09:40:52AM -0700, Yinghai Lu wrote:
> On Tue, May 12, 2009 at 9:16 AM, Jack Steiner <steiner@sgi.com> wrote:
> > On Tue, May 12, 2009 at 08:10:18AM -0700, Yinghai Lu wrote:
> >> jack,
> >>
> >> can you confirm:
> >> 1. without this patch, your system still have problem?
> >> 2. does current tip/master work for you?
> >
> > The current tip/master fails with or without the patch. AFAICT, the
> > failure is identical in both cases.
> 
> ok, can you try attach patch to check cpu_to_node mapping?

The boot messages are somewhat inconsistent:

Earlier in boot:

	<6>Setting APIC routing to cluster x2apic.
	<6>SRAT: PXM 0 -> APIC 0 -> Node 0
	<6>SRAT: PXM 1 -> APIC 128 -> Node 1
	<6>SRAT: Node 1 PXM 1 0-fff6c000


but at the point of your debug messages:

	<6>cpu 0 -> apicid 0x0 -> node 1
	<6>cpu 1 -> apicid 0x80 -> node 1





BIOS log:

	PROM>> Build ACPI tables
	PROM>>   RSDP at 0x00000000000e0200
	PROM>>   XSDT at 0x00000000000e0240
	PROM>>   DSDT at 0x00000000000e02a0
	PROM>>   MADT at 0x00000000000e02e0 (0xa0)
	PROM>>     sapic: cpu 0, socket 0, lcpu 0, proc_id 0x0, id 0x00, eid 0x00, apicid 0x0000,
	PROM>>     sapic: cpu 1, socket 1, lcpu 0, proc_id 0x1, id 0x00, eid 0x80, apicid 0x0080,
	PROM>>     io_apic: id 8, base 0, entries 24, prq 0, arb 0
	PROM>>     io_apic: id 9, base 24, entries 24, prq 1, arb 9
	PROM>>     lapic_nmi: acpi_id 0, flags 0x5, lint 1
	PROM>>     lapic_nmi: acpi_id 1, flags 0x5, lint 1
	PROM>>     int_src_ovr: bus 0, bus_irq 0, global_irq 2, flags 5
	PROM>>     int_src_ovr: bus 0, bus_irq 9, global_irq 9, flags 13
	PROM>>   SRAT at 0x00000000000e0380
	PROM>>     Memory:
	PROM>>       blade 1, soc 1: paddr 0x0 - 0xfff6c000 (3GB + 1023MB + 442368), pxm 1
	PROM>>     Processor at 00000000000e03d8:
	PROM>>       soc 0, lcpu 0: sapicid 0x0000, pxm 0
	PROM>>       soc 1, lcpu 0: sapicid 0x0080, pxm 1
	PROM>>   SLIT at 0x00000000000e05e0, dim 2
	PROM>>       10  21
	PROM>>       21  10
	PROM>>   FADT at 0x00000000000e06a0
	PROM>>   FACS at 0x00000000000e07a0
	PROM>>   DMAR at 0x00000000000e0860

Kernel log:
	<6>Initializing cgroup subsys cpuset
	<6>Initializing cgroup subsys cpu
	<5>Linux version 2.6.30-rc5-next-20090512-medusa (steiner@alcatraz.americas.sgi.com) (gcc version 4.2.4) #4 SMP Tue May 12 12:55:26 CDT 2009
	<6>Command line: root=/dev/hda2 init=/bin/bash console=ttyS0,38400n8 fprom lpj=10000 nohpet loglevel=8 iommu=off dma32_size=4096
	<6>KERNEL supported cpus:
	<6>  Intel GenuineIntel
	<6>  AMD AuthenticAMD
	<6>  Centaur CentaurHauls
	<6>BIOS-provided physical RAM map:
	<6> BIOS-e820: 0000000000000000 - 0000000000006000 (usable)
	<6> BIOS-e820: 0000000000006000 - 0000000000200000 (reserved)
	<6> BIOS-e820: 0000000000200000 - 0000000010000000 (usable)
	<6> BIOS-e820: 0000000080000000 - 0000000090000000 (reserved)
	<6> BIOS-e820: 00000000f0000000 - 00000000fc000000 (reserved)
	<6> BIOS-e820: 00000000fed1c000 - 00000000fed20000 (reserved)
	<6> BIOS-e820: 00000000fff60000 - 00000000fff6c000 (reserved)
	<6> BIOS-e820: 00000fe000000000 - 00000fe018000000 (reserved)
	<6>EFI v1.00 by SGI
	<6> ACPI 2.0=0xe0200  UVsystab=0xe08c0
	<6>EFI: mem00: type=7, attr=0x8, range=[0x0000000000000000-0x0000000000006000) (0MB)
	<6>EFI: mem01: type=5, attr=0x8000000000001000, range=[0x0000000000006000-0x00000000000b0000) (0MB)
	<6>EFI: mem02: type=6, attr=0x8000000000000008, range=[0x00000000000b0000-0x0000000000200000) (1MB)
	<6>EFI: mem03: type=7, attr=0x8, range=[0x0000000000200000-0x0000000010000000) (254MB)
	<6>EFI: mem04: type=6, attr=0x8000000000000001, range=[0x0000000080000000-0x0000000090000000) (256MB)
	<6>EFI: mem05: type=6, attr=0x8000000000000001, range=[0x00000000f0000000-0x00000000fc000000) (192MB)
	<6>EFI: mem06: type=6, attr=0x8000000000000001, range=[0x00000000fed1c000-0x00000000fed20000) (0MB)
	<6>EFI: mem07: type=6, attr=0x8000000000000001, range=[0x00000000fff60000-0x00000000fff6c000) (0MB)
	<6>EFI: mem08: type=11, attr=0x8000000000000001, range=[0x00000fe000000000-0x00000fe018000000) (384MB)
	<6>DMI not present or invalid.
	<6>last_pfn = 0x10000 max_arch_pfn = 0x100000000
	<7>MTRR default type: write-back
	<7>MTRR fixed ranges enabled:
	<7>  00000-FFFFF write-back
	<7>MTRR variable ranges enabled:
	<7>  0 base 0   F0000000 mask FFF F0000000 uncachable
	<7>  1 base E0  00000000 mask FF0 00000000 uncachable
	<7>  2 base F0  00000000 mask FF0 00000000 uncachable
	<7>  3 base F00 00000000 mask FF0000000000 uncachable
	<7>  4 disabled
	<7>  5 disabled
	<7>  6 disabled
	<7>  7 disabled
	<6>x86 PAT enabled: cpu 0, old 0x606060606060606, new 0x7010600070106
	<6>x2apic enabled by BIOS, switching to x2apic ops
	<6>init_memory_mapping: 0000000000000000-0000000010000000
	<7> 0000000000 - 0010000000 page 2M
	<7>kernel direct mapping tables up to 10000000 @ 93a000-93c000
	<4>ACPI: RSDP 00000000000e0200 00024 (v02       )
	<4>ACPI: XSDT 00000000000e0240 00054 (v01    SGI      UVX 00010001 FPRM 00000001)
	<4>ACPI: APIC 00000000000e02e0 00086 (v01    SGI      UVX 00010001 FPRM 00000001)
	<4>ACPI: SRAT 00000000000e0380 00078 (v01    SGI      UVX 00010001 FPRM 00000001)
	<4>ACPI: SLIT 00000000000e05e0 00030 (v01    SGI      UVX 00010001 FPRM 00000001)
	<4>ACPI: MCFG 00000000000e0640 0004C (v01    SGI      UVX 00010001 FPRM 00000001)
	<4>ACPI: FACP 00000000000e06a0 000F4 (v03    SGI      UVX 00030001 FPRM 00000001)
	<4>ACPI: DSDT 00000000000e02a0 00030 (v01    SGI      UVX 00010001 FPRM 00000001)
	<4>ACPI: FACS 00000000000e07a0 00040
	<4>ACPI: DMAR 00000000000e0860 0004C (v01    SGI      UVX 00010001 FPRM 00000001)
	<7>ACPI: Local APIC address 0xfee00000
	<6>Setting APIC routing to cluster x2apic.
	<6>SRAT: PXM 0 -> APIC 0 -> Node 0
	<6>SRAT: PXM 1 -> APIC 128 -> Node 1
	<6>SRAT: Node 1 PXM 1 0-fff6c000
	<7>NUMA: Using 63 for the hash shift.
	<6>Bootmem setup node 1 0000000000000000-0000000010000000
	<6>  NODE_DATA [0000000000939a80 - 000000000096da7f]
	<6>  bootmap [000000000096e000 -  000000000096ffff] pages 2
	<6>(7 early reservations) ==> bootmem [0000000000 - 0010000000]
	<6>  #0 [0000000000 - 0000001000]   BIOS data page ==> [0000000000 - 0000001000]
	<6>  #1 [0000006000 - 0000008000]       TRAMPOLINE ==> [0000006000 - 0000008000]
	<6>  #2 [0000200000 - 0000939a5c]    TEXT DATA BSS ==> [0000200000 - 0000939a5c]
	<6>  #3 [000009f000 - 00000e0900]    BIOS reserved ==> [000009f000 - 00000e0900]
	<6>  #4 [00000e0a68 - 0000100000]    BIOS reserved ==> [00000e0a68 - 0000100000]
	<6>  #5 [00000e0900 - 00000e0a68]       EFI memmap ==> [00000e0900 - 00000e0a68]
	<6>  #6 [0000001000 - 0000001030]        ACPI SLIT ==> [0000001000 - 0000001030]
	<7> [ffffe20000000000-ffffe200003fffff] PMD -> [ffff880001200000-ffff8800015fffff] on node 1
	<4>Zone PFN ranges:
	<4>  DMA      0x00000000 -> 0x00001000
	<4>  DMA32    0x00001000 -> 0x00100000
	<4>  Normal   0x00100000 -> 0x00100000
	<4>Movable zone start PFN for each node
	<4>early_node_map[2] active PFN ranges
	<4>    1: 0x00000000 -> 0x00000006
	<4>    1: 0x00000200 -> 0x00010000
	<7>On node 1 totalpages: 65030
	<7>  DMA zone: 56 pages used for memmap
	<7>  DMA zone: 1948 pages reserved
	<7>  DMA zone: 1586 pages, LIFO batch:0
	<7>  DMA32 zone: 840 pages used for memmap
	<7>  DMA32 zone: 60600 pages, LIFO batch:15
	<6>ACPI: PM-Timer IO Port: 0x1008
	<7>ACPI: Local APIC address 0xfee00000
	<6>Setting APIC routing to cluster x2apic.
	<6>ACPI: LSAPIC (acpi_id[0x00] lsapic_id[0x00] lsapic_eid[0x00] enabled)
	<6>ACPI: LSAPIC (acpi_id[0x01] lsapic_id[0x00] lsapic_eid[0x80] enabled)
	<6>ACPI: LAPIC_NMI (acpi_id[0x00] high edge lint[0x1])
	<6>ACPI: LAPIC_NMI (acpi_id[0x01] high edge lint[0x1])
	<6>ACPI: IOAPIC (id[0x08] address[0xfec00000] gsi_base[0])
	<6>IOAPIC[0]: apic_id 8, version 0, address 0xfec00000, GSI 0-23
	<6>ACPI: IOAPIC (id[0x09] address[0xfec80000] gsi_base[24])
	<6>IOAPIC[1]: apic_id 9, version 0, address 0xfec80000, GSI 24-24
	<6>ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 high edge)
	<6>ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level)
	<7>ACPI: IRQ0 used by override.
	<7>ACPI: IRQ2 used by override.
	<7>ACPI: IRQ9 used by override.
	<6>Using ACPI (MADT) for SMP configuration information
	<6>SMP: Allowing 2 CPUs, 0 hotplug CPUs
	<6>init_cpu_to_node:
	<6>cpu 0 -> apicid 0x0 -> node 1
	<6>cpu 1 -> apicid 0x80 -> node 1
	<7>nr_irqs_gsi: 25
	<6>PM: Registered nosave memory: 0000000000006000 - 0000000000200000
	<6>Allocating PCI resources starting at 18000000 (gap: 10000000:70000000)
	<6>NR_CPUS:4096 nr_cpumask_bits:2 nr_cpu_ids:2 nr_node_ids:2
	<6>PERCPU: Embedded 26 pages at ffff880001005000, static data 76384 bytes


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 2/3] x86: add numa_move_cpus_to_node
  2009-05-11 21:53     ` Yinghai Lu
@ 2009-05-12 20:59       ` Christoph Lameter
  2009-05-12 17:16         ` Yinghai Lu
  0 siblings, 1 reply; 102+ messages in thread
From: Christoph Lameter @ 2009-05-12 20:59 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Suresh Siddha, linux-kernel, Al Viro, Rusty Russell,
	Pekka Enberg

On Mon, 11 May 2009, Yinghai Lu wrote:

> > This going to be fun for the allocators that have put their queues on the
> > respective nodes. How are the various OS structures that were allocated on
> > the node that is downed / upped relocated?
>
> NODE_DATA is new allocated on that node is backed on line for the first time.
>
> per_cpu will still stay with old near node. because per_cpu is somehow preallocated for all possible cpu.

Slab allocators and page allocator use neither NODE_DATA nor percpu

It may be best to bring the processors down and up again if the node
assignment changes. That will cause the allocators to reallocate the per
cpu resources on the right node.


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 2/3] x86: add numa_move_cpus_to_node
  2009-05-12 17:16         ` Yinghai Lu
@ 2009-05-12 21:21           ` Christoph Lameter
  2009-05-13  5:39             ` Yinghai Lu
  0 siblings, 1 reply; 102+ messages in thread
From: Christoph Lameter @ 2009-05-12 21:21 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Suresh Siddha, linux-kernel, Al Viro, Rusty Russell,
	Pekka Enberg

On Tue, 12 May 2009, Yinghai Lu wrote:

> or let user util do that? aka before hotadd mem to that node, use /sys to bring
> the cpu offine and put the cpus online after mem is added ?

Ok then we wont need the kernel.


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 3/3] x86: fix node_possible_map logic -v2
  2009-05-12 18:03                   ` Jack Steiner
@ 2009-05-12 21:31                     ` Yinghai Lu
  2009-05-12 21:58                       ` Jack Steiner
  0 siblings, 1 reply; 102+ messages in thread
From: Yinghai Lu @ 2009-05-12 21:31 UTC (permalink / raw)
  To: Jack Steiner
  Cc: David Rientjes, H. Peter Anvin, Ingo Molnar, Thomas Gleixner,
	Andrew Morton, Andi Kleen, linux-kernel

On Tue, May 12, 2009 at 11:03 AM, Jack Steiner <steiner@sgi.com> wrote:
> On Tue, May 12, 2009 at 09:40:52AM -0700, Yinghai Lu wrote:
>> On Tue, May 12, 2009 at 9:16 AM, Jack Steiner <steiner@sgi.com> wrote:
>> > On Tue, May 12, 2009 at 08:10:18AM -0700, Yinghai Lu wrote:
>> >> jack,
>> >>
>> >> can you confirm:
>> >> 1. without this patch, your system still have problem?
>> >> 2. does current tip/master work for you?
>> >
>> > The current tip/master fails with or without the patch. AFAICT, the
>> > failure is identical in both cases.
>>
>> ok, can you try attach patch to check cpu_to_node mapping?
>
> The boot messages are somewhat inconsistent:
>
> Earlier in boot:
>
>        <6>Setting APIC routing to cluster x2apic.
>        <6>SRAT: PXM 0 -> APIC 0 -> Node 0
>        <6>SRAT: PXM 1 -> APIC 128 -> Node 1
>        <6>SRAT: Node 1 PXM 1 0-fff6c000
>
>
> but at the point of your debug messages:
>
>        <6>cpu 0 -> apicid 0x0 -> node 1
>        <6>cpu 1 -> apicid 0x80 -> node 1

that mean cpu0 is mapping to node1, because node0 doesn't memory installed.

>
>
>
>
>
> BIOS log:
>
>        PROM>> Build ACPI tables
>        PROM>>   RSDP at 0x00000000000e0200
>        PROM>>   XSDT at 0x00000000000e0240
>        PROM>>   DSDT at 0x00000000000e02a0
>        PROM>>   MADT at 0x00000000000e02e0 (0xa0)
>        PROM>>     sapic: cpu 0, socket 0, lcpu 0, proc_id 0x0, id 0x00, eid 0x00, apicid 0x0000,
>        PROM>>     sapic: cpu 1, socket 1, lcpu 0, proc_id 0x1, id 0x00, eid 0x80, apicid 0x0080,
>        PROM>>     io_apic: id 8, base 0, entries 24, prq 0, arb 0
>        PROM>>     io_apic: id 9, base 24, entries 24, prq 1, arb 9
>        PROM>>     lapic_nmi: acpi_id 0, flags 0x5, lint 1
>        PROM>>     lapic_nmi: acpi_id 1, flags 0x5, lint 1
>        PROM>>     int_src_ovr: bus 0, bus_irq 0, global_irq 2, flags 5
>        PROM>>     int_src_ovr: bus 0, bus_irq 9, global_irq 9, flags 13
>        PROM>>   SRAT at 0x00000000000e0380
>        PROM>>     Memory:
>        PROM>>       blade 1, soc 1: paddr 0x0 - 0xfff6c000 (3GB + 1023MB + 442368), pxm 1
>        PROM>>     Processor at 00000000000e03d8:
>        PROM>>       soc 0, lcpu 0: sapicid 0x0000, pxm 0
>        PROM>>       soc 1, lcpu 0: sapicid 0x0080, pxm 1
>        PROM>>   SLIT at 0x00000000000e05e0, dim 2
>        PROM>>       10  21
>        PROM>>       21  10
>        PROM>>   FADT at 0x00000000000e06a0
>        PROM>>   FACS at 0x00000000000e07a0
>        PROM>>   DMAR at 0x00000000000e0860
>
> Kernel log:
>        <6>Initializing cgroup subsys cpuset
>        <6>Initializing cgroup subsys cpu
>        <5>Linux version 2.6.30-rc5-next-20090512-medusa (steiner@alcatraz.americas.sgi.com) (gcc version 4.2.4) #4 SMP Tue May 12 12:55:26 CDT 2009
>        <6>Command line: root=/dev/hda2 init=/bin/bash console=ttyS0,38400n8 fprom lpj=10000 nohpet loglevel=8 iommu=off dma32_size=4096
>        <6>KERNEL supported cpus:
>        <6>  Intel GenuineIntel
>        <6>  AMD AuthenticAMD
>        <6>  Centaur CentaurHauls
>        <6>BIOS-provided physical RAM map:
>        <6> BIOS-e820: 0000000000000000 - 0000000000006000 (usable)
>        <6> BIOS-e820: 0000000000006000 - 0000000000200000 (reserved)
>        <6> BIOS-e820: 0000000000200000 - 0000000010000000 (usable)
>        <6> BIOS-e820: 0000000080000000 - 0000000090000000 (reserved)
>        <6> BIOS-e820: 00000000f0000000 - 00000000fc000000 (reserved)
>        <6> BIOS-e820: 00000000fed1c000 - 00000000fed20000 (reserved)
>        <6> BIOS-e820: 00000000fff60000 - 00000000fff6c000 (reserved)
>        <6> BIOS-e820: 00000fe000000000 - 00000fe018000000 (reserved)
>        <6>EFI v1.00 by SGI
>        <6> ACPI 2.0=0xe0200  UVsystab=0xe08c0
>        <6>EFI: mem00: type=7, attr=0x8, range=[0x0000000000000000-0x0000000000006000) (0MB)
>        <6>EFI: mem01: type=5, attr=0x8000000000001000, range=[0x0000000000006000-0x00000000000b0000) (0MB)
>        <6>EFI: mem02: type=6, attr=0x8000000000000008, range=[0x00000000000b0000-0x0000000000200000) (1MB)
>        <6>EFI: mem03: type=7, attr=0x8, range=[0x0000000000200000-0x0000000010000000) (254MB)
>        <6>EFI: mem04: type=6, attr=0x8000000000000001, range=[0x0000000080000000-0x0000000090000000) (256MB)
>        <6>EFI: mem05: type=6, attr=0x8000000000000001, range=[0x00000000f0000000-0x00000000fc000000) (192MB)
>        <6>EFI: mem06: type=6, attr=0x8000000000000001, range=[0x00000000fed1c000-0x00000000fed20000) (0MB)
>        <6>EFI: mem07: type=6, attr=0x8000000000000001, range=[0x00000000fff60000-0x00000000fff6c000) (0MB)
>        <6>EFI: mem08: type=11, attr=0x8000000000000001, range=[0x00000fe000000000-0x00000fe018000000) (384MB)
>        <6>DMI not present or invalid.
>        <6>last_pfn = 0x10000 max_arch_pfn = 0x100000000
>        <7>MTRR default type: write-back
>        <7>MTRR fixed ranges enabled:
>        <7>  00000-FFFFF write-back
>        <7>MTRR variable ranges enabled:
>        <7>  0 base 0   F0000000 mask FFF F0000000 uncachable
>        <7>  1 base E0  00000000 mask FF0 00000000 uncachable
>        <7>  2 base F0  00000000 mask FF0 00000000 uncachable
>        <7>  3 base F00 00000000 mask FF0000000000 uncachable
>        <7>  4 disabled
>        <7>  5 disabled
>        <7>  6 disabled
>        <7>  7 disabled
>        <6>x86 PAT enabled: cpu 0, old 0x606060606060606, new 0x7010600070106
>        <6>x2apic enabled by BIOS, switching to x2apic ops
>        <6>init_memory_mapping: 0000000000000000-0000000010000000
>        <7> 0000000000 - 0010000000 page 2M
>        <7>kernel direct mapping tables up to 10000000 @ 93a000-93c000
>        <4>ACPI: RSDP 00000000000e0200 00024 (v02       )
>        <4>ACPI: XSDT 00000000000e0240 00054 (v01    SGI      UVX 00010001 FPRM 00000001)
>        <4>ACPI: APIC 00000000000e02e0 00086 (v01    SGI      UVX 00010001 FPRM 00000001)
>        <4>ACPI: SRAT 00000000000e0380 00078 (v01    SGI      UVX 00010001 FPRM 00000001)
>        <4>ACPI: SLIT 00000000000e05e0 00030 (v01    SGI      UVX 00010001 FPRM 00000001)
>        <4>ACPI: MCFG 00000000000e0640 0004C (v01    SGI      UVX 00010001 FPRM 00000001)
>        <4>ACPI: FACP 00000000000e06a0 000F4 (v03    SGI      UVX 00030001 FPRM 00000001)
>        <4>ACPI: DSDT 00000000000e02a0 00030 (v01    SGI      UVX 00010001 FPRM 00000001)
>        <4>ACPI: FACS 00000000000e07a0 00040
>        <4>ACPI: DMAR 00000000000e0860 0004C (v01    SGI      UVX 00010001 FPRM 00000001)
>        <7>ACPI: Local APIC address 0xfee00000
>        <6>Setting APIC routing to cluster x2apic.
>        <6>SRAT: PXM 0 -> APIC 0 -> Node 0
>        <6>SRAT: PXM 1 -> APIC 128 -> Node 1
>        <6>SRAT: Node 1 PXM 1 0-fff6c000
>        <7>NUMA: Using 63 for the hash shift.
>        <6>Bootmem setup node 1 0000000000000000-0000000010000000
>        <6>  NODE_DATA [0000000000939a80 - 000000000096da7f]
>        <6>  bootmap [000000000096e000 -  000000000096ffff] pages 2
>        <6>(7 early reservations) ==> bootmem [0000000000 - 0010000000]
>        <6>  #0 [0000000000 - 0000001000]   BIOS data page ==> [0000000000 - 0000001000]
>        <6>  #1 [0000006000 - 0000008000]       TRAMPOLINE ==> [0000006000 - 0000008000]
>        <6>  #2 [0000200000 - 0000939a5c]    TEXT DATA BSS ==> [0000200000 - 0000939a5c]
>        <6>  #3 [000009f000 - 00000e0900]    BIOS reserved ==> [000009f000 - 00000e0900]
>        <6>  #4 [00000e0a68 - 0000100000]    BIOS reserved ==> [00000e0a68 - 0000100000]
>        <6>  #5 [00000e0900 - 00000e0a68]       EFI memmap ==> [00000e0900 - 00000e0a68]
>        <6>  #6 [0000001000 - 0000001030]        ACPI SLIT ==> [0000001000 - 0000001030]
>        <7> [ffffe20000000000-ffffe200003fffff] PMD -> [ffff880001200000-ffff8800015fffff] on node 1
>        <4>Zone PFN ranges:
>        <4>  DMA      0x00000000 -> 0x00001000
>        <4>  DMA32    0x00001000 -> 0x00100000
>        <4>  Normal   0x00100000 -> 0x00100000
>        <4>Movable zone start PFN for each node
>        <4>early_node_map[2] active PFN ranges
>        <4>    1: 0x00000000 -> 0x00000006
>        <4>    1: 0x00000200 -> 0x00010000
>        <7>On node 1 totalpages: 65030
>        <7>  DMA zone: 56 pages used for memmap
>        <7>  DMA zone: 1948 pages reserved
>        <7>  DMA zone: 1586 pages, LIFO batch:0
>        <7>  DMA32 zone: 840 pages used for memmap
>        <7>  DMA32 zone: 60600 pages, LIFO batch:15
>        <6>ACPI: PM-Timer IO Port: 0x1008
>        <7>ACPI: Local APIC address 0xfee00000
>        <6>Setting APIC routing to cluster x2apic.

x2apic related?

can you disable x2apic in kernel?

YH

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 3/3] x86: fix node_possible_map logic -v2
  2009-05-12 21:31                     ` Yinghai Lu
@ 2009-05-12 21:58                       ` Jack Steiner
  2009-05-12 23:13                         ` Yinghai Lu
  0 siblings, 1 reply; 102+ messages in thread
From: Jack Steiner @ 2009-05-12 21:58 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: David Rientjes, H. Peter Anvin, Ingo Molnar, Thomas Gleixner,
	Andrew Morton, Andi Kleen, linux-kernel

> 
> x2apic related?

No. 



> 
> can you disable x2apic in kernel?

Yes - We have a BIOS option to allow us to boot either in legacy apic mode OR
in x2apic mode.

Both behave the same...


	<6>Initializing cgroup subsys cpuset
	<6>Initializing cgroup subsys cpu
	<5>Linux version 2.6.30-rc5-next-20090512-medusa (steiner@alcatraz.americas.sgi.com) (gcc version 4.2.4) #7 SMP Tue May 12 16:54:27 CDT 2009
	<6>Command line: root=/dev/hda2 init=/bin/bash console=ttyS0,38400n8 fprom lpj=10000 nohpet loglevel=8 iommu=off dma32_size=4096
	<6>KERNEL supported cpus:
	<6>  Intel GenuineIntel
	<6>  AMD AuthenticAMD
	<6>  Centaur CentaurHauls
	<6>BIOS-provided physical RAM map:
	<6> BIOS-e820: 0000000000000000 - 0000000000006000 (usable)
	<6> BIOS-e820: 0000000000006000 - 0000000000200000 (reserved)
	<6> BIOS-e820: 0000000000200000 - 0000000010000000 (usable)
	<6> BIOS-e820: 0000000080000000 - 0000000090000000 (reserved)
	<6> BIOS-e820: 00000000f0000000 - 00000000fc000000 (reserved)
	<6> BIOS-e820: 00000000fed1c000 - 00000000fed20000 (reserved)
	<6> BIOS-e820: 00000000fff60000 - 00000000fff6c000 (reserved)
	<6> BIOS-e820: 00000fe000000000 - 00000fe018000000 (reserved)
	<6>EFI v1.00 by SGI
	<6> ACPI 2.0=0xe0200  UVsystab=0xe08c0
	<6>EFI: mem00: type=7, attr=0x8, range=[0x0000000000000000-0x0000000000006000) (0MB)
	<6>EFI: mem01: type=5, attr=0x8000000000001000, range=[0x0000000000006000-0x00000000000b0000) (0MB)
	<6>EFI: mem02: type=6, attr=0x8000000000000008, range=[0x00000000000b0000-0x0000000000200000) (1MB)
	<6>EFI: mem03: type=7, attr=0x8, range=[0x0000000000200000-0x0000000010000000) (254MB)
	<6>EFI: mem04: type=6, attr=0x8000000000000001, range=[0x0000000080000000-0x0000000090000000) (256MB)
	<6>EFI: mem05: type=6, attr=0x8000000000000001, range=[0x00000000f0000000-0x00000000fc000000) (192MB)
	<6>EFI: mem06: type=6, attr=0x8000000000000001, range=[0x00000000fed1c000-0x00000000fed20000) (0MB)
	<6>EFI: mem07: type=6, attr=0x8000000000000001, range=[0x00000000fff60000-0x00000000fff6c000) (0MB)
	<6>EFI: mem08: type=11, attr=0x8000000000000001, range=[0x00000fe000000000-0x00000fe018000000) (384MB)
	<6>DMI not present or invalid.
	<6>last_pfn = 0x10000 max_arch_pfn = 0x100000000
	<7>MTRR default type: write-back
	<7>MTRR fixed ranges enabled:
	<7>  00000-FFFFF write-back
	<7>MTRR variable ranges enabled:
	<7>  0 base 0   F0000000 mask FFF F0000000 uncachable
	<7>  1 base E0  00000000 mask FF0 00000000 uncachable
	<7>  2 base F0  00000000 mask FF0 00000000 uncachable
	<7>  3 base F00 00000000 mask FF0000000000 uncachable
	<7>  4 disabled
	<7>  5 disabled
	<7>  6 disabled
	<7>  7 disabled
	<6>x86 PAT enabled: cpu 0, old 0x606060606060606, new 0x7010600070106
	<6>init_memory_mapping: 0000000000000000-0000000010000000
	<7> 0000000000 - 0010000000 page 2M
	<7>kernel direct mapping tables up to 10000000 @ 93a000-93c000
	<4>ACPI: RSDP 00000000000e0200 00024 (v02       )
	<4>ACPI: XSDT 00000000000e0240 00054 (v01    SGI      UVL 00010001 FPRM 00000001)
	<4>ACPI: APIC 00000000000e02e0 00074 (v01    SGI      UVL 00010001 FPRM 00000001)
	<4>ACPI: SRAT 00000000000e0380 00078 (v01    SGI      UVL 00010001 FPRM 00000001)
	<4>ACPI: SLIT 00000000000e05e0 00030 (v01    SGI      UVL 00010001 FPRM 00000001)
	<4>ACPI: MCFG 00000000000e0640 0004C (v01    SGI      UVL 00010001 FPRM 00000001)
	<4>ACPI: FACP 00000000000e06a0 000F4 (v03    SGI      UVL 00030001 FPRM 00000001)
	<4>ACPI: DSDT 00000000000e02a0 00030 (v01    SGI      UVL 00010001 FPRM 00000001)
	<4>ACPI: FACS 00000000000e07a0 00040
	<4>ACPI: DMAR 00000000000e0860 0004C (v01    SGI      UVL 00010001 FPRM 00000001)
	<7>ACPI: Local APIC address 0xfee00000
	<6>SRAT: PXM 0 -> APIC 0 -> Node 0
	<6>SRAT: PXM 1 -> APIC 128 -> Node 1
	<6>SRAT: Node 1 PXM 1 0-fff6c000
	<7>NUMA: Using 63 for the hash shift.
	<6>Bootmem setup node 1 0000000000000000-0000000010000000
	<6>  NODE_DATA [0000000000939a80 - 000000000096da7f]
	<6>  bootmap [000000000096e000 -  000000000096ffff] pages 2
	<6>(7 early reservations) ==> bootmem [0000000000 - 0010000000]
	<6>  #0 [0000000000 - 0000001000]   BIOS data page ==> [0000000000 - 0000001000]
	<6>  #1 [0000006000 - 0000008000]       TRAMPOLINE ==> [0000006000 - 0000008000]
	<6>  #2 [0000200000 - 0000939a5c]    TEXT DATA BSS ==> [0000200000 - 0000939a5c]
	<6>  #3 [000009f000 - 00000e0900]    BIOS reserved ==> [000009f000 - 00000e0900]
	<6>  #4 [00000e0a68 - 0000100000]    BIOS reserved ==> [00000e0a68 - 0000100000]
	<6>  #5 [00000e0900 - 00000e0a68]       EFI memmap ==> [00000e0900 - 00000e0a68]
	<6>  #6 [0000001000 - 0000001030]        ACPI SLIT ==> [0000001000 - 0000001030]
	<7> [ffffe20000000000-ffffe200003fffff] PMD -> [ffff880001200000-ffff8800015fffff] on node 1
	<4>Zone PFN ranges:
	<4>  DMA      0x00000000 -> 0x00001000
	<4>  DMA32    0x00001000 -> 0x00100000
	<4>  Normal   0x00100000 -> 0x00100000
	<4>Movable zone start PFN for each node
	<4>early_node_map[2] active PFN ranges
	<4>    1: 0x00000000 -> 0x00000006
	<4>    1: 0x00000200 -> 0x00010000
	<7>On node 1 totalpages: 65030
	<7>  DMA zone: 56 pages used for memmap
	<7>  DMA zone: 1948 pages reserved
	<7>  DMA zone: 1586 pages, LIFO batch:0
	<7>  DMA32 zone: 840 pages used for memmap
	<7>  DMA32 zone: 60600 pages, LIFO batch:15
	<6>ACPI: PM-Timer IO Port: 0x1008
	<7>ACPI: Local APIC address 0xfee00000
	<6>ACPI: LAPIC (acpi_id[0x00] lapic_id[0x00] enabled)
	<6>ACPI: LAPIC (acpi_id[0x01] lapic_id[0x80] enabled)
	<6>ACPI: LAPIC_NMI (acpi_id[0x00] high edge lint[0x1])
	<6>ACPI: LAPIC_NMI (acpi_id[0x01] high edge lint[0x1])
	<6>ACPI: IOAPIC (id[0x08] address[0xfec00000] gsi_base[0])
	<6>IOAPIC[0]: apic_id 8, version 0, address 0xfec00000, GSI 0-23
	<6>ACPI: IOAPIC (id[0x09] address[0xfec80000] gsi_base[24])
	<6>IOAPIC[1]: apic_id 9, version 0, address 0xfec80000, GSI 24-24
	<6>ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 high edge)
	<6>ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level)
	<7>ACPI: IRQ0 used by override.
	<7>ACPI: IRQ2 used by override.
	<7>ACPI: IRQ9 used by override.
	<6>Using ACPI (MADT) for SMP configuration information
	<6>SMP: Allowing 2 CPUs, 0 hotplug CPUs
	<6>init_cpu_to_node:
	<6>cpu 0 -> apicid 0x0 -> node 1
	<6>cpu 1 -> apicid 0x80 -> node 1
	<7>nr_irqs_gsi: 25
	<6>PM: Registered nosave memory: 0000000000006000 - 0000000000200000
	<6>Allocating PCI resources starting at 18000000 (gap: 10000000:70000000)
	<6>NR_CPUS:4096 nr_cpumask_bits:2 nr_cpu_ids:2 nr_node_ids:2
	<6>PERCPU: Embedded 26 pages at ffff880001005000, static data 76384 bytes
	<4> [<ffffffff806bd19e>] early_idt_handler+0x5e/0x71
	<4> [<ffffffff802942ad>] ? build_zonelists_node+0x2f/0x70
	<4> [<ffffffff8023417d>] ? __node_distance+0x59/0x70
	<4> [<ffffffff80295550>] __build_all_zonelists+0x1ae/0x55a
	<4> [<ffffffff80295b43>] build_all_zonelists+0x1b5/0x264
	<4> [<ffffffff806bdb73>] start_kernel+0x17a/0x3c5
	<4> [<ffffffff806bd140>] ? early_idt_handler+0x0/0x71
	<4> [<ffffffff806bd2a7>] x86_64_start_reservations+0xae/0xb2
	<4> [<ffffffff806bd3fd>] x86_64_start_kernel+0x152/0x161
	<4>RIP build_zonelists_node+0x2f/0x70




^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 3/3] x86: fix node_possible_map logic -v2
  2009-05-12 21:58                       ` Jack Steiner
@ 2009-05-12 23:13                         ` Yinghai Lu
  2009-05-12 23:26                           ` Yinghai Lu
  0 siblings, 1 reply; 102+ messages in thread
From: Yinghai Lu @ 2009-05-12 23:13 UTC (permalink / raw)
  To: Jack Steiner
  Cc: David Rientjes, H. Peter Anvin, Ingo Molnar, Thomas Gleixner,
	Andrew Morton, Andi Kleen, linux-kernel

On Tue, May 12, 2009 at 2:58 PM, Jack Steiner <steiner@sgi.com> wrote:
>>
>> x2apic related?
>
> No.
>
>
>
>>
>> can you disable x2apic in kernel?
>
> Yes - We have a BIOS option to allow us to boot either in legacy apic mode OR
> in x2apic mode.
>
> Both behave the same...
>
>
>        <6>Initializing cgroup subsys cpuset
>        <6>Initializing cgroup subsys cpu
>        <5>Linux version 2.6.30-rc5-next-20090512-medusa (steiner@alcatraz.americas.sgi.com) (gcc version 4.2.4) #7 SMP Tue May 12 16:54:27 CDT 2009
>        <6>Command line: root=/dev/hda2 init=/bin/bash console=ttyS0,38400n8 fprom lpj=10000 nohpet loglevel=8 iommu=off dma32_size=4096
>        <6>KERNEL supported cpus:
>        <6>  Intel GenuineIntel
>        <6>  AMD AuthenticAMD
>        <6>  Centaur CentaurHauls
>        <6>BIOS-provided physical RAM map:
>        <6> BIOS-e820: 0000000000000000 - 0000000000006000 (usable)
>        <6> BIOS-e820: 0000000000006000 - 0000000000200000 (reserved)
>        <6> BIOS-e820: 0000000000200000 - 0000000010000000 (usable)
>        <6> BIOS-e820: 0000000080000000 - 0000000090000000 (reserved)
>        <6> BIOS-e820: 00000000f0000000 - 00000000fc000000 (reserved)
>        <6> BIOS-e820: 00000000fed1c000 - 00000000fed20000 (reserved)
>        <6> BIOS-e820: 00000000fff60000 - 00000000fff6c000 (reserved)
>        <6> BIOS-e820: 00000fe000000000 - 00000fe018000000 (reserved)
>        <6>EFI v1.00 by SGI
>        <6> ACPI 2.0=0xe0200  UVsystab=0xe08c0
>        <6>EFI: mem00: type=7, attr=0x8, range=[0x0000000000000000-0x0000000000006000) (0MB)
>        <6>EFI: mem01: type=5, attr=0x8000000000001000, range=[0x0000000000006000-0x00000000000b0000) (0MB)
>        <6>EFI: mem02: type=6, attr=0x8000000000000008, range=[0x00000000000b0000-0x0000000000200000) (1MB)
>        <6>EFI: mem03: type=7, attr=0x8, range=[0x0000000000200000-0x0000000010000000) (254MB)
>        <6>EFI: mem04: type=6, attr=0x8000000000000001, range=[0x0000000080000000-0x0000000090000000) (256MB)
>        <6>EFI: mem05: type=6, attr=0x8000000000000001, range=[0x00000000f0000000-0x00000000fc000000) (192MB)
>        <6>EFI: mem06: type=6, attr=0x8000000000000001, range=[0x00000000fed1c000-0x00000000fed20000) (0MB)
>        <6>EFI: mem07: type=6, attr=0x8000000000000001, range=[0x00000000fff60000-0x00000000fff6c000) (0MB)
>        <6>EFI: mem08: type=11, attr=0x8000000000000001, range=[0x00000fe000000000-0x00000fe018000000) (384MB)
>        <6>DMI not present or invalid.
>        <6>last_pfn = 0x10000 max_arch_pfn = 0x100000000
>        <7>MTRR default type: write-back
>        <7>MTRR fixed ranges enabled:
>        <7>  00000-FFFFF write-back
>        <7>MTRR variable ranges enabled:
>        <7>  0 base 0   F0000000 mask FFF F0000000 uncachable
>        <7>  1 base E0  00000000 mask FF0 00000000 uncachable
>        <7>  2 base F0  00000000 mask FF0 00000000 uncachable
>        <7>  3 base F00 00000000 mask FF0000000000 uncachable
>        <7>  4 disabled
>        <7>  5 disabled
>        <7>  6 disabled
>        <7>  7 disabled
>        <6>x86 PAT enabled: cpu 0, old 0x606060606060606, new 0x7010600070106
>        <6>init_memory_mapping: 0000000000000000-0000000010000000
>        <7> 0000000000 - 0010000000 page 2M
>        <7>kernel direct mapping tables up to 10000000 @ 93a000-93c000
>        <4>ACPI: RSDP 00000000000e0200 00024 (v02       )
>        <4>ACPI: XSDT 00000000000e0240 00054 (v01    SGI      UVL 00010001 FPRM 00000001)
>        <4>ACPI: APIC 00000000000e02e0 00074 (v01    SGI      UVL 00010001 FPRM 00000001)
>        <4>ACPI: SRAT 00000000000e0380 00078 (v01    SGI      UVL 00010001 FPRM 00000001)
>        <4>ACPI: SLIT 00000000000e05e0 00030 (v01    SGI      UVL 00010001 FPRM 00000001)
>        <4>ACPI: MCFG 00000000000e0640 0004C (v01    SGI      UVL 00010001 FPRM 00000001)
>        <4>ACPI: FACP 00000000000e06a0 000F4 (v03    SGI      UVL 00030001 FPRM 00000001)
>        <4>ACPI: DSDT 00000000000e02a0 00030 (v01    SGI      UVL 00010001 FPRM 00000001)
>        <4>ACPI: FACS 00000000000e07a0 00040
>        <4>ACPI: DMAR 00000000000e0860 0004C (v01    SGI      UVL 00010001 FPRM 00000001)
>        <7>ACPI: Local APIC address 0xfee00000
>        <6>SRAT: PXM 0 -> APIC 0 -> Node 0
>        <6>SRAT: PXM 1 -> APIC 128 -> Node 1
>        <6>SRAT: Node 1 PXM 1 0-fff6c000
>        <7>NUMA: Using 63 for the hash shift.
>        <6>Bootmem setup node 1 0000000000000000-0000000010000000
>        <6>  NODE_DATA [0000000000939a80 - 000000000096da7f]
>        <6>  bootmap [000000000096e000 -  000000000096ffff] pages 2
>        <6>(7 early reservations) ==> bootmem [0000000000 - 0010000000]
>        <6>  #0 [0000000000 - 0000001000]   BIOS data page ==> [0000000000 - 0000001000]
>        <6>  #1 [0000006000 - 0000008000]       TRAMPOLINE ==> [0000006000 - 0000008000]
>        <6>  #2 [0000200000 - 0000939a5c]    TEXT DATA BSS ==> [0000200000 - 0000939a5c]
>        <6>  #3 [000009f000 - 00000e0900]    BIOS reserved ==> [000009f000 - 00000e0900]
>        <6>  #4 [00000e0a68 - 0000100000]    BIOS reserved ==> [00000e0a68 - 0000100000]
>        <6>  #5 [00000e0900 - 00000e0a68]       EFI memmap ==> [00000e0900 - 00000e0a68]
>        <6>  #6 [0000001000 - 0000001030]        ACPI SLIT ==> [0000001000 - 0000001030]
>        <7> [ffffe20000000000-ffffe200003fffff] PMD -> [ffff880001200000-ffff8800015fffff] on node 1
>        <4>Zone PFN ranges:
>        <4>  DMA      0x00000000 -> 0x00001000
>        <4>  DMA32    0x00001000 -> 0x00100000
>        <4>  Normal   0x00100000 -> 0x00100000
>        <4>Movable zone start PFN for each node
>        <4>early_node_map[2] active PFN ranges
>        <4>    1: 0x00000000 -> 0x00000006
>        <4>    1: 0x00000200 -> 0x00010000
>        <7>On node 1 totalpages: 65030
>        <7>  DMA zone: 56 pages used for memmap
>        <7>  DMA zone: 1948 pages reserved
>        <7>  DMA zone: 1586 pages, LIFO batch:0
>        <7>  DMA32 zone: 840 pages used for memmap
>        <7>  DMA32 zone: 60600 pages, LIFO batch:15
>        <6>ACPI: PM-Timer IO Port: 0x1008
>        <7>ACPI: Local APIC address 0xfee00000
>        <6>ACPI: LAPIC (acpi_id[0x00] lapic_id[0x00] enabled)
>        <6>ACPI: LAPIC (acpi_id[0x01] lapic_id[0x80] enabled)
>        <6>ACPI: LAPIC_NMI (acpi_id[0x00] high edge lint[0x1])
>        <6>ACPI: LAPIC_NMI (acpi_id[0x01] high edge lint[0x1])
>        <6>ACPI: IOAPIC (id[0x08] address[0xfec00000] gsi_base[0])
>        <6>IOAPIC[0]: apic_id 8, version 0, address 0xfec00000, GSI 0-23
>        <6>ACPI: IOAPIC (id[0x09] address[0xfec80000] gsi_base[24])
>        <6>IOAPIC[1]: apic_id 9, version 0, address 0xfec80000, GSI 24-24
>        <6>ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 high edge)
>        <6>ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level)
>        <7>ACPI: IRQ0 used by override.
>        <7>ACPI: IRQ2 used by override.
>        <7>ACPI: IRQ9 used by override.
>        <6>Using ACPI (MADT) for SMP configuration information
>        <6>SMP: Allowing 2 CPUs, 0 hotplug CPUs
>        <6>init_cpu_to_node:
>        <6>cpu 0 -> apicid 0x0 -> node 1
>        <6>cpu 1 -> apicid 0x80 -> node 1
>        <7>nr_irqs_gsi: 25
>        <6>PM: Registered nosave memory: 0000000000006000 - 0000000000200000
>        <6>Allocating PCI resources starting at 18000000 (gap: 10000000:70000000)
>        <6>NR_CPUS:4096 nr_cpumask_bits:2 nr_cpu_ids:2 nr_node_ids:2

How about change NR_CPUS to 128?

will find one system that only have node1 has memory.

YH

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 3/3] x86: fix node_possible_map logic -v2
  2009-05-12 23:13                         ` Yinghai Lu
@ 2009-05-12 23:26                           ` Yinghai Lu
  0 siblings, 0 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-05-12 23:26 UTC (permalink / raw)
  To: Jack Steiner
  Cc: David Rientjes, H. Peter Anvin, Ingo Molnar, Thomas Gleixner,
	Andrew Morton, Andi Kleen, linux-kernel

On Tue, May 12, 2009 at 4:13 PM, Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>
> will find one system that only have node1 has memory.

duplicated that ...

[    0.000000] Linux version 2.6.30-rc5-tip-01542-g60b6b0b-dirty
(yhlu@linux-mstp) (gcc version 4.3.2 [gcc-4_3-branch revision 141291]
(SUSE Linux) ) #222 SMP Tue May 12 13:45:54 PDT 2009ing
kernel.org/mydisk13_x86_64.gz
[    0.000000] Command line: console=uart8250,io,0x3f8,115200n8
initrd=kernel.org/mydisk13_x86_64.gz rw root=/dev/ram0 debug
unknown_nmi_panic initcall_debug apic=debug pci=routeirq,lastbus=255
ip=dhcp load_ramdisk=1 ramdisk_size=131072
BOOT_IMAGE=kernel.org/bzImage_2.6.30_k8.2
[    0.000000] KERNEL supported cpus:
[    0.000000]   Intel GenuineIntel
[    0.000000]   AMD AuthenticAMD
[    0.000000]   Centaur CentaurHauls
[    0.000000] BIOS-provided physical RAM map:
[    0.000000]  BIOS-e820: 0000000000000000 - 0000000000099800 (usable)
[    0.000000]  BIOS-e820: 0000000000099800 - 00000000000a0000 (reserved)
[    0.000000]  BIOS-e820: 00000000000e0000 - 0000000000100000 (reserved)
[    0.000000]  BIOS-e820: 0000000000100000 - 000000007ffa0000 (usable)
[    0.000000]  BIOS-e820: 000000007ffae000 - 000000007ffb0000 (usable)
[    0.000000]  BIOS-e820: 000000007ffb0000 - 000000007ffbe000 (ACPI data)
[    0.000000]  BIOS-e820: 000000007ffbe000 - 000000007fff0000 (ACPI NVS)
[    0.000000]  BIOS-e820: 000000007fff0000 - 0000000080000000 (reserved)
[    0.000000]  BIOS-e820: 00000000e0000000 - 00000000f0000000 (reserved)
[    0.000000]  BIOS-e820: 00000000fec00000 - 00000000fec01000 (reserved)
[    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fef00000 (reserved)
[    0.000000]  BIOS-e820: 00000000ff700000 - 0000000100000000 (reserved)
[    0.000000] Early serial console at I/O port 0x3f8 (options '115200n8')
[    0.000000] console [uart0] enabled
[    0.000000] DMI present.
[    0.000000] last_pfn = 0x7ffb0 max_arch_pfn = 0x400000000
[    0.000000] MTRR default type: uncachable
[    0.000000] MTRR fixed ranges enabled:
[    0.000000]   00000-9FFFF write-back
[    0.000000]   A0000-EFFFF uncachable
[    0.000000]   F0000-FFFFF write-protect
[    0.000000] MTRR variable ranges enabled:
[    0.000000]   0 base 000000000000 mask FFFF80000000 write-back
[    0.000000]   1 disabled
[    0.000000]   2 disabled
[    0.000000]   3 disabled
[    0.000000]   4 disabled
[    0.000000]   5 disabled
[    0.000000]   6 disabled
[    0.000000]   7 disabled
[    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
[    0.000000] initial memory mapped : 0 - 20000000
[    0.000000] init_memory_mapping: 0000000000000000-000000007ffb0000
[    0.000000] Using GB pages for direct mapping
[    0.000000]  0000000000 - 0040000000 page 1G
[    0.000000]  0040000000 - 007fe00000 page 2M
[    0.000000]  007fe00000 - 007ffb0000 page 4k
[    0.000000] kernel direct mapping tables up to 7ffb0000 @ 8000-b000
[    0.000000] RAMDISK: 7d901000 - 7ff7f439
[    0.000000] ACPI: RSDP 00000000000fa570 00024 (v02 ACPIAM)
[    0.000000] ACPI: XSDT 000000007ffb0100 00094 (v01 SUN    X4x40
00000052 MSFT 00000097)
[    0.000000] ACPI: FACP 000000007ffb0290 000F4 (v03 SUN    X4x40
00000052 MSFT 00000097)
[    0.000000] ACPI: DSDT 000000007ffb05d0 07EB4 (v01 SUN    X4x40
00000052 INTL 20051117)
[    0.000000] ACPI: FACS 000000007ffbe000 00040
[    0.000000] ACPI: APIC 000000007ffb0390 000EC (v01 SUN    X4x40
00000052 MSFT 00000097)
[    0.000000] ACPI: SPCR 000000007ffb0480 00050 (v01 SUN    X4x40
00000052 MSFT 00000097)
[    0.000000] ACPI: MCFG 000000007ffb04d0 0003C (v01 SUN    OEMMCFG
00000052 MSFT 00000097)
[    0.000000] ACPI: SLIT 000000007ffb0510 00030 (v01 SUN    OEMSLIT
00000052 MSFT 00000097)
[    0.000000] ACPI: SPMI 000000007ffb0580 00041 (v01 SUN    OEMSPMI
00000052 MSFT 00000097)
[    0.000000] ACPI: OEMB 000000007ffbe040 000AE (v01 SUN    X4x40
00000052 MSFT 00000097)
[    0.000000] ACPI: SRAT 000000007ffb8490 00100 (v01 AMD    FAM_F_10
00000002 AMD  00000001)
[    0.000000] ACPI: HPET 000000007ffb8590 00038 (v01 SUN    OEMHPET0
00000052 MSFT 00000097)
[    0.000000] ACPI: EINJ 000000007ffb85d0 00130 (v01  AMIER AMI_EINJ
10000815 MSFT 00000097)
[    0.000000] ACPI: BERT 000000007ffb8760 00030 (v01  AMIER AMI_BERT
10000815 MSFT 00000097)
[    0.000000] ACPI: ERST 000000007ffb8790 001B0 (v01  AMIER AMI_ERST
10000815 MSFT 00000097)
[    0.000000] ACPI: HEST 000000007ffb8940 000A8 (v01  AMIER AMI_HEST
10000815 MSFT 00000097)
[    0.000000] ACPI: SSDT 000000007ffb89f0 010F4 (v01 A M I  POWERNOW
00000001 AMD  00000001)
[    0.000000] ACPI: Local APIC address 0xfee00000
[    0.000000] SRAT: PXM 0 -> APIC 0 -> Node 0
[    0.000000] SRAT: PXM 0 -> APIC 1 -> Node 0
[    0.000000] SRAT: PXM 0 -> APIC 2 -> Node 0
[    0.000000] SRAT: PXM 0 -> APIC 3 -> Node 0
[    0.000000] SRAT: PXM 1 -> APIC 4 -> Node 1
[    0.000000] SRAT: PXM 1 -> APIC 5 -> Node 1
[    0.000000] SRAT: PXM 1 -> APIC 6 -> Node 1
[    0.000000] SRAT: PXM 1 -> APIC 7 -> Node 1
[    0.000000] SRAT: Node 1 PXM 1 0-a0000
[    0.000000] Adding active range (1, 0x0, 0x99) 0 entries of 12800 used
[    0.000000] SRAT: Node 1 PXM 1 100000-80000000
[    0.000000] Adding active range (1, 0x100, 0x7ffa0) 1 entries of 12800 used
[    0.000000] Adding active range (1, 0x7ffae, 0x7ffb0) 2 entries of 12800 used
[    0.000000] ACPI: SLIT: nodes = 2
[    0.000000]    10 13
[    0.000000]    13 10
[    0.000000] NUMA: Allocated memnodemap from a000 - b040
[    0.000000] NUMA: Using 20 for the hash shift.
[    0.000000] Bootmem setup node 1 0000000000000000-000000007ffb0000
[    0.000000]   NODE_DATA [000000000000b040 - 000000000001703f]
[    0.000000]   bootmap [0000000000018000 -  0000000000027ff7] pages 10
[    0.000000] (9 early reservations) ==> bootmem [0000000000 - 007ffb0000]
[    0.000000]   #0 [0000000000 - 0000001000]   BIOS data page ==>
[0000000000 - 0000001000]
[    0.000000]   #1 [0000006000 - 0000008000]       TRAMPOLINE ==>
[0000006000 - 0000008000]
[    0.000000]   #2 [0000200000 - 00020ae6d8]    TEXT DATA BSS ==>
[0000200000 - 00020ae6d8]
[    0.000000]   #3 [007d901000 - 007ff7f439]          RAMDISK ==>
[007d901000 - 007ff7f439]
[    0.000000]   #4 [0000099400 - 0000100000]    BIOS reserved ==>
[0000099400 - 0000100000]
[    0.000000]   #5 [00020af000 - 00020af308]              BRK ==>
[00020af000 - 00020af308]
[    0.000000]   #6 [0000008000 - 000000a000]          PGTABLE ==>
[0000008000 - 000000a000]
[    0.000000]   #7 [0000001000 - 0000001030]        ACPI SLIT ==>
[0000001000 - 0000001030]
[    0.000000]   #8 [000000a000 - 000000b040]       MEMNODEMAP ==>
[000000a000 - 000000b040]
[    0.000000] Scan SMP from ffff880000000000 for 1024 bytes.
[    0.000000] Scan SMP from ffff88000009fc00 for 1024 bytes.
[    0.000000] Scan SMP from ffff8800000f0000 for 65536 bytes.
[    0.000000] found SMP MP-table at [ffff8800000ff780] ff780
[    0.000000]   mpc: fc4e0-fc6e4
[    0.000000]  [ffffea0000000000-ffffea7fffffffff] PGD
->ffff8800024b2000 on node 1
[    0.000000]  [ffffea0000000000-ffffea003fffffff] PUD
->ffff8800024b3000 on node 1
[    0.000000]  [ffffea0000000000-ffffea00033fffff] PMD ->
[ffff880002600000-ffff8800059fffff] on node 1
[    0.000000] Zone PFN ranges:
[    0.000000]   DMA      0x00000000 -> 0x00001000
[    0.000000]   DMA32    0x00001000 -> 0x00100000
[    0.000000]   Normal   0x00100000 -> 0x00100000
[    0.000000] Movable zone start PFN for each node
[    0.000000] early_node_map[3] active PFN ranges
[    0.000000]     1: 0x00000000 -> 0x00000099
[    0.000000]     1: 0x00000100 -> 0x0007ffa0
[    0.000000]     1: 0x0007ffae -> 0x0007ffb0
[    0.000000] On node 1 totalpages: 524091
[    0.000000]   DMA zone: 104 pages used for memmap
[    0.000000]   DMA zone: 108 pages reserved
[    0.000000]   DMA zone: 3781 pages, LIFO batch:0
[    0.000000]   DMA32 zone: 13206 pages used for memmap
[    0.000000]   DMA32 zone: 506892 pages, LIFO batch:31
[    0.000000] ACPI: PM-Timer IO Port: 0xe008
[    0.000000] ACPI: Local APIC address 0xfee00000
[    0.000000] ACPI: LAPIC (acpi_id[0x01] lapic_id[0x00] enabled)
[    0.000000] ACPI: LAPIC (acpi_id[0x02] lapic_id[0x01] enabled)
[    0.000000] ACPI: LAPIC (acpi_id[0x03] lapic_id[0x02] enabled)
[    0.000000] ACPI: LAPIC (acpi_id[0x04] lapic_id[0x03] enabled)
[    0.000000] ACPI: LAPIC (acpi_id[0x05] lapic_id[0x04] enabled)
[    0.000000] ACPI: LAPIC (acpi_id[0x06] lapic_id[0x05] enabled)
[    0.000000] ACPI: LAPIC (acpi_id[0x07] lapic_id[0x06] enabled)
[    0.000000] ACPI: LAPIC (acpi_id[0x08] lapic_id[0x07] enabled)
[    0.000000] ACPI: LAPIC (acpi_id[0x09] lapic_id[0x88] disabled)
[    0.000000] ACPI: LAPIC (acpi_id[0x0a] lapic_id[0x89] disabled)
[    0.000000] ACPI: LAPIC (acpi_id[0x0b] lapic_id[0x8a] disabled)
[    0.000000] ACPI: LAPIC (acpi_id[0x0c] lapic_id[0x8b] disabled)
[    0.000000] ACPI: LAPIC (acpi_id[0x0d] lapic_id[0x8c] disabled)
[    0.000000] ACPI: LAPIC (acpi_id[0x0e] lapic_id[0x8d] disabled)
[    0.000000] ACPI: LAPIC (acpi_id[0x0f] lapic_id[0x8e] disabled)
[    0.000000] ACPI: LAPIC (acpi_id[0x10] lapic_id[0x8f] disabled)
[    0.000000] ACPI: IOAPIC (id[0x08] address[0xfec00000] gsi_base[0])
[    0.000000] IOAPIC[0]: apic_id 8, version 0, address 0xfec00000, GSI 0-23
[    0.000000] ACPI: IOAPIC (id[0x09] address[0xddfff000] gsi_base[24])
[    0.000000] IOAPIC[1]: apic_id 9, version 0, address 0xddfff000, GSI 24-47
[    0.000000] ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 dfl dfl)
[    0.000000] ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level)
[    0.000000] ACPI: INT_SRC_OVR (bus 0 bus_irq 14 global_irq 14 high edge)
[    0.000000] ACPI: INT_SRC_OVR (bus 0 bus_irq 15 global_irq 15 high edge)
[    0.000000] ACPI: IRQ0 used by override.
[    0.000000] ACPI: IRQ2 used by override.
[    0.000000] ACPI: IRQ9 used by override.
[    0.000000] ACPI: IRQ14 used by override.
[    0.000000] ACPI: IRQ15 used by override.
[    0.000000] Using ACPI (MADT) for SMP configuration information
[    0.000000] ACPI: HPET id: 0x10de8201 base: 0xfed00000
[    0.000000] SMP: Allowing 16 CPUs, 8 hotplug CPUs
[    0.000000] init_cpu_to_node:
[    0.000000] cpu 0 -> apicid 0x0 -> node 1
[    0.000000] cpu 1 -> apicid 0x1 -> node 1
[    0.000000] cpu 2 -> apicid 0x2 -> node 1
[    0.000000] cpu 3 -> apicid 0x3 -> node 1
[    0.000000] cpu 4 -> apicid 0x4 -> node 1
[    0.000000] cpu 5 -> apicid 0x5 -> node 1
[    0.000000] cpu 6 -> apicid 0x6 -> node 1
[    0.000000] cpu 7 -> apicid 0x7 -> node 1
[    0.000000] cpu 8 -> apicid 0xffff -> node 1
[    0.000000] cpu 9 -> apicid 0xffff -> node 1
[    0.000000] cpu 10 -> apicid 0xffff -> node 1
[    0.000000] cpu 11 -> apicid 0xffff -> node 1
[    0.000000] cpu 12 -> apicid 0xffff -> node 1
[    0.000000] cpu 13 -> apicid 0xffff -> node 1
[    0.000000] cpu 14 -> apicid 0xffff -> node 1
[    0.000000] cpu 15 -> apicid 0xffff -> node 1
[    0.000000] mapped APIC to ffffffffff5fc000 (fee00000)
[    0.000000] mapped IOAPIC to ffffffffff5fb000 (fec00000)
[    0.000000] mapped IOAPIC to ffffffffff5fa000 (ddfff000)
[    0.000000] nr_irqs_gsi: 48
[    0.000000] Allocating PCI resources starting at 80000000 (gap:
80000000:60000000)
[    0.000000] NR_CPUS:128 nr_cpumask_bits:128 nr_cpu_ids:16 nr_node_ids:2
[    0.000000] PERCPU: Embedded 478 pages at ffff880005a00000, static
data 1925536 bytes
[    0.000000] Pid: 0, comm: swapper Not tainted
2.6.30-rc5-tip-01542-g60b6b0b-dirty #222
[    0.000000] Call Trace:
[    0.000000]  [<ffffffff813a9195>] early_idt_handler+0x55/0x68
[    0.000000]  [<ffffffff8030d607>] ? next_zones_zonelist+0x27/0x6a
[    0.000000]  [<ffffffff8030d5ed>] ? next_zones_zonelist+0xd/0x6a
[    0.000000]  [<ffffffff802fdc99>] nr_free_zone_pages+0x51/0xaa
[    0.000000]  [<ffffffff8030f7b2>] ? mminit_verify_zonelist+0x16/0x170
[    0.000000]  [<ffffffff802fdd18>] nr_free_pagecache_pages+0x26/0x3c
[    0.000000]  [<ffffffff802fdf1c>] build_all_zonelists+0x1ee/0x292
[    0.000000]  [<ffffffff813a9d0a>] start_kernel+0x1a3/0x3fd
[    0.000000]  [<ffffffff813a92a9>] x86_64_start_reservations+0xb9/0xd4
[    0.000000]  [<ffffffff813a93b2>] x86_64_start_kernel+0xee/0x109
[    0.000000] RIP next_zones_zonelist+0x27/0x6a

^ permalink raw reply	[flat|nested] 102+ messages in thread

* [PATCH] x86: fix system without memory on node0
  2009-05-12 15:06           ` Jack Steiner
  2009-05-12 15:10             ` Yinghai Lu
  2009-05-12 15:43             ` Andi Kleen
@ 2009-05-13  1:34             ` Yinghai Lu
  2009-05-13  8:00               ` Andi Kleen
                                 ` (2 more replies)
  2 siblings, 3 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-05-13  1:34 UTC (permalink / raw)
  To: Jack Steiner, H. Peter Anvin, Ingo Molnar, Thomas Gleixner,
	Andrew Morton
  Cc: David Rientjes, Andi Kleen, linux-kernel, Rusty Russell, Mike Travis


Jack found that crash with doesn't have memory on node0.

it turns out with per_cpu changeset, node_number for BSP will be alway 0,
and it is consistent to cpu_to_node() that is to near node already.
aka when numa_set_node() for node0 is called early before per_cpu area is
setup

try to set the node_number for boot cpu, after we get per_cpu area setup.

[ Impact: fix crashing on memoryless node 0]

Reported-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 arch/x86/kernel/setup_percpu.c |    8 ++++++++
 1 file changed, 8 insertions(+)

Index: linux-2.6/arch/x86/kernel/setup_percpu.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_percpu.c
+++ linux-2.6/arch/x86/kernel/setup_percpu.c
@@ -423,6 +423,14 @@ void __init setup_per_cpu_areas(void)
 	early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
 #endif
 
+#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
+	/*
+	 * make sure boot cpu node_number is right, when boot cpu is on the
+	 * node that doesn't have mem installed
+	 */
+	per_cpu(node_number, boot_cpu_id) = cpu_to_node(boot_cpu_id);
+#endif
+
 	/* Setup node to cpumask map */
 	setup_node_to_cpumask_map();
 

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 1/3] x86: remove MEMORY_HOTPLUG_RESERVE related code
  2009-05-12 11:16 ` Mel Gorman
@ 2009-05-13  5:29   ` Yinghai Lu
  2009-05-13  9:55     ` Mel Gorman
  2009-05-13  6:13   ` [PATCH] x86: remove MEMORY_HOTPLUG_RESERVE related code -v2 Yinghai Lu
  1 sibling, 1 reply; 102+ messages in thread
From: Yinghai Lu @ 2009-05-13  5:29 UTC (permalink / raw)
  To: Mel Gorman
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Suresh Siddha, Christoph Lameter, linux-kernel, Al Viro,
	Rusty Russell

Mel Gorman wrote:
> On Fri, May 08, 2009 at 11:45:49PM -0700, Yinghai Lu wrote:
>> after
>> | commit b263295dbffd33b0fbff670720fa178c30e3392a
>> | Author: Christoph Lameter <clameter@sgi.com>
>> | Date:   Wed Jan 30 13:30:47 2008 +0100
>> |
>> |    x86: 64-bit, make sparsemem vmemmap the only memory model
>>
>> we don't have MEMORY_HOTPLUG_RESERVE anymore.
>>
>> remove related dead code.
>>
> 
> Good spot, this removes a nice amount of code. The changelog could say
> more though, how about?
> 
> =====
> Historically, x86-64 had an architecture-specific method for memory hotplug
> whereby it scanned the SRAT for physical memory ranges that could be
> potentially used for memory hot-add later. By reserving those ranges
> without physical memory, the memmap would be allocated and left dormant
> until needed. This depended on the DISCONTIG memory model which has been
> removed so the code implementing HOTPLUG_RESERVE is now dead.
> 
> This patch removes the dead code used by MEMORY_HOTPLUG_RESERVE
thanks will use that.
> =====
> 
>> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
>>
>> ---
>>  arch/x86/include/asm/numa_64.h |    3 -
>>  arch/x86/mm/numa_64.c          |    5 --
>>  arch/x86/mm/srat_64.c          |   63 +++++++------------------------------
>>  include/linux/mm.h             |    2 -
>>  mm/page_alloc.c                |   69 -----------------------------------------
>>  5 files changed, 12 insertions(+), 130 deletions(-)
>>
>> Index: linux-2.6/arch/x86/include/asm/numa_64.h
>> ===================================================================
>> --- linux-2.6.orig/arch/x86/include/asm/numa_64.h
>> +++ linux-2.6/arch/x86/include/asm/numa_64.h
>> @@ -17,9 +17,6 @@ extern int compute_hash_shift(struct boo
>>  extern void numa_init_array(void);
>>  extern int numa_off;
>>  
>> -extern void srat_reserve_add_area(int nodeid);
>> -extern int hotadd_percent;
>> -
>>  extern s16 apicid_to_node[MAX_LOCAL_APIC];
>>  
>>  extern unsigned long numa_free_all_bootmem(void);
>> Index: linux-2.6/arch/x86/mm/numa_64.c
>> ===================================================================
>> --- linux-2.6.orig/arch/x86/mm/numa_64.c
>> +++ linux-2.6/arch/x86/mm/numa_64.c
>> @@ -272,9 +272,6 @@ void __init setup_node_bootmem(int nodei
>>  		reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
>>  				 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
>>  
>> -#ifdef CONFIG_ACPI_NUMA
>> -	srat_reserve_add_area(nodeid);
>> -#endif
>>  	node_set_online(nodeid);
>>  }
>>  
>> @@ -608,8 +605,6 @@ static __init int numa_setup(char *opt)
>>  #ifdef CONFIG_ACPI_NUMA
>>  	if (!strncmp(opt, "noacpi", 6))
>>  		acpi_numa = -1;
>> -	if (!strncmp(opt, "hotadd=", 7))
>> -		hotadd_percent = simple_strtoul(opt+7, NULL, 10);
> 
> Documentation/x86/x86_64/boot-options.txt now needs to be updated to
> remove the documentation on hotadd=.
> 
> Instead of ignoring the option, should a warning now be printed saying the
> option is deprecated?

that is dead for 2.6.27 (?), 2.6.28, 2.6.29, ...
guess we could remove that directly...

> 
>>  #endif
>>  	return 0;
>>  }
>> Index: linux-2.6/arch/x86/mm/srat_64.c
>> ===================================================================
>> --- linux-2.6.orig/arch/x86/mm/srat_64.c
>> +++ linux-2.6/arch/x86/mm/srat_64.c
>> @@ -31,8 +31,6 @@ static nodemask_t nodes_parsed __initdat
>>  static nodemask_t cpu_nodes_parsed __initdata;
>>  static struct bootnode nodes[MAX_NUMNODES] __initdata;
>>  static struct bootnode nodes_add[MAX_NUMNODES];
>> -static int found_add_area __initdata;
>> -int hotadd_percent __initdata = 0;
>>  
>>  static int num_node_memblks __initdata;
>>  static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
>> @@ -66,9 +64,6 @@ static __init void cutoff_node(int i, un
>>  {
>>  	struct bootnode *nd = &nodes[i];
>>  
>> -	if (found_add_area)
>> -		return;
>> -
>>  	if (nd->start < start) {
>>  		nd->start = start;
>>  		if (nd->end < nd->start)
>> @@ -86,7 +81,6 @@ static __init void bad_srat(void)
>>  	int i;
>>  	printk(KERN_ERR "SRAT: SRAT not used.\n");
>>  	acpi_numa = -1;
>> -	found_add_area = 0;
>>  	for (i = 0; i < MAX_LOCAL_APIC; i++)
>>  		apicid_to_node[i] = NUMA_NO_NODE;
>>  	for (i = 0; i < MAX_NUMNODES; i++)
>> @@ -182,24 +176,21 @@ acpi_numa_processor_affinity_init(struct
>>  	       pxm, apic_id, node);
>>  }
>>  
>> -static int update_end_of_memory(unsigned long end) {return -1;}
>> -static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
>>  #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
>>  static inline int save_add_info(void) {return 1;}
>>  #else
>>  static inline int save_add_info(void) {return 0;}
>>  #endif
>>  /*
>> - * Update nodes_add and decide if to include add are in the zone.
>> - * Both SPARSE and RESERVE need nodes_add information.
>> - * This code supports one contiguous hot add area per node.
>> + * Update nodes_add[]
>> + * This code supports one contiguous hot add area per node
>>   */
>> -static int __init
>> -reserve_hotadd(int node, unsigned long start, unsigned long end)
>> +static void __init
>> +update_nodes_add(int node, unsigned long start, unsigned long end)
>>  {
> 
> It's now very unclear what the purpose of this function is. I'm guessing it
> should be something like
> 
> validate_hotadd_region()
> This validates that the region of memory described by SRAT as suitable
> for use with memory hot-add is sane
> 
> What it was for was to validate that the SRAT looked sane and then push out the
> end of the node boundaries so that the memmap would get allocated. However,
> because we are no longer pushing out the node boundaries, is this doing
> anything useful at all any more? For sparsemem, memory-hotadd allocates
> the memmap as it required.

but it does update nodes_add range.

YH

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 2/3] x86: add numa_move_cpus_to_node
  2009-05-12 21:21           ` Christoph Lameter
@ 2009-05-13  5:39             ` Yinghai Lu
  2009-05-14 19:34               ` Christoph Lameter
  0 siblings, 1 reply; 102+ messages in thread
From: Yinghai Lu @ 2009-05-13  5:39 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Suresh Siddha, linux-kernel, Al Viro, Rusty Russell,
	Pekka Enberg

On Tue, May 12, 2009 at 2:21 PM, Christoph Lameter
<cl@linux-foundation.org> wrote:
> On Tue, 12 May 2009, Yinghai Lu wrote:
>
>> or let user util do that? aka before hotadd mem to that node, use /sys to bring
>> the cpu offine and put the cpus online after mem is added ?
>
> Ok then we wont need the kernel.
>
so i recored this OK as one Reviewed-by or Acked-by ?

YH

^ permalink raw reply	[flat|nested] 102+ messages in thread

* [PATCH] x86: remove MEMORY_HOTPLUG_RESERVE related code -v2
  2009-05-12 11:16 ` Mel Gorman
  2009-05-13  5:29   ` Yinghai Lu
@ 2009-05-13  6:13   ` Yinghai Lu
  2009-05-13 14:59     ` Mel Gorman
  1 sibling, 1 reply; 102+ messages in thread
From: Yinghai Lu @ 2009-05-13  6:13 UTC (permalink / raw)
  To: Mel Gorman, Ingo Molnar, Thomas Gleixner, H. Peter Anvin,
	Andrew Morton, Christoph Lameter
  Cc: Suresh Siddha, linux-kernel, Al Viro, Rusty Russell


after
| commit b263295dbffd33b0fbff670720fa178c30e3392a
| Author: Christoph Lameter <clameter@sgi.com>
| Date:   Wed Jan 30 13:30:47 2008 +0100
|
|    x86: 64-bit, make sparsemem vmemmap the only memory model

we don't have MEMORY_HOTPLUG_RESERVE anymore.

Historically, x86-64 had an architecture-specific method for memory hotplug
whereby it scanned the SRAT for physical memory ranges that could be
potentially used for memory hot-add later. By reserving those ranges
without physical memory, the memmap would be allocated and left dormant
until needed. This depended on the DISCONTIG memory model which has been
removed so the code implementing HOTPLUG_RESERVE is now dead.

This patch removes the dead code used by MEMORY_HOTPLUG_RESERVE

Changelog updated by Mel.

v2: updated changelog, and remove hotadd= in doc

[ Impact: remove dead code ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Mel Gorman <mel@csn.ul.ie>

---
 Documentation/x86/x86_64/boot-options.txt |    5 --
 arch/x86/include/asm/numa_64.h            |    3 -
 arch/x86/mm/numa_64.c                     |    5 --
 arch/x86/mm/srat_64.c                     |   63 +++++----------------------
 include/linux/mm.h                        |    2 
 mm/page_alloc.c                           |   69 ------------------------------
 6 files changed, 12 insertions(+), 135 deletions(-)

Index: linux-2.6/arch/x86/include/asm/numa_64.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/numa_64.h
+++ linux-2.6/arch/x86/include/asm/numa_64.h
@@ -17,9 +17,6 @@ extern int compute_hash_shift(struct boo
 extern void numa_init_array(void);
 extern int numa_off;
 
-extern void srat_reserve_add_area(int nodeid);
-extern int hotadd_percent;
-
 extern s16 apicid_to_node[MAX_LOCAL_APIC];
 
 extern unsigned long numa_free_all_bootmem(void);
Index: linux-2.6/arch/x86/mm/numa_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/numa_64.c
+++ linux-2.6/arch/x86/mm/numa_64.c
@@ -272,9 +272,6 @@ void __init setup_node_bootmem(int nodei
 		reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
 				 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
 
-#ifdef CONFIG_ACPI_NUMA
-	srat_reserve_add_area(nodeid);
-#endif
 	node_set_online(nodeid);
 }
 
@@ -593,8 +590,6 @@ static __init int numa_setup(char *opt)
 #ifdef CONFIG_ACPI_NUMA
 	if (!strncmp(opt, "noacpi", 6))
 		acpi_numa = -1;
-	if (!strncmp(opt, "hotadd=", 7))
-		hotadd_percent = simple_strtoul(opt+7, NULL, 10);
 #endif
 	return 0;
 }
Index: linux-2.6/arch/x86/mm/srat_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/srat_64.c
+++ linux-2.6/arch/x86/mm/srat_64.c
@@ -31,8 +31,6 @@ static nodemask_t nodes_parsed __initdat
 static nodemask_t cpu_nodes_parsed __initdata;
 static struct bootnode nodes[MAX_NUMNODES] __initdata;
 static struct bootnode nodes_add[MAX_NUMNODES];
-static int found_add_area __initdata;
-int hotadd_percent __initdata = 0;
 
 static int num_node_memblks __initdata;
 static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
@@ -66,9 +64,6 @@ static __init void cutoff_node(int i, un
 {
 	struct bootnode *nd = &nodes[i];
 
-	if (found_add_area)
-		return;
-
 	if (nd->start < start) {
 		nd->start = start;
 		if (nd->end < nd->start)
@@ -86,7 +81,6 @@ static __init void bad_srat(void)
 	int i;
 	printk(KERN_ERR "SRAT: SRAT not used.\n");
 	acpi_numa = -1;
-	found_add_area = 0;
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
 		apicid_to_node[i] = NUMA_NO_NODE;
 	for (i = 0; i < MAX_NUMNODES; i++)
@@ -182,24 +176,21 @@ acpi_numa_processor_affinity_init(struct
 	       pxm, apic_id, node);
 }
 
-static int update_end_of_memory(unsigned long end) {return -1;}
-static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 static inline int save_add_info(void) {return 1;}
 #else
 static inline int save_add_info(void) {return 0;}
 #endif
 /*
- * Update nodes_add and decide if to include add are in the zone.
- * Both SPARSE and RESERVE need nodes_add information.
- * This code supports one contiguous hot add area per node.
+ * Update nodes_add[]
+ * This code supports one contiguous hot add area per node
  */
-static int __init
-reserve_hotadd(int node, unsigned long start, unsigned long end)
+static void __init
+update_nodes_add(int node, unsigned long start, unsigned long end)
 {
 	unsigned long s_pfn = start >> PAGE_SHIFT;
 	unsigned long e_pfn = end >> PAGE_SHIFT;
-	int ret = 0, changed = 0;
+	int changed = 0;
 	struct bootnode *nd = &nodes_add[node];
 
 	/* I had some trouble with strange memory hotadd regions breaking
@@ -210,7 +201,7 @@ reserve_hotadd(int node, unsigned long s
 	   mistakes */
 	if ((signed long)(end - start) < NODE_MIN_SIZE) {
 		printk(KERN_ERR "SRAT: Hotplug area too small\n");
-		return -1;
+		return;
 	}
 
 	/* This check might be a bit too strict, but I'm keeping it for now. */
@@ -218,12 +209,7 @@ reserve_hotadd(int node, unsigned long s
 		printk(KERN_ERR
 			"SRAT: Hotplug area %lu -> %lu has existing memory\n",
 			s_pfn, e_pfn);
-		return -1;
-	}
-
-	if (!hotadd_enough_memory(&nodes_add[node]))  {
-		printk(KERN_ERR "SRAT: Hotplug area too large\n");
-		return -1;
+		return;
 	}
 
 	/* Looks good */
@@ -245,11 +231,9 @@ reserve_hotadd(int node, unsigned long s
 			printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
 	}
 
-	ret = update_end_of_memory(nd->end);
-
 	if (changed)
-	 	printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
-	return ret;
+		printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
+				 nd->start, nd->end);
 }
 
 /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
@@ -310,13 +294,10 @@ acpi_numa_memory_affinity_init(struct ac
 	       start, end);
 	e820_register_active_regions(node, start >> PAGE_SHIFT,
 				     end >> PAGE_SHIFT);
-	push_node_boundaries(node, nd->start >> PAGE_SHIFT,
-						nd->end >> PAGE_SHIFT);
 
-	if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) &&
-	    (reserve_hotadd(node, start, end) < 0)) {
-		/* Ignore hotadd region. Undo damage */
-		printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
+	if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
+		update_nodes_add(node, start, end);
+		/* restore nodes[node] */
 		*nd = oldnode;
 		if ((nd->start | nd->end) == 0)
 			node_clear(node, nodes_parsed);
@@ -510,26 +491,6 @@ static int null_slit_node_compare(int a,
 }
 #endif /* CONFIG_NUMA_EMU */
 
-void __init srat_reserve_add_area(int nodeid)
-{
-	if (found_add_area && nodes_add[nodeid].end) {
-		u64 total_mb;
-
-		printk(KERN_INFO "SRAT: Reserving hot-add memory space "
-				"for node %d at %Lx-%Lx\n",
-			nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end);
-		total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start)
-					>> PAGE_SHIFT;
-		total_mb *= sizeof(struct page);
-		total_mb >>= 20;
-		printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
-				"pre-allocated memory.\n", (unsigned long long)total_mb);
-		reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
-			       nodes_add[nodeid].end - nodes_add[nodeid].start,
-			       BOOTMEM_DEFAULT);
-	}
-}
-
 int __node_distance(int a, int b)
 {
 	int index;
Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -1032,8 +1032,6 @@ extern void add_active_range(unsigned in
 					unsigned long end_pfn);
 extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
 					unsigned long end_pfn);
-extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn,
-					unsigned long end_pfn);
 extern void remove_all_active_ranges(void);
 extern unsigned long absent_pages_in_range(unsigned long start_pfn,
 						unsigned long end_pfn);
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -150,10 +150,6 @@ static unsigned long __meminitdata dma_r
   static int __meminitdata nr_nodemap_entries;
   static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
   static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-  static unsigned long __meminitdata node_boundary_start_pfn[MAX_NUMNODES];
-  static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES];
-#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
   static unsigned long __initdata required_kernelcore;
   static unsigned long __initdata required_movablecore;
   static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
@@ -3121,64 +3117,6 @@ void __init sparse_memory_present_with_a
 }
 
 /**
- * push_node_boundaries - Push node boundaries to at least the requested boundary
- * @nid: The nid of the node to push the boundary for
- * @start_pfn: The start pfn of the node
- * @end_pfn: The end pfn of the node
- *
- * In reserve-based hot-add, mem_map is allocated that is unused until hotadd
- * time. Specifically, on x86_64, SRAT will report ranges that can potentially
- * be hotplugged even though no physical memory exists. This function allows
- * an arch to push out the node boundaries so mem_map is allocated that can
- * be used later.
- */
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-void __init push_node_boundaries(unsigned int nid,
-		unsigned long start_pfn, unsigned long end_pfn)
-{
-	mminit_dprintk(MMINIT_TRACE, "zoneboundary",
-			"Entering push_node_boundaries(%u, %lu, %lu)\n",
-			nid, start_pfn, end_pfn);
-
-	/* Initialise the boundary for this node if necessary */
-	if (node_boundary_end_pfn[nid] == 0)
-		node_boundary_start_pfn[nid] = -1UL;
-
-	/* Update the boundaries */
-	if (node_boundary_start_pfn[nid] > start_pfn)
-		node_boundary_start_pfn[nid] = start_pfn;
-	if (node_boundary_end_pfn[nid] < end_pfn)
-		node_boundary_end_pfn[nid] = end_pfn;
-}
-
-/* If necessary, push the node boundary out for reserve hotadd */
-static void __meminit account_node_boundary(unsigned int nid,
-		unsigned long *start_pfn, unsigned long *end_pfn)
-{
-	mminit_dprintk(MMINIT_TRACE, "zoneboundary",
-			"Entering account_node_boundary(%u, %lu, %lu)\n",
-			nid, *start_pfn, *end_pfn);
-
-	/* Return if boundary information has not been provided */
-	if (node_boundary_end_pfn[nid] == 0)
-		return;
-
-	/* Check the boundaries and update if necessary */
-	if (node_boundary_start_pfn[nid] < *start_pfn)
-		*start_pfn = node_boundary_start_pfn[nid];
-	if (node_boundary_end_pfn[nid] > *end_pfn)
-		*end_pfn = node_boundary_end_pfn[nid];
-}
-#else
-void __init push_node_boundaries(unsigned int nid,
-		unsigned long start_pfn, unsigned long end_pfn) {}
-
-static void __meminit account_node_boundary(unsigned int nid,
-		unsigned long *start_pfn, unsigned long *end_pfn) {}
-#endif
-
-
-/**
  * get_pfn_range_for_nid - Return the start and end page frames for a node
  * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned.
  * @start_pfn: Passed by reference. On return, it will have the node start_pfn.
@@ -3203,9 +3141,6 @@ void __meminit get_pfn_range_for_nid(uns
 
 	if (*start_pfn == -1UL)
 		*start_pfn = 0;
-
-	/* Push the node boundaries out if requested */
-	account_node_boundary(nid, start_pfn, end_pfn);
 }
 
 /*
@@ -3810,10 +3745,6 @@ void __init remove_all_active_ranges(voi
 {
 	memset(early_node_map, 0, sizeof(early_node_map));
 	nr_nodemap_entries = 0;
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-	memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn));
-	memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn));
-#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
 }
 
 /* Compare two active node_active_regions */
Index: linux-2.6/Documentation/x86/x86_64/boot-options.txt
===================================================================
--- linux-2.6.orig/Documentation/x86/x86_64/boot-options.txt
+++ linux-2.6/Documentation/x86/x86_64/boot-options.txt
@@ -150,11 +150,6 @@ NUMA
 		Otherwise, the remaining system RAM is allocated to an
 		additional node.
 
-  numa=hotadd=percent
-		Only allow hotadd memory to preallocate page structures upto
-		percent of already available memory.
-		numa=hotadd=0 will disable hotadd memory.
-
 ACPI
 
   acpi=off	Don't enable ACPI

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] x86: fix system without memory on node0
  2009-05-13  1:34             ` [PATCH] x86: fix system without memory on node0 Yinghai Lu
@ 2009-05-13  8:00               ` Andi Kleen
  2009-05-13 15:58                 ` Yinghai Lu
  2009-05-13 13:35               ` Ingo Molnar
  2009-05-13 16:52               ` Jack Steiner
  2 siblings, 1 reply; 102+ messages in thread
From: Andi Kleen @ 2009-05-13  8:00 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Jack Steiner, H. Peter Anvin, Ingo Molnar, Thomas Gleixner,
	Andrew Morton, David Rientjes, Andi Kleen, linux-kernel,
	Rusty Russell, Mike Travis

> +#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
> +	/*
> +	 * make sure boot cpu node_number is right, when boot cpu is on the
> +	 * node that doesn't have mem installed
> +	 */
> +	per_cpu(node_number, boot_cpu_id) = cpu_to_node(boot_cpu_id);
> +#endif

Seems like a quite crappy hac^wpatch. Why is it ever set to the wrong
value? And why is that only the case on NUMA and 64bit? 

-Andi (who is also doubtful that the rest of the system can handle
memory less memory nodes in all cases)

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 1/3] x86: remove MEMORY_HOTPLUG_RESERVE related code
  2009-05-13  5:29   ` Yinghai Lu
@ 2009-05-13  9:55     ` Mel Gorman
  0 siblings, 0 replies; 102+ messages in thread
From: Mel Gorman @ 2009-05-13  9:55 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Suresh Siddha, Christoph Lameter, linux-kernel, Al Viro,
	Rusty Russell

On Tue, May 12, 2009 at 10:29:13PM -0700, Yinghai Lu wrote:
> Mel Gorman wrote:
> > On Fri, May 08, 2009 at 11:45:49PM -0700, Yinghai Lu wrote:
> >> after
> >> | commit b263295dbffd33b0fbff670720fa178c30e3392a
> >> | Author: Christoph Lameter <clameter@sgi.com>
> >> | Date:   Wed Jan 30 13:30:47 2008 +0100
> >> |
> >> |    x86: 64-bit, make sparsemem vmemmap the only memory model
> >>
> >> we don't have MEMORY_HOTPLUG_RESERVE anymore.
> >>
> >> remove related dead code.
> >>
> > 
> > Good spot, this removes a nice amount of code. The changelog could say
> > more though, how about?
> > 
> > =====
> > Historically, x86-64 had an architecture-specific method for memory hotplug
> > whereby it scanned the SRAT for physical memory ranges that could be
> > potentially used for memory hot-add later. By reserving those ranges
> > without physical memory, the memmap would be allocated and left dormant
> > until needed. This depended on the DISCONTIG memory model which has been
> > removed so the code implementing HOTPLUG_RESERVE is now dead.
> > 
> > This patch removes the dead code used by MEMORY_HOTPLUG_RESERVE
> thanks will use that.
> > =====
> > 
> >> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
> >>
> >> ---
> >>  arch/x86/include/asm/numa_64.h |    3 -
> >>  arch/x86/mm/numa_64.c          |    5 --
> >>  arch/x86/mm/srat_64.c          |   63 +++++++------------------------------
> >>  include/linux/mm.h             |    2 -
> >>  mm/page_alloc.c                |   69 -----------------------------------------
> >>  5 files changed, 12 insertions(+), 130 deletions(-)
> >>
> >> Index: linux-2.6/arch/x86/include/asm/numa_64.h
> >> ===================================================================
> >> --- linux-2.6.orig/arch/x86/include/asm/numa_64.h
> >> +++ linux-2.6/arch/x86/include/asm/numa_64.h
> >> @@ -17,9 +17,6 @@ extern int compute_hash_shift(struct boo
> >>  extern void numa_init_array(void);
> >>  extern int numa_off;
> >>  
> >> -extern void srat_reserve_add_area(int nodeid);
> >> -extern int hotadd_percent;
> >> -
> >>  extern s16 apicid_to_node[MAX_LOCAL_APIC];
> >>  
> >>  extern unsigned long numa_free_all_bootmem(void);
> >> Index: linux-2.6/arch/x86/mm/numa_64.c
> >> ===================================================================
> >> --- linux-2.6.orig/arch/x86/mm/numa_64.c
> >> +++ linux-2.6/arch/x86/mm/numa_64.c
> >> @@ -272,9 +272,6 @@ void __init setup_node_bootmem(int nodei
> >>  		reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
> >>  				 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
> >>  
> >> -#ifdef CONFIG_ACPI_NUMA
> >> -	srat_reserve_add_area(nodeid);
> >> -#endif
> >>  	node_set_online(nodeid);
> >>  }
> >>  
> >> @@ -608,8 +605,6 @@ static __init int numa_setup(char *opt)
> >>  #ifdef CONFIG_ACPI_NUMA
> >>  	if (!strncmp(opt, "noacpi", 6))
> >>  		acpi_numa = -1;
> >> -	if (!strncmp(opt, "hotadd=", 7))
> >> -		hotadd_percent = simple_strtoul(opt+7, NULL, 10);
> > 
> > Documentation/x86/x86_64/boot-options.txt now needs to be updated to
> > remove the documentation on hotadd=.
> > 
> > Instead of ignoring the option, should a warning now be printed saying the
> > option is deprecated?
> 
> that is dead for 2.6.27 (?), 2.6.28, 2.6.29, ...
> guess we could remove that directly...
> 

If someone did a distro upgrade, the kernel version would jump from before
2.6.27 to 2.6.29 or 2.6.30. If someone is depending on the behaviour, they
might like to hear about it rather than having it myseriously "fail".

> > 
> >>  #endif
> >>  	return 0;
> >>  }
> >> Index: linux-2.6/arch/x86/mm/srat_64.c
> >> ===================================================================
> >> --- linux-2.6.orig/arch/x86/mm/srat_64.c
> >> +++ linux-2.6/arch/x86/mm/srat_64.c
> >> @@ -31,8 +31,6 @@ static nodemask_t nodes_parsed __initdat
> >>  static nodemask_t cpu_nodes_parsed __initdata;
> >>  static struct bootnode nodes[MAX_NUMNODES] __initdata;
> >>  static struct bootnode nodes_add[MAX_NUMNODES];
> >> -static int found_add_area __initdata;
> >> -int hotadd_percent __initdata = 0;
> >>  
> >>  static int num_node_memblks __initdata;
> >>  static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
> >> @@ -66,9 +64,6 @@ static __init void cutoff_node(int i, un
> >>  {
> >>  	struct bootnode *nd = &nodes[i];
> >>  
> >> -	if (found_add_area)
> >> -		return;
> >> -
> >>  	if (nd->start < start) {
> >>  		nd->start = start;
> >>  		if (nd->end < nd->start)
> >> @@ -86,7 +81,6 @@ static __init void bad_srat(void)
> >>  	int i;
> >>  	printk(KERN_ERR "SRAT: SRAT not used.\n");
> >>  	acpi_numa = -1;
> >> -	found_add_area = 0;
> >>  	for (i = 0; i < MAX_LOCAL_APIC; i++)
> >>  		apicid_to_node[i] = NUMA_NO_NODE;
> >>  	for (i = 0; i < MAX_NUMNODES; i++)
> >> @@ -182,24 +176,21 @@ acpi_numa_processor_affinity_init(struct
> >>  	       pxm, apic_id, node);
> >>  }
> >>  
> >> -static int update_end_of_memory(unsigned long end) {return -1;}
> >> -static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
> >>  #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
> >>  static inline int save_add_info(void) {return 1;}
> >>  #else
> >>  static inline int save_add_info(void) {return 0;}
> >>  #endif
> >>  /*
> >> - * Update nodes_add and decide if to include add are in the zone.
> >> - * Both SPARSE and RESERVE need nodes_add information.
> >> - * This code supports one contiguous hot add area per node.
> >> + * Update nodes_add[]
> >> + * This code supports one contiguous hot add area per node
> >>   */
> >> -static int __init
> >> -reserve_hotadd(int node, unsigned long start, unsigned long end)
> >> +static void __init
> >> +update_nodes_add(int node, unsigned long start, unsigned long end)
> >>  {
> > 
> > It's now very unclear what the purpose of this function is. I'm guessing it
> > should be something like
> > 
> > validate_hotadd_region()
> > This validates that the region of memory described by SRAT as suitable
> > for use with memory hot-add is sane
> > 
> > What it was for was to validate that the SRAT looked sane and then push out the
> > end of the node boundaries so that the memmap would get allocated. However,
> > because we are no longer pushing out the node boundaries, is this doing
> > anything useful at all any more? For sparsemem, memory-hotadd allocates
> > the memmap as it required.
> 
> but it does update nodes_add range.
> 

D'oh, of course. Thanks for correcting me.

-- 
Mel Gorman
Part-time Phd Student                          Linux Technology Center
University of Limerick                         IBM Dublin Software Lab

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] x86: fix system without memory on node0
  2009-05-13  1:34             ` [PATCH] x86: fix system without memory on node0 Yinghai Lu
  2009-05-13  8:00               ` Andi Kleen
@ 2009-05-13 13:35               ` Ingo Molnar
  2009-05-13 16:52               ` Jack Steiner
  2 siblings, 0 replies; 102+ messages in thread
From: Ingo Molnar @ 2009-05-13 13:35 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Jack Steiner, H. Peter Anvin, Thomas Gleixner, Andrew Morton,
	David Rientjes, Andi Kleen, linux-kernel, Rusty Russell,
	Mike Travis


* Yinghai Lu <yinghai@kernel.org> wrote:

> Jack found that crash with doesn't have memory on node0.
> 
> it turns out with per_cpu changeset, node_number for BSP will be 
> alway 0, and it is consistent to cpu_to_node() that is to near 
> node already. aka when numa_set_node() for node0 is called early 
> before per_cpu area is setup
> 
> try to set the node_number for boot cpu, after we get per_cpu area 
> setup.
> 
> [ Impact: fix crashing on memoryless node 0]
> 
> Reported-by: Jack Steiner <steiner@sgi.com>
> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
> 
> ---
>  arch/x86/kernel/setup_percpu.c |    8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> Index: linux-2.6/arch/x86/kernel/setup_percpu.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/setup_percpu.c
> +++ linux-2.6/arch/x86/kernel/setup_percpu.c
> @@ -423,6 +423,14 @@ void __init setup_per_cpu_areas(void)
>  	early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
>  #endif
>  
> +#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
> +	/*
> +	 * make sure boot cpu node_number is right, when boot cpu is on the
> +	 * node that doesn't have mem installed
> +	 */
> +	per_cpu(node_number, boot_cpu_id) = cpu_to_node(boot_cpu_id);
> +#endif

ok, that's a nice fix i guess.

Wouldnt it also be necessary/good to unify this code between 32-bit 
and 64-bit? 32-bit has cpu_to_node_map[], while 64-bit has percpu 
variables for that.

	Ingo

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] x86: remove MEMORY_HOTPLUG_RESERVE related code -v2
  2009-05-13  6:13   ` [PATCH] x86: remove MEMORY_HOTPLUG_RESERVE related code -v2 Yinghai Lu
@ 2009-05-13 14:59     ` Mel Gorman
  2009-05-14 16:38       ` [PATCH 1/5] " Yinghai Lu
  0 siblings, 1 reply; 102+ messages in thread
From: Mel Gorman @ 2009-05-13 14:59 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Christoph Lameter, Suresh Siddha, linux-kernel, Al Viro,
	Rusty Russell

On Tue, May 12, 2009 at 11:13:15PM -0700, Yinghai Lu wrote:
> 
> after
> | commit b263295dbffd33b0fbff670720fa178c30e3392a
> | Author: Christoph Lameter <clameter@sgi.com>
> | Date:   Wed Jan 30 13:30:47 2008 +0100
> |
> |    x86: 64-bit, make sparsemem vmemmap the only memory model
> 
> we don't have MEMORY_HOTPLUG_RESERVE anymore.
> 
> Historically, x86-64 had an architecture-specific method for memory hotplug
> whereby it scanned the SRAT for physical memory ranges that could be
> potentially used for memory hot-add later. By reserving those ranges
> without physical memory, the memmap would be allocated and left dormant
> until needed. This depended on the DISCONTIG memory model which has been
> removed so the code implementing HOTPLUG_RESERVE is now dead.
> 
> This patch removes the dead code used by MEMORY_HOTPLUG_RESERVE
> 
> Changelog updated by Mel.
> 
> v2: updated changelog, and remove hotadd= in doc
> 
> [ Impact: remove dead code ]
> 
> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
> Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
> Cc: Mel Gorman <mel@csn.ul.ie>

Patch looks good and successfully boot-tested on a small number of
machines. Nice work.

Reviewed-by: Mel Gorman <mel@csn.ul.ie>

> 
> ---
>  Documentation/x86/x86_64/boot-options.txt |    5 --
>  arch/x86/include/asm/numa_64.h            |    3 -
>  arch/x86/mm/numa_64.c                     |    5 --
>  arch/x86/mm/srat_64.c                     |   63 +++++----------------------
>  include/linux/mm.h                        |    2 
>  mm/page_alloc.c                           |   69 ------------------------------
>  6 files changed, 12 insertions(+), 135 deletions(-)
> 
> Index: linux-2.6/arch/x86/include/asm/numa_64.h
> ===================================================================
> --- linux-2.6.orig/arch/x86/include/asm/numa_64.h
> +++ linux-2.6/arch/x86/include/asm/numa_64.h
> @@ -17,9 +17,6 @@ extern int compute_hash_shift(struct boo
>  extern void numa_init_array(void);
>  extern int numa_off;
>  
> -extern void srat_reserve_add_area(int nodeid);
> -extern int hotadd_percent;
> -
>  extern s16 apicid_to_node[MAX_LOCAL_APIC];
>  
>  extern unsigned long numa_free_all_bootmem(void);
> Index: linux-2.6/arch/x86/mm/numa_64.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/mm/numa_64.c
> +++ linux-2.6/arch/x86/mm/numa_64.c
> @@ -272,9 +272,6 @@ void __init setup_node_bootmem(int nodei
>  		reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
>  				 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
>  
> -#ifdef CONFIG_ACPI_NUMA
> -	srat_reserve_add_area(nodeid);
> -#endif
>  	node_set_online(nodeid);
>  }
>  
> @@ -593,8 +590,6 @@ static __init int numa_setup(char *opt)
>  #ifdef CONFIG_ACPI_NUMA
>  	if (!strncmp(opt, "noacpi", 6))
>  		acpi_numa = -1;
> -	if (!strncmp(opt, "hotadd=", 7))
> -		hotadd_percent = simple_strtoul(opt+7, NULL, 10);
>  #endif
>  	return 0;
>  }
> Index: linux-2.6/arch/x86/mm/srat_64.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/mm/srat_64.c
> +++ linux-2.6/arch/x86/mm/srat_64.c
> @@ -31,8 +31,6 @@ static nodemask_t nodes_parsed __initdat
>  static nodemask_t cpu_nodes_parsed __initdata;
>  static struct bootnode nodes[MAX_NUMNODES] __initdata;
>  static struct bootnode nodes_add[MAX_NUMNODES];
> -static int found_add_area __initdata;
> -int hotadd_percent __initdata = 0;
>  
>  static int num_node_memblks __initdata;
>  static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
> @@ -66,9 +64,6 @@ static __init void cutoff_node(int i, un
>  {
>  	struct bootnode *nd = &nodes[i];
>  
> -	if (found_add_area)
> -		return;
> -
>  	if (nd->start < start) {
>  		nd->start = start;
>  		if (nd->end < nd->start)
> @@ -86,7 +81,6 @@ static __init void bad_srat(void)
>  	int i;
>  	printk(KERN_ERR "SRAT: SRAT not used.\n");
>  	acpi_numa = -1;
> -	found_add_area = 0;
>  	for (i = 0; i < MAX_LOCAL_APIC; i++)
>  		apicid_to_node[i] = NUMA_NO_NODE;
>  	for (i = 0; i < MAX_NUMNODES; i++)
> @@ -182,24 +176,21 @@ acpi_numa_processor_affinity_init(struct
>  	       pxm, apic_id, node);
>  }
>  
> -static int update_end_of_memory(unsigned long end) {return -1;}
> -static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
>  #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
>  static inline int save_add_info(void) {return 1;}
>  #else
>  static inline int save_add_info(void) {return 0;}
>  #endif
>  /*
> - * Update nodes_add and decide if to include add are in the zone.
> - * Both SPARSE and RESERVE need nodes_add information.
> - * This code supports one contiguous hot add area per node.
> + * Update nodes_add[]
> + * This code supports one contiguous hot add area per node
>   */
> -static int __init
> -reserve_hotadd(int node, unsigned long start, unsigned long end)
> +static void __init
> +update_nodes_add(int node, unsigned long start, unsigned long end)
>  {
>  	unsigned long s_pfn = start >> PAGE_SHIFT;
>  	unsigned long e_pfn = end >> PAGE_SHIFT;
> -	int ret = 0, changed = 0;
> +	int changed = 0;
>  	struct bootnode *nd = &nodes_add[node];
>  
>  	/* I had some trouble with strange memory hotadd regions breaking
> @@ -210,7 +201,7 @@ reserve_hotadd(int node, unsigned long s
>  	   mistakes */
>  	if ((signed long)(end - start) < NODE_MIN_SIZE) {
>  		printk(KERN_ERR "SRAT: Hotplug area too small\n");
> -		return -1;
> +		return;
>  	}
>  
>  	/* This check might be a bit too strict, but I'm keeping it for now. */
> @@ -218,12 +209,7 @@ reserve_hotadd(int node, unsigned long s
>  		printk(KERN_ERR
>  			"SRAT: Hotplug area %lu -> %lu has existing memory\n",
>  			s_pfn, e_pfn);
> -		return -1;
> -	}
> -
> -	if (!hotadd_enough_memory(&nodes_add[node]))  {
> -		printk(KERN_ERR "SRAT: Hotplug area too large\n");
> -		return -1;
> +		return;
>  	}
>  
>  	/* Looks good */
> @@ -245,11 +231,9 @@ reserve_hotadd(int node, unsigned long s
>  			printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
>  	}
>  
> -	ret = update_end_of_memory(nd->end);
> -
>  	if (changed)
> -	 	printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
> -	return ret;
> +		printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
> +				 nd->start, nd->end);
>  }
>  
>  /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
> @@ -310,13 +294,10 @@ acpi_numa_memory_affinity_init(struct ac
>  	       start, end);
>  	e820_register_active_regions(node, start >> PAGE_SHIFT,
>  				     end >> PAGE_SHIFT);
> -	push_node_boundaries(node, nd->start >> PAGE_SHIFT,
> -						nd->end >> PAGE_SHIFT);
>  
> -	if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) &&
> -	    (reserve_hotadd(node, start, end) < 0)) {
> -		/* Ignore hotadd region. Undo damage */
> -		printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
> +	if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
> +		update_nodes_add(node, start, end);
> +		/* restore nodes[node] */
>  		*nd = oldnode;
>  		if ((nd->start | nd->end) == 0)
>  			node_clear(node, nodes_parsed);
> @@ -510,26 +491,6 @@ static int null_slit_node_compare(int a,
>  }
>  #endif /* CONFIG_NUMA_EMU */
>  
> -void __init srat_reserve_add_area(int nodeid)
> -{
> -	if (found_add_area && nodes_add[nodeid].end) {
> -		u64 total_mb;
> -
> -		printk(KERN_INFO "SRAT: Reserving hot-add memory space "
> -				"for node %d at %Lx-%Lx\n",
> -			nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end);
> -		total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start)
> -					>> PAGE_SHIFT;
> -		total_mb *= sizeof(struct page);
> -		total_mb >>= 20;
> -		printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
> -				"pre-allocated memory.\n", (unsigned long long)total_mb);
> -		reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
> -			       nodes_add[nodeid].end - nodes_add[nodeid].start,
> -			       BOOTMEM_DEFAULT);
> -	}
> -}
> -
>  int __node_distance(int a, int b)
>  {
>  	int index;
> Index: linux-2.6/include/linux/mm.h
> ===================================================================
> --- linux-2.6.orig/include/linux/mm.h
> +++ linux-2.6/include/linux/mm.h
> @@ -1032,8 +1032,6 @@ extern void add_active_range(unsigned in
>  					unsigned long end_pfn);
>  extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
>  					unsigned long end_pfn);
> -extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn,
> -					unsigned long end_pfn);
>  extern void remove_all_active_ranges(void);
>  extern unsigned long absent_pages_in_range(unsigned long start_pfn,
>  						unsigned long end_pfn);
> Index: linux-2.6/mm/page_alloc.c
> ===================================================================
> --- linux-2.6.orig/mm/page_alloc.c
> +++ linux-2.6/mm/page_alloc.c
> @@ -150,10 +150,6 @@ static unsigned long __meminitdata dma_r
>    static int __meminitdata nr_nodemap_entries;
>    static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
>    static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
> -#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
> -  static unsigned long __meminitdata node_boundary_start_pfn[MAX_NUMNODES];
> -  static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES];
> -#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
>    static unsigned long __initdata required_kernelcore;
>    static unsigned long __initdata required_movablecore;
>    static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
> @@ -3121,64 +3117,6 @@ void __init sparse_memory_present_with_a
>  }
>  
>  /**
> - * push_node_boundaries - Push node boundaries to at least the requested boundary
> - * @nid: The nid of the node to push the boundary for
> - * @start_pfn: The start pfn of the node
> - * @end_pfn: The end pfn of the node
> - *
> - * In reserve-based hot-add, mem_map is allocated that is unused until hotadd
> - * time. Specifically, on x86_64, SRAT will report ranges that can potentially
> - * be hotplugged even though no physical memory exists. This function allows
> - * an arch to push out the node boundaries so mem_map is allocated that can
> - * be used later.
> - */
> -#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
> -void __init push_node_boundaries(unsigned int nid,
> -		unsigned long start_pfn, unsigned long end_pfn)
> -{
> -	mminit_dprintk(MMINIT_TRACE, "zoneboundary",
> -			"Entering push_node_boundaries(%u, %lu, %lu)\n",
> -			nid, start_pfn, end_pfn);
> -
> -	/* Initialise the boundary for this node if necessary */
> -	if (node_boundary_end_pfn[nid] == 0)
> -		node_boundary_start_pfn[nid] = -1UL;
> -
> -	/* Update the boundaries */
> -	if (node_boundary_start_pfn[nid] > start_pfn)
> -		node_boundary_start_pfn[nid] = start_pfn;
> -	if (node_boundary_end_pfn[nid] < end_pfn)
> -		node_boundary_end_pfn[nid] = end_pfn;
> -}
> -
> -/* If necessary, push the node boundary out for reserve hotadd */
> -static void __meminit account_node_boundary(unsigned int nid,
> -		unsigned long *start_pfn, unsigned long *end_pfn)
> -{
> -	mminit_dprintk(MMINIT_TRACE, "zoneboundary",
> -			"Entering account_node_boundary(%u, %lu, %lu)\n",
> -			nid, *start_pfn, *end_pfn);
> -
> -	/* Return if boundary information has not been provided */
> -	if (node_boundary_end_pfn[nid] == 0)
> -		return;
> -
> -	/* Check the boundaries and update if necessary */
> -	if (node_boundary_start_pfn[nid] < *start_pfn)
> -		*start_pfn = node_boundary_start_pfn[nid];
> -	if (node_boundary_end_pfn[nid] > *end_pfn)
> -		*end_pfn = node_boundary_end_pfn[nid];
> -}
> -#else
> -void __init push_node_boundaries(unsigned int nid,
> -		unsigned long start_pfn, unsigned long end_pfn) {}
> -
> -static void __meminit account_node_boundary(unsigned int nid,
> -		unsigned long *start_pfn, unsigned long *end_pfn) {}
> -#endif
> -
> -
> -/**
>   * get_pfn_range_for_nid - Return the start and end page frames for a node
>   * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned.
>   * @start_pfn: Passed by reference. On return, it will have the node start_pfn.
> @@ -3203,9 +3141,6 @@ void __meminit get_pfn_range_for_nid(uns
>  
>  	if (*start_pfn == -1UL)
>  		*start_pfn = 0;
> -
> -	/* Push the node boundaries out if requested */
> -	account_node_boundary(nid, start_pfn, end_pfn);
>  }
>  
>  /*
> @@ -3810,10 +3745,6 @@ void __init remove_all_active_ranges(voi
>  {
>  	memset(early_node_map, 0, sizeof(early_node_map));
>  	nr_nodemap_entries = 0;
> -#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
> -	memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn));
> -	memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn));
> -#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
>  }
>  
>  /* Compare two active node_active_regions */
> Index: linux-2.6/Documentation/x86/x86_64/boot-options.txt
> ===================================================================
> --- linux-2.6.orig/Documentation/x86/x86_64/boot-options.txt
> +++ linux-2.6/Documentation/x86/x86_64/boot-options.txt
> @@ -150,11 +150,6 @@ NUMA
>  		Otherwise, the remaining system RAM is allocated to an
>  		additional node.
>  
> -  numa=hotadd=percent
> -		Only allow hotadd memory to preallocate page structures upto
> -		percent of already available memory.
> -		numa=hotadd=0 will disable hotadd memory.
> -
>  ACPI
>  
>    acpi=off	Don't enable ACPI
> 

-- 
Mel Gorman
Part-time Phd Student                          Linux Technology Center
University of Limerick                         IBM Dublin Software Lab

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] x86: fix system without memory on node0
  2009-05-13  8:00               ` Andi Kleen
@ 2009-05-13 15:58                 ` Yinghai Lu
  0 siblings, 0 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-05-13 15:58 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Jack Steiner, H. Peter Anvin, Ingo Molnar, Thomas Gleixner,
	Andrew Morton, David Rientjes, linux-kernel, Rusty Russell,
	Mike Travis

Andi Kleen wrote:
>> +#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
>> +	/*
>> +	 * make sure boot cpu node_number is right, when boot cpu is on the
>> +	 * node that doesn't have mem installed
>> +	 */
>> +	per_cpu(node_number, boot_cpu_id) = cpu_to_node(boot_cpu_id);
>> +#endif
> 
> Seems like a quite crappy hac^wpatch. Why is it ever set to the wrong
> value? And why is that only the case on NUMA and 64bit? 

two places touched that per_cpu(node_number,), 
1. in cpu/common.c::cpu_init() and it is not for BP
#ifdef CONFIG_NUMA
        if (cpu != 0 && percpu_read(node_number) == 0 &&
            cpu_to_node(cpu) != NUMA_NO_NODE)
                percpu_write(node_number, cpu_to_node(cpu));
#endif
for BP is traps_init ==> cpu_init
for AP is start_secondary ==> cpu_init

2. cpu/intel.c or amd.c::srat_detect_node via numa_set_node() and they are called via identify_cpu
for BP:
check_bugs ==> identify_boot_cpu ==> identify_cpu() that is rather later before numa_node_id() is used for BP...
for AP:
start_secondary=>smp_callin=>smp_store_cpu_info()=>identify_secondary_cpu ==> identify_cpu()

so only try to set that for BP more early in setup_per_cpu_areas, and don't bother set that for APs there.
(and don't want to mess the 0 before the copying BP per_cpu to APs)

or you check set the per_cpu(node_number) is early enough with setup_per_cpu_areas();


setup_percpu.c in arch/x86/kernel/ is used on lot of conf.

and in arch/x86/include/asm/topology.h, we have

#ifdef CONFIG_NUMA
#include <linux/cpumask.h>
#include <asm/mpspec.h>

#ifdef CONFIG_X86_32

/* Mappings between logical cpu number and node number */
extern int cpu_to_node_map[];

/* Returns the number of the node containing CPU 'cpu' */
static inline int cpu_to_node(int cpu)
{
        return cpu_to_node_map[cpu];
}
#define early_cpu_to_node(cpu)  cpu_to_node(cpu)

#else /* CONFIG_X86_64 */

/* Mappings between logical cpu number and node number */
DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);

/* Returns the number of the current Node. */
DECLARE_PER_CPU(int, node_number);
#define numa_node_id()          percpu_read(node_number)
...

so we need to for NUMA and 64 bit.


YH

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] x86: fix system without memory on node0
  2009-05-13  1:34             ` [PATCH] x86: fix system without memory on node0 Yinghai Lu
  2009-05-13  8:00               ` Andi Kleen
  2009-05-13 13:35               ` Ingo Molnar
@ 2009-05-13 16:52               ` Jack Steiner
  2009-05-13 17:43                 ` Yinghai Lu
  2009-05-13 18:08                 ` Yinghai Lu
  2 siblings, 2 replies; 102+ messages in thread
From: Jack Steiner @ 2009-05-13 16:52 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: H. Peter Anvin, Ingo Molnar, Thomas Gleixner, Andrew Morton,
	David Rientjes, Andi Kleen, linux-kernel, Rusty Russell,
	Mike Travis

On Tue, May 12, 2009 at 06:34:31PM -0700, Yinghai Lu wrote:
> 
> Jack found that crash with doesn't have memory on node0.
> 
> it turns out with per_cpu changeset, node_number for BSP will be alway 0,
> and it is consistent to cpu_to_node() that is to near node already.
> aka when numa_set_node() for node0 is called early before per_cpu area is
> setup
> 
> try to set the node_number for boot cpu, after we get per_cpu area setup.
> 
> [ Impact: fix crashing on memoryless node 0]
> 
> Reported-by: Jack Steiner <steiner@sgi.com>
> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
> 
> ---
>  arch/x86/kernel/setup_percpu.c |    8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> Index: linux-2.6/arch/x86/kernel/setup_percpu.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/setup_percpu.c
> +++ linux-2.6/arch/x86/kernel/setup_percpu.c
> @@ -423,6 +423,14 @@ void __init setup_per_cpu_areas(void)
>  	early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
>  #endif
>  
> +#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
> +	/*
> +	 * make sure boot cpu node_number is right, when boot cpu is on the
> +	 * node that doesn't have mem installed
> +	 */
> +	per_cpu(node_number, boot_cpu_id) = cpu_to_node(boot_cpu_id);
> +#endif
> +
>  	/* Setup node to cpumask map */
>  	setup_node_to_cpumask_map();
>  

With the patch above PLUS the patch below, I verified that all of our strange
configurations boot to shell prompt & run simple commands. There are certainly
some corner cases that have not been tested.

Note that both patches are required. The system panics in early boot if either
patch is omitted.

---


Ignore offline nodes when building the zone lists. This
fix is needed to support configurations that hax PXMs with
cpus but no memory.


Signed-off-by: Jack Steiner <steiner@sgi.com>


---
 mm/page_alloc.c |    2 ++
 1 file changed, 2 insertions(+)

Index: linux/mm/page_alloc.c
===================================================================
--- linux.orig/mm/page_alloc.c	2009-05-12 17:06:59.000000000 -0500
+++ linux/mm/page_alloc.c	2009-05-13 09:54:09.000000000 -0500
@@ -2370,6 +2370,8 @@ static void build_zonelists(pg_data_t *p
 		 * If another node is sufficiently far away then it is better
 		 * to reclaim pages in a zone before going off node.
 		 */
+		if (!node_online(node))
+			continue;
 		if (distance > RECLAIM_DISTANCE)
 			zone_reclaim_mode = 1;
 

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] x86: fix system without memory on node0
  2009-05-13 16:52               ` Jack Steiner
@ 2009-05-13 17:43                 ` Yinghai Lu
  2009-05-13 18:08                 ` Yinghai Lu
  1 sibling, 0 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-05-13 17:43 UTC (permalink / raw)
  To: Jack Steiner
  Cc: H. Peter Anvin, Ingo Molnar, Thomas Gleixner, Andrew Morton,
	David Rientjes, Andi Kleen, linux-kernel, Rusty Russell,
	Mike Travis

Jack Steiner wrote:
> On Tue, May 12, 2009 at 06:34:31PM -0700, Yinghai Lu wrote:
>> Jack found that crash with doesn't have memory on node0.
>>
>> it turns out with per_cpu changeset, node_number for BSP will be alway 0,
>> and it is consistent to cpu_to_node() that is to near node already.
>> aka when numa_set_node() for node0 is called early before per_cpu area is
>> setup
>>
>> try to set the node_number for boot cpu, after we get per_cpu area setup.
>>
>> [ Impact: fix crashing on memoryless node 0]
>>
>> Reported-by: Jack Steiner <steiner@sgi.com>
>> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
>>
>> ---
>>  arch/x86/kernel/setup_percpu.c |    8 ++++++++
>>  1 file changed, 8 insertions(+)
>>
>> Index: linux-2.6/arch/x86/kernel/setup_percpu.c
>> ===================================================================
>> --- linux-2.6.orig/arch/x86/kernel/setup_percpu.c
>> +++ linux-2.6/arch/x86/kernel/setup_percpu.c
>> @@ -423,6 +423,14 @@ void __init setup_per_cpu_areas(void)
>>  	early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
>>  #endif
>>  
>> +#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
>> +	/*
>> +	 * make sure boot cpu node_number is right, when boot cpu is on the
>> +	 * node that doesn't have mem installed
>> +	 */
>> +	per_cpu(node_number, boot_cpu_id) = cpu_to_node(boot_cpu_id);
>> +#endif
>> +
>>  	/* Setup node to cpumask map */
>>  	setup_node_to_cpumask_map();
>>  
> 
> With the patch above PLUS the patch below, I verified that all of our strange
> configurations boot to shell prompt & run simple commands. There are certainly
> some corner cases that have not been tested.
> 
> Note that both patches are required. The system panics in early boot if either
> patch is omitted.
> 
> ---
> 
> 
> Ignore offline nodes when building the zone lists. This
> fix is needed to support configurations that hax PXMs with
> cpus but no memory.
> 
> 
> Signed-off-by: Jack Steiner <steiner@sgi.com>
> 
> 
> ---
>  mm/page_alloc.c |    2 ++
>  1 file changed, 2 insertions(+)
> 
> Index: linux/mm/page_alloc.c
> ===================================================================
> --- linux.orig/mm/page_alloc.c	2009-05-12 17:06:59.000000000 -0500
> +++ linux/mm/page_alloc.c	2009-05-13 09:54:09.000000000 -0500
> @@ -2370,6 +2370,8 @@ static void build_zonelists(pg_data_t *p
>  		 * If another node is sufficiently far away then it is better
>  		 * to reclaim pages in a zone before going off node.
>  		 */
> +		if (!node_online(node))
> +			continue;
>  		if (distance > RECLAIM_DISTANCE)
>  			zone_reclaim_mode = 1;
>  

that means that node_states[N_HIGH_MEMORY] is still not right.

and it should be done by
/*
 * early_calculate_totalpages()
 * Sum pages in active regions for movable zone.
 * Populate N_HIGH_MEMORY for calculating usable_nodes.
 */
static unsigned long __init early_calculate_totalpages(void)
{
        int i;
        unsigned long totalpages = 0;

        for (i = 0; i < nr_nodemap_entries; i++) {
                unsigned long pages = early_node_map[i].end_pfn -
                                                early_node_map[i].start_pfn;
                totalpages += pages;
                if (pages)
                        node_set_state(early_node_map[i].nid, N_HIGH_MEMORY);
        }
        return totalpages;
}
     

also

void __init free_area_init_nodes(unsigned long *max_zone_pfn)
...



somehow that is broken?

YH

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] x86: fix system without memory on node0
  2009-05-13 16:52               ` Jack Steiner
  2009-05-13 17:43                 ` Yinghai Lu
@ 2009-05-13 18:08                 ` Yinghai Lu
  1 sibling, 0 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-05-13 18:08 UTC (permalink / raw)
  To: Jack Steiner, Ingo Molnar, Andrew Morton, Mel Gorman
  Cc: H. Peter Anvin, Thomas Gleixner, David Rientjes, Andi Kleen,
	linux-kernel, Rusty Russell, Mike Travis

Jack Steiner wrote:
> On Tue, May 12, 2009 at 06:34:31PM -0700, Yinghai Lu wrote:
>> Jack found that crash with doesn't have memory on node0.
>>
>> it turns out with per_cpu changeset, node_number for BSP will be alway 0,
>> and it is consistent to cpu_to_node() that is to near node already.
>> aka when numa_set_node() for node0 is called early before per_cpu area is
>> setup
>>
>> try to set the node_number for boot cpu, after we get per_cpu area setup.
>>
>> [ Impact: fix crashing on memoryless node 0]
>>
>> Reported-by: Jack Steiner <steiner@sgi.com>
>> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
>>
>> ---
>>  arch/x86/kernel/setup_percpu.c |    8 ++++++++
>>  1 file changed, 8 insertions(+)
>>
>> Index: linux-2.6/arch/x86/kernel/setup_percpu.c
>> ===================================================================
>> --- linux-2.6.orig/arch/x86/kernel/setup_percpu.c
>> +++ linux-2.6/arch/x86/kernel/setup_percpu.c
>> @@ -423,6 +423,14 @@ void __init setup_per_cpu_areas(void)
>>  	early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
>>  #endif
>>  
>> +#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
>> +	/*
>> +	 * make sure boot cpu node_number is right, when boot cpu is on the
>> +	 * node that doesn't have mem installed
>> +	 */
>> +	per_cpu(node_number, boot_cpu_id) = cpu_to_node(boot_cpu_id);
>> +#endif
>> +
>>  	/* Setup node to cpumask map */
>>  	setup_node_to_cpumask_map();
>>  
> 
> With the patch above PLUS the patch below, I verified that all of our strange
> configurations boot to shell prompt & run simple commands. There are certainly
> some corner cases that have not been tested.
> 
> Note that both patches are required. The system panics in early boot if either
> patch is omitted.
> 
> ---
> 
> 
> Ignore offline nodes when building the zone lists. This
> fix is needed to support configurations that hax PXMs with
> cpus but no memory.
> 
> 
> Signed-off-by: Jack Steiner <steiner@sgi.com>
> 
> 
> ---
>  mm/page_alloc.c |    2 ++
>  1 file changed, 2 insertions(+)
> 
> Index: linux/mm/page_alloc.c
> ===================================================================
> --- linux.orig/mm/page_alloc.c	2009-05-12 17:06:59.000000000 -0500
> +++ linux/mm/page_alloc.c	2009-05-13 09:54:09.000000000 -0500
> @@ -2370,6 +2370,8 @@ static void build_zonelists(pg_data_t *p
>  		 * If another node is sufficiently far away then it is better
>  		 * to reclaim pages in a zone before going off node.
>  		 */
> +		if (!node_online(node))
> +			continue;
>  		if (distance > RECLAIM_DISTANCE)
>  			zone_reclaim_mode = 1;
>  

can you try this instead of your patch ?

{PATCH] mm: clear N_HIGH_MEMORY map before se set it again

incase some system strange SRAT table. some kind of small range.

Signed-off-by: Yinghai Lu <Yinghai@kernel.org>

---
 mm/page_alloc.c |    5 +++++
 1 file changed, 5 insertions(+)

Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -4041,6 +4041,11 @@ void __init free_area_init_nodes(unsigne
 						early_node_map[i].start_pfn,
 						early_node_map[i].end_pfn);
 
+	/*
+	 * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init
+	 * that node_mask, clear it at first
+	 */
+	nodes_clear(nodes_state[N_HIGH_MEMORY]);
 	/* Initialise every node */
 	mminit_verify_pageflags_layout();
 	setup_nr_node_ids();



^ permalink raw reply	[flat|nested] 102+ messages in thread

* [PATCH 1/5] x86: remove MEMORY_HOTPLUG_RESERVE related code -v2
  2009-05-13 14:59     ` Mel Gorman
@ 2009-05-14 16:38       ` Yinghai Lu
  2009-05-14 16:40         ` [PATCH 2/5] x86: add numa_move_cpus_to_node Yinghai Lu
                           ` (4 more replies)
  0 siblings, 5 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-05-14 16:38 UTC (permalink / raw)
  To: Mel Gorman, Ingo Molnar, Thomas Gleixner, H. Peter Anvin,
	Christoph Lameter
  Cc: Andrew Morton, Suresh Siddha, linux-kernel, Al Viro,
	Rusty Russell, Jack Steiner, David Rientjes


after
| commit b263295dbffd33b0fbff670720fa178c30e3392a
| Author: Christoph Lameter <clameter@sgi.com>
| Date:   Wed Jan 30 13:30:47 2008 +0100
|
|    x86: 64-bit, make sparsemem vmemmap the only memory model

we don't have MEMORY_HOTPLUG_RESERVE anymore.

Historically, x86-64 had an architecture-specific method for memory hotplug
whereby it scanned the SRAT for physical memory ranges that could be
potentially used for memory hot-add later. By reserving those ranges
without physical memory, the memmap would be allocated and left dormant
until needed. This depended on the DISCONTIG memory model which has been
removed so the code implementing HOTPLUG_RESERVE is now dead.

This patch removes the dead code used by MEMORY_HOTPLUG_RESERVE

Changelog updated by Mel.

v2: updated changelog, and remove hotadd= in doc

[ Impact: remove dead code ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: Mel Gorman <mel@csn.ul.ie>

---
 Documentation/x86/x86_64/boot-options.txt |    5 --
 arch/x86/include/asm/numa_64.h            |    3 -
 arch/x86/mm/numa_64.c                     |    5 --
 arch/x86/mm/srat_64.c                     |   63 +++++----------------------
 include/linux/mm.h                        |    2 
 mm/page_alloc.c                           |   69 ------------------------------
 6 files changed, 12 insertions(+), 135 deletions(-)

Index: linux-2.6/arch/x86/include/asm/numa_64.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/numa_64.h
+++ linux-2.6/arch/x86/include/asm/numa_64.h
@@ -17,9 +17,6 @@ extern int compute_hash_shift(struct boo
 extern void numa_init_array(void);
 extern int numa_off;
 
-extern void srat_reserve_add_area(int nodeid);
-extern int hotadd_percent;
-
 extern s16 apicid_to_node[MAX_LOCAL_APIC];
 
 extern unsigned long numa_free_all_bootmem(void);
Index: linux-2.6/arch/x86/mm/numa_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/numa_64.c
+++ linux-2.6/arch/x86/mm/numa_64.c
@@ -272,9 +272,6 @@ void __init setup_node_bootmem(int nodei
 		reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
 				 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
 
-#ifdef CONFIG_ACPI_NUMA
-	srat_reserve_add_area(nodeid);
-#endif
 	node_set_online(nodeid);
 }
 
@@ -593,8 +590,6 @@ static __init int numa_setup(char *opt)
 #ifdef CONFIG_ACPI_NUMA
 	if (!strncmp(opt, "noacpi", 6))
 		acpi_numa = -1;
-	if (!strncmp(opt, "hotadd=", 7))
-		hotadd_percent = simple_strtoul(opt+7, NULL, 10);
 #endif
 	return 0;
 }
Index: linux-2.6/arch/x86/mm/srat_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/srat_64.c
+++ linux-2.6/arch/x86/mm/srat_64.c
@@ -31,8 +31,6 @@ static nodemask_t nodes_parsed __initdat
 static nodemask_t cpu_nodes_parsed __initdata;
 static struct bootnode nodes[MAX_NUMNODES] __initdata;
 static struct bootnode nodes_add[MAX_NUMNODES];
-static int found_add_area __initdata;
-int hotadd_percent __initdata = 0;
 
 static int num_node_memblks __initdata;
 static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
@@ -66,9 +64,6 @@ static __init void cutoff_node(int i, un
 {
 	struct bootnode *nd = &nodes[i];
 
-	if (found_add_area)
-		return;
-
 	if (nd->start < start) {
 		nd->start = start;
 		if (nd->end < nd->start)
@@ -86,7 +81,6 @@ static __init void bad_srat(void)
 	int i;
 	printk(KERN_ERR "SRAT: SRAT not used.\n");
 	acpi_numa = -1;
-	found_add_area = 0;
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
 		apicid_to_node[i] = NUMA_NO_NODE;
 	for (i = 0; i < MAX_NUMNODES; i++)
@@ -182,24 +176,21 @@ acpi_numa_processor_affinity_init(struct
 	       pxm, apic_id, node);
 }
 
-static int update_end_of_memory(unsigned long end) {return -1;}
-static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 static inline int save_add_info(void) {return 1;}
 #else
 static inline int save_add_info(void) {return 0;}
 #endif
 /*
- * Update nodes_add and decide if to include add are in the zone.
- * Both SPARSE and RESERVE need nodes_add information.
- * This code supports one contiguous hot add area per node.
+ * Update nodes_add[]
+ * This code supports one contiguous hot add area per node
  */
-static int __init
-reserve_hotadd(int node, unsigned long start, unsigned long end)
+static void __init
+update_nodes_add(int node, unsigned long start, unsigned long end)
 {
 	unsigned long s_pfn = start >> PAGE_SHIFT;
 	unsigned long e_pfn = end >> PAGE_SHIFT;
-	int ret = 0, changed = 0;
+	int changed = 0;
 	struct bootnode *nd = &nodes_add[node];
 
 	/* I had some trouble with strange memory hotadd regions breaking
@@ -210,7 +201,7 @@ reserve_hotadd(int node, unsigned long s
 	   mistakes */
 	if ((signed long)(end - start) < NODE_MIN_SIZE) {
 		printk(KERN_ERR "SRAT: Hotplug area too small\n");
-		return -1;
+		return;
 	}
 
 	/* This check might be a bit too strict, but I'm keeping it for now. */
@@ -218,12 +209,7 @@ reserve_hotadd(int node, unsigned long s
 		printk(KERN_ERR
 			"SRAT: Hotplug area %lu -> %lu has existing memory\n",
 			s_pfn, e_pfn);
-		return -1;
-	}
-
-	if (!hotadd_enough_memory(&nodes_add[node]))  {
-		printk(KERN_ERR "SRAT: Hotplug area too large\n");
-		return -1;
+		return;
 	}
 
 	/* Looks good */
@@ -245,11 +231,9 @@ reserve_hotadd(int node, unsigned long s
 			printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
 	}
 
-	ret = update_end_of_memory(nd->end);
-
 	if (changed)
-	 	printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
-	return ret;
+		printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
+				 nd->start, nd->end);
 }
 
 /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
@@ -310,13 +294,10 @@ acpi_numa_memory_affinity_init(struct ac
 	       start, end);
 	e820_register_active_regions(node, start >> PAGE_SHIFT,
 				     end >> PAGE_SHIFT);
-	push_node_boundaries(node, nd->start >> PAGE_SHIFT,
-						nd->end >> PAGE_SHIFT);
 
-	if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) &&
-	    (reserve_hotadd(node, start, end) < 0)) {
-		/* Ignore hotadd region. Undo damage */
-		printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
+	if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
+		update_nodes_add(node, start, end);
+		/* restore nodes[node] */
 		*nd = oldnode;
 		if ((nd->start | nd->end) == 0)
 			node_clear(node, nodes_parsed);
@@ -510,26 +491,6 @@ static int null_slit_node_compare(int a,
 }
 #endif /* CONFIG_NUMA_EMU */
 
-void __init srat_reserve_add_area(int nodeid)
-{
-	if (found_add_area && nodes_add[nodeid].end) {
-		u64 total_mb;
-
-		printk(KERN_INFO "SRAT: Reserving hot-add memory space "
-				"for node %d at %Lx-%Lx\n",
-			nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end);
-		total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start)
-					>> PAGE_SHIFT;
-		total_mb *= sizeof(struct page);
-		total_mb >>= 20;
-		printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
-				"pre-allocated memory.\n", (unsigned long long)total_mb);
-		reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
-			       nodes_add[nodeid].end - nodes_add[nodeid].start,
-			       BOOTMEM_DEFAULT);
-	}
-}
-
 int __node_distance(int a, int b)
 {
 	int index;
Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -1032,8 +1032,6 @@ extern void add_active_range(unsigned in
 					unsigned long end_pfn);
 extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
 					unsigned long end_pfn);
-extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn,
-					unsigned long end_pfn);
 extern void remove_all_active_ranges(void);
 extern unsigned long absent_pages_in_range(unsigned long start_pfn,
 						unsigned long end_pfn);
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -150,10 +150,6 @@ static unsigned long __meminitdata dma_r
   static int __meminitdata nr_nodemap_entries;
   static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
   static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-  static unsigned long __meminitdata node_boundary_start_pfn[MAX_NUMNODES];
-  static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES];
-#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
   static unsigned long __initdata required_kernelcore;
   static unsigned long __initdata required_movablecore;
   static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
@@ -3121,64 +3117,6 @@ void __init sparse_memory_present_with_a
 }
 
 /**
- * push_node_boundaries - Push node boundaries to at least the requested boundary
- * @nid: The nid of the node to push the boundary for
- * @start_pfn: The start pfn of the node
- * @end_pfn: The end pfn of the node
- *
- * In reserve-based hot-add, mem_map is allocated that is unused until hotadd
- * time. Specifically, on x86_64, SRAT will report ranges that can potentially
- * be hotplugged even though no physical memory exists. This function allows
- * an arch to push out the node boundaries so mem_map is allocated that can
- * be used later.
- */
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-void __init push_node_boundaries(unsigned int nid,
-		unsigned long start_pfn, unsigned long end_pfn)
-{
-	mminit_dprintk(MMINIT_TRACE, "zoneboundary",
-			"Entering push_node_boundaries(%u, %lu, %lu)\n",
-			nid, start_pfn, end_pfn);
-
-	/* Initialise the boundary for this node if necessary */
-	if (node_boundary_end_pfn[nid] == 0)
-		node_boundary_start_pfn[nid] = -1UL;
-
-	/* Update the boundaries */
-	if (node_boundary_start_pfn[nid] > start_pfn)
-		node_boundary_start_pfn[nid] = start_pfn;
-	if (node_boundary_end_pfn[nid] < end_pfn)
-		node_boundary_end_pfn[nid] = end_pfn;
-}
-
-/* If necessary, push the node boundary out for reserve hotadd */
-static void __meminit account_node_boundary(unsigned int nid,
-		unsigned long *start_pfn, unsigned long *end_pfn)
-{
-	mminit_dprintk(MMINIT_TRACE, "zoneboundary",
-			"Entering account_node_boundary(%u, %lu, %lu)\n",
-			nid, *start_pfn, *end_pfn);
-
-	/* Return if boundary information has not been provided */
-	if (node_boundary_end_pfn[nid] == 0)
-		return;
-
-	/* Check the boundaries and update if necessary */
-	if (node_boundary_start_pfn[nid] < *start_pfn)
-		*start_pfn = node_boundary_start_pfn[nid];
-	if (node_boundary_end_pfn[nid] > *end_pfn)
-		*end_pfn = node_boundary_end_pfn[nid];
-}
-#else
-void __init push_node_boundaries(unsigned int nid,
-		unsigned long start_pfn, unsigned long end_pfn) {}
-
-static void __meminit account_node_boundary(unsigned int nid,
-		unsigned long *start_pfn, unsigned long *end_pfn) {}
-#endif
-
-
-/**
  * get_pfn_range_for_nid - Return the start and end page frames for a node
  * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned.
  * @start_pfn: Passed by reference. On return, it will have the node start_pfn.
@@ -3203,9 +3141,6 @@ void __meminit get_pfn_range_for_nid(uns
 
 	if (*start_pfn == -1UL)
 		*start_pfn = 0;
-
-	/* Push the node boundaries out if requested */
-	account_node_boundary(nid, start_pfn, end_pfn);
 }
 
 /*
@@ -3810,10 +3745,6 @@ void __init remove_all_active_ranges(voi
 {
 	memset(early_node_map, 0, sizeof(early_node_map));
 	nr_nodemap_entries = 0;
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-	memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn));
-	memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn));
-#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
 }
 
 /* Compare two active node_active_regions */
Index: linux-2.6/Documentation/x86/x86_64/boot-options.txt
===================================================================
--- linux-2.6.orig/Documentation/x86/x86_64/boot-options.txt
+++ linux-2.6/Documentation/x86/x86_64/boot-options.txt
@@ -150,11 +150,6 @@ NUMA
 		Otherwise, the remaining system RAM is allocated to an
 		additional node.
 
-  numa=hotadd=percent
-		Only allow hotadd memory to preallocate page structures upto
-		percent of already available memory.
-		numa=hotadd=0 will disable hotadd memory.
-
 ACPI
 
   acpi=off	Don't enable ACPI

^ permalink raw reply	[flat|nested] 102+ messages in thread

* [PATCH 2/5] x86: add numa_move_cpus_to_node
  2009-05-14 16:38       ` [PATCH 1/5] " Yinghai Lu
@ 2009-05-14 16:40         ` Yinghai Lu
  2009-05-14 16:41         ` [PATCH 3/5] x86: fix node_possible_map logic -v2 Yinghai Lu
                           ` (3 subsequent siblings)
  4 siblings, 0 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-05-14 16:40 UTC (permalink / raw)
  To: Mel Gorman, Ingo Molnar, Thomas Gleixner, H. Peter Anvin,
	Christoph Lameter
  Cc: Andrew Morton, Suresh Siddha, linux-kernel, Al Viro,
	Rusty Russell, Jack Steiner, David Rientjes


when node only have hot add range and don't have other static range.
that node will not be onlined, and cpus on that will be linked to nearby
node with memory. when that host add range is added later, we need to
link those cpus back.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 arch/x86/include/asm/numa_64.h |   10 ++++---
 arch/x86/mm/init_64.c          |    3 ++
 arch/x86/mm/numa_64.c          |   52 +++++++++++++++++++++++++++++++++++------
 3 files changed, 54 insertions(+), 11 deletions(-)

Index: linux-2.6/arch/x86/include/asm/numa_64.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/numa_64.h
+++ linux-2.6/arch/x86/include/asm/numa_64.h
@@ -25,16 +25,18 @@ extern void setup_node_bootmem(int nodei
 
 #ifdef CONFIG_NUMA
 extern void __init init_cpu_to_node(void);
-extern void __cpuinit numa_set_node(int cpu, int node);
-extern void __cpuinit numa_clear_node(int cpu);
-extern void __cpuinit numa_add_cpu(int cpu);
-extern void __cpuinit numa_remove_cpu(int cpu);
+extern void numa_set_node(int cpu, int node);
+extern void numa_clear_node(int cpu);
+extern void numa_add_cpu(int cpu);
+extern void numa_remove_cpu(int cpu);
+extern void numa_move_cpus_to_node(int nid);
 #else
 static inline void init_cpu_to_node(void)		{ }
 static inline void numa_set_node(int cpu, int node)	{ }
 static inline void numa_clear_node(int cpu)		{ }
 static inline void numa_add_cpu(int cpu, int node)	{ }
 static inline void numa_remove_cpu(int cpu)		{ }
+static inline void numa_move_cpus_to_node(int nid)	{ }
 #endif
 
 #endif /* _ASM_X86_NUMA_64_H */
Index: linux-2.6/arch/x86/mm/numa_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/numa_64.c
+++ linux-2.6/arch/x86/mm/numa_64.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/nodemask.h>
 #include <linux/sched.h>
+#include <linux/node.h>
 
 #include <asm/e820.h>
 #include <asm/proto.h>
@@ -645,7 +646,7 @@ void __init init_cpu_to_node(void)
 #endif
 
 
-void __cpuinit numa_set_node(int cpu, int node)
+void numa_set_node(int cpu, int node)
 {
 	int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
 
@@ -668,19 +669,56 @@ void __cpuinit numa_set_node(int cpu, in
 		per_cpu(node_number, cpu) = node;
 }
 
-void __cpuinit numa_clear_node(int cpu)
+void numa_clear_node(int cpu)
 {
 	numa_set_node(cpu, NUMA_NO_NODE);
 }
 
+static int real_cpu_to_node(int cpu)
+{
+	int apicid, nodeid = -1;
+
+	/*
+	 * when the node doesn't have memory before, cpu_to_node(cpu) is
+	 * point to other node, but apicid_to_node still hold the real nodeid
+	 */
+	apicid = per_cpu(x86_cpu_to_apicid, cpu);
+	if (apicid == BAD_APICID)
+		return nodeid;
+
+	nodeid = apicid_to_node[apicid];
+	return nodeid;
+}
+
+void numa_move_cpus_to_node(int nid)
+{
+	int cpu;
+
+	for_each_present_cpu(cpu) {
+		int nodeid;
+
+		nodeid = real_cpu_to_node(cpu);
+		if (nodeid != nid)
+			continue;
+
+		nodeid = cpu_to_node(cpu);
+		if (nodeid != nid) {
+			unregister_cpu_under_node(cpu, nodeid);
+			numa_remove_cpu(cpu);
+			numa_set_node(cpu, nid);
+			numa_add_cpu(cpu);
+		}
+	}
+}
+
 #ifndef CONFIG_DEBUG_PER_CPU_MAPS
 
-void __cpuinit numa_add_cpu(int cpu)
+void numa_add_cpu(int cpu)
 {
 	cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
 }
 
-void __cpuinit numa_remove_cpu(int cpu)
+void numa_remove_cpu(int cpu)
 {
 	cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
 }
@@ -690,7 +728,7 @@ void __cpuinit numa_remove_cpu(int cpu)
 /*
  * --------- debug versions of the numa functions ---------
  */
-static void __cpuinit numa_set_cpumask(int cpu, int enable)
+static void numa_set_cpumask(int cpu, int enable)
 {
 	int node = early_cpu_to_node(cpu);
 	struct cpumask *mask;
@@ -713,12 +751,12 @@ static void __cpuinit numa_set_cpumask(i
 		enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
 }
 
-void __cpuinit numa_add_cpu(int cpu)
+void numa_add_cpu(int cpu)
 {
 	numa_set_cpumask(cpu, 1);
 }
 
-void __cpuinit numa_remove_cpu(int cpu)
+void numa_remove_cpu(int cpu)
 {
 	numa_set_cpumask(cpu, 0);
 }
Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -631,6 +631,9 @@ int arch_add_memory(int nid, u64 start,
 	ret = __add_pages(nid, zone, start_pfn, nr_pages);
 	WARN_ON_ONCE(ret);
 
+	if (!ret)
+		numa_move_cpus_to_node(nid);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(arch_add_memory);

^ permalink raw reply	[flat|nested] 102+ messages in thread

* [PATCH 3/5] x86: fix node_possible_map logic -v2
  2009-05-14 16:38       ` [PATCH 1/5] " Yinghai Lu
  2009-05-14 16:40         ` [PATCH 2/5] x86: add numa_move_cpus_to_node Yinghai Lu
@ 2009-05-14 16:41         ` Yinghai Lu
  2009-05-18  7:40           ` [tip:x86/mm] x86, mm: Fix node_possible_map logic tip-bot for Yinghai Lu
  2009-05-14 16:42         ` [PATCH 4/5] x86: fix system without memory on node0 -v2 Yinghai Lu
                           ` (2 subsequent siblings)
  4 siblings, 1 reply; 102+ messages in thread
From: Yinghai Lu @ 2009-05-14 16:41 UTC (permalink / raw)
  To: Mel Gorman, Ingo Molnar, Thomas Gleixner, H. Peter Anvin,
	Christoph Lameter
  Cc: Andrew Morton, Suresh Siddha, linux-kernel, Al Viro,
	Rusty Russell, Jack Steiner, David Rientjes


recently there are some changes to about meaning of node_possible_map

and it is some strange:
the node without memory would be set in node_possible_map
but some node with less NODE_MIN_SIZE will be kicked out of node_possible_map.

try to fix it by adding strict_setup_node_bootmem.
also remove unparse_node.

so result will be:
1. cpu_to_node will return online node only (nearest one)
2. apicid_to_node still return the node that could be not online but is set
   in node_possible_map.
3. node_possible_map will include nodes that mem on it are less NODE_MIN_SIZE

v2: after move_cpus_to_node change.

[ Impact: get node_possible_map right ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Tested-by: Jack Steiner <steiner@sgi.com>

---
 arch/x86/include/asm/numa_64.h |    4 ++++
 arch/x86/mm/numa_64.c          |    7 +++++++
 arch/x86/mm/srat_64.c          |   29 ++---------------------------
 3 files changed, 13 insertions(+), 27 deletions(-)

Index: linux-2.6/arch/x86/mm/srat_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/srat_64.c
+++ linux-2.6/arch/x86/mm/srat_64.c
@@ -36,10 +36,6 @@ static int num_node_memblks __initdata;
 static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
 static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
 
-/* Too small nodes confuse the VM badly. Usually they result
-   from BIOS bugs. */
-#define NODE_MIN_SIZE (4*1024*1024)
-
 static __init int setup_node(int pxm)
 {
 	return acpi_map_pxm_to_node(pxm);
@@ -338,17 +334,6 @@ static int __init nodes_cover_memory(con
 	return 1;
 }
 
-static void __init unparse_node(int node)
-{
-	int i;
-	node_clear(node, nodes_parsed);
-	node_clear(node, cpu_nodes_parsed);
-	for (i = 0; i < MAX_LOCAL_APIC; i++) {
-		if (apicid_to_node[i] == node)
-			apicid_to_node[i] = NUMA_NO_NODE;
-	}
-}
-
 void __init acpi_numa_arch_fixup(void) {}
 
 /* Use the information discovered above to actually set up the nodes. */
@@ -360,18 +345,8 @@ int __init acpi_scan_nodes(unsigned long
 		return -1;
 
 	/* First clean up the node list */
-	for (i = 0; i < MAX_NUMNODES; i++) {
+	for (i = 0; i < MAX_NUMNODES; i++)
 		cutoff_node(i, start, end);
-		/*
-		 * don't confuse VM with a node that doesn't have the
-		 * minimum memory.
-		 */
-		if (nodes[i].end &&
-			(nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
-			unparse_node(i);
-			node_set_offline(i);
-		}
-	}
 
 	if (!nodes_cover_memory(nodes)) {
 		bad_srat();
@@ -404,7 +379,7 @@ int __init acpi_scan_nodes(unsigned long
 
 		if (node == NUMA_NO_NODE)
 			continue;
-		if (!node_isset(node, node_possible_map))
+		if (!node_online(node))
 			numa_clear_node(i);
 	}
 	numa_init_array();
Index: linux-2.6/arch/x86/mm/numa_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/numa_64.c
+++ linux-2.6/arch/x86/mm/numa_64.c
@@ -192,6 +192,13 @@ void __init setup_node_bootmem(int nodei
 	if (!end)
 		return;
 
+	/*
+	 * don't confuse VM with a node that doesn't have the
+	 * minimum memory.
+	 */
+	if (end && (end - start) < NODE_MIN_SIZE)
+		return;
+
 	start = roundup(start, ZONE_ALIGN);
 
 	printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
Index: linux-2.6/arch/x86/include/asm/numa_64.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/numa_64.h
+++ linux-2.6/arch/x86/include/asm/numa_64.h
@@ -24,6 +24,10 @@ extern void setup_node_bootmem(int nodei
 			       unsigned long end);
 
 #ifdef CONFIG_NUMA
+/* Too small nodes confuse the VM badly. Usually they result
+   from BIOS bugs. */
+#define NODE_MIN_SIZE (4*1024*1024)
+
 extern void __init init_cpu_to_node(void);
 extern void numa_set_node(int cpu, int node);
 extern void numa_clear_node(int cpu);

^ permalink raw reply	[flat|nested] 102+ messages in thread

* [PATCH 4/5] x86: fix system without memory on node0 -v2
  2009-05-14 16:38       ` [PATCH 1/5] " Yinghai Lu
  2009-05-14 16:40         ` [PATCH 2/5] x86: add numa_move_cpus_to_node Yinghai Lu
  2009-05-14 16:41         ` [PATCH 3/5] x86: fix node_possible_map logic -v2 Yinghai Lu
@ 2009-05-14 16:42         ` Yinghai Lu
  2009-05-18  7:40           ` [tip:x86/mm] x86: fix system without memory on node0 tip-bot for Yinghai Lu
  2009-05-14 16:43         ` [PATCH 5/5] mm: clear N_HIGH_MEMORY map before se set it again -v2 Yinghai Lu
  2009-05-18  7:39         ` [tip:x86/mm] mm, x86: remove MEMORY_HOTPLUG_RESERVE related code tip-bot for Yinghai Lu
  4 siblings, 1 reply; 102+ messages in thread
From: Yinghai Lu @ 2009-05-14 16:42 UTC (permalink / raw)
  To: Mel Gorman, Ingo Molnar, Thomas Gleixner, H. Peter Anvin,
	Christoph Lameter
  Cc: Andrew Morton, Suresh Siddha, linux-kernel, Al Viro,
	Rusty Russell, Jack Steiner, David Rientjes


Jack found that crash with doesn't have memory on node0.

it turns out with per_cpu changeset, node_number for BSP will be alway 0,
and it is not consistent to cpu_to_node() that is to near node already.
aka when numa_set_node() for node0 is called early before per_cpu area is
setup

two places touched that per_cpu(node_number,):
1. in cpu/common.c::cpu_init() and it is not for BP
| #ifdef CONFIG_NUMA
|        if (cpu != 0 && percpu_read(node_number) == 0 &&
|            cpu_to_node(cpu) != NUMA_NO_NODE)
|                percpu_write(node_number, cpu_to_node(cpu));
| #endif
for BP: traps_init ==> cpu_init
for AP: start_secondary ==> cpu_init

2. cpu/intel.c or amd.c::srat_detect_node via numa_set_node()
for BP: check_bugs ==> identify_boot_cpu ==> identify_cpu()
	 that is rather later before numa_node_id() is used for BP...
for AP: start_secondary=>smp_callin=>smp_store_cpu_info()=>identify_secondary_cpu ==> identify_cpu()

so only try to set that for BP more early in setup_per_cpu_areas, and
don't bother set that for APs there (it will be updated later and used later)
(and don't mess the 0 before the copying BP per_cpu data to APs)

v2: updated changelog with detailed reason

[ Impact: fix crashing on memoryless node 0]

Reported-and-tested-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 arch/x86/kernel/setup_percpu.c |    8 ++++++++
 1 file changed, 8 insertions(+)

Index: linux-2.6/arch/x86/kernel/setup_percpu.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_percpu.c
+++ linux-2.6/arch/x86/kernel/setup_percpu.c
@@ -423,6 +423,14 @@ void __init setup_per_cpu_areas(void)
 	early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
 #endif
 
+#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
+	/*
+	 * make sure boot cpu node_number is right, when boot cpu is on the
+	 * node that doesn't have mem installed
+	 */
+	per_cpu(node_number, boot_cpu_id) = cpu_to_node(boot_cpu_id);
+#endif
+
 	/* Setup node to cpumask map */
 	setup_node_to_cpumask_map();
 

^ permalink raw reply	[flat|nested] 102+ messages in thread

* [PATCH 5/5] mm: clear N_HIGH_MEMORY map before se set it again -v2
  2009-05-14 16:38       ` [PATCH 1/5] " Yinghai Lu
                           ` (2 preceding siblings ...)
  2009-05-14 16:42         ` [PATCH 4/5] x86: fix system without memory on node0 -v2 Yinghai Lu
@ 2009-05-14 16:43         ` Yinghai Lu
  2009-05-14 16:54           ` Andrew Morton
  2009-05-18  7:39         ` [tip:x86/mm] mm, x86: remove MEMORY_HOTPLUG_RESERVE related code tip-bot for Yinghai Lu
  4 siblings, 1 reply; 102+ messages in thread
From: Yinghai Lu @ 2009-05-14 16:43 UTC (permalink / raw)
  To: Mel Gorman, Ingo Molnar, Thomas Gleixner, H. Peter Anvin,
	Christoph Lameter
  Cc: Andrew Morton, Suresh Siddha, linux-kernel, Al Viro,
	Rusty Russell, Jack Steiner, David Rientjes


incase some system strange SRAT table. some kind of small range.
or with mem= etc

v2: fix typo

Signed-off-by: Yinghai Lu <Yinghai@kernel.org>
Tested-by: Jack Steiner <steiner@sgi.com>

---
 mm/page_alloc.c |   13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -4041,6 +4047,11 @@ void __init free_area_init_nodes(unsigne
 						early_node_map[i].start_pfn,
 						early_node_map[i].end_pfn);
 
+	/*
+	 * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init
+	 * that node_mask, clear it at first
+	 */
+	nodes_clear(node_states[N_HIGH_MEMORY]);
 	/* Initialise every node */
 	mminit_verify_pageflags_layout();
 	setup_nr_node_ids();

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 5/5] mm: clear N_HIGH_MEMORY map before se set it again -v2
  2009-05-14 16:43         ` [PATCH 5/5] mm: clear N_HIGH_MEMORY map before se set it again -v2 Yinghai Lu
@ 2009-05-14 16:54           ` Andrew Morton
  2009-05-14 17:05             ` Yinghai Lu
  0 siblings, 1 reply; 102+ messages in thread
From: Andrew Morton @ 2009-05-14 16:54 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: mel, mingo, tglx, hpa, cl, suresh.b.siddha, linux-kernel, viro,
	rusty, steiner, rientjes

On Thu, 14 May 2009 09:43:22 -0700
Yinghai Lu <yinghai@kernel.org> wrote:

> 
> incase some system strange SRAT table. some kind of small range.
> or with mem= etc
> 

That description is very hard to understand.  Please provide more details.

> 
> Signed-off-by: Yinghai Lu <Yinghai@kernel.org>
> Tested-by: Jack Steiner <steiner@sgi.com>

What reason did Jack have to test this?  Perhaps he hit some bug? 
If so, please fully describe that bug in the changelog.


> Index: linux-2.6/mm/page_alloc.c
> ===================================================================
> --- linux-2.6.orig/mm/page_alloc.c
> +++ linux-2.6/mm/page_alloc.c
> @@ -4041,6 +4047,11 @@ void __init free_area_init_nodes(unsigne
>  						early_node_map[i].start_pfn,
>  						early_node_map[i].end_pfn);
>  
> +	/*
> +	 * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init
> +	 * that node_mask, clear it at first
> +	 */
> +	nodes_clear(node_states[N_HIGH_MEMORY]);
>  	/* Initialise every node */
>  	mminit_verify_pageflags_layout();
>  	setup_nr_node_ids();

If CONFIG_HIGHMEM=n, this will clear the N_NORMAL_MEMORY entry in
node_states[].  Why is this correct and desirable?


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 5/5] mm: clear N_HIGH_MEMORY map before se set it again -v2
  2009-05-14 16:54           ` Andrew Morton
@ 2009-05-14 17:05             ` Yinghai Lu
  2009-05-14 17:25               ` Andrew Morton
  0 siblings, 1 reply; 102+ messages in thread
From: Yinghai Lu @ 2009-05-14 17:05 UTC (permalink / raw)
  To: Andrew Morton
  Cc: mel, mingo, tglx, hpa, cl, suresh.b.siddha, linux-kernel, viro,
	rusty, steiner, rientjes

Andrew Morton wrote:
> On Thu, 14 May 2009 09:43:22 -0700
> Yinghai Lu <yinghai@kernel.org> wrote:
> 
>> incase some system strange SRAT table. some kind of small range.
>> or with mem= etc
>>
> 
> That description is very hard to understand.  Please provide more details.

if the wrong SRAT table, have small range for some node. that node will not be onlined.
In the early checking, the bit in node_states[N_HIGH_MEMORY] for the node is set even 
that node has less RAM like 1M, and it is not cleared before the bit is set again in 
the following loop according online nodes.

> 
>> Signed-off-by: Yinghai Lu <Yinghai@kernel.org>
>> Tested-by: Jack Steiner <steiner@sgi.com>
> 
> What reason did Jack have to test this?  Perhaps he hit some bug? 
> If so, please fully describe that bug in the changelog.

for some memmoryless node and strange memmap.

> 
> 
>> Index: linux-2.6/mm/page_alloc.c
>> ===================================================================
>> --- linux-2.6.orig/mm/page_alloc.c
>> +++ linux-2.6/mm/page_alloc.c
>> @@ -4041,6 +4047,11 @@ void __init free_area_init_nodes(unsigne
>>  						early_node_map[i].start_pfn,
>>  						early_node_map[i].end_pfn);
>>  
>> +	/*
>> +	 * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init
>> +	 * that node_mask, clear it at first
>> +	 */
>> +	nodes_clear(node_states[N_HIGH_MEMORY]);
>>  	/* Initialise every node */
>>  	mminit_verify_pageflags_layout();
>>  	setup_nr_node_ids();
> 
> If CONFIG_HIGHMEM=n, this will clear the N_NORMAL_MEMORY entry in
> node_states[].  Why is this correct and desirable?

then N_NORMAL_MEMORY == N_HIGH_MEMORY

YH

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 5/5] mm: clear N_HIGH_MEMORY map before se set it again -v2
  2009-05-14 17:05             ` Yinghai Lu
@ 2009-05-14 17:25               ` Andrew Morton
  2009-05-14 17:34                 ` Yinghai Lu
  0 siblings, 1 reply; 102+ messages in thread
From: Andrew Morton @ 2009-05-14 17:25 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: mel, mingo, tglx, hpa, cl, suresh.b.siddha, linux-kernel, viro,
	rusty, steiner, rientjes

On Thu, 14 May 2009 10:05:43 -0700
Yinghai Lu <yinghai@kernel.org> wrote:

> Andrew Morton wrote:
> > On Thu, 14 May 2009 09:43:22 -0700
> > Yinghai Lu <yinghai@kernel.org> wrote:
> > 
> >> incase some system strange SRAT table. some kind of small range.
> >> or with mem= etc
> >>
> > 
> > That description is very hard to understand.  Please provide more details.
> 
> if the wrong SRAT table, have small range for some node. that node will not be onlined.
> In the early checking, the bit in node_states[N_HIGH_MEMORY] for the node is set even 
> that node has less RAM like 1M, and it is not cleared before the bit is set again in 
> the following loop according online nodes.

Where in the kernel does this setting of the bit in node_states[]
occur?  early_calculate_totalpages()?

Where in the kernel is it later decided to _not_ use these pages in
that node?  Perhaps that's the place where the problem should be fixed.


> > 
> >> Signed-off-by: Yinghai Lu <Yinghai@kernel.org>
> >> Tested-by: Jack Steiner <steiner@sgi.com>
> > 
> > What reason did Jack have to test this?  Perhaps he hit some bug? 
> > If so, please fully describe that bug in the changelog.
> 
> for some memmoryless node and strange memmap.

That's not a very good problem description.

Put yourself in the position of a distro engineer whose customer
reports a 2.6.26 problem.  He's going to look at your patch wondering
whether it might fix his customer's problem.  We should provide him
with sufficient information to be able to determine this.

> > 
> > 
> >> Index: linux-2.6/mm/page_alloc.c
> >> ===================================================================
> >> --- linux-2.6.orig/mm/page_alloc.c
> >> +++ linux-2.6/mm/page_alloc.c
> >> @@ -4041,6 +4047,11 @@ void __init free_area_init_nodes(unsigne
> >>  						early_node_map[i].start_pfn,
> >>  						early_node_map[i].end_pfn);
> >>  
> >> +	/*
> >> +	 * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init
> >> +	 * that node_mask, clear it at first
> >> +	 */
> >> +	nodes_clear(node_states[N_HIGH_MEMORY]);
> >>  	/* Initialise every node */
> >>  	mminit_verify_pageflags_layout();
> >>  	setup_nr_node_ids();
> > 
> > If CONFIG_HIGHMEM=n, this will clear the N_NORMAL_MEMORY entry in
> > node_states[].  Why is this correct and desirable?
> 
> then N_NORMAL_MEMORY == N_HIGH_MEMORY

I know.

But it's unobvious that this change is correct and desirable with both
CONFIG_HIGHMEM=n and CONFIG_HIGHMEM=y.


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 5/5] mm: clear N_HIGH_MEMORY map before se set it again -v2
  2009-05-14 17:25               ` Andrew Morton
@ 2009-05-14 17:34                 ` Yinghai Lu
  2009-05-14 19:44                   ` Christoph Lameter
  2009-06-04  5:16                   ` [RESEND PATCH] " Yinghai Lu
  0 siblings, 2 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-05-14 17:34 UTC (permalink / raw)
  To: Andrew Morton
  Cc: mel, mingo, tglx, hpa, cl, suresh.b.siddha, linux-kernel, viro,
	rusty, steiner, rientjes

Andrew Morton wrote:
> On Thu, 14 May 2009 10:05:43 -0700
> Yinghai Lu <yinghai@kernel.org> wrote:
> 
>> Andrew Morton wrote:
>>> On Thu, 14 May 2009 09:43:22 -0700
>>> Yinghai Lu <yinghai@kernel.org> wrote:
>>>
>>>> incase some system strange SRAT table. some kind of small range.
>>>> or with mem= etc
>>>>
>>> That description is very hard to understand.  Please provide more details.
>> if the wrong SRAT table, have small range for some node. that node will not be onlined.
>> In the early checking, the bit in node_states[N_HIGH_MEMORY] for the node is set even 
>> that node has less RAM like 1M, and it is not cleared before the bit is set again in 
>> the following loop according online nodes.
> 
> Where in the kernel does this setting of the bit in node_states[]
> occur?  early_calculate_totalpages()?

yes.

> 
> Where in the kernel is it later decided to _not_ use these pages in
> that node?  Perhaps that's the place where the problem should be fixed.

in free_area_init_nodes()

        /* Initialise every node */
        mminit_verify_pageflags_layout();
        setup_nr_node_ids();
        for_each_online_node(nid) {
                pg_data_t *pgdat = NODE_DATA(nid);
                free_area_init_node(nid, NULL,
                                find_min_pfn_for_node(nid), NULL);

                /* Any memory on that node */
                if (pgdat->node_present_pages)
                        node_set_state(nid, N_HIGH_MEMORY);
                check_for_regular_memory(pgdat);
        }

so that patch clear that node_mask before set those bits according if that node is online
and node_present_pages is there.

> 
> 
>>>> Signed-off-by: Yinghai Lu <Yinghai@kernel.org>
>>>> Tested-by: Jack Steiner <steiner@sgi.com>
>>> What reason did Jack have to test this?  Perhaps he hit some bug? 
>>> If so, please fully describe that bug in the changelog.
>> for some memmoryless node and strange memmap.
> 
> That's not a very good problem description.
> 
> Put yourself in the position of a distro engineer whose customer
> reports a 2.6.26 problem.  He's going to look at your patch wondering
> whether it might fix his customer's problem.  We should provide him
> with sufficient information to be able to determine this.
> 
>>>
>>>> Index: linux-2.6/mm/page_alloc.c
>>>> ===================================================================
>>>> --- linux-2.6.orig/mm/page_alloc.c
>>>> +++ linux-2.6/mm/page_alloc.c
>>>> @@ -4041,6 +4047,11 @@ void __init free_area_init_nodes(unsigne
>>>>  						early_node_map[i].start_pfn,
>>>>  						early_node_map[i].end_pfn);
>>>>  
>>>> +	/*
>>>> +	 * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init
>>>> +	 * that node_mask, clear it at first
>>>> +	 */
>>>> +	nodes_clear(node_states[N_HIGH_MEMORY]);
>>>>  	/* Initialise every node */
>>>>  	mminit_verify_pageflags_layout();
>>>>  	setup_nr_node_ids();
>>> If CONFIG_HIGHMEM=n, this will clear the N_NORMAL_MEMORY entry in
>>> node_states[].  Why is this correct and desirable?
>> then N_NORMAL_MEMORY == N_HIGH_MEMORY
> 
> I know.
> 
> But it's unobvious that this change is correct and desirable with both
> CONFIG_HIGHMEM=n and CONFIG_HIGHMEM=y.

use ifdef ?

YH

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 2/3] x86: add numa_move_cpus_to_node
  2009-05-13  5:39             ` Yinghai Lu
@ 2009-05-14 19:34               ` Christoph Lameter
  2009-05-14 20:58                 ` Yinghai Lu
  0 siblings, 1 reply; 102+ messages in thread
From: Christoph Lameter @ 2009-05-14 19:34 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Suresh Siddha, linux-kernel, Al Viro, Rusty Russell,
	Pekka Enberg

On Tue, 12 May 2009, Yinghai Lu wrote:

> so i recored this OK as one Reviewed-by or Acked-by ?

Well why do we need this functionality in the kernel if offlining and
onlining the cpu again will properly allocate the kernel metadata?



^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 5/5] mm: clear N_HIGH_MEMORY map before se set it again -v2
  2009-05-14 17:34                 ` Yinghai Lu
@ 2009-05-14 19:44                   ` Christoph Lameter
  2009-06-04  5:16                   ` [RESEND PATCH] " Yinghai Lu
  1 sibling, 0 replies; 102+ messages in thread
From: Christoph Lameter @ 2009-05-14 19:44 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Andrew Morton, mel, mingo, tglx, hpa, suresh.b.siddha,
	linux-kernel, viro, rusty, steiner, rientjes

On Thu, 14 May 2009, Yinghai Lu wrote:

> > Put yourself in the position of a distro engineer whose customer
> > reports a 2.6.26 problem.  He's going to look at your patch wondering
> > whether it might fix his customer's problem.  We should provide him
> > with sufficient information to be able to determine this.

PowerPC has similar node 0 issues that have thrown us for a curve once in
awhile. Now they are coming to x86.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH 2/3] x86: add numa_move_cpus_to_node
  2009-05-14 19:34               ` Christoph Lameter
@ 2009-05-14 20:58                 ` Yinghai Lu
  0 siblings, 0 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-05-14 20:58 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Suresh Siddha, linux-kernel, Al Viro, Rusty Russell,
	Pekka Enberg

On Thu, May 14, 2009 at 12:34 PM, Christoph Lameter
<cl@linux-foundation.org> wrote:
> On Tue, 12 May 2009, Yinghai Lu wrote:
>
>> so i recored this OK as one Reviewed-by or Acked-by ?
>
> Well why do we need this functionality in the kernel if offlining and
> onlining the cpu again will properly allocate the kernel metadata?

ok, you are right, for x86 64 bit, srat_detect_node will call
numa_set_node when online the cpu again.

we don't this patch.

YH

^ permalink raw reply	[flat|nested] 102+ messages in thread

* tip: patches in git for irq and numa
       [not found]             ` <20090515173521.GA29647@elte.hu>
@ 2009-05-15 21:38               ` Yinghai Lu
  2009-05-18  7:29                 ` Ingo Molnar
  0 siblings, 1 reply; 102+ messages in thread
From: Yinghai Lu @ 2009-05-15 21:38 UTC (permalink / raw)
  To: Ingo Molnar, Jack Steiner; +Cc: linux-kernel

irq related:
git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-2.6-yinghai.git irq
need to on top of tip/irq/numa

for memoryless node support: 
git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-2.6-yinghai.git numa
and it is on top of tip/master

YH


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: tip: patches in git for irq and numa
  2009-05-15 21:38               ` tip: patches in git for irq and numa Yinghai Lu
@ 2009-05-18  7:29                 ` Ingo Molnar
  2009-05-18 13:50                   ` Peter Zijlstra
  0 siblings, 1 reply; 102+ messages in thread
From: Ingo Molnar @ 2009-05-18  7:29 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Jack Steiner, linux-kernel, Thomas Gleixner, H. Peter Anvin


* Yinghai Lu <yinghai@kernel.org> wrote:

> irq related:
> git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-2.6-yinghai.git irq
> need to on top of tip/irq/numa

ok, these were nicely structured. the pci_routeirq patch had a build 
bug for !CONFIG_PCI.

( I added an #ifdef for now, it might make sense to send a clean-up 
  patch in the next merge window (not now) to factor out a 
  pci_routeirq_enable() method that does all this cleanly. )

Also, please add appropriate Cc: lines to the commit logs in the 
future, beyond the LKML-Reference tgs.

> for memoryless node support: 
> git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-2.6-yinghai.git numa
> and it is on top of tip/master

small note: you could have based these on x86/mm btw. - that's where 
these patches go, typically.

regarding subject lines:

 d03a6a4: mm: clear N_HIGH_MEMORY map before se set it again -v2
 02ce039: x86: fix system without memory on node0 -v2
 8c1aec8: x86: fix node_possible_map logic -v2
 44a633c: x86: remove MEMORY_HOTPLUG_RESERVE related code -v2

please never put '-v2' type of tags into the title of commits. In 
the title of patches they can be put here:

  [PATCH, v2] x86: fix system without memory on node0

that saves maintainers a bit of typing work.

Also, you included:

 d03a6a4: mm: clear N_HIGH_MEMORY map before se set it again -v2

with no Acks from MM folks yet. So i skipped that one and will 
follow up about it.

	Ingo

^ permalink raw reply	[flat|nested] 102+ messages in thread

* [tip:x86/mm] mm, x86: remove MEMORY_HOTPLUG_RESERVE related code
  2009-05-14 16:38       ` [PATCH 1/5] " Yinghai Lu
                           ` (3 preceding siblings ...)
  2009-05-14 16:43         ` [PATCH 5/5] mm: clear N_HIGH_MEMORY map before se set it again -v2 Yinghai Lu
@ 2009-05-18  7:39         ` tip-bot for Yinghai Lu
  4 siblings, 0 replies; 102+ messages in thread
From: tip-bot for Yinghai Lu @ 2009-05-18  7:39 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, mingo, yinghai, mel, akpm, tglx, mingo, cl

Commit-ID:  888a589f6be07d624e21e2174d98375e9f95911b
Gitweb:     http://git.kernel.org/tip/888a589f6be07d624e21e2174d98375e9f95911b
Author:     Yinghai Lu <yinghai@kernel.org>
AuthorDate: Fri, 15 May 2009 13:59:37 -0700
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Mon, 18 May 2009 09:13:31 +0200

mm, x86: remove MEMORY_HOTPLUG_RESERVE related code

after:

 | commit b263295dbffd33b0fbff670720fa178c30e3392a
 | Author: Christoph Lameter <clameter@sgi.com>
 | Date:   Wed Jan 30 13:30:47 2008 +0100
 |
 |    x86: 64-bit, make sparsemem vmemmap the only memory model

we don't have MEMORY_HOTPLUG_RESERVE anymore.

Historically, x86-64 had an architecture-specific method for memory hotplug
whereby it scanned the SRAT for physical memory ranges that could be
potentially used for memory hot-add later. By reserving those ranges
without physical memory, the memmap would be allocated and left dormant
until needed. This depended on the DISCONTIG memory model which has been
removed so the code implementing HOTPLUG_RESERVE is now dead.

This patch removes the dead code used by MEMORY_HOTPLUG_RESERVE.

(Changelog authored by Mel.)

v2: updated changelog, and remove hotadd= in doc

[ Impact: remove dead code ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: Mel Gorman <mel@csn.ul.ie>
Workflow-found-OK-by: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <4A0C4910.7090508@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>


---
 Documentation/x86/x86_64/boot-options.txt |    5 --
 arch/x86/include/asm/numa_64.h            |    3 -
 arch/x86/mm/numa_64.c                     |    5 --
 arch/x86/mm/srat_64.c                     |   63 +++++---------------------
 include/linux/mm.h                        |    2 -
 mm/page_alloc.c                           |   69 -----------------------------
 6 files changed, 12 insertions(+), 135 deletions(-)

diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 34c1304..2db5893 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -150,11 +150,6 @@ NUMA
 		Otherwise, the remaining system RAM is allocated to an
 		additional node.
 
-  numa=hotadd=percent
-		Only allow hotadd memory to preallocate page structures upto
-		percent of already available memory.
-		numa=hotadd=0 will disable hotadd memory.
-
 ACPI
 
   acpi=off	Don't enable ACPI
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 064ed6d..7feff06 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -17,9 +17,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
 extern void numa_init_array(void);
 extern int numa_off;
 
-extern void srat_reserve_add_area(int nodeid);
-extern int hotadd_percent;
-
 extern s16 apicid_to_node[MAX_LOCAL_APIC];
 
 extern unsigned long numa_free_all_bootmem(void);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index fb61d81..a6a93c3 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -272,9 +272,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
 		reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
 				 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
 
-#ifdef CONFIG_ACPI_NUMA
-	srat_reserve_add_area(nodeid);
-#endif
 	node_set_online(nodeid);
 }
 
@@ -591,8 +588,6 @@ static __init int numa_setup(char *opt)
 #ifdef CONFIG_ACPI_NUMA
 	if (!strncmp(opt, "noacpi", 6))
 		acpi_numa = -1;
-	if (!strncmp(opt, "hotadd=", 7))
-		hotadd_percent = simple_strtoul(opt+7, NULL, 10);
 #endif
 	return 0;
 }
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 87b45bf..b0dbbd4 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -31,8 +31,6 @@ static nodemask_t nodes_parsed __initdata;
 static nodemask_t cpu_nodes_parsed __initdata;
 static struct bootnode nodes[MAX_NUMNODES] __initdata;
 static struct bootnode nodes_add[MAX_NUMNODES];
-static int found_add_area __initdata;
-int hotadd_percent __initdata = 0;
 
 static int num_node_memblks __initdata;
 static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
@@ -66,9 +64,6 @@ static __init void cutoff_node(int i, unsigned long start, unsigned long end)
 {
 	struct bootnode *nd = &nodes[i];
 
-	if (found_add_area)
-		return;
-
 	if (nd->start < start) {
 		nd->start = start;
 		if (nd->end < nd->start)
@@ -86,7 +81,6 @@ static __init void bad_srat(void)
 	int i;
 	printk(KERN_ERR "SRAT: SRAT not used.\n");
 	acpi_numa = -1;
-	found_add_area = 0;
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
 		apicid_to_node[i] = NUMA_NO_NODE;
 	for (i = 0; i < MAX_NUMNODES; i++)
@@ -182,24 +176,21 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 	       pxm, apic_id, node);
 }
 
-static int update_end_of_memory(unsigned long end) {return -1;}
-static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 static inline int save_add_info(void) {return 1;}
 #else
 static inline int save_add_info(void) {return 0;}
 #endif
 /*
- * Update nodes_add and decide if to include add are in the zone.
- * Both SPARSE and RESERVE need nodes_add information.
- * This code supports one contiguous hot add area per node.
+ * Update nodes_add[]
+ * This code supports one contiguous hot add area per node
  */
-static int __init
-reserve_hotadd(int node, unsigned long start, unsigned long end)
+static void __init
+update_nodes_add(int node, unsigned long start, unsigned long end)
 {
 	unsigned long s_pfn = start >> PAGE_SHIFT;
 	unsigned long e_pfn = end >> PAGE_SHIFT;
-	int ret = 0, changed = 0;
+	int changed = 0;
 	struct bootnode *nd = &nodes_add[node];
 
 	/* I had some trouble with strange memory hotadd regions breaking
@@ -210,7 +201,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end)
 	   mistakes */
 	if ((signed long)(end - start) < NODE_MIN_SIZE) {
 		printk(KERN_ERR "SRAT: Hotplug area too small\n");
-		return -1;
+		return;
 	}
 
 	/* This check might be a bit too strict, but I'm keeping it for now. */
@@ -218,12 +209,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end)
 		printk(KERN_ERR
 			"SRAT: Hotplug area %lu -> %lu has existing memory\n",
 			s_pfn, e_pfn);
-		return -1;
-	}
-
-	if (!hotadd_enough_memory(&nodes_add[node]))  {
-		printk(KERN_ERR "SRAT: Hotplug area too large\n");
-		return -1;
+		return;
 	}
 
 	/* Looks good */
@@ -245,11 +231,9 @@ reserve_hotadd(int node, unsigned long start, unsigned long end)
 			printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
 	}
 
-	ret = update_end_of_memory(nd->end);
-
 	if (changed)
-	 	printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
-	return ret;
+		printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
+				 nd->start, nd->end);
 }
 
 /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
@@ -310,13 +294,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 	       start, end);
 	e820_register_active_regions(node, start >> PAGE_SHIFT,
 				     end >> PAGE_SHIFT);
-	push_node_boundaries(node, nd->start >> PAGE_SHIFT,
-						nd->end >> PAGE_SHIFT);
 
-	if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) &&
-	    (reserve_hotadd(node, start, end) < 0)) {
-		/* Ignore hotadd region. Undo damage */
-		printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
+	if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
+		update_nodes_add(node, start, end);
+		/* restore nodes[node] */
 		*nd = oldnode;
 		if ((nd->start | nd->end) == 0)
 			node_clear(node, nodes_parsed);
@@ -510,26 +491,6 @@ static int null_slit_node_compare(int a, int b)
 }
 #endif /* CONFIG_NUMA_EMU */
 
-void __init srat_reserve_add_area(int nodeid)
-{
-	if (found_add_area && nodes_add[nodeid].end) {
-		u64 total_mb;
-
-		printk(KERN_INFO "SRAT: Reserving hot-add memory space "
-				"for node %d at %Lx-%Lx\n",
-			nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end);
-		total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start)
-					>> PAGE_SHIFT;
-		total_mb *= sizeof(struct page);
-		total_mb >>= 20;
-		printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
-				"pre-allocated memory.\n", (unsigned long long)total_mb);
-		reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
-			       nodes_add[nodeid].end - nodes_add[nodeid].start,
-			       BOOTMEM_DEFAULT);
-	}
-}
-
 int __node_distance(int a, int b)
 {
 	int index;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bff1f0d..511b098 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1031,8 +1031,6 @@ extern void add_active_range(unsigned int nid, unsigned long start_pfn,
 					unsigned long end_pfn);
 extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
 					unsigned long end_pfn);
-extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn,
-					unsigned long end_pfn);
 extern void remove_all_active_ranges(void);
 extern unsigned long absent_pages_in_range(unsigned long start_pfn,
 						unsigned long end_pfn);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fe753ec..474c7e9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -149,10 +149,6 @@ static unsigned long __meminitdata dma_reserve;
   static int __meminitdata nr_nodemap_entries;
   static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
   static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-  static unsigned long __meminitdata node_boundary_start_pfn[MAX_NUMNODES];
-  static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES];
-#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
   static unsigned long __initdata required_kernelcore;
   static unsigned long __initdata required_movablecore;
   static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
@@ -3103,64 +3099,6 @@ void __init sparse_memory_present_with_active_regions(int nid)
 }
 
 /**
- * push_node_boundaries - Push node boundaries to at least the requested boundary
- * @nid: The nid of the node to push the boundary for
- * @start_pfn: The start pfn of the node
- * @end_pfn: The end pfn of the node
- *
- * In reserve-based hot-add, mem_map is allocated that is unused until hotadd
- * time. Specifically, on x86_64, SRAT will report ranges that can potentially
- * be hotplugged even though no physical memory exists. This function allows
- * an arch to push out the node boundaries so mem_map is allocated that can
- * be used later.
- */
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-void __init push_node_boundaries(unsigned int nid,
-		unsigned long start_pfn, unsigned long end_pfn)
-{
-	mminit_dprintk(MMINIT_TRACE, "zoneboundary",
-			"Entering push_node_boundaries(%u, %lu, %lu)\n",
-			nid, start_pfn, end_pfn);
-
-	/* Initialise the boundary for this node if necessary */
-	if (node_boundary_end_pfn[nid] == 0)
-		node_boundary_start_pfn[nid] = -1UL;
-
-	/* Update the boundaries */
-	if (node_boundary_start_pfn[nid] > start_pfn)
-		node_boundary_start_pfn[nid] = start_pfn;
-	if (node_boundary_end_pfn[nid] < end_pfn)
-		node_boundary_end_pfn[nid] = end_pfn;
-}
-
-/* If necessary, push the node boundary out for reserve hotadd */
-static void __meminit account_node_boundary(unsigned int nid,
-		unsigned long *start_pfn, unsigned long *end_pfn)
-{
-	mminit_dprintk(MMINIT_TRACE, "zoneboundary",
-			"Entering account_node_boundary(%u, %lu, %lu)\n",
-			nid, *start_pfn, *end_pfn);
-
-	/* Return if boundary information has not been provided */
-	if (node_boundary_end_pfn[nid] == 0)
-		return;
-
-	/* Check the boundaries and update if necessary */
-	if (node_boundary_start_pfn[nid] < *start_pfn)
-		*start_pfn = node_boundary_start_pfn[nid];
-	if (node_boundary_end_pfn[nid] > *end_pfn)
-		*end_pfn = node_boundary_end_pfn[nid];
-}
-#else
-void __init push_node_boundaries(unsigned int nid,
-		unsigned long start_pfn, unsigned long end_pfn) {}
-
-static void __meminit account_node_boundary(unsigned int nid,
-		unsigned long *start_pfn, unsigned long *end_pfn) {}
-#endif
-
-
-/**
  * get_pfn_range_for_nid - Return the start and end page frames for a node
  * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned.
  * @start_pfn: Passed by reference. On return, it will have the node start_pfn.
@@ -3185,9 +3123,6 @@ void __meminit get_pfn_range_for_nid(unsigned int nid,
 
 	if (*start_pfn == -1UL)
 		*start_pfn = 0;
-
-	/* Push the node boundaries out if requested */
-	account_node_boundary(nid, start_pfn, end_pfn);
 }
 
 /*
@@ -3793,10 +3728,6 @@ void __init remove_all_active_ranges(void)
 {
 	memset(early_node_map, 0, sizeof(early_node_map));
 	nr_nodemap_entries = 0;
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-	memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn));
-	memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn));
-#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
 }
 
 /* Compare two active node_active_regions */

^ permalink raw reply related	[flat|nested] 102+ messages in thread

* [tip:x86/mm] x86, mm: Fix node_possible_map logic
  2009-05-14 16:41         ` [PATCH 3/5] x86: fix node_possible_map logic -v2 Yinghai Lu
@ 2009-05-18  7:40           ` tip-bot for Yinghai Lu
  0 siblings, 0 replies; 102+ messages in thread
From: tip-bot for Yinghai Lu @ 2009-05-18  7:40 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, hpa, mingo, yinghai, steiner, tglx, mingo

Commit-ID:  7c43769a9776141ec23ca81a1bdd5a9c0512f165
Gitweb:     http://git.kernel.org/tip/7c43769a9776141ec23ca81a1bdd5a9c0512f165
Author:     Yinghai Lu <yinghai@kernel.org>
AuthorDate: Fri, 15 May 2009 13:59:37 -0700
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Mon, 18 May 2009 09:21:04 +0200

x86, mm: Fix node_possible_map logic

Recently there were some changes to the meaning of node_possible_map,
and it is quite strange:

- the node without memory would be set in node_possible_map
- but some node with less NODE_MIN_SIZE will be kicked out of node_possible_map.

fix it by adding strict_setup_node_bootmem().

Also, remove unparse_node().

so result will be:

1. cpu_to_node() will return online node only (nearest one)
2. apicid_to_node() still returns the node that could be not online but is set
   in node_possible_map.
3. node_possible_map will include nodes that mem on it are less NODE_MIN_SIZE

v2: after move_cpus_to_node change.

[ Impact: get node_possible_map right ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Tested-by: Jack Steiner <steiner@sgi.com>
LKML-Reference: <4A0C49BE.6080800@kernel.org>
[ v3: various small cleanups and comment clarifications ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>


---
 arch/x86/include/asm/numa_64.h |    7 +++++++
 arch/x86/mm/numa_64.c          |   13 ++++++++++---
 arch/x86/mm/srat_64.c          |   29 ++---------------------------
 3 files changed, 19 insertions(+), 30 deletions(-)

diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 7feff06..c4ae822 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -24,6 +24,13 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
 			       unsigned long end);
 
 #ifdef CONFIG_NUMA
+/*
+ * Too small node sizes may confuse the VM badly. Usually they
+ * result from BIOS bugs. So dont recognize nodes as standalone
+ * NUMA entities that have less than this amount of RAM listed:
+ */
+#define NODE_MIN_SIZE (4*1024*1024)
+
 extern void __init init_cpu_to_node(void);
 extern void __cpuinit numa_set_node(int cpu, int node);
 extern void __cpuinit numa_clear_node(int cpu);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index a6a93c3..459913b 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -179,18 +179,25 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
 }
 
 /* Initialize bootmem allocator for a node */
-void __init setup_node_bootmem(int nodeid, unsigned long start,
-			       unsigned long end)
+void __init
+setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
 {
 	unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size;
+	const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
 	unsigned long bootmap_start, nodedata_phys;
 	void *bootmap;
-	const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
 	int nid;
 
 	if (!end)
 		return;
 
+	/*
+	 * Don't confuse VM with a node that doesn't have the
+	 * minimum amount of memory:
+	 */
+	if (end && (end - start) < NODE_MIN_SIZE)
+		return;
+
 	start = roundup(start, ZONE_ALIGN);
 
 	printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index b0dbbd4..2dfcbf9 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -36,10 +36,6 @@ static int num_node_memblks __initdata;
 static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
 static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
 
-/* Too small nodes confuse the VM badly. Usually they result
-   from BIOS bugs. */
-#define NODE_MIN_SIZE (4*1024*1024)
-
 static __init int setup_node(int pxm)
 {
 	return acpi_map_pxm_to_node(pxm);
@@ -338,17 +334,6 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
 	return 1;
 }
 
-static void __init unparse_node(int node)
-{
-	int i;
-	node_clear(node, nodes_parsed);
-	node_clear(node, cpu_nodes_parsed);
-	for (i = 0; i < MAX_LOCAL_APIC; i++) {
-		if (apicid_to_node[i] == node)
-			apicid_to_node[i] = NUMA_NO_NODE;
-	}
-}
-
 void __init acpi_numa_arch_fixup(void) {}
 
 /* Use the information discovered above to actually set up the nodes. */
@@ -360,18 +345,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
 		return -1;
 
 	/* First clean up the node list */
-	for (i = 0; i < MAX_NUMNODES; i++) {
+	for (i = 0; i < MAX_NUMNODES; i++)
 		cutoff_node(i, start, end);
-		/*
-		 * don't confuse VM with a node that doesn't have the
-		 * minimum memory.
-		 */
-		if (nodes[i].end &&
-			(nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
-			unparse_node(i);
-			node_set_offline(i);
-		}
-	}
 
 	if (!nodes_cover_memory(nodes)) {
 		bad_srat();
@@ -404,7 +379,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
 
 		if (node == NUMA_NO_NODE)
 			continue;
-		if (!node_isset(node, node_possible_map))
+		if (!node_online(node))
 			numa_clear_node(i);
 	}
 	numa_init_array();

^ permalink raw reply related	[flat|nested] 102+ messages in thread

* [tip:x86/mm] x86: fix system without memory on node0
  2009-05-14 16:42         ` [PATCH 4/5] x86: fix system without memory on node0 -v2 Yinghai Lu
@ 2009-05-18  7:40           ` tip-bot for Yinghai Lu
  0 siblings, 0 replies; 102+ messages in thread
From: tip-bot for Yinghai Lu @ 2009-05-18  7:40 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, htejun, hpa, mingo, yinghai, steiner, tglx, mingo

Commit-ID:  35d5a9a61490bf39d2e48d7f499c8c801a39ebe9
Gitweb:     http://git.kernel.org/tip/35d5a9a61490bf39d2e48d7f499c8c801a39ebe9
Author:     Yinghai Lu <yinghai@kernel.org>
AuthorDate: Fri, 15 May 2009 13:59:37 -0700
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Mon, 18 May 2009 09:27:09 +0200

x86: fix system without memory on node0

Jack found a boot crash on a system which doesn't have memory on node0.

It turns out with recent per_cpu changes, node_number for BSP will always
be 0, and it is not consistent to cpu_to_node() that might set it to a
different (nearer) node already.

aka when numa_set_node() for node0 is called early before per_cpu area is
setup:

two places touched that per_cpu(node_number,):

1. in cpu/common.c::cpu_init() and it is not for BP
| #ifdef CONFIG_NUMA
|        if (cpu != 0 && percpu_read(node_number) == 0 &&
|            cpu_to_node(cpu) != NUMA_NO_NODE)
|                percpu_write(node_number, cpu_to_node(cpu));
| #endif
for BP: traps_init ==> cpu_init
for AP: start_secondary ==> cpu_init

2. cpu/intel.c or amd.c::srat_detect_node via numa_set_node()
for BP: check_bugs ==> identify_boot_cpu ==> identify_cpu()
	 that is rather later before numa_node_id() is used for BP...
for AP: start_secondary => smp_callin => smp_store_cpu_info() =>
	=> identify_secondary_cpu => identify_cpu()

so try to set that for BP earlier in setup_per_cpu_areas(), and
don't bother to set that for APs there (it will be updated later
and will be used later)

(and don't mess the 0 before the copying BP per_cpu data to APs)

[ Impact: fix boot crash on memoryless node-0 ]

Reported-and-tested-by: Jack Steiner <steiner@sgi.com>
Cc: Tejun Heo <htejun@gmail.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <4A0C4A02.7050401@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>


---
 arch/x86/kernel/setup_percpu.c |    8 ++++++++
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 3a97a4c..3b5f327 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -423,6 +423,14 @@ void __init setup_per_cpu_areas(void)
 	early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
 #endif
 
+#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
+	/*
+	 * make sure boot cpu node_number is right, when boot cpu is on the
+	 * node that doesn't have mem installed
+	 */
+	per_cpu(node_number, boot_cpu_id) = cpu_to_node(boot_cpu_id);
+#endif
+
 	/* Setup node to cpumask map */
 	setup_node_to_cpumask_map();
 

^ permalink raw reply related	[flat|nested] 102+ messages in thread

* Re: tip: patches in git for irq and numa
  2009-05-18  7:29                 ` Ingo Molnar
@ 2009-05-18 13:50                   ` Peter Zijlstra
  2009-05-18 13:56                     ` Ingo Molnar
  2009-05-18 15:03                     ` Yinghai Lu
  0 siblings, 2 replies; 102+ messages in thread
From: Peter Zijlstra @ 2009-05-18 13:50 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Yinghai Lu, Jack Steiner, linux-kernel, Thomas Gleixner, H. Peter Anvin

On Mon, 2009-05-18 at 09:29 +0200, Ingo Molnar wrote:
> * Yinghai Lu <yinghai@kernel.org> wrote:
> 
> > irq related:
> > git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-2.6-yinghai.git irq
> > need to on top of tip/irq/numa
> 
> ok, these were nicely structured. the pci_routeirq patch had a build 
> bug for !CONFIG_PCI.
> 
> ( I added an #ifdef for now, it might make sense to send a clean-up 
>   patch in the next merge window (not now) to factor out a 
>   pci_routeirq_enable() method that does all this cleanly. )
> 
> Also, please add appropriate Cc: lines to the commit logs in the 
> future, beyond the LKML-Reference tgs.
> 
> > for memoryless node support: 
> > git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-2.6-yinghai.git numa
> > and it is on top of tip/master
> 
> small note: you could have based these on x86/mm btw. - that's where 
> these patches go, typically.
> 
> regarding subject lines:
> 
>  d03a6a4: mm: clear N_HIGH_MEMORY map before se set it again -v2
>  02ce039: x86: fix system without memory on node0 -v2
>  8c1aec8: x86: fix node_possible_map logic -v2
>  44a633c: x86: remove MEMORY_HOTPLUG_RESERVE related code -v2
> 
> please never put '-v2' type of tags into the title of commits. In 
> the title of patches they can be put here:
> 
>   [PATCH, v2] x86: fix system without memory on node0
> 
> that saves maintainers a bit of typing work.
> 
> Also, you included:
> 
>  d03a6a4: mm: clear N_HIGH_MEMORY map before se set it again -v2
> 
> with no Acks from MM folks yet. So i skipped that one and will 
> follow up about it.

The below seems to wreck my opteron, ata1 interrupts fail to get
through.


[    6.951257] ata1.00: qc timeout (cmd 0x27)
[    6.955354] ata1.00: failed to read native max address (err_mask=0x4)
[    6.961781] ata1.00: HPA support seems broken, skipping HPA handling
[    7.273044] ata1: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[    7.285159] ata1.00: configured for UDMA/133
[    7.290052] scsi 0:0:0:0: Direct-Access     ATA      WDC WD1200JS-00N 10.0 PQ: 0 ANSI: 5
[    7.299294] sd 0:0:0:0: [sda] 234441648 512-byte hardware sectors: (120 GB/111 GiB)
[    7.306968] sd 0:0:0:0: [sda] Write Protect is off
[    7.311754] sd 0:0:0:0: [sda] Mode Sense: 00 3a 00 00
[    7.316839] sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[    7.326312]  sda:<6>ata2: SATA link down (SStatus 4 SControl 300)
[    7.938372] ata3: SATA link down (SStatus 4 SControl 300)
[    8.258372] ata4: SATA link down (SStatus 4 SControl 300)
[    8.264357] scsi 4:0:0:0: CD-ROM            TEAC     DV-516G          F4S7 PQ: 0 ANSI: 5
[   37.704234] ata1: lost interrupt (Status 0x50)
[   37.708695] sd 0:0:0:0: [sda] Unhandled error code
[   37.713479] sd 0:0:0:0: [sda] Result: hostbyte=DID_OK driverbyte=DRIVER_TIMEOUT
[   37.720791] end_request: I/O error, dev sda, sector 0
[   37.725848] Buffer I/O error on device sda, logical block 0

---


commit b9c61b70075c87a8612624736faf4a2de5b1ed30
Author: Yinghai Lu <yinghai@kernel.org>
Date:   Wed May 6 10:10:06 2009 -0700

    x86/pci: update pirq_enable_irq() to setup io apic routing
    
    So we can set io apic routing only when enabling the device irq.
    
    This is advantageous for IRQ descriptor allocation affinity: if we set up
    the IO-APIC entry later, we have a chance to allocate the IRQ descriptor
    later and know which device it is on and can set affinity accordingly.
    
    [ Impact: standardize/enhance irq-enabling sequence for mptable irqs ]
    
    Signed-off-by: Yinghai Lu <yinghai@kernel.org>
    Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
    Cc: Len Brown <lenb@kernel.org>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    LKML-Reference: <4A01C46E.8000501@kernel.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 3a68dae..5d5f412 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1480,9 +1480,13 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
 	ioapic_write_entry(apic_id, pin, entry);
 }
 
+static struct {
+	DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
+} mp_ioapic_routing[MAX_IO_APICS];
+
 static void __init setup_IO_APIC_irqs(void)
 {
-	int apic_id, pin, idx, irq;
+	int apic_id = 0, pin, idx, irq;
 	int notcon = 0;
 	struct irq_desc *desc;
 	struct irq_cfg *cfg;
@@ -1490,48 +1494,53 @@ static void __init setup_IO_APIC_irqs(void)
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
-	for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
-		for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
-
-			idx = find_irq_entry(apic_id, pin, mp_INT);
-			if (idx == -1) {
-				if (!notcon) {
-					notcon = 1;
-					apic_printk(APIC_VERBOSE,
-						KERN_DEBUG " %d-%d",
-						mp_ioapics[apic_id].apicid, pin);
-				} else
-					apic_printk(APIC_VERBOSE, " %d-%d",
-						mp_ioapics[apic_id].apicid, pin);
-				continue;
-			}
-			if (notcon) {
-				apic_printk(APIC_VERBOSE,
-					" (apicid-pin) not connected\n");
-				notcon = 0;
-			}
+#ifdef CONFIG_ACPI
+	if (!acpi_disabled && acpi_ioapic) {
+		apic_id = mp_find_ioapic(0);
+		if (apic_id < 0)
+			apic_id = 0;
+	}
+#endif
 
-			irq = pin_2_irq(idx, apic_id, pin);
+	for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
+		idx = find_irq_entry(apic_id, pin, mp_INT);
+		if (idx == -1) {
+			if (!notcon) {
+				notcon = 1;
+				apic_printk(APIC_VERBOSE,
+					KERN_DEBUG " %d-%d",
+					mp_ioapics[apic_id].apicid, pin);
+			} else
+				apic_printk(APIC_VERBOSE, " %d-%d",
+					mp_ioapics[apic_id].apicid, pin);
+			continue;
+		}
+		if (notcon) {
+			apic_printk(APIC_VERBOSE,
+				" (apicid-pin) not connected\n");
+			notcon = 0;
+		}
 
-			/*
-			 * Skip the timer IRQ if there's a quirk handler
-			 * installed and if it returns 1:
-			 */
-			if (apic->multi_timer_check &&
-					apic->multi_timer_check(apic_id, irq))
-				continue;
+		irq = pin_2_irq(idx, apic_id, pin);
 
-			desc = irq_to_desc_alloc_node(irq, node);
-			if (!desc) {
-				printk(KERN_INFO "can not get irq_desc for %d\n", irq);
-				continue;
-			}
-			cfg = desc->chip_data;
-			add_pin_to_irq_node(cfg, node, apic_id, pin);
+		/*
+		 * Skip the timer IRQ if there's a quirk handler
+		 * installed and if it returns 1:
+		 */
+		if (apic->multi_timer_check &&
+				apic->multi_timer_check(apic_id, irq))
+			continue;
 
-			setup_IO_APIC_irq(apic_id, pin, irq, desc,
-					irq_trigger(idx), irq_polarity(idx));
+		desc = irq_to_desc_alloc_node(irq, node);
+		if (!desc) {
+			printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+			continue;
 		}
+		cfg = desc->chip_data;
+		add_pin_to_irq_node(cfg, node, apic_id, pin);
+		set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
+		setup_IO_APIC_irq(apic_id, pin, irq, desc,
+				irq_trigger(idx), irq_polarity(idx));
 	}
 
 	if (notcon)
@@ -3876,10 +3885,6 @@ static int __io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, in
 	return 0;
 }
 
-static struct {
-	DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
-} mp_ioapic_routing[MAX_IO_APICS];
-
 int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, int irq,
 				 int triggering, int polarity)
 {
@@ -4023,51 +4028,44 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 #ifdef CONFIG_SMP
 void __init setup_ioapic_dest(void)
 {
-	int pin, ioapic, irq, irq_entry;
+	int pin, ioapic = 0, irq, irq_entry;
 	struct irq_desc *desc;
-	struct irq_cfg *cfg;
 	const struct cpumask *mask;
 
 	if (skip_ioapic_setup == 1)
 		return;
 
-	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
-		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
-			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
-			if (irq_entry == -1)
-				continue;
-			irq = pin_2_irq(irq_entry, ioapic, pin);
-
-			/* setup_IO_APIC_irqs could fail to get vector for some device
-			 * when you have too many devices, because at that time only boot
-			 * cpu is online.
-			 */
-			desc = irq_to_desc(irq);
-			cfg = desc->chip_data;
-			if (!cfg->vector) {
-				setup_IO_APIC_irq(ioapic, pin, irq, desc,
-						  irq_trigger(irq_entry),
-						  irq_polarity(irq_entry));
-				continue;
+#ifdef CONFIG_ACPI
+	if (!acpi_disabled && acpi_ioapic) {
+		ioapic = mp_find_ioapic(0);
+		if (ioapic < 0)
+			ioapic = 0;
+	}
+#endif
 
-			}
+	for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+		irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+		if (irq_entry == -1)
+			continue;
+		irq = pin_2_irq(irq_entry, ioapic, pin);
 
-			/*
-			 * Honour affinities which have been set in early boot
-			 */
-			if (desc->status &
-			    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
-				mask = desc->affinity;
-			else
-				mask = apic->target_cpus();
+		desc = irq_to_desc(irq);
 
-			if (intr_remapping_enabled)
-				set_ir_ioapic_affinity_irq_desc(desc, mask);
-			else
-				set_ioapic_affinity_irq_desc(desc, mask);
-		}
+		/*
+		 * Honour affinities which have been set in early boot
+		 */
+		if (desc->status &
+		    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
+			mask = desc->affinity;
+		else
+			mask = apic->target_cpus();
 
+		if (intr_remapping_enabled)
+			set_ir_ioapic_affinity_irq_desc(desc, mask);
+		else
+			set_ioapic_affinity_irq_desc(desc, mask);
 	}
+
 }
 #endif
 
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index a2f6bde..2f3e192 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -889,6 +889,9 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
 		return 0;
 	}
 
+	if (io_apic_assign_pci_irqs)
+		return 0;
+
 	/* Find IRQ routing entry */
 
 	if (!pirq_table)
@@ -1039,63 +1042,15 @@ static void __init pcibios_fixup_irqs(void)
 		pirq_penalty[dev->irq]++;
 	}
 
+	if (io_apic_assign_pci_irqs)
+		return;
+
 	dev = NULL;
 	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
 		pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
 		if (!pin)
 			continue;
 
-#ifdef CONFIG_X86_IO_APIC
-		/*
-		 * Recalculate IRQ numbers if we use the I/O APIC.
-		 */
-		if (io_apic_assign_pci_irqs) {
-			int irq;
-			int ioapic = -1, ioapic_pin = -1;
-			int triggering, polarity;
-
-			/*
-			 * interrupt pins are numbered starting from 1
-			 */
-			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
-						PCI_SLOT(dev->devfn), pin - 1,
-						&ioapic, &ioapic_pin,
-						&triggering, &polarity);
-			/*
-			 * Busses behind bridges are typically not listed in the
-			 * MP-table.  In this case we have to look up the IRQ
-			 * based on the parent bus, parent slot, and pin number.
-			 * The SMP code detects such bridged busses itself so we
-			 * should get into this branch reliably.
-			 */
-			if (irq < 0 && dev->bus->parent) {
-				/* go back to the bridge */
-				struct pci_dev *bridge = dev->bus->self;
-				int bus;
-
-				pin = pci_swizzle_interrupt_pin(dev, pin);
-				bus = bridge->bus->number;
-				irq = IO_APIC_get_PCI_irq_vector(bus,
-						PCI_SLOT(bridge->devfn),
-						pin - 1,
-						&ioapic, &ioapic_pin,
-						&triggering, &polarity);
-				if (irq >= 0)
-					dev_warn(&dev->dev,
-						"using bridge %s INT %c to "
-							"get IRQ %d\n",
-						 pci_name(bridge),
-						 'A' + pin - 1, irq);
-			}
-			if (irq >= 0) {
-				dev_info(&dev->dev,
-					"PCI->APIC IRQ transform: INT %c "
-						"-> IRQ %d\n",
-					'A' + pin - 1, irq);
-				dev->irq = irq;
-			}
-		}
-#endif
 		/*
 		 * Still no IRQ? Try to lookup one...
 		 */
@@ -1190,6 +1145,19 @@ int __init pcibios_irq_init(void)
 	pcibios_enable_irq = pirq_enable_irq;
 
 	pcibios_fixup_irqs();
+
+	if (io_apic_assign_pci_irqs && pci_routeirq) {
+		struct pci_dev *dev = NULL;
+		/*
+		 * PCI IRQ routing is set up by pci_enable_device(), but we
+		 * also do it here in case there are still broken drivers that
+		 * don't use pci_enable_device().
+		 */
+		printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n");
+		for_each_pci_dev(dev)
+			pirq_enable_irq(dev);
+	}
+
 	return 0;
 }
 
@@ -1220,13 +1188,17 @@ void pcibios_penalize_isa_irq(int irq, int active)
 static int pirq_enable_irq(struct pci_dev *dev)
 {
 	u8 pin;
-	struct pci_dev *temp_dev;
 
 	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
-	if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
+	if (pin && !pcibios_lookup_irq(dev, 1)) {
 		char *msg = "";
 
+		if (!io_apic_assign_pci_irqs && dev->irq)
+			return 0;
+
 		if (io_apic_assign_pci_irqs) {
+#ifdef CONFIG_X86_IO_APIC
+			struct pci_dev *temp_dev;
 			int irq;
 			int ioapic = -1, ioapic_pin = -1;
 			int triggering, polarity;
@@ -1261,12 +1233,16 @@ static int pirq_enable_irq(struct pci_dev *dev)
 			}
 			dev = temp_dev;
 			if (irq >= 0) {
+				io_apic_set_pci_routing(&dev->dev, ioapic,
+							ioapic_pin, irq,
+							triggering, polarity);
+				dev->irq = irq;
 				dev_info(&dev->dev, "PCI->APIC IRQ transform: "
 					 "INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
-				dev->irq = irq;
 				return 0;
 			} else
 				msg = "; probably buggy MP table";
+#endif
 		} else if (pci_probe & PCI_BIOS_IRQ_SCAN)
 			msg = "";
 		else


^ permalink raw reply related	[flat|nested] 102+ messages in thread

* Re: tip: patches in git for irq and numa
  2009-05-18 13:50                   ` Peter Zijlstra
@ 2009-05-18 13:56                     ` Ingo Molnar
  2009-05-18 15:03                     ` Yinghai Lu
  1 sibling, 0 replies; 102+ messages in thread
From: Ingo Molnar @ 2009-05-18 13:56 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Yinghai Lu, Jack Steiner, linux-kernel, Thomas Gleixner, H. Peter Anvin


* Peter Zijlstra <peterz@infradead.org> wrote:

> On Mon, 2009-05-18 at 09:29 +0200, Ingo Molnar wrote:
> > * Yinghai Lu <yinghai@kernel.org> wrote:
> > 
> > > irq related:
> > > git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-2.6-yinghai.git irq
> > > need to on top of tip/irq/numa
> > 
> > ok, these were nicely structured. the pci_routeirq patch had a build 
> > bug for !CONFIG_PCI.
> > 
> > ( I added an #ifdef for now, it might make sense to send a clean-up 
> >   patch in the next merge window (not now) to factor out a 
> >   pci_routeirq_enable() method that does all this cleanly. )
> > 
> > Also, please add appropriate Cc: lines to the commit logs in the 
> > future, beyond the LKML-Reference tgs.
> > 
> > > for memoryless node support: 
> > > git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-2.6-yinghai.git numa
> > > and it is on top of tip/master
> > 
> > small note: you could have based these on x86/mm btw. - that's where 
> > these patches go, typically.
> > 
> > regarding subject lines:
> > 
> >  d03a6a4: mm: clear N_HIGH_MEMORY map before se set it again -v2
> >  02ce039: x86: fix system without memory on node0 -v2
> >  8c1aec8: x86: fix node_possible_map logic -v2
> >  44a633c: x86: remove MEMORY_HOTPLUG_RESERVE related code -v2
> > 
> > please never put '-v2' type of tags into the title of commits. In 
> > the title of patches they can be put here:
> > 
> >   [PATCH, v2] x86: fix system without memory on node0
> > 
> > that saves maintainers a bit of typing work.
> > 
> > Also, you included:
> > 
> >  d03a6a4: mm: clear N_HIGH_MEMORY map before se set it again -v2
> > 
> > with no Acks from MM folks yet. So i skipped that one and will 
> > follow up about it.
> 
> The below seems to wreck my opteron, ata1 interrupts fail to get
> through.

thanks Peter for bisecting this, i've excluded irq/numa from 
tip:master again.

	Ingo

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: tip: patches in git for irq and numa
  2009-05-18 13:50                   ` Peter Zijlstra
  2009-05-18 13:56                     ` Ingo Molnar
@ 2009-05-18 15:03                     ` Yinghai Lu
  2009-05-18 15:09                       ` Ingo Molnar
  2009-05-18 15:11                       ` Peter Zijlstra
  1 sibling, 2 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-05-18 15:03 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, Jack Steiner, linux-kernel, Thomas Gleixner, H. Peter Anvin

Peter Zijlstra wrote:
> On Mon, 2009-05-18 at 09:29 +0200, Ingo Molnar wrote:
>> * Yinghai Lu <yinghai@kernel.org> wrote:
>>
>>> irq related:
>>> git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-2.6-yinghai.git irq
>>> need to on top of tip/irq/numa
>> ok, these were nicely structured. the pci_routeirq patch had a build 
>> bug for !CONFIG_PCI.
>>
>> ( I added an #ifdef for now, it might make sense to send a clean-up 
>>   patch in the next merge window (not now) to factor out a 
>>   pci_routeirq_enable() method that does all this cleanly. )
>>
>> Also, please add appropriate Cc: lines to the commit logs in the 
>> future, beyond the LKML-Reference tgs.
>>
>>> for memoryless node support: 
>>> git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-2.6-yinghai.git numa
>>> and it is on top of tip/master
>> small note: you could have based these on x86/mm btw. - that's where 
>> these patches go, typically.
>>
>> regarding subject lines:
>>
>>  d03a6a4: mm: clear N_HIGH_MEMORY map before se set it again -v2
>>  02ce039: x86: fix system without memory on node0 -v2
>>  8c1aec8: x86: fix node_possible_map logic -v2
>>  44a633c: x86: remove MEMORY_HOTPLUG_RESERVE related code -v2
>>
>> please never put '-v2' type of tags into the title of commits. In 
>> the title of patches they can be put here:
>>
>>   [PATCH, v2] x86: fix system without memory on node0
>>
>> that saves maintainers a bit of typing work.
>>
>> Also, you included:
>>
>>  d03a6a4: mm: clear N_HIGH_MEMORY map before se set it again -v2
>>
>> with no Acks from MM folks yet. So i skipped that one and will 
>> follow up about it.
> 
> The below seems to wreck my opteron, ata1 interrupts fail to get
> through.
> 
> 
> [    6.951257] ata1.00: qc timeout (cmd 0x27)
> [    6.955354] ata1.00: failed to read native max address (err_mask=0x4)
> [    6.961781] ata1.00: HPA support seems broken, skipping HPA handling
> [    7.273044] ata1: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
> [    7.285159] ata1.00: configured for UDMA/133
> [    7.290052] scsi 0:0:0:0: Direct-Access     ATA      WDC WD1200JS-00N 10.0 PQ: 0 ANSI: 5
> [    7.299294] sd 0:0:0:0: [sda] 234441648 512-byte hardware sectors: (120 GB/111 GiB)
> [    7.306968] sd 0:0:0:0: [sda] Write Protect is off
> [    7.311754] sd 0:0:0:0: [sda] Mode Sense: 00 3a 00 00
> [    7.316839] sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
> [    7.326312]  sda:<6>ata2: SATA link down (SStatus 4 SControl 300)
> [    7.938372] ata3: SATA link down (SStatus 4 SControl 300)
> [    8.258372] ata4: SATA link down (SStatus 4 SControl 300)
> [    8.264357] scsi 4:0:0:0: CD-ROM            TEAC     DV-516G          F4S7 PQ: 0 ANSI: 5
> [   37.704234] ata1: lost interrupt (Status 0x50)
> [   37.708695] sd 0:0:0:0: [sda] Unhandled error code
> [   37.713479] sd 0:0:0:0: [sda] Result: hostbyte=DID_OK driverbyte=DRIVER_TIMEOUT
> [   37.720791] end_request: I/O error, dev sda, sector 0
> [   37.725848] Buffer I/O error on device sda, logical block 0
> 
> ---
> 
> 
> commit b9c61b70075c87a8612624736faf4a2de5b1ed30
> Author: Yinghai Lu <yinghai@kernel.org>
> Date:   Wed May 6 10:10:06 2009 -0700
> 
>     x86/pci: update pirq_enable_irq() to setup io apic routing
>     
>     So we can set io apic routing only when enabling the device irq.
>     
>     This is advantageous for IRQ descriptor allocation affinity: if we set up
>     the IO-APIC entry later, we have a chance to allocate the IRQ descriptor
>     later and know which device it is on and can set affinity accordingly.
>     
>     [ Impact: standardize/enhance irq-enabling sequence for mptable irqs ]

can you post whole bootlog?
need to figure out 32bit/64bit? ACPI is disabled? MPtable is used?

also please check if pci=routeirq help to fix the problem.

YH

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: tip: patches in git for irq and numa
  2009-05-18 15:03                     ` Yinghai Lu
@ 2009-05-18 15:09                       ` Ingo Molnar
  2009-05-18 15:11                       ` Peter Zijlstra
  1 sibling, 0 replies; 102+ messages in thread
From: Ingo Molnar @ 2009-05-18 15:09 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Peter Zijlstra, Jack Steiner, linux-kernel, Thomas Gleixner,
	H. Peter Anvin


* Yinghai Lu <yinghai@kernel.org> wrote:

> Peter Zijlstra wrote:
> > On Mon, 2009-05-18 at 09:29 +0200, Ingo Molnar wrote:
> >> * Yinghai Lu <yinghai@kernel.org> wrote:
> >>
> >>> irq related:
> >>> git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-2.6-yinghai.git irq
> >>> need to on top of tip/irq/numa
> >> ok, these were nicely structured. the pci_routeirq patch had a build 
> >> bug for !CONFIG_PCI.
> >>
> >> ( I added an #ifdef for now, it might make sense to send a clean-up 
> >>   patch in the next merge window (not now) to factor out a 
> >>   pci_routeirq_enable() method that does all this cleanly. )
> >>
> >> Also, please add appropriate Cc: lines to the commit logs in the 
> >> future, beyond the LKML-Reference tgs.
> >>
> >>> for memoryless node support: 
> >>> git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-2.6-yinghai.git numa
> >>> and it is on top of tip/master
> >> small note: you could have based these on x86/mm btw. - that's where 
> >> these patches go, typically.
> >>
> >> regarding subject lines:
> >>
> >>  d03a6a4: mm: clear N_HIGH_MEMORY map before se set it again -v2
> >>  02ce039: x86: fix system without memory on node0 -v2
> >>  8c1aec8: x86: fix node_possible_map logic -v2
> >>  44a633c: x86: remove MEMORY_HOTPLUG_RESERVE related code -v2
> >>
> >> please never put '-v2' type of tags into the title of commits. In 
> >> the title of patches they can be put here:
> >>
> >>   [PATCH, v2] x86: fix system without memory on node0
> >>
> >> that saves maintainers a bit of typing work.
> >>
> >> Also, you included:
> >>
> >>  d03a6a4: mm: clear N_HIGH_MEMORY map before se set it again -v2
> >>
> >> with no Acks from MM folks yet. So i skipped that one and will 
> >> follow up about it.
> > 
> > The below seems to wreck my opteron, ata1 interrupts fail to get
> > through.
> > 
> > 
> > [    6.951257] ata1.00: qc timeout (cmd 0x27)
> > [    6.955354] ata1.00: failed to read native max address (err_mask=0x4)
> > [    6.961781] ata1.00: HPA support seems broken, skipping HPA handling
> > [    7.273044] ata1: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
> > [    7.285159] ata1.00: configured for UDMA/133
> > [    7.290052] scsi 0:0:0:0: Direct-Access     ATA      WDC WD1200JS-00N 10.0 PQ: 0 ANSI: 5
> > [    7.299294] sd 0:0:0:0: [sda] 234441648 512-byte hardware sectors: (120 GB/111 GiB)
> > [    7.306968] sd 0:0:0:0: [sda] Write Protect is off
> > [    7.311754] sd 0:0:0:0: [sda] Mode Sense: 00 3a 00 00
> > [    7.316839] sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
> > [    7.326312]  sda:<6>ata2: SATA link down (SStatus 4 SControl 300)
> > [    7.938372] ata3: SATA link down (SStatus 4 SControl 300)
> > [    8.258372] ata4: SATA link down (SStatus 4 SControl 300)
> > [    8.264357] scsi 4:0:0:0: CD-ROM            TEAC     DV-516G          F4S7 PQ: 0 ANSI: 5
> > [   37.704234] ata1: lost interrupt (Status 0x50)
> > [   37.708695] sd 0:0:0:0: [sda] Unhandled error code
> > [   37.713479] sd 0:0:0:0: [sda] Result: hostbyte=DID_OK driverbyte=DRIVER_TIMEOUT
> > [   37.720791] end_request: I/O error, dev sda, sector 0
> > [   37.725848] Buffer I/O error on device sda, logical block 0
> > 
> > ---
> > 
> > 
> > commit b9c61b70075c87a8612624736faf4a2de5b1ed30
> > Author: Yinghai Lu <yinghai@kernel.org>
> > Date:   Wed May 6 10:10:06 2009 -0700
> > 
> >     x86/pci: update pirq_enable_irq() to setup io apic routing
> >     
> >     So we can set io apic routing only when enabling the device irq.
> >     
> >     This is advantageous for IRQ descriptor allocation affinity: if we set up
> >     the IO-APIC entry later, we have a chance to allocate the IRQ descriptor
> >     later and know which device it is on and can set affinity accordingly.
> >     
> >     [ Impact: standardize/enhance irq-enabling sequence for mptable irqs ]
> 
> can you post whole bootlog?
> need to figure out 32bit/64bit? ACPI is disabled? MPtable is used?
> 
> also please check if pci=routeirq help to fix the problem.

The thing is ... this again is a _WAY TOO LARGE_ patch from you - 
250 lines of flux. Peter already did a bisection and if that's not 
enough to tell what the bug is, then frankly the splitup was way 
wrong.

You really need to learn how to create small gradual patches that 
are obviously correct - and if they are buggy it's obvious _why_ 
they are incorrect.

	Ingo

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: tip: patches in git for irq and numa
  2009-05-18 15:03                     ` Yinghai Lu
  2009-05-18 15:09                       ` Ingo Molnar
@ 2009-05-18 15:11                       ` Peter Zijlstra
  2009-05-18 17:23                         ` Yinghai Lu
  1 sibling, 1 reply; 102+ messages in thread
From: Peter Zijlstra @ 2009-05-18 15:11 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Jack Steiner, linux-kernel, Thomas Gleixner, H. Peter Anvin

On Mon, 2009-05-18 at 08:03 -0700, Yinghai Lu wrote:
> Peter Zijlstra wrote:
>  
> > commit b9c61b70075c87a8612624736faf4a2de5b1ed30
> > Author: Yinghai Lu <yinghai@kernel.org>
> > Date:   Wed May 6 10:10:06 2009 -0700
> > 
> >     x86/pci: update pirq_enable_irq() to setup io apic routing
> >     
> >     So we can set io apic routing only when enabling the device irq.
> >     
> >     This is advantageous for IRQ descriptor allocation affinity: if we set up
> >     the IO-APIC entry later, we have a chance to allocate the IRQ descriptor
> >     later and know which device it is on and can set affinity accordingly.
> >     
> >     [ Impact: standardize/enhance irq-enabling sequence for mptable irqs ]
> 
> can you post whole bootlog?

See below

> need to figure out 32bit/64bit? ACPI is disabled? MPtable is used?

64bit, default acpi default mptable

> also please check if pci=routeirq help to fix the problem.

It does not.

[    0.000000] Linux version 2.6.30-rc3 (root@twins) (gcc version 4.3.1 20080510 (prerelease) (GCC) ) #984 SMP PREEMPT Mon May 18 15:46:49 CEST 2009
[    0.000000] Command line: ro root=UUID=2678f5ee-ec1c-4be4-9fdd-ee4f1de115fa debug ignore_loglevel sysrq_always_enabled console=ttyS0,115200 earlyprintk=serial,ttyS0,115200rq_always_enabled console=ttyS0,115200 earlyprintk=seria
[    0.000000] KERNEL supported cpus:                                          
[    0.000000]   Intel GenuineIntel   [Linux-bzImage, setup=0x2e00, size=0x27cbb0]
[    0.000000]   AMD AuthenticAMD                                                 
[    0.000000]   Centaur CentaurHauls                                             
[    0.000000] BIOS-provided physical RAM map:                                    
[    0.000000]  BIOS-e820: 0000000000000000 - 000000000009d800 (usable)           
[    0.000000]  BIOS-e820: 000000000009d800 - 00000000000a0000 (reserved)         
[    0.000000]  BIOS-e820: 00000000000e0000 - 0000000000100000 (reserved)         
[    0.000000]  BIOS-e820: 0000000000100000 - 000000007fff0000 (usable)           
[    0.000000]  BIOS-e820: 000000007fff0000 - 000000007fffe000 (ACPI data)        
[    0.000000]  BIOS-e820: 000000007fffe000 - 0000000080000000 (ACPI NVS)         
[    0.000000]  BIOS-e820: 00000000fec00000 - 00000000fec03000 (reserved)         
[    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)         
[    0.000000] debug: ignoring loglevel setting.                                  
[    0.000000] console [earlyser0] enabled                                        
[    0.000000] DMI 2.3 present.                                                   
[    0.000000] AMI BIOS detected: BIOS may corrupt low RAM, working around it.  
[    0.000000] e820 update range: 0000000000000000 - 0000000000010000 (usable) ==> (reserved)-2.6.30-rc3.img
[    0.000000] last_pfn = 0x7fff0 max_arch_pfn = 0x100000000                                                
[    0.000000] MTRR default type: uncachable                                                                
[    0.000000] MTRR fixed ranges enabled:                                                                   
[    0.000000]   00000-9FFFF write-back                                                                     
[    0.000000]   A0000-EFFFF uncachable                                                                     
[    0.000000]   F0000-FFFFF write-protect                                                                  
[    0.000000] MTRR variable ranges enabled:                                                                
[    0.000000]   0 base 0000000000 mask FF80000000 write-back                                               
[    0.000000]   1 disabled                                                                                 
[    0.000000]   2 disabled                                                                                 
[    0.000000]   3 disabled                                                                                 
[    0.000000]   4 disabled                                                                                 
[    0.000000]   5 disabled                                                                                 
[    0.000000]   6 disabled                                                                                 
[    0.000000]   7 disabled                                                                                 
[    0.000000] init_memory_mapping: 0000000000000000-000000007fff0000                                       
[    0.000000]  0000000000 - 007fe00000 page 2M                                                             
[    0.000000]  007fe00000 - 007fff0000 page 4k                                                             
[    0.000000] kernel direct mapping tables up to 7fff0000 @ 10000-14000                                    
[    0.000000] RAMDISK: 37d54000 - 37fef31b                                                                 
[    0.000000] ACPI: RSDP 00000000000f7470 00024 (v02 ACPIAM)                                               
[    0.000000] ACPI: XSDT 000000007fff0100 00044 (v01 A M I  OEMXSDT  09000625 MSFT 00000097)               
[    0.000000] ACPI: FACP 000000007fff0290 000F4 (v03 A M I  OEMFACP  09000625 MSFT 00000097)               
[    0.000000] ACPI: DSDT 000000007fff0400 03018 (v01  0AAAA 0AAAA000 00000000 INTL 02002026)               
[    0.000000] ACPI: FACS 000000007fffe000 00040                                                            
[    0.000000] ACPI: APIC 000000007fff0390 00070 (v01 A M I  OEMAPIC  09000625 MSFT 00000097)               
[    0.000000] ACPI: OEMB 000000007fffe040 00056 (v01 A M I  AMI_OEM  09000625 MSFT 00000097)               
[    0.000000] ACPI: SSDT 000000007fff3420 00248 (v01 A M I  POWERNOW 00000001 AMD  00000001)               
[    0.000000] ACPI: Local APIC address 0xfee00000                                                          
[    0.000000] (7 early reservations) ==> bootmem [0000000000 - 007fff0000]                                 
[    0.000000]   #0 [0000000000 - 0000001000]   BIOS data page ==> [0000000000 - 0000001000]                
[    0.000000]   #1 [0000006000 - 0000008000]       TRAMPOLINE ==> [0000006000 - 0000008000]                
[    0.000000]   #2 [0000200000 - 0001231e10]    TEXT DATA BSS ==> [0000200000 - 0001231e10]                
[    0.000000]   #3 [0037d54000 - 0037fef31b]          RAMDISK ==> [0037d54000 - 0037fef31b]                
[    0.000000]   #4 [000009d800 - 0000100000]    BIOS reserved ==> [000009d800 - 0000100000]                
[    0.000000]   #5 [0001232000 - 0001232200]              BRK ==> [0001232000 - 0001232200]                
[    0.000000]   #6 [0000010000 - 0000012000]          PGTABLE ==> [0000010000 - 0000012000]                
[    0.000000] found SMP MP-table at [ffff8800000ff780] ff780                                               
[    0.000000]  [ffffe20000000000-ffffe200033fffff] PMD -> [ffff880001400000-ffff8800047fffff] on node 0    
[    0.000000] Zone PFN ranges:                                                                             
[    0.000000]   DMA      0x00000010 -> 0x00001000                                                          
[    0.000000]   DMA32    0x00001000 -> 0x00100000                                                          
[    0.000000]   Normal   0x00100000 -> 0x00100000                                                          
[    0.000000] Movable zone start PFN for each node                                                         
[    0.000000] early_node_map[2] active PFN ranges                                                          
[    0.000000]     0: 0x00000010 -> 0x0000009d                                                              
[    0.000000]     0: 0x00000100 -> 0x0007fff0                                                              
[    0.000000] On node 0 totalpages: 524157                                                                 
[    0.000000]   DMA zone: 104 pages used for memmap                                                        
[    0.000000]   DMA zone: 103 pages reserved                                                               
[    0.000000]   DMA zone: 3774 pages, LIFO batch:0                                                         
[    0.000000]   DMA32 zone: 13208 pages used for memmap                                                    
[    0.000000]   DMA32 zone: 506968 pages, LIFO batch:31                                                    
[    0.000000] Detected use of extended apic ids on hypertransport bus                                      
[    0.000000] ACPI: PM-Timer IO Port: 0x508                                                                
[    0.000000] ACPI: Local APIC address 0xfee00000                                                          
[    0.000000] ACPI: LAPIC (acpi_id[0x01] lapic_id[0x00] enabled)                                           
[    0.000000] ACPI: LAPIC (acpi_id[0x02] lapic_id[0x01] enabled)                                           
[    0.000000] ACPI: LAPIC_NMI (acpi_id[0x01] high edge lint[0x1])                                          
[    0.000000] ACPI: IOAPIC (id[0x02] address[0xfec00000] gsi_base[0])                                      
[    0.000000] IOAPIC[0]: apic_id 2, version 0, address 0xfec00000, GSI 0-15                                
[    0.000000] ACPI: IOAPIC (id[0x03] address[0xfec01000] gsi_base[16])                                     
[    0.000000] IOAPIC[1]: apic_id 3, version 0, address 0xfec01000, GSI 16-31                               
[    0.000000] ACPI: IOAPIC (id[0x04] address[0xfec02000] gsi_base[32])                                     
[    0.000000] IOAPIC[2]: apic_id 4, version 0, address 0xfec02000, GSI 32-47                               
[    0.000000] ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 dfl dfl)                                     
[    0.000000] ACPI: IRQ0 used by override.                                                                 
[    0.000000] ACPI: IRQ2 used by override.                                                                 
[    0.000000] ACPI: IRQ9 used by override.                                                                 
[    0.000000] Using ACPI (MADT) for SMP configuration information                                          
[    0.000000] SMP: Allowing 2 CPUs, 0 hotplug CPUs                                                         
[    0.000000] nr_irqs_gsi: 48                                                                              
[    0.000000] Allocating PCI resources starting at 88000000 (gap: 80000000:7ec00000)                       
[    0.000000] NR_CPUS:8 nr_cpumask_bits:8 nr_cpu_ids:2 nr_node_ids:1                                       
[    0.000000] PERCPU: Embedded 473 pages at ffff880004800000, static data 1907424 bytes                    
[    0.000000] Built 1 zonelists in Zone order, mobility grouping on.  Total pages: 510742                  
[    0.000000] Kernel command line: ro root=UUID=2678f5ee-ec1c-4be4-9fdd-ee4f1de115fa debug ignore_loglevel sysrq_always_enabled console=ttyS0,115200 earlyprintk=serial,ttyS0,115200                                                                                                                         
[    0.000000] debug: sysrq always enabled.                                                                                                            
[    0.000000] Initializing CPU#0                                                                                                                      
[    0.000000] NR_IRQS:512                                                                                                                             
[    0.000000] PID hash table entries: 4096 (order: 12, 32768 bytes)                                                                                   
[    0.000000] Extended CMOS year: 2000                                                                                                                
[    0.000000] Fast TSC calibration failed                                                                                                             
[    0.000000] TSC: PIT calibration matches PMTIMER. 1 loops                                                                                           
[    0.000000] Detected 2393.995 MHz processor.                                                                                                        
[    0.000999] Console: colour VGA+ 80x25                                                                                                              
[    0.000999] console handover: boot [earlyser0] -> real [ttyS0]                                                                                      
[    0.000999] Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar                                                                
[    0.000999] ... MAX_LOCKDEP_SUBCLASSES:  8                                                                                                          
[    0.000999] ... MAX_LOCK_DEPTH:          48                                                                                                         
[    0.000999] ... MAX_LOCKDEP_KEYS:        8191                                                                                                       
[    0.000999] ... CLASSHASH_SIZE:          4096                                                                                                       
[    0.000999] ... MAX_LOCKDEP_ENTRIES:     8192                                                                                                       
[    0.000999] ... MAX_LOCKDEP_CHAINS:      16384                                                                                                      
[    0.000999] ... CHAINHASH_SIZE:          8192                                                                                                       
[    0.000999]  memory used by lock dependency info: 5119 kB                                                                                           
[    0.000999]  per task-struct memory footprint: 2688 bytes                                                                                           
[    0.000999] Dentry cache hash table entries: 262144 (order: 9, 2097152 bytes)                                                                       
[    0.000999] Inode-cache hash table entries: 131072 (order: 8, 1048576 bytes)                                                                        
[    0.000999] Checking aperture...                                                                                                                    
[    0.000999] No AGP bridge found                                                                                                                     
[    0.000999] Node 0: aperture @ 202a000000 size 32 MB                                                                                                
[    0.000999] Aperture beyond 4GB. Ignoring.                                                                                                          
[    0.000999] Memory: 2016276k/2097088k available (3225k kernel code, 460k absent, 79688k reserved, 2051k data, 2180k init)                           
[    0.000999] SLUB: Genslabs=13, HWalign=64, Order=0-3, MinObjects=0, CPUs=2, Nodes=1                                                                 
[    0.001012] Calibrating delay loop (skipped), value calculated using timer frequency.. 4787.99 BogoMIPS (lpj=2393995)                               
[    0.003038] Security Framework initialized                                                                                                          
[    0.004112] Mount-cache hash table entries: 256                                                                                                     
[    0.007530] CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)                                                                       
[    0.008002] CPU: L2 Cache: 1024K (64 bytes/line)                                                                                                    
[    0.009002] tseg: 0000000000                                                                                                                        
[    0.010070] CPU: Physical Processor ID: 0                                                                                                           
[    0.011001] CPU: Processor Core ID: 0                                                                                                               
[    0.012010] using C1E aware idle routine                                                                                                            
[    0.013031] ACPI: Core revision 20090320                                                                                                            
[    0.022005] ftrace: converting mcount calls to 0f 1f 44 00 00                                                                                       
[    0.023002] ftrace: allocating 13675 entries in 54 pages                                                                                            
[    0.025212] Setting APIC routing to flat                                                                                                            
[    0.026741] ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1                                                                                    
[    0.037023] CPU0: Dual-Core AMD Opteron(tm) Processor 1216 stepping 02                                                                              
[    0.040993] lockdep: fixing up alternatives.                                                                                                        
[    0.041182] Booting processor 1 APIC 0x1 ip 0x6000                                                                                                  
[    0.000999] Initializing CPU#1                                                                                                                      
[    0.000999] Calibrating delay using timer specific routine.. 4791.25 BogoMIPS (lpj=2395625)                                                         
[    0.000999] CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)                                                                       
[    0.000999] CPU: L2 Cache: 1024K (64 bytes/line)                                                                                                    
[    0.000999] CPU: Physical Processor ID: 0                                                                                                           
[    0.000999] CPU: Processor Core ID: 1                                                                                                               
[    0.114121] CPU1: Dual-Core AMD Opteron(tm) Processor 1216 stepping 02                                                                              
[    0.117018] Brought up 2 CPUs                                                                                                                       
[    0.117985] Total of 2 processors activated (9579.24 BogoMIPS).                                                                                     
[    0.120028] CPU0 attaching sched-domain:                                                                                                            
[    0.120984]  domain 0: span 0-1 level CPU                                                                                                           
[    0.122983]   groups: 0 1                                                                                                                           
[    0.124554] CPU1 attaching sched-domain:                                                                                                            
[    0.124984]  domain 0: span 0-1 level CPU                                                                                                           
[    0.126982]   groups: 1 0                                                                                                                           
[    0.130206] net_namespace: 784 bytes                                                                                                                
[    0.131536] NET: Registered protocol family 16                                                                                                      
[    0.133093] node 0 link 0: io port [1000, ffffff]                                                                                                   
[    0.133984] TOM: 0000000080000000 aka 2048M                                                                                                         
[    0.134993] node 0 link 0: mmio [a0000, bffff]                                                                                                      
[    0.136165] node 0 link 0: mmio [80000000, ff70ffff]                                                                                                
[    0.137165] bus: [00,ff] on node 0 link 0                                                                                                           
[    0.137981] bus: 00 index 0 io port: [0, ffff]                                                                                                      
[    0.138981] bus: 00 index 1 mmio: [a0000, bffff]                                                                                                    
[    0.139981] bus: 00 index 2 mmio: [80000000, fcffffffff]                                                                                            
[    0.141025] ACPI: bus type pci registered                                                                                                           
[    0.142047] PCI: Using configuration type 1 for base access                                                                                         
[    0.151063] bio: create slab <bio-0> at 0                                                                                                           
[    0.154745] ACPI: EC: Look up EC in DSDT                                                                                                            
[    0.169950] ACPI: Interpreter enabled                                                                                                               
[    0.169978] ACPI: (supports S0 S1 S5)                                                                                                               
[    0.172248] ACPI: Using IOAPIC for interrupt routing                                                                                                
[    0.192179] ACPI Warning (tbutils-0246): Incorrect checksum in table [OEMB] - 8D, should be 82 [20090320]                                           
[    0.195298] ACPI: No dock devices found.                                                                                                            
[    0.196053] ACPI: PCI Root Bridge [PCI0] (0000:00)                                                                                                  
[    0.197128] pci 0000:00:01.0: Enabling HT MSI Mapping                                                                                               
[    0.198136] pci 0000:00:02.1: reg 10 io port: [0x1f0-0x1f7]                                                                                         
[    0.198983] pci 0000:00:02.1: reg 14 io port: [0x3f4-0x3f7]                                                                                         
[    0.199977] pci 0000:00:02.1: reg 18 io port: [0x170-0x177]                                                                                         
[    0.200977] pci 0000:00:02.1: reg 1c io port: [0x374-0x377]                                                                                         
[    0.201977] pci 0000:00:02.1: reg 20 io port: [0xffa0-0xffaf]                                                                                       
[    0.203108] pci 0000:00:03.0: reg 10 32bit mmio: [0xff6b4000-0xff6b4fff]                                                                            
[    0.203977] pci 0000:00:03.0: reg 14 io port: [0xe000-0xe0ff]                                                                                       
[    0.205976] pci 0000:00:03.1: reg 10 32bit mmio: [0xff6b5000-0xff6b5fff]                                                                            
[    0.206977] pci 0000:00:03.1: reg 14 io port: [0xe400-0xe4ff]                                                                                       
[    0.208065] pci 0000:00:03.2: reg 10 32bit mmio: [0xff6b6000-0xff6b6fff]                                                                            
[    0.208977] pci 0000:00:03.2: reg 14 io port: [0xe800-0xe8ff]                                                                                       
[    0.210022] pci 0000:00:03.2: supports D1 D2                                                                                                        
[    0.210970] pci 0000:00:03.2: PME# supported from D0 D1 D2 D3hot                                                                                    
[    0.211973] pci 0000:00:03.2: PME# disabled                                                                                                         
[    0.213034] pci 0000:00:04.0: reg 10 32bit mmio: [0xff680000-0xff69ffff]                                                                            
[    0.213976] pci 0000:00:04.0: reg 14 32bit mmio: [0xff660000-0xff67ffff]                                                                            
[    0.214976] pci 0000:00:04.0: reg 18 io port: [0xdc00-0xdc3f]                                                                                       
[    0.215993] pci 0000:00:04.0: reg 30 32bit mmio: [0xff640000-0xff65ffff]                                                                            
[    0.216996] pci 0000:00:04.0: PME# supported from D0 D3hot D3cold                                                                                   
[    0.217972] pci 0000:00:04.0: PME# disabled                                                                                                         
[    0.219023] pci 0000:00:05.0: reg 10 32bit mmio: [0xff620000-0xff63ffff]                                                                            
[    0.219975] pci 0000:00:05.0: reg 14 32bit mmio: [0xff600000-0xff61ffff]                                                                            
[    0.220974] pci 0000:00:05.0: reg 18 io port: [0xd880-0xd8bf]                                                                                       
[    0.221992] pci 0000:00:05.0: reg 30 32bit mmio: [0xff5e0000-0xff5fffff]                                                                            
[    0.222996] pci 0000:00:05.0: PME# supported from D0 D3hot D3cold                                                                                   
[    0.223971] pci 0000:00:05.0: PME# disabled                                                                                                         
[    0.225012] pci 0000:00:06.0: reg 10 32bit mmio: [0xf8000000-0xfbffffff]                                                                            
[    0.225974] pci 0000:00:06.0: reg 14 32bit mmio: [0xff6c0000-0xff6fffff]                                                                            
[    0.226973] pci 0000:00:06.0: reg 18 io port: [0xec00-0xec7f]                                                                                       
[    0.229001] pci 0000:00:06.0: supports D1 D2                                                                                                        
[    0.230339] pci 0000:01:0e.0: reg 10 io port: [0xc080-0xc087]                                                                                       
[    0.230971] pci 0000:01:0e.0: reg 14 io port: [0xc000-0xc003]                                                                                       
[    0.231971] pci 0000:01:0e.0: reg 18 io port: [0xbc00-0xbc07]                                                                                       
[    0.232971] pci 0000:01:0e.0: reg 1c io port: [0xb880-0xb883]                                                                                       
[    0.233971] pci 0000:01:0e.0: reg 20 io port: [0xb800-0xb80f]                                                                                       
[    0.234971] pci 0000:01:0e.0: reg 24 32bit mmio: [0xff3fe000-0xff3fffff]                                                                            
[    0.235970] pci 0000:01:0e.0: reg 30 32bit mmio: [0xff3c0000-0xff3dffff]                                                                            
[    0.237031] pci 0000:01:0e.1: reg 10 io port: [0xcc00-0xcc07]                                                                                       
[    0.237970] pci 0000:01:0e.1: reg 14 io port: [0xc880-0xc883]                                                                                       
[    0.238970] pci 0000:01:0e.1: reg 18 io port: [0xc800-0xc807]                                                                                       
[    0.239970] pci 0000:01:0e.1: reg 1c io port: [0xc480-0xc483]                                                                                       
[    0.240969] pci 0000:01:0e.1: reg 20 io port: [0xc400-0xc40f]                                                                                       
[    0.242057] pci 0000:00:01.0: bridge io port: [0xb000-0xcfff]                                                                                       
[    0.242967] pci 0000:00:01.0: bridge 32bit mmio: [0xff300000-0xff3fffff]                                                                            
[    0.244072] pci_bus 0000:00: on NUMA node 0                                                                                                         
[    0.244971] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0._PRT]                                                                                     
[    0.246176] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.P0P1._PRT]                                                                                
[    0.247020] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.P0P1.P1P2._PRT]                                                                           
[    0.259175] ACPI: PCI Interrupt Link [LN00] (IRQs 3 4 5 7 9 11 12 14 15) *0, disabled.                                                              
[    0.263312] ACPI: PCI Interrupt Link [LN01] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.268128] ACPI: PCI Interrupt Link [LN02] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.273308] ACPI: PCI Interrupt Link [LN03] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.278310] ACPI: PCI Interrupt Link [LN04] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.283307] ACPI: PCI Interrupt Link [LN05] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.288101] ACPI: PCI Interrupt Link [LN06] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.293306] ACPI: PCI Interrupt Link [LN07] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.298123] ACPI: PCI Interrupt Link [LN08] (IRQs 1 3 4 5 6 7 *9 11 12 14 15)                                                                       
[    0.302401] ACPI: PCI Interrupt Link [LN09] (IRQs 1 3 4 *5 6 7 9 11 12 14 15)                                                                       
[    0.306083] ACPI: PCI Interrupt Link [LN10] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.311304] ACPI: PCI Interrupt Link [LN11] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.316141] ACPI: PCI Interrupt Link [LN12] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.321350] ACPI: PCI Interrupt Link [LN13] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.327046] ACPI: PCI Interrupt Link [LN14] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.332351] ACPI: PCI Interrupt Link [LN15] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.337171] ACPI: PCI Interrupt Link [LN16] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.342350] ACPI: PCI Interrupt Link [LN17] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.347169] ACPI: PCI Interrupt Link [LN18] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.352351] ACPI: PCI Interrupt Link [LN19] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.357351] ACPI: PCI Interrupt Link [LN20] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.362352] ACPI: PCI Interrupt Link [LN21] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.367172] ACPI: PCI Interrupt Link [LN22] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.372351] ACPI: PCI Interrupt Link [LN23] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.377352] ACPI: PCI Interrupt Link [LN24] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.382352] ACPI: PCI Interrupt Link [LN25] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.387350] ACPI: PCI Interrupt Link [LN26] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.392129] ACPI: PCI Interrupt Link [LN27] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.398168] ACPI: PCI Interrupt Link [LN28] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.403352] ACPI: PCI Interrupt Link [LN29] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.408351] ACPI: PCI Interrupt Link [LN30] (IRQs 1 3 4 5 6 7 9 11 12 14 15) *0, disabled.                                                          
[    0.414039] ACPI: PCI Interrupt Link [LNUS] (IRQs *10)                                                                                              
[    0.416360] ACPI: PCI Interrupt Link [LNSA] (IRQs *11)                                                                                              
[    0.419075] SCSI subsystem initialized                                                                                                              
[    0.421058] libata version 3.00 loaded.                                                                                                             
[    0.423077] usbcore: registered new interface driver usbfs                                                                                          
[    0.424055] usbcore: registered new interface driver hub                                                                                            
[    0.425038] usbcore: registered new device driver usb                                                                                               
[    0.426115] PCI: Using ACPI for IRQ routing                                                                                                         
[    0.436018] pnp: PnP ACPI init                                                                                                                      
[    0.437034] ACPI: bus type pnp registered                                                                                                           
[    0.445279] pnp: PnP ACPI: found 12 devices                                                                                                         
[    0.446003] ACPI: ACPI bus type pnp unregistered                                                                                                    
[    0.447021] system 00:08: ioport range 0x800-0x87f has been reserved                                                                                
[    0.448003] system 00:08: ioport range 0xa80-0xa8f has been reserved                                                                                
[    0.449012] system 00:09: ioport range 0x4d0-0x4d1 has been reserved                                                                                
[    0.450003] system 00:09: ioport range 0xc00-0xc01 has been reserved                                                                                
[    0.451003] system 00:09: ioport range 0xcd6-0xcd7 has been reserved                                                                                
[    0.452006] system 00:09: ioport range 0xcd4-0xcd5 has been reserved                                                                                
[    0.453003] system 00:09: ioport range 0xcd8-0xcdf has been reserved                                                                                
[    0.454003] system 00:09: ioport range 0x40b-0x40b has been reserved                                                                                
[    0.455003] system 00:09: ioport range 0x4d6-0x4d6 has been reserved                                                                                
[    0.456003] system 00:09: ioport range 0xc06-0xc07 has been reserved                                                                                
[    0.457003] system 00:09: ioport range 0xc14-0xc14 has been reserved                                                                                
[    0.458003] system 00:09: ioport range 0xc49-0xc49 has been reserved                                                                                
[    0.459003] system 00:09: ioport range 0xc4a-0xc4a has been reserved                                                                                
[    0.460003] system 00:09: ioport range 0xc50-0xc51 has been reserved                                                                                
[    0.461003] system 00:09: ioport range 0xc52-0xc52 has been reserved                                                                                
[    0.462003] system 00:09: ioport range 0xc6c-0xc6c has been reserved                                                                                
[    0.463003] system 00:09: ioport range 0xc6f-0xc6f has been reserved                                                                                
[    0.465003] system 00:09: ioport range 0x500-0x57f has been reserved                                                                                
[    0.466012] system 00:0a: ioport range 0x580-0x58f has been reserved                                                                                
[    0.467003] system 00:0a: ioport range 0x590-0x593 has been reserved                                                                                
[    0.468003] system 00:0a: ioport range 0x700-0x703 has been reserved                                                                                
[    0.469003] system 00:0a: ioport range 0xca0-0xcaf has been reserved                                                                                
[    0.470004] system 00:0a: iomem range 0xfec00000-0xfec00fff has been reserved                                                                       
[    0.471003] system 00:0a: iomem range 0xfec01000-0xfec01fff has been reserved                                                                       
[    0.472003] system 00:0a: iomem range 0xfec02000-0xfec02fff has been reserved                                                                       
[    0.473003] system 00:0a: iomem range 0xfee00000-0xfee00fff has been reserved                                                                       
[    0.474003] system 00:0a: iomem range 0xfff00000-0xffffffff has been reserved                                                                       
[    0.475003] system 00:0a: iomem range 0xff780000-0xffbfffff has been reserved                                                                       
[    0.476003] system 00:0a: iomem range 0xfebfe000-0xfebfefff has been reserved                                                                       
[    0.477012] system 00:0b: iomem range 0x0-0x9ffff could not be reserved                                                                             
[    0.478003] system 00:0b: iomem range 0xe0000-0xfffff could not be reserved                                                                         
[    0.479004] system 00:0b: iomem range 0x100000-0x7fffffff could not be reserved                                                                     
[    0.486501] pci 0000:01:0d.0: PCI bridge, secondary bus 0000:02                                                                                     
[    0.487022] Switched to high resolution mode on CPU 0                                                                                               
[    0.487213] Switched to high resolution mode on CPU 1                                                                                               
[    0.497094] pci 0000:01:0d.0:   IO window: disabled                                                                                                 
[    0.501967] pci 0000:01:0d.0:   MEM window: disabled                                                                                                
[    0.506925] pci 0000:01:0d.0:   PREFETCH window: disabled                                                                                           
[    0.512316] pci 0000:00:01.0: PCI bridge, secondary bus 0000:01                                                                                     
[    0.518227] pci 0000:00:01.0:   IO window: 0xb000-0xcfff                                                                                            
[    0.523531] pci 0000:00:01.0:   MEM window: 0xff300000-0xff3fffff                                                                                   
[    0.529613] pci 0000:00:01.0:   PREFETCH window: disabled                                                                                           
[    0.535026] pci_bus 0000:00: resource 0 io:  [0x00-0xffff]                                                                                          
[    0.540500] pci_bus 0000:00: resource 1 mem: [0x000000-0xffffffffffffffff]                                                                          
[    0.547360] pci_bus 0000:01: resource 0 io:  [0xb000-0xcfff]                                                                                        
[    0.553601] pci_bus 0000:01: resource 1 mem: [0xff300000-0xff3fffff]                                                                                
[    0.559941] pci_bus 0000:01: resource 2 mem: [0x0-0x0]                                                                                              
[    0.565069] pci_bus 0000:01: resource 3 mem: [0x0-0x0]                                                                                              
[    0.570199] pci_bus 0000:02: resource 0 mem: [0x0-0x0]                                                                                              
[    0.575327] pci_bus 0000:02: resource 1 mem: [0x0-0x0]                                                                                              
[    0.580454] pci_bus 0000:02: resource 2 mem: [0x0-0x0]                                                                                              
[    0.585581] pci_bus 0000:02: resource 3 mem: [0x0-0x0]                                                                                              
[    0.590794] NET: Registered protocol family 2                                                                                                       
[    0.607200] IP route cache hash table entries: 65536 (order: 7, 524288 bytes)                                                                       
[    0.616062] TCP established hash table entries: 262144 (order: 10, 4194304 bytes)                                                                   
[    0.625753] TCP bind hash table entries: 32768 (order: 9, 2359296 bytes)                                                                            
[    0.636466] TCP: Hash tables configured (established 262144 bind 32768)                                                                             
[    0.643108] TCP reno registered                                                                                                                     
[    0.649199] NET: Registered protocol family 1                                                                                                       
[    0.653870] checking if image is initramfs...                                                                                                       
[    0.740649] rootfs image is initramfs; unpacking...                                                                                                 
[    0.745594] Freeing initrd memory: 2668k freed                                                                                                      
[    0.755727] audit: initializing netlink socket (disabled)                                                                                           
[    0.761216] type=2000 audit(1242654497.761:1): initialized                                                                                          
[    0.769272] HugeTLB registered 2 MB page size, pre-allocated 0 pages                                                                                
[    0.783215] VFS: Disk quotas dquot_6.5.2                                                                                                            
[    0.787383] Dquot-cache hash table entries: 512 (order 0, 4096 bytes)                                                                               
[    0.795622] msgmni has been set to 3944                                                                                                             
[    0.800880] alg: No test for stdrng (krng)                                                                                                          
[    0.805667] io scheduler noop registered                                                                                                            
[    0.809591] io scheduler anticipatory registered                                                                                                    
[    0.814202] io scheduler deadline registered                                                                                                        
[    0.818482] io scheduler cfq registered (default)                                                                                                   
[    0.823242] disabled boot interrupts on PCI device0x1166:0x0205                                                                                     
[    0.871059] pci 0000:00:06.0: Boot video device                                                                                                     
[    0.876105] pci_hotplug: PCI Hot Plug PCI Core version: 0.5                                                                                         
[    0.882441] processor ACPI_CPU:00: registered as cooling_device0                                                                                    
[    0.888596] processor ACPI_CPU:01: registered as cooling_device1                                                                                    
[    0.907665] Non-volatile memory driver v1.3                                                                                                         
[    0.911908] Linux agpgart interface v0.103                                                                                                          
[    0.916348] Serial: 8250/16550 driver, 4 ports, IRQ sharing enabled                                                                                 
�[    1.167644] serial8250: ttyS0 at I/O 0x3f8 (irq = 4) is a 16550A                                                                                   
[    1.418642] serial8250: ttyS1 at I/O 0x2f8 (irq = 3) is a 16550A                                                                                    
[    1.425803] 00:05: ttyS0 at I/O 0x3f8 (irq = 4) is a 16550A                                                                                         
[    1.431735] 00:06: ttyS1 at I/O 0x2f8 (irq = 3) is a 16550A                                                                                         
[    1.441852] brd: module loaded                                                                                                                      
[    1.445211] Driver 'sd' needs updating - please use bus_type methods                                                                                
[    1.451839] sata_svw 0000:01:0e.0: version 2.3                                                                                                      
[    1.456333] sata_svw 0000:01:0e.0: PCI INT A -> GSI 11 (level, low) -> IRQ 11                                                                       
[    1.463639] scsi0 : sata_svw                                                                                                                        
[    1.466949] scsi1 : sata_svw                                                                                                                        
[    1.470022] scsi2 : sata_svw                                                                                                                        
[    1.473090] scsi3 : sata_svw                                                                                                                        
[    1.476112] ata1: SATA max UDMA/133 mmio m8192@0xff3fe000 port 0xff3fe000 irq 11                                                                    
[    1.483490] ata2: SATA max UDMA/133 mmio m8192@0xff3fe000 port 0xff3fe100 irq 11                                                                    
[    1.490870] ata3: SATA max UDMA/133 mmio m8192@0xff3fe000 port 0xff3fe200 irq 11                                                                    
[    1.498247] ata4: SATA max UDMA/133 mmio m8192@0xff3fe000 port 0xff3fe300 irq 11                                                                    
[    1.505712] sata_svw 0000:01:0e.1: PCI INT A -> GSI 11 (level, low) -> IRQ 11                                                                       
[    1.513536] scsi4 : pata_serverworks                                                                                                                
[    1.517323] scsi5 : pata_serverworks                                                                                                                
[    1.521046] ata5: PATA max UDMA/66 cmd 0x1f0 ctl 0x3f6 bmdma 0xffa0 irq 14                                                                          
[    1.527921] ata6: PATA max UDMA/66 cmd 0x170 ctl 0x376 bmdma 0xffa8 irq 15                                                                          
[    1.535032] Intel(R) PRO/1000 Network Driver - version 7.3.21-k3-NAPI                                                                               
[    1.541460] Copyright (c) 1999-2006 Intel Corporation.                                                                                              
[    1.546627] e1000 0000:00:04.0: PCI INT A -> GSI 24 (level, low) -> IRQ 24                                                                          
[    1.690370] ata5.00: ATAPI: TEAC DV-516G, F4S7, max UDMA/33                                                                                         
[    1.699343] ata5.00: configured for UDMA/33                                                                                                         
[    1.834378] e1000: 0000:00:04.0: e1000_probe: (PCI:33MHz:32-bit) 00:e0:81:72:62:74                                                                  
[    1.942047] ata1: SATA link up 1.5 Gbps (SStatus 113 SControl 300)                                                                                  
[    1.993659] e1000: eth0: e1000_probe: Intel(R) PRO/1000 Network Connection                                                                          
[    2.000541] e1000 0000:00:05.0: PCI INT A -> GSI 25 (level, low) -> IRQ 25                                                                          
[    2.271681] e1000: 0000:00:05.0: e1000_probe: (PCI:33MHz:32-bit) 00:e0:81:72:62:75                                                                  
[    2.443651] e1000: eth1: e1000_probe: Intel(R) PRO/1000 Network Connection                                                                          
[    2.450618] e1000e: Intel(R) PRO/1000 Network Driver - 0.3.3.4-k4                                                                                   
[    2.456700] e1000e: Copyright (c) 1999-2008 Intel Corporation.                                                                                      
[    2.462659] console [netcon0] enabled                                                                                                               
[    2.466320] netconsole: network logging started                                                                                                     
[    2.471178] ehci_hcd: USB 2.0 'Enhanced' Host Controller (EHCI) Driver                                                                              
[    2.477720] ehci_hcd 0000:00:03.2: PCI INT A -> GSI 10 (level, low) -> IRQ 10                                                                       
[    2.484849] ehci_hcd 0000:00:03.2: EHCI Host Controller                                                                                             
[    2.492188] ehci_hcd 0000:00:03.2: new USB bus registered, assigned bus number 1                                                                    
[    2.520055] ehci_hcd 0000:00:03.2: irq 10, io mem 0xff6b6000                                                                                        
[    2.531045] ehci_hcd 0000:00:03.2: USB 2.0 started, EHCI 1.00                                                                                       
[    2.537344] usb usb1: configuration #1 chosen from 1 choice                                                                                         
[    2.543264] hub 1-0:1.0: USB hub found                                                                                                              
[    2.547074] hub 1-0:1.0: 4 ports detected                                                                                                           
[    2.552549] ohci_hcd: USB 1.1 'Open' Host Controller (OHCI) Driver                                                                                  
[    2.558741] ohci_hcd 0000:00:03.0: PCI INT A -> GSI 10 (level, low) -> IRQ 10                                                                       
[    2.565869] ohci_hcd 0000:00:03.0: OHCI Host Controller                                                                                             
[    2.571347] ohci_hcd 0000:00:03.0: new USB bus registered, assigned bus number 2                                                                    
[    2.578745] ohci_hcd 0000:00:03.0: irq 10, io mem 0xff6b4000                                                                                        
[    2.639319] usb usb2: configuration #1 chosen from 1 choice                                                                                         
[    2.645015] hub 2-0:1.0: USB hub found                                                                                                              
[    2.648770] hub 2-0:1.0: 2 ports detected                                                                                                           
[    2.653366] ohci_hcd 0000:00:03.1: PCI INT A -> GSI 10 (level, low) -> IRQ 10                                                                       
[    2.660493] ohci_hcd 0000:00:03.1: OHCI Host Controller                                                                                             
[    2.665985] ohci_hcd 0000:00:03.1: new USB bus registered, assigned bus number 3                                                                    
[    2.673382] ohci_hcd 0000:00:03.1: irq 10, io mem 0xff6b5000                                                                                        
[    2.734284] usb usb3: configuration #1 chosen from 1 choice                                                                                         
[    2.740006] hub 3-0:1.0: USB hub found                                                                                                              
[    2.743765] hub 3-0:1.0: 2 ports detected                                                                                                           
[    2.748320] uhci_hcd: USB Universal Host Controller Interface driver                                                                                
[    2.755360] usbcore: registered new interface driver libusual                                                                                       
[    2.761511] PNP: No PS/2 controller found. Probing ports directly.                                                                                  
[    2.769409] serio: i8042 KBD port at 0x60,0x64 irq 1                                                                                                
[    2.774412] serio: i8042 AUX port at 0x60,0x64 irq 12                                                                                               
[    2.779871] mice: PS/2 mouse device common for all mice                                                                                             
[    2.785770] cpuidle: using governor ladder                                                                                                          
[    2.789861] cpuidle: using governor menu                                                                                                            
[    2.796101] usbcore: registered new interface driver hiddev                                                                                         
[    2.801783] usbcore: registered new interface driver usbhid                                                                                         
[    2.807346] usbhid: v2.6:USB HID core driver                                                                                                        
[    2.812502] TCP bic registered                                                                                                                      
[    2.815553] NET: Registered protocol family 17                                                                                                      
[    2.820302] powernow-k8: Found 1 Dual-Core AMD Opteron(tm) Processor 1216 processors (2 cpu cores) (version 2.20.00)                                
[    2.831228] powernow-k8:    0 : fid 0x10 (2400 MHz), vid 0xa                                                                                        
[    2.836892] powernow-k8:    1 : fid 0xe (2200 MHz), vid 0xc                                                                                         
[    2.842454] powernow-k8:    2 : fid 0xc (2000 MHz), vid 0xe                                                                                         
[    2.848015] powernow-k8:    3 : fid 0xa (1800 MHz), vid 0x10                                                                                        
[    2.853661] powernow-k8:    4 : fid 0x2 (1000 MHz), vid 0x12                                                                                        
[    2.860635] registered taskstats version 1                                                                                                          
[    6.951257] ata1.00: qc timeout (cmd 0x27)                                                                                                          
[    6.955354] ata1.00: failed to read native max address (err_mask=0x4)                                                                               
[    6.961781] ata1.00: HPA support seems broken, skipping HPA handling                                                                                
[    7.273044] ata1: SATA link up 1.5 Gbps (SStatus 113 SControl 300)                                                                                  
[    7.285159] ata1.00: configured for UDMA/133                                                                                                        
[    7.290052] scsi 0:0:0:0: Direct-Access     ATA      WDC WD1200JS-00N 10.0 PQ: 0 ANSI: 5                                                            
[    7.299294] sd 0:0:0:0: [sda] 234441648 512-byte hardware sectors: (120 GB/111 GiB)                                                                 
[    7.306968] sd 0:0:0:0: [sda] Write Protect is off                                                                                                  
[    7.311754] sd 0:0:0:0: [sda] Mode Sense: 00 3a 00 00                                                                                               
[    7.316839] sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA                                                 
[    7.326312]  sda:<6>ata2: SATA link down (SStatus 4 SControl 300)                                                                                   
[    7.938372] ata3: SATA link down (SStatus 4 SControl 300)                                                                                           
[    8.258372] ata4: SATA link down (SStatus 4 SControl 300)                                                                                           
[    8.264357] scsi 4:0:0:0: CD-ROM            TEAC     DV-516G          F4S7 PQ: 0 ANSI: 5                                                            
[   37.704234] ata1: lost interrupt (Status 0x50)                                                                                                      
[   37.708695] sd 0:0:0:0: [sda] Unhandled error code                                                                                                  
[   37.713479] sd 0:0:0:0: [sda] Result: hostbyte=DID_OK driverbyte=DRIVER_TIMEOUT                                                                     
[   37.720791] end_request: I/O error, dev sda, sector 0                                                                                               
[   37.725848] Buffer I/O error on device sda, logical block 0 

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: tip: patches in git for irq and numa
  2009-05-18 15:11                       ` Peter Zijlstra
@ 2009-05-18 17:23                         ` Yinghai Lu
  2009-05-19  9:37                           ` Ingo Molnar
                                             ` (2 more replies)
  0 siblings, 3 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-05-18 17:23 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, Jack Steiner, linux-kernel, Thomas Gleixner, H. Peter Anvin

please check following patch

Thanks

YH

Subject: [PATCH] x86: don't mark pin_programmed early

Peter bisected that
| commit b9c61b70075c87a8612624736faf4a2de5b1ed30
| Date:   Wed May 6 10:10:06 2009 -0700
|
|     x86/pci: update pirq_enable_irq() to setup io apic routing
|
|     So we can set io apic routing only when enabling the device irq.
wrecked his opteron box, ata1 interrupts fail to get through

and that ata1 is using irq 11
[    1.451839] sata_svw 0000:01:0e.0: version 2.3
[    1.456333] sata_svw 0000:01:0e.0: PCI INT A -> GSI 11 (level, low) -> IRQ 11
[    1.463639] scsi0 : sata_svw
[    1.466949] scsi1 : sata_svw
[    1.470022] scsi2 : sata_svw
[    1.473090] scsi3 : sata_svw
[    1.476112] ata1: SATA max UDMA/133 mmio m8192@0xff3fe000 port 0xff3fe000 irq 11
[    1.483490] ata2: SATA max UDMA/133 mmio m8192@0xff3fe000 port 0xff3fe100 irq 11
[    1.490870] ata3: SATA max UDMA/133 mmio m8192@0xff3fe000 port 0xff3fe200 irq 11
[    1.498247] ata4: SATA max UDMA/133 mmio m8192@0xff3fe000 port 0xff3fe300 irq 11

that pin is overlapped with pin with legacy ones.
We should not set bits in pin_programmed here, so that those bit could be set later
via io_apic_set_pci_routing()

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Yinghai Lu <yinghai.lu@kernel.org>

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 6b7913b..ed6a91c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1538,7 +1538,10 @@ static void __init setup_IO_APIC_irqs(void)
 		}
 		cfg = desc->chip_data;
 		add_pin_to_irq_node(cfg, node, apic_id, pin);
-		set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
+		/*
+		 * don't mark it in pin_programmed, so later acpi could
+		 * set it correctly when irq < 16
+		 */
 		setup_IO_APIC_irq(apic_id, pin, irq, desc,
 				irq_trigger(idx), irq_polarity(idx));
 	}

^ permalink raw reply related	[flat|nested] 102+ messages in thread

* Re: tip: patches in git for irq and numa
  2009-05-18 17:23                         ` Yinghai Lu
@ 2009-05-19  9:37                           ` Ingo Molnar
  2009-05-19 10:31                             ` Peter Zijlstra
  2009-05-19  9:39                           ` [tip:irq/numa] x86, io-apic: Don't mark pin_programmed early tip-bot for Yinghai Lu
  2009-05-19 12:30                           ` tip-bot for Yinghai Lu
  2 siblings, 1 reply; 102+ messages in thread
From: Ingo Molnar @ 2009-05-19  9:37 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Peter Zijlstra, Jack Steiner, linux-kernel, Thomas Gleixner,
	H. Peter Anvin


* Yinghai Lu <yinghai@kernel.org> wrote:

> please check following patch
> 
> Thanks

I've applied it to tip:irq/numa, thanks Yingai. Pushed it out into 
tip:master - Peter please check wether:

  3b44254: Merge branch 'irq/numa'

or later versions of -tip work fine on your Opteron box.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 102+ messages in thread

* [tip:irq/numa] x86, io-apic: Don't mark pin_programmed early
  2009-05-18 17:23                         ` Yinghai Lu
  2009-05-19  9:37                           ` Ingo Molnar
@ 2009-05-19  9:39                           ` tip-bot for Yinghai Lu
  2009-05-19 12:30                           ` tip-bot for Yinghai Lu
  2 siblings, 0 replies; 102+ messages in thread
From: tip-bot for Yinghai Lu @ 2009-05-19  9:39 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, mingo, yinghai, peterz, yinghai.lu, steiner,
	tglx, mingo

Commit-ID:  46e842ccbe38be910bb2928258850b6178ce00e1
Gitweb:     http://git.kernel.org/tip/46e842ccbe38be910bb2928258850b6178ce00e1
Author:     Yinghai Lu <yinghai@kernel.org>
AuthorDate: Mon, 18 May 2009 10:23:28 -0700
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Tue, 19 May 2009 11:35:59 +0200

x86, io-apic: Don't mark pin_programmed early

Peter bisected that:

| commit b9c61b70075c87a8612624736faf4a2de5b1ed30
| Date:   Wed May 6 10:10:06 2009 -0700
|
|     x86/pci: update pirq_enable_irq() to setup io apic routing
|
|     So we can set io apic routing only when enabling the device irq.

wrecked his opteron box, ata1 interrupts fail to get through.

ata1 is using irq 11:

[    1.451839] sata_svw 0000:01:0e.0: version 2.3
[    1.456333] sata_svw 0000:01:0e.0: PCI INT A -> GSI 11 (level, low) -> IRQ 11
[    1.463639] scsi0 : sata_svw
[    1.466949] scsi1 : sata_svw
[    1.470022] scsi2 : sata_svw
[    1.473090] scsi3 : sata_svw
[    1.476112] ata1: SATA max UDMA/133 mmio m8192@0xff3fe000 port 0xff3fe000 irq 11
[    1.483490] ata2: SATA max UDMA/133 mmio m8192@0xff3fe000 port 0xff3fe100 irq 11
[    1.490870] ata3: SATA max UDMA/133 mmio m8192@0xff3fe000 port 0xff3fe200 irq 11
[    1.498247] ata4: SATA max UDMA/133 mmio m8192@0xff3fe000 port 0xff3fe300 irq 11

that pin is overlapped with pin with legacy ones.

We should not set bits in pin_programmed here, so that those bit could
be set later via io_apic_set_pci_routing().

[ Impact: fix boot hang on certain systems ]

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Yinghai Lu <yinghai.lu@kernel.org>
Cc: Jack Steiner <steiner@sgi.com>
LKML-Reference: <4A119990.9020606@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>


---
 arch/x86/kernel/apic/io_apic.c |    5 ++++-
 1 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ce1ac74..ac7f3b6 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1537,7 +1537,10 @@ static void __init setup_IO_APIC_irqs(void)
 		}
 		cfg = desc->chip_data;
 		add_pin_to_irq_node(cfg, node, apic_id, pin);
-		set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
+		/*
+		 * don't mark it in pin_programmed, so later acpi could
+		 * set it correctly when irq < 16
+		 */
 		setup_IO_APIC_irq(apic_id, pin, irq, desc,
 				irq_trigger(idx), irq_polarity(idx));
 	}

^ permalink raw reply related	[flat|nested] 102+ messages in thread

* Re: tip: patches in git for irq and numa
  2009-05-19  9:37                           ` Ingo Molnar
@ 2009-05-19 10:31                             ` Peter Zijlstra
  2009-05-19 12:26                               ` Ingo Molnar
  0 siblings, 1 reply; 102+ messages in thread
From: Peter Zijlstra @ 2009-05-19 10:31 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Yinghai Lu, Jack Steiner, linux-kernel, Thomas Gleixner, H. Peter Anvin

On Tue, 2009-05-19 at 11:37 +0200, Ingo Molnar wrote:
> * Yinghai Lu <yinghai@kernel.org> wrote:
> 
> > please check following patch
> > 
> > Thanks
> 
> I've applied it to tip:irq/numa, thanks Yingai. Pushed it out into 
> tip:master - Peter please check wether:
> 
>   3b44254: Merge branch 'irq/numa'
> 
> or later versions of -tip work fine on your Opteron box.

v2.6.30-rc6-1624-g3b44254

seems to boot again.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: tip: patches in git for irq and numa
  2009-05-19 10:31                             ` Peter Zijlstra
@ 2009-05-19 12:26                               ` Ingo Molnar
  0 siblings, 0 replies; 102+ messages in thread
From: Ingo Molnar @ 2009-05-19 12:26 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Yinghai Lu, Jack Steiner, linux-kernel, Thomas Gleixner, H. Peter Anvin


* Peter Zijlstra <peterz@infradead.org> wrote:

> On Tue, 2009-05-19 at 11:37 +0200, Ingo Molnar wrote:
> > * Yinghai Lu <yinghai@kernel.org> wrote:
> > 
> > > please check following patch
> > > 
> > > Thanks
> > 
> > I've applied it to tip:irq/numa, thanks Yingai. Pushed it out into 
> > tip:master - Peter please check wether:
> > 
> >   3b44254: Merge branch 'irq/numa'
> > 
> > or later versions of -tip work fine on your Opteron box.
> 
> v2.6.30-rc6-1624-g3b44254
> 
> seems to boot again.

Thanks!

	Ingo

^ permalink raw reply	[flat|nested] 102+ messages in thread

* [tip:irq/numa] x86, io-apic: Don't mark pin_programmed early
  2009-05-18 17:23                         ` Yinghai Lu
  2009-05-19  9:37                           ` Ingo Molnar
  2009-05-19  9:39                           ` [tip:irq/numa] x86, io-apic: Don't mark pin_programmed early tip-bot for Yinghai Lu
@ 2009-05-19 12:30                           ` tip-bot for Yinghai Lu
  2 siblings, 0 replies; 102+ messages in thread
From: tip-bot for Yinghai Lu @ 2009-05-19 12:30 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, mingo, yinghai, peterz, yinghai.lu, steiner,
	tglx, mingo

Commit-ID:  4c6f18fc81565967da20f2d4a3922cdba33f8e2b
Gitweb:     http://git.kernel.org/tip/4c6f18fc81565967da20f2d4a3922cdba33f8e2b
Author:     Yinghai Lu <yinghai@kernel.org>
AuthorDate: Mon, 18 May 2009 10:23:28 -0700
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Tue, 19 May 2009 14:26:51 +0200

x86, io-apic: Don't mark pin_programmed early

Peter bisected that:

| commit b9c61b70075c87a8612624736faf4a2de5b1ed30
| Date:   Wed May 6 10:10:06 2009 -0700
|
|     x86/pci: update pirq_enable_irq() to setup io apic routing
|
|     So we can set io apic routing only when enabling the device irq.

wrecked his opteron box, ata1 interrupts fail to get through.

ata1 is using irq 11:

[    1.451839] sata_svw 0000:01:0e.0: version 2.3
[    1.456333] sata_svw 0000:01:0e.0: PCI INT A -> GSI 11 (level, low) -> IRQ 11
[    1.463639] scsi0 : sata_svw
[    1.466949] scsi1 : sata_svw
[    1.470022] scsi2 : sata_svw
[    1.473090] scsi3 : sata_svw
[    1.476112] ata1: SATA max UDMA/133 mmio m8192@0xff3fe000 port 0xff3fe000 irq 11
[    1.483490] ata2: SATA max UDMA/133 mmio m8192@0xff3fe000 port 0xff3fe100 irq 11
[    1.490870] ata3: SATA max UDMA/133 mmio m8192@0xff3fe000 port 0xff3fe200 irq 11
[    1.498247] ata4: SATA max UDMA/133 mmio m8192@0xff3fe000 port 0xff3fe300 irq 11

that pin is overlapped with pin with legacy ones.

We should not set bits in pin_programmed here, so that those bit could
be set later via io_apic_set_pci_routing().

[ Impact: fix boot hang on certain systems ]

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Yinghai Lu <yinghai.lu@kernel.org>
Tested-by: Peter Zijlstra <peterz@infradead.org>
Cc: Jack Steiner <steiner@sgi.com>
LKML-Reference: <4A119990.9020606@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>


---
 arch/x86/kernel/apic/io_apic.c |    5 ++++-
 1 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ce1ac74..ac7f3b6 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1537,7 +1537,10 @@ static void __init setup_IO_APIC_irqs(void)
 		}
 		cfg = desc->chip_data;
 		add_pin_to_irq_node(cfg, node, apic_id, pin);
-		set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
+		/*
+		 * don't mark it in pin_programmed, so later acpi could
+		 * set it correctly when irq < 16
+		 */
 		setup_IO_APIC_irq(apic_id, pin, irq, desc,
 				irq_trigger(idx), irq_polarity(idx));
 	}

^ permalink raw reply related	[flat|nested] 102+ messages in thread

* [RESEND PATCH] mm: clear N_HIGH_MEMORY map before se set it again -v2
  2009-05-14 17:34                 ` Yinghai Lu
  2009-05-14 19:44                   ` Christoph Lameter
@ 2009-06-04  5:16                   ` Yinghai Lu
  2009-06-04 16:38                     ` Christoph Lameter
  1 sibling, 1 reply; 102+ messages in thread
From: Yinghai Lu @ 2009-06-04  5:16 UTC (permalink / raw)
  To: Andrew Morton, mingo, tglx, hpa
  Cc: mel, cl, suresh.b.siddha, linux-kernel, viro, rusty, steiner, rientjes


in case some system strange SRAT table. some kind of small range.
or with mem= boot option etc

if the wrong SRAT table, have small range for some node. that node will not be
onlined.  In the early checking, the bit in node_states[N_HIGH_MEMORY] for the
node is set even that node has less RAM like 1M, and it is not cleared before
the bit is set again in the following loop according online nodes finally.

for 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too

v2: fix typo

Signed-off-by: Yinghai Lu <Yinghai@kernel.org>
Tested-by: Jack Steiner <steiner@sgi.com>

---
 mm/page_alloc.c |    5 +++++
 1 file changed, 5 insertions(+)

Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -4041,6 +4041,11 @@ void __init free_area_init_nodes(unsigne
 						early_node_map[i].start_pfn,
 						early_node_map[i].end_pfn);
 
+	/*
+	 * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init
+	 * that node_mask, clear it at first
+	 */
+	nodes_clear(node_states[N_HIGH_MEMORY]);
 	/* Initialise every node */
 	mminit_verify_pageflags_layout();
 	setup_nr_node_ids();

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RESEND PATCH] mm: clear N_HIGH_MEMORY map before se set it again -v2
  2009-06-04  5:16                   ` [RESEND PATCH] " Yinghai Lu
@ 2009-06-04 16:38                     ` Christoph Lameter
  2009-06-04 16:48                       ` Yinghai Lu
  0 siblings, 1 reply; 102+ messages in thread
From: Christoph Lameter @ 2009-06-04 16:38 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Andrew Morton, mingo, tglx, hpa, mel, suresh.b.siddha,
	linux-kernel, viro, rusty, steiner, rientjes

Ok. The N_HIGH_MEMORY bit is set for all onlines nodes in a loop
that follows the code you modified. The patch insures that bits do not
remain set that earlier arch / core code may have set.

Could you make the description clearer?

Otherwise

Acked-by: Christoph Lameter <cl@linux-foundation.org>


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RESEND PATCH] mm: clear N_HIGH_MEMORY map before se set it again -v2
  2009-06-04 16:38                     ` Christoph Lameter
@ 2009-06-04 16:48                       ` Yinghai Lu
  2009-06-04 17:11                         ` Christoph Lameter
  0 siblings, 1 reply; 102+ messages in thread
From: Yinghai Lu @ 2009-06-04 16:48 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Andrew Morton, mingo, tglx, hpa, mel, suresh.b.siddha,
	linux-kernel, viro, rusty, steiner, rientjes

Christoph Lameter wrote:
> Ok. The N_HIGH_MEMORY bit is set for all onlines nodes in a loop
> that follows the code you modified. The patch insures that bits do not
> remain set that earlier arch / core code may have set.
> 
> Could you make the description clearer?
> 

how about

Subject: [PATCH] mm: clear N_HIGH_MEMORY map before se set it again -v3

in case some system strange SRAT table. some kind of small range form some node
or with mem= boot option etc. that node will not be onlined.  
In the early checking, the bit in node_states[N_HIGH_MEMORY] for the
node is set even that node has less RAM like 1M.

The N_HIGH_MEMORY bit is set for all onlines nodes in a loop that follows the 
code that is modified. The patch insures that bits do not remain set that 
earlier arch / core code may have set.

for 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too

v3: update description according to Christoph


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [RESEND PATCH] mm: clear N_HIGH_MEMORY map before se set it again -v2
  2009-06-04 16:48                       ` Yinghai Lu
@ 2009-06-04 17:11                         ` Christoph Lameter
  2009-06-04 17:26                           ` [PATCH] mm: clear N_HIGH_MEMORY map before se set it again -v4 Yinghai Lu
  0 siblings, 1 reply; 102+ messages in thread
From: Christoph Lameter @ 2009-06-04 17:11 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Andrew Morton, mingo, tglx, hpa, mel, suresh.b.siddha,
	linux-kernel, viro, rusty, steiner, rientjes

On Thu, 4 Jun 2009, Yinghai Lu wrote:

> in case some system strange SRAT table. some kind of small range form some node
> or with mem= boot option etc. that node will not be onlined.
> In the early checking, the bit in node_states[N_HIGH_MEMORY] for the
> node is set even that node has less RAM like 1M.

Maybe we can replace that with:

SRAT tables may contains nodes of very small size. The arch code may
decide to not activate such a node. However, currently the early boot code
sets N_HIGH_MEMORY for such nodes. These nodes therefore seem to be active
although these nodes have no present pages.


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [PATCH] mm: clear N_HIGH_MEMORY map before se set it again -v4
  2009-06-04 17:11                         ` Christoph Lameter
@ 2009-06-04 17:26                           ` Yinghai Lu
  2009-06-19  6:42                             ` Nathan Lynch
       [not found]                             ` <4A2803D1.4070001-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
  0 siblings, 2 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-06-04 17:26 UTC (permalink / raw)
  To: Christoph Lameter, Andrew Morton, mingo, mel
  Cc: tglx, hpa, suresh.b.siddha, linux-kernel, viro, rusty, steiner, rientjes


SRAT tables may contains nodes of very small size. The arch code may
decide to not activate such a node. However, currently the early boot code
sets N_HIGH_MEMORY for such nodes. These nodes therefore seem to be active
although these nodes have no present pages.

for 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too

v4: update description according to Christoph

Signed-off-by: Yinghai Lu <Yinghai@kernel.org>
Tested-by: Jack Steiner <steiner@sgi.com>
Acked-by: Christoph Lameter <cl@linux-foundation.org>

---
 mm/page_alloc.c |    5 +++++
 1 file changed, 5 insertions(+)

Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -4041,6 +4041,11 @@ void __init free_area_init_nodes(unsigne
 						early_node_map[i].start_pfn,
 						early_node_map[i].end_pfn);
 
+	/*
+	 * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init
+	 * that node_mask, clear it at first
+	 */
+	nodes_clear(node_states[N_HIGH_MEMORY]);
 	/* Initialise every node */
 	mminit_verify_pageflags_layout();
 	setup_nr_node_ids();

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] mm: clear N_HIGH_MEMORY map before se set it again -v4
       [not found]                             ` <4A2803D1.4070001-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
@ 2009-06-19  6:42                               ` Nathan Lynch
  0 siblings, 0 replies; 102+ messages in thread
From: Nathan Lynch @ 2009-06-19  6:42 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: steiner-sJ/iWh9BUns, Christoph Lameter,
	suresh.b.siddha-ral2JQCrhuEAvxtiuMwx3w,
	mel-wPRd99KPJ+uzQB+pC5nmwQ,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	rusty-8n+1lVoiYb80n/F98K4Iww,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, tglx-hfZtesqFncYOwBW4kG4KsQ,
	viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn,
	hpa-YMNOUZJC4hwAvxtiuMwx3w, rientjes-hpIqsD4AKlfQT0dZR+AlfA,
	Andrew Morton, mingo-X9Un+BFzKDI

Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> writes:
> SRAT tables may contains nodes of very small size. The arch code may
> decide to not activate such a node. However, currently the early boot code
> sets N_HIGH_MEMORY for such nodes. These nodes therefore seem to be active
> although these nodes have no present pages.
>
> for 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
>
> v4: update description according to Christoph
>
> Signed-off-by: Yinghai Lu <Yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> Tested-by: Jack Steiner <steiner-sJ/iWh9BUns@public.gmane.org>
> Acked-by: Christoph Lameter <cl-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
>
> ---
>  mm/page_alloc.c |    5 +++++
>  1 file changed, 5 insertions(+)
>
> Index: linux-2.6/mm/page_alloc.c
> ===================================================================
> --- linux-2.6.orig/mm/page_alloc.c
> +++ linux-2.6/mm/page_alloc.c
> @@ -4041,6 +4041,11 @@ void __init free_area_init_nodes(unsigne
>  						early_node_map[i].start_pfn,
>  						early_node_map[i].end_pfn);
>  
> +	/*
> +	 * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init
> +	 * that node_mask, clear it at first
> +	 */
> +	nodes_clear(node_states[N_HIGH_MEMORY]);
>  	/* Initialise every node */
>  	mminit_verify_pageflags_layout();
>  	setup_nr_node_ids();

This patch breaks the cpuset.mems cgroup attribute on an i386 kvm guest.

With v2.6.30:

# uname -r
2.6.30
# cat /cgroup/cpuset.mems
0
# mkdir /cgroup/test
# for i in cpus mems ; do cat /cgroup/cpuset.$i > /cgroup/test/cpuset.$i ; done
# echo $$ > /cgroup/test/tasks
# echo $?
0

With a pulled-today Linus tree:

# uname -r
2.6.30-06725-g1d89b30
# cat /cgroup/cpuset.mems

# mkdir /cgroup/test
# for i in cpus mems ; do cat /cgroup/cpuset.$i > /cgroup/test/cpuset.$i ; done
# echo $$ > /cgroup/test/tasks
-bash: echo: write error: No space left on device

(Note that in addition to the ENOSPC error, /cgroup/cpuset.mems is empty
rather than '0' in the second test.)

I bisected to the commit containing this change.  Reverting fixes the
problem.

.config below:

#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.30
# Fri Jun 19 01:27:04 2009
#
# CONFIG_64BIT is not set
CONFIG_X86_32=y
# CONFIG_X86_64 is not set
CONFIG_X86=y
CONFIG_OUTPUT_FORMAT="elf32-i386"
CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig"
CONFIG_GENERIC_TIME=y
CONFIG_GENERIC_CMOS_UPDATE=y
CONFIG_CLOCKSOURCE_WATCHDOG=y
CONFIG_GENERIC_CLOCKEVENTS=y
CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_HAVE_LATENCYTOP_SUPPORT=y
CONFIG_FAST_CMPXCHG_LOCAL=y
CONFIG_MMU=y
CONFIG_ZONE_DMA=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_IOMAP=y
CONFIG_GENERIC_BUG=y
CONFIG_GENERIC_HWEIGHT=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
# CONFIG_RWSEM_GENERIC_SPINLOCK is not set
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
# CONFIG_GENERIC_TIME_VSYSCALL is not set
CONFIG_ARCH_HAS_CPU_RELAX=y
CONFIG_ARCH_HAS_DEFAULT_IDLE=y
CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
CONFIG_HAVE_SETUP_PER_CPU_AREA=y
CONFIG_HAVE_DYNAMIC_PER_CPU_AREA=y
# CONFIG_HAVE_CPUMASK_OF_CPU_MAP is not set
CONFIG_ARCH_HIBERNATION_POSSIBLE=y
CONFIG_ARCH_SUSPEND_POSSIBLE=y
# CONFIG_ZONE_DMA32 is not set
CONFIG_ARCH_POPULATES_NODE_MAP=y
# CONFIG_AUDIT_ARCH is not set
CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_GENERIC_PENDING_IRQ=y
CONFIG_USE_GENERIC_SMP_HELPERS=y
CONFIG_X86_32_SMP=y
CONFIG_X86_HT=y
CONFIG_X86_TRAMPOLINE=y
CONFIG_X86_32_LAZY_GS=y
CONFIG_KTIME_SCALAR=y
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"

#
# General setup
#
CONFIG_EXPERIMENTAL=y
CONFIG_LOCK_KERNEL=y
CONFIG_INIT_ENV_ARG_LIMIT=32
CONFIG_LOCALVERSION=""
CONFIG_LOCALVERSION_AUTO=y
CONFIG_HAVE_KERNEL_GZIP=y
CONFIG_HAVE_KERNEL_BZIP2=y
CONFIG_HAVE_KERNEL_LZMA=y
CONFIG_KERNEL_GZIP=y
# CONFIG_KERNEL_BZIP2 is not set
# CONFIG_KERNEL_LZMA is not set
CONFIG_SWAP=y
CONFIG_SYSVIPC=y
CONFIG_SYSVIPC_SYSCTL=y
CONFIG_POSIX_MQUEUE=y
CONFIG_POSIX_MQUEUE_SYSCTL=y
CONFIG_BSD_PROCESS_ACCT=y
# CONFIG_BSD_PROCESS_ACCT_V3 is not set
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_AUDIT=y
CONFIG_AUDITSYSCALL=y
CONFIG_AUDIT_TREE=y

#
# RCU Subsystem
#
CONFIG_CLASSIC_RCU=y
# CONFIG_TREE_RCU is not set
# CONFIG_PREEMPT_RCU is not set
# CONFIG_TREE_RCU_TRACE is not set
# CONFIG_PREEMPT_RCU_TRACE is not set
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=19
CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
CONFIG_GROUP_SCHED=y
CONFIG_FAIR_GROUP_SCHED=y
# CONFIG_RT_GROUP_SCHED is not set
# CONFIG_USER_SCHED is not set
CONFIG_CGROUP_SCHED=y
CONFIG_CGROUPS=y
# CONFIG_CGROUP_DEBUG is not set
CONFIG_CGROUP_NS=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
CONFIG_PROC_PID_CPUSET=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_RESOURCE_COUNTERS=y
# CONFIG_CGROUP_MEM_RES_CTLR is not set
CONFIG_SYSFS_DEPRECATED=y
CONFIG_SYSFS_DEPRECATED_V2=y
CONFIG_RELAY=y
CONFIG_NAMESPACES=y
CONFIG_UTS_NS=y
CONFIG_IPC_NS=y
CONFIG_USER_NS=y
CONFIG_PID_NS=y
CONFIG_NET_NS=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_INITRAMFS_SOURCE=""
CONFIG_RD_GZIP=y
CONFIG_RD_BZIP2=y
CONFIG_RD_LZMA=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_SYSCTL=y
CONFIG_ANON_INODES=y
# CONFIG_EMBEDDED is not set
CONFIG_UID16=y
CONFIG_SYSCTL_SYSCALL=y
CONFIG_KALLSYMS=y
CONFIG_KALLSYMS_ALL=y
CONFIG_KALLSYMS_EXTRA_PASS=y
CONFIG_HOTPLUG=y
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_ELF_CORE=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
CONFIG_SIGNALFD=y
CONFIG_TIMERFD=y
CONFIG_EVENTFD=y
CONFIG_SHMEM=y
CONFIG_AIO=y
CONFIG_HAVE_PERF_COUNTERS=y

#
# Performance Counters
#
# CONFIG_PERF_COUNTERS is not set
CONFIG_VM_EVENT_COUNTERS=y
CONFIG_PCI_QUIRKS=y
CONFIG_SLUB_DEBUG=y
# CONFIG_STRIP_ASM_SYMS is not set
# CONFIG_COMPAT_BRK is not set
# CONFIG_SLAB is not set
CONFIG_SLUB=y
# CONFIG_SLOB is not set
CONFIG_PROFILING=y
CONFIG_TRACEPOINTS=y
CONFIG_MARKERS=y
# CONFIG_OPROFILE is not set
CONFIG_HAVE_OPROFILE=y
CONFIG_KPROBES=y
CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
CONFIG_KRETPROBES=y
CONFIG_HAVE_IOREMAP_PROT=y
CONFIG_HAVE_KPROBES=y
CONFIG_HAVE_KRETPROBES=y
CONFIG_HAVE_ARCH_TRACEHOOK=y
CONFIG_HAVE_DMA_API_DEBUG=y
# CONFIG_SLOW_WORK is not set
CONFIG_HAVE_GENERIC_DMA_COHERENT=y
CONFIG_SLABINFO=y
CONFIG_RT_MUTEXES=y
CONFIG_BASE_SMALL=0
CONFIG_MODULES=y
# CONFIG_MODULE_FORCE_LOAD is not set
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_MODVERSIONS is not set
# CONFIG_MODULE_SRCVERSION_ALL is not set
CONFIG_STOP_MACHINE=y
CONFIG_BLOCK=y
# CONFIG_LBD is not set
CONFIG_BLK_DEV_BSG=y
# CONFIG_BLK_DEV_INTEGRITY is not set

#
# IO Schedulers
#
CONFIG_IOSCHED_NOOP=y
CONFIG_IOSCHED_AS=y
CONFIG_IOSCHED_DEADLINE=y
CONFIG_IOSCHED_CFQ=y
# CONFIG_DEFAULT_AS is not set
# CONFIG_DEFAULT_DEADLINE is not set
CONFIG_DEFAULT_CFQ=y
# CONFIG_DEFAULT_NOOP is not set
CONFIG_DEFAULT_IOSCHED="cfq"
CONFIG_FREEZER=y

#
# Processor type and features
#
CONFIG_TICK_ONESHOT=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
CONFIG_SMP=y
# CONFIG_SPARSE_IRQ is not set
CONFIG_X86_MPPARSE=y
# CONFIG_X86_BIGSMP is not set
CONFIG_X86_EXTENDED_PLATFORM=y
# CONFIG_X86_ELAN is not set
# CONFIG_X86_RDC321X is not set
# CONFIG_X86_32_NON_STANDARD is not set
CONFIG_SCHED_OMIT_FRAME_POINTER=y
# CONFIG_PARAVIRT_GUEST is not set
# CONFIG_MEMTEST is not set
# CONFIG_M386 is not set
# CONFIG_M486 is not set
# CONFIG_M586 is not set
# CONFIG_M586TSC is not set
# CONFIG_M586MMX is not set
CONFIG_M686=y
# CONFIG_MPENTIUMII is not set
# CONFIG_MPENTIUMIII is not set
# CONFIG_MPENTIUMM is not set
# CONFIG_MPENTIUM4 is not set
# CONFIG_MK6 is not set
# CONFIG_MK7 is not set
# CONFIG_MK8 is not set
# CONFIG_MCRUSOE is not set
# CONFIG_MEFFICEON is not set
# CONFIG_MWINCHIPC6 is not set
# CONFIG_MWINCHIP3D is not set
# CONFIG_MGEODEGX1 is not set
# CONFIG_MGEODE_LX is not set
# CONFIG_MCYRIXIII is not set
# CONFIG_MVIAC3_2 is not set
# CONFIG_MVIAC7 is not set
# CONFIG_MPSC is not set
# CONFIG_MCORE2 is not set
# CONFIG_GENERIC_CPU is not set
CONFIG_X86_GENERIC=y
CONFIG_X86_CPU=y
CONFIG_X86_L1_CACHE_BYTES=64
CONFIG_X86_INTERNODE_CACHE_BYTES=64
CONFIG_X86_CMPXCHG=y
CONFIG_X86_L1_CACHE_SHIFT=5
CONFIG_X86_XADD=y
# CONFIG_X86_PPRO_FENCE is not set
CONFIG_X86_WP_WORKS_OK=y
CONFIG_X86_INVLPG=y
CONFIG_X86_BSWAP=y
CONFIG_X86_POPAD_OK=y
CONFIG_X86_INTEL_USERCOPY=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
CONFIG_X86_TSC=y
CONFIG_X86_CMOV=y
CONFIG_X86_MINIMUM_CPU_FAMILY=4
CONFIG_X86_DEBUGCTLMSR=y
CONFIG_CPU_SUP_INTEL=y
CONFIG_CPU_SUP_CYRIX_32=y
CONFIG_CPU_SUP_AMD=y
CONFIG_CPU_SUP_CENTAUR=y
CONFIG_CPU_SUP_TRANSMETA_32=y
CONFIG_CPU_SUP_UMC_32=y
# CONFIG_X86_DS is not set
CONFIG_HPET_TIMER=y
CONFIG_HPET_EMULATE_RTC=y
CONFIG_DMI=y
# CONFIG_IOMMU_HELPER is not set
# CONFIG_IOMMU_API is not set
CONFIG_NR_CPUS=8
CONFIG_SCHED_SMT=y
CONFIG_SCHED_MC=y
# CONFIG_PREEMPT_NONE is not set
# CONFIG_PREEMPT_VOLUNTARY is not set
CONFIG_PREEMPT=y
CONFIG_X86_LOCAL_APIC=y
CONFIG_X86_IO_APIC=y
# CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS is not set
# CONFIG_X86_MCE is not set
# CONFIG_X86_ANCIENT_MCE is not set
CONFIG_VM86=y
# CONFIG_TOSHIBA is not set
# CONFIG_I8K is not set
CONFIG_X86_REBOOTFIXUPS=y
CONFIG_MICROCODE=y
CONFIG_MICROCODE_INTEL=y
# CONFIG_MICROCODE_AMD is not set
CONFIG_MICROCODE_OLD_INTERFACE=y
CONFIG_X86_MSR=y
CONFIG_X86_CPUID=y
# CONFIG_X86_CPU_DEBUG is not set
# CONFIG_NOHIGHMEM is not set
CONFIG_HIGHMEM4G=y
# CONFIG_HIGHMEM64G is not set
CONFIG_PAGE_OFFSET=0xC0000000
CONFIG_HIGHMEM=y
# CONFIG_ARCH_PHYS_ADDR_T_64BIT is not set
CONFIG_ARCH_FLATMEM_ENABLE=y
CONFIG_ARCH_SPARSEMEM_ENABLE=y
CONFIG_ARCH_SELECT_MEMORY_MODEL=y
CONFIG_SELECT_MEMORY_MODEL=y
CONFIG_FLATMEM_MANUAL=y
# CONFIG_DISCONTIGMEM_MANUAL is not set
# CONFIG_SPARSEMEM_MANUAL is not set
CONFIG_FLATMEM=y
CONFIG_FLAT_NODE_MEM_MAP=y
CONFIG_SPARSEMEM_STATIC=y
CONFIG_PAGEFLAGS_EXTENDED=y
CONFIG_SPLIT_PTLOCK_CPUS=4
# CONFIG_PHYS_ADDR_T_64BIT is not set
CONFIG_ZONE_DMA_FLAG=1
CONFIG_BOUNCE=y
CONFIG_VIRT_TO_BUS=y
CONFIG_HAVE_MLOCK=y
CONFIG_HAVE_MLOCKED_PAGE_BIT=y
CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
CONFIG_HIGHPTE=y
# CONFIG_X86_CHECK_BIOS_CORRUPTION is not set
CONFIG_X86_RESERVE_LOW_64K=y
# CONFIG_MATH_EMULATION is not set
CONFIG_MTRR=y
# CONFIG_MTRR_SANITIZER is not set
CONFIG_X86_PAT=y
CONFIG_EFI=y
CONFIG_SECCOMP=y
# CONFIG_CC_STACKPROTECTOR is not set
# CONFIG_HZ_100 is not set
# CONFIG_HZ_250 is not set
# CONFIG_HZ_300 is not set
CONFIG_HZ_1000=y
CONFIG_HZ=1000
CONFIG_SCHED_HRTICK=y
CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
# CONFIG_KEXEC_JUMP is not set
CONFIG_PHYSICAL_START=0x1000000
CONFIG_RELOCATABLE=y
CONFIG_X86_NEED_RELOCS=y
CONFIG_PHYSICAL_ALIGN=0x200000
CONFIG_HOTPLUG_CPU=y
CONFIG_COMPAT_VDSO=y
# CONFIG_CMDLINE_BOOL is not set
CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y

#
# Power management and ACPI options
#
CONFIG_PM=y
CONFIG_PM_DEBUG=y
# CONFIG_PM_VERBOSE is not set
CONFIG_CAN_PM_TRACE=y
CONFIG_PM_TRACE=y
CONFIG_PM_TRACE_RTC=y
CONFIG_PM_SLEEP_SMP=y
CONFIG_PM_SLEEP=y
CONFIG_SUSPEND=y
# CONFIG_PM_TEST_SUSPEND is not set
CONFIG_SUSPEND_FREEZER=y
CONFIG_HIBERNATION_NVS=y
CONFIG_HIBERNATION=y
CONFIG_PM_STD_PARTITION=""
CONFIG_ACPI=y
CONFIG_ACPI_SLEEP=y
CONFIG_ACPI_PROCFS=y
CONFIG_ACPI_PROCFS_POWER=y
CONFIG_ACPI_SYSFS_POWER=y
CONFIG_ACPI_PROC_EVENT=y
CONFIG_ACPI_AC=y
CONFIG_ACPI_BATTERY=y
CONFIG_ACPI_BUTTON=y
CONFIG_ACPI_VIDEO=y
CONFIG_ACPI_FAN=y
CONFIG_ACPI_DOCK=y
CONFIG_ACPI_PROCESSOR=y
CONFIG_ACPI_HOTPLUG_CPU=y
CONFIG_ACPI_THERMAL=y
# CONFIG_ACPI_CUSTOM_DSDT is not set
CONFIG_ACPI_BLACKLIST_YEAR=0
# CONFIG_ACPI_DEBUG is not set
# CONFIG_ACPI_PCI_SLOT is not set
CONFIG_X86_PM_TIMER=y
CONFIG_ACPI_CONTAINER=y
# CONFIG_ACPI_SBS is not set
# CONFIG_APM is not set

#
# CPU Frequency scaling
#
CONFIG_CPU_FREQ=y
CONFIG_CPU_FREQ_TABLE=y
CONFIG_CPU_FREQ_DEBUG=y
# CONFIG_CPU_FREQ_STAT is not set
# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set
# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set

#
# CPUFreq processor drivers
#
CONFIG_X86_ACPI_CPUFREQ=y
# CONFIG_X86_POWERNOW_K6 is not set
# CONFIG_X86_POWERNOW_K7 is not set
# CONFIG_X86_POWERNOW_K8 is not set
# CONFIG_X86_GX_SUSPMOD is not set
# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
# CONFIG_X86_SPEEDSTEP_ICH is not set
# CONFIG_X86_SPEEDSTEP_SMI is not set
# CONFIG_X86_P4_CLOCKMOD is not set
# CONFIG_X86_CPUFREQ_NFORCE2 is not set
# CONFIG_X86_LONGRUN is not set
# CONFIG_X86_LONGHAUL is not set
# CONFIG_X86_E_POWERSAVER is not set

#
# shared options
#
# CONFIG_X86_SPEEDSTEP_LIB is not set
CONFIG_CPU_IDLE=y
CONFIG_CPU_IDLE_GOV_LADDER=y
CONFIG_CPU_IDLE_GOV_MENU=y

#
# Bus options (PCI etc.)
#
CONFIG_PCI=y
# CONFIG_PCI_GOBIOS is not set
# CONFIG_PCI_GOMMCONFIG is not set
# CONFIG_PCI_GODIRECT is not set
# CONFIG_PCI_GOOLPC is not set
CONFIG_PCI_GOANY=y
CONFIG_PCI_BIOS=y
CONFIG_PCI_DIRECT=y
CONFIG_PCI_MMCONFIG=y
CONFIG_PCI_DOMAINS=y
# CONFIG_DMAR is not set
CONFIG_PCIEPORTBUS=y
# CONFIG_HOTPLUG_PCI_PCIE is not set
CONFIG_PCIEAER=y
# CONFIG_PCIEASPM is not set
CONFIG_ARCH_SUPPORTS_MSI=y
CONFIG_PCI_MSI=y
# CONFIG_PCI_LEGACY is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_STUB is not set
CONFIG_HT_IRQ=y
# CONFIG_PCI_IOV is not set
CONFIG_ISA_DMA_API=y
# CONFIG_ISA is not set
# CONFIG_MCA is not set
# CONFIG_SCx200 is not set
# CONFIG_OLPC is not set
CONFIG_K8_NB=y
CONFIG_PCCARD=y
# CONFIG_PCMCIA_DEBUG is not set
CONFIG_PCMCIA=y
CONFIG_PCMCIA_LOAD_CIS=y
CONFIG_PCMCIA_IOCTL=y
CONFIG_CARDBUS=y

#
# PC-card bridges
#
CONFIG_YENTA=y
CONFIG_YENTA_O2=y
CONFIG_YENTA_RICOH=y
CONFIG_YENTA_TI=y
CONFIG_YENTA_ENE_TUNE=y
CONFIG_YENTA_TOSHIBA=y
# CONFIG_PD6729 is not set
# CONFIG_I82092 is not set
CONFIG_PCCARD_NONSTATIC=y
CONFIG_HOTPLUG_PCI=y
# CONFIG_HOTPLUG_PCI_FAKE is not set
# CONFIG_HOTPLUG_PCI_IBM is not set
# CONFIG_HOTPLUG_PCI_ACPI is not set
# CONFIG_HOTPLUG_PCI_CPCI is not set
# CONFIG_HOTPLUG_PCI_SHPC is not set

#
# Executable file formats / Emulations
#
CONFIG_BINFMT_ELF=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_HAVE_AOUT=y
# CONFIG_BINFMT_AOUT is not set
CONFIG_BINFMT_MISC=y
CONFIG_HAVE_ATOMIC_IOMAP=y
CONFIG_NET=y

#
# Networking options
#
CONFIG_PACKET=y
CONFIG_PACKET_MMAP=y
CONFIG_UNIX=y
CONFIG_XFRM=y
CONFIG_XFRM_USER=y
# CONFIG_XFRM_SUB_POLICY is not set
# CONFIG_XFRM_MIGRATE is not set
# CONFIG_XFRM_STATISTICS is not set
# CONFIG_NET_KEY is not set
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_ASK_IP_FIB_HASH=y
# CONFIG_IP_FIB_TRIE is not set
CONFIG_IP_FIB_HASH=y
CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_ROUTE_MULTIPATH=y
CONFIG_IP_ROUTE_VERBOSE=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_IP_PNP_RARP=y
# CONFIG_NET_IPIP is not set
# CONFIG_NET_IPGRE is not set
CONFIG_IP_MROUTE=y
CONFIG_IP_PIMSM_V1=y
CONFIG_IP_PIMSM_V2=y
# CONFIG_ARPD is not set
CONFIG_SYN_COOKIES=y
# CONFIG_INET_AH is not set
# CONFIG_INET_ESP is not set
# CONFIG_INET_IPCOMP is not set
# CONFIG_INET_XFRM_TUNNEL is not set
CONFIG_INET_TUNNEL=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
CONFIG_INET_LRO=y
# CONFIG_INET_DIAG is not set
CONFIG_TCP_CONG_ADVANCED=y
# CONFIG_TCP_CONG_BIC is not set
CONFIG_TCP_CONG_CUBIC=y
# CONFIG_TCP_CONG_WESTWOOD is not set
# CONFIG_TCP_CONG_HTCP is not set
# CONFIG_TCP_CONG_HSTCP is not set
# CONFIG_TCP_CONG_HYBLA is not set
# CONFIG_TCP_CONG_VEGAS is not set
# CONFIG_TCP_CONG_SCALABLE is not set
# CONFIG_TCP_CONG_LP is not set
# CONFIG_TCP_CONG_VENO is not set
# CONFIG_TCP_CONG_YEAH is not set
# CONFIG_TCP_CONG_ILLINOIS is not set
# CONFIG_DEFAULT_BIC is not set
CONFIG_DEFAULT_CUBIC=y
# CONFIG_DEFAULT_HTCP is not set
# CONFIG_DEFAULT_VEGAS is not set
# CONFIG_DEFAULT_WESTWOOD is not set
# CONFIG_DEFAULT_RENO is not set
CONFIG_DEFAULT_TCP_CONG="cubic"
CONFIG_TCP_MD5SIG=y
CONFIG_IPV6=y
# CONFIG_IPV6_PRIVACY is not set
# CONFIG_IPV6_ROUTER_PREF is not set
# CONFIG_IPV6_OPTIMISTIC_DAD is not set
CONFIG_INET6_AH=y
CONFIG_INET6_ESP=y
# CONFIG_INET6_IPCOMP is not set
# CONFIG_IPV6_MIP6 is not set
# CONFIG_INET6_XFRM_TUNNEL is not set
# CONFIG_INET6_TUNNEL is not set
CONFIG_INET6_XFRM_MODE_TRANSPORT=y
CONFIG_INET6_XFRM_MODE_TUNNEL=y
CONFIG_INET6_XFRM_MODE_BEET=y
# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
CONFIG_IPV6_SIT=y
CONFIG_IPV6_NDISC_NODETYPE=y
# CONFIG_IPV6_TUNNEL is not set
# CONFIG_IPV6_MULTIPLE_TABLES is not set
# CONFIG_IPV6_MROUTE is not set
CONFIG_NETLABEL=y
CONFIG_NETWORK_SECMARK=y
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_DEBUG is not set
# CONFIG_NETFILTER_ADVANCED is not set

#
# Core Netfilter Configuration
#
CONFIG_NETFILTER_NETLINK=y
CONFIG_NETFILTER_NETLINK_LOG=y
CONFIG_NF_CONNTRACK=y
CONFIG_NF_CONNTRACK_SECMARK=y
CONFIG_NF_CONNTRACK_FTP=y
CONFIG_NF_CONNTRACK_IRC=y
CONFIG_NF_CONNTRACK_SIP=y
CONFIG_NF_CT_NETLINK=y
CONFIG_NETFILTER_XTABLES=y
CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
CONFIG_NETFILTER_XT_TARGET_MARK=y
CONFIG_NETFILTER_XT_TARGET_NFLOG=y
CONFIG_NETFILTER_XT_TARGET_SECMARK=y
CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
CONFIG_NETFILTER_XT_MATCH_MARK=y
CONFIG_NETFILTER_XT_MATCH_POLICY=y
CONFIG_NETFILTER_XT_MATCH_STATE=y
# CONFIG_IP_VS is not set

#
# IP: Netfilter Configuration
#
CONFIG_NF_DEFRAG_IPV4=y
CONFIG_NF_CONNTRACK_IPV4=y
CONFIG_NF_CONNTRACK_PROC_COMPAT=y
CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_NF_FILTER=y
CONFIG_IP_NF_TARGET_REJECT=y
CONFIG_IP_NF_TARGET_LOG=y
CONFIG_IP_NF_TARGET_ULOG=y
CONFIG_NF_NAT=y
CONFIG_NF_NAT_NEEDED=y
CONFIG_IP_NF_TARGET_MASQUERADE=y
CONFIG_NF_NAT_FTP=y
CONFIG_NF_NAT_IRC=y
# CONFIG_NF_NAT_TFTP is not set
# CONFIG_NF_NAT_AMANDA is not set
# CONFIG_NF_NAT_PPTP is not set
# CONFIG_NF_NAT_H323 is not set
CONFIG_NF_NAT_SIP=y
CONFIG_IP_NF_MANGLE=y

#
# IPv6: Netfilter Configuration
#
CONFIG_NF_CONNTRACK_IPV6=y
CONFIG_IP6_NF_IPTABLES=y
CONFIG_IP6_NF_MATCH_IPV6HEADER=y
CONFIG_IP6_NF_TARGET_LOG=y
CONFIG_IP6_NF_FILTER=y
CONFIG_IP6_NF_TARGET_REJECT=y
CONFIG_IP6_NF_MANGLE=y
# CONFIG_IP_DCCP is not set
# CONFIG_IP_SCTP is not set
# CONFIG_TIPC is not set
# CONFIG_ATM is not set
CONFIG_STP=y
CONFIG_BRIDGE=y
# CONFIG_NET_DSA is not set
# CONFIG_VLAN_8021Q is not set
# CONFIG_DECNET is not set
CONFIG_LLC=y
# CONFIG_LLC2 is not set
# CONFIG_IPX is not set
# CONFIG_ATALK is not set
# CONFIG_X25 is not set
# CONFIG_LAPB is not set
# CONFIG_ECONET is not set
# CONFIG_WAN_ROUTER is not set
# CONFIG_PHONET is not set
# CONFIG_IEEE802154 is not set
# CONFIG_NET_SCHED is not set
# CONFIG_DCB is not set

#
# Network testing
#
# CONFIG_NET_PKTGEN is not set
# CONFIG_NET_TCPPROBE is not set
# CONFIG_NET_DROP_MONITOR is not set
CONFIG_HAMRADIO=y

#
# Packet Radio protocols
#
# CONFIG_AX25 is not set
# CONFIG_CAN is not set
# CONFIG_IRDA is not set
# CONFIG_BT is not set
# CONFIG_AF_RXRPC is not set
CONFIG_FIB_RULES=y
# CONFIG_WIRELESS is not set
# CONFIG_WIMAX is not set
# CONFIG_RFKILL is not set
# CONFIG_NET_9P is not set

#
# Device Drivers
#

#
# Generic Driver Options
#
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_STANDALONE=y
CONFIG_PREVENT_FIRMWARE_BUILD=y
CONFIG_FW_LOADER=y
CONFIG_FIRMWARE_IN_KERNEL=y
CONFIG_EXTRA_FIRMWARE=""
# CONFIG_DEBUG_DRIVER is not set
CONFIG_DEBUG_DEVRES=y
# CONFIG_SYS_HYPERVISOR is not set
CONFIG_CONNECTOR=y
CONFIG_PROC_EVENTS=y
# CONFIG_MTD is not set
# CONFIG_PARPORT is not set
CONFIG_PNP=y
CONFIG_PNP_DEBUG_MESSAGES=y

#
# Protocols
#
CONFIG_PNPACPI=y
CONFIG_BLK_DEV=y
# CONFIG_BLK_DEV_FD is not set
# CONFIG_BLK_CPQ_DA is not set
# CONFIG_BLK_CPQ_CISS_DA is not set
# CONFIG_BLK_DEV_DAC960 is not set
# CONFIG_BLK_DEV_UMEM is not set
# CONFIG_BLK_DEV_COW_COMMON is not set
CONFIG_BLK_DEV_LOOP=y
# CONFIG_BLK_DEV_CRYPTOLOOP is not set
# CONFIG_BLK_DEV_NBD is not set
# CONFIG_BLK_DEV_SX8 is not set
# CONFIG_BLK_DEV_UB is not set
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=16
CONFIG_BLK_DEV_RAM_SIZE=16384
# CONFIG_BLK_DEV_XIP is not set
# CONFIG_CDROM_PKTCDVD is not set
# CONFIG_ATA_OVER_ETH is not set
# CONFIG_VIRTIO_BLK is not set
# CONFIG_BLK_DEV_HD is not set
CONFIG_MISC_DEVICES=y
# CONFIG_IBM_ASM is not set
# CONFIG_PHANTOM is not set
# CONFIG_SGI_IOC4 is not set
# CONFIG_TIFM_CORE is not set
# CONFIG_ICS932S401 is not set
# CONFIG_ENCLOSURE_SERVICES is not set
# CONFIG_HP_ILO is not set
# CONFIG_ISL29003 is not set
# CONFIG_C2PORT is not set

#
# EEPROM support
#
# CONFIG_EEPROM_AT24 is not set
# CONFIG_EEPROM_LEGACY is not set
# CONFIG_EEPROM_MAX6875 is not set
# CONFIG_EEPROM_93CX6 is not set
# CONFIG_CB710_CORE is not set
CONFIG_HAVE_IDE=y
# CONFIG_IDE is not set

#
# SCSI device support
#
# CONFIG_RAID_ATTRS is not set
CONFIG_SCSI=y
CONFIG_SCSI_DMA=y
# CONFIG_SCSI_TGT is not set
# CONFIG_SCSI_NETLINK is not set
CONFIG_SCSI_PROC_FS=y

#
# SCSI support type (disk, tape, CD-ROM)
#
CONFIG_BLK_DEV_SD=y
# CONFIG_CHR_DEV_ST is not set
# CONFIG_CHR_DEV_OSST is not set
CONFIG_BLK_DEV_SR=y
# CONFIG_BLK_DEV_SR_VENDOR is not set
CONFIG_CHR_DEV_SG=y
# CONFIG_CHR_DEV_SCH is not set
CONFIG_SCSI_MULTI_LUN=y
# CONFIG_SCSI_CONSTANTS is not set
# CONFIG_SCSI_LOGGING is not set
# CONFIG_SCSI_SCAN_ASYNC is not set
CONFIG_SCSI_WAIT_SCAN=m

#
# SCSI Transports
#
# CONFIG_SCSI_SPI_ATTRS is not set
# CONFIG_SCSI_FC_ATTRS is not set
# CONFIG_SCSI_ISCSI_ATTRS is not set
# CONFIG_SCSI_SAS_ATTRS is not set
# CONFIG_SCSI_SAS_LIBSAS is not set
# CONFIG_SCSI_SRP_ATTRS is not set
CONFIG_SCSI_LOWLEVEL=y
# CONFIG_ISCSI_TCP is not set
# CONFIG_SCSI_BNX2_ISCSI is not set
# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
# CONFIG_SCSI_3W_9XXX is not set
# CONFIG_SCSI_ACARD is not set
# CONFIG_SCSI_AACRAID is not set
# CONFIG_SCSI_AIC7XXX is not set
# CONFIG_SCSI_AIC7XXX_OLD is not set
# CONFIG_SCSI_AIC79XX is not set
# CONFIG_SCSI_AIC94XX is not set
# CONFIG_SCSI_MVSAS is not set
# CONFIG_SCSI_DPT_I2O is not set
# CONFIG_SCSI_ADVANSYS is not set
# CONFIG_SCSI_ARCMSR is not set
# CONFIG_MEGARAID_NEWGEN is not set
# CONFIG_MEGARAID_LEGACY is not set
# CONFIG_MEGARAID_SAS is not set
# CONFIG_SCSI_MPT2SAS is not set
# CONFIG_SCSI_HPTIOP is not set
# CONFIG_SCSI_BUSLOGIC is not set
# CONFIG_LIBFC is not set
# CONFIG_LIBFCOE is not set
# CONFIG_FCOE is not set
# CONFIG_FCOE_FNIC is not set
# CONFIG_SCSI_DMX3191D is not set
# CONFIG_SCSI_EATA is not set
# CONFIG_SCSI_FUTURE_DOMAIN is not set
# CONFIG_SCSI_GDTH is not set
# CONFIG_SCSI_IPS is not set
# CONFIG_SCSI_INITIO is not set
# CONFIG_SCSI_INIA100 is not set
# CONFIG_SCSI_STEX is not set
# CONFIG_SCSI_SYM53C8XX_2 is not set
# CONFIG_SCSI_IPR is not set
# CONFIG_SCSI_QLOGIC_1280 is not set
# CONFIG_SCSI_QLA_FC is not set
# CONFIG_SCSI_QLA_ISCSI is not set
# CONFIG_SCSI_LPFC is not set
# CONFIG_SCSI_DC395x is not set
# CONFIG_SCSI_DC390T is not set
# CONFIG_SCSI_NSP32 is not set
# CONFIG_SCSI_DEBUG is not set
# CONFIG_SCSI_SRP is not set
# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
# CONFIG_SCSI_DH is not set
# CONFIG_SCSI_OSD_INITIATOR is not set
CONFIG_ATA=y
# CONFIG_ATA_NONSTANDARD is not set
CONFIG_ATA_ACPI=y
CONFIG_SATA_PMP=y
# CONFIG_SATA_AHCI is not set
# CONFIG_SATA_SIL24 is not set
CONFIG_ATA_SFF=y
# CONFIG_SATA_SVW is not set
CONFIG_ATA_PIIX=y
# CONFIG_SATA_MV is not set
# CONFIG_SATA_NV is not set
# CONFIG_PDC_ADMA is not set
# CONFIG_SATA_QSTOR is not set
# CONFIG_SATA_PROMISE is not set
# CONFIG_SATA_SX4 is not set
# CONFIG_SATA_SIL is not set
# CONFIG_SATA_SIS is not set
# CONFIG_SATA_ULI is not set
# CONFIG_SATA_VIA is not set
# CONFIG_SATA_VITESSE is not set
# CONFIG_SATA_INIC162X is not set
# CONFIG_PATA_ACPI is not set
# CONFIG_PATA_ALI is not set
# CONFIG_PATA_AMD is not set
# CONFIG_PATA_ARTOP is not set
# CONFIG_PATA_ATIIXP is not set
# CONFIG_PATA_CMD640_PCI is not set
# CONFIG_PATA_CMD64X is not set
# CONFIG_PATA_CS5520 is not set
# CONFIG_PATA_CS5530 is not set
# CONFIG_PATA_CS5535 is not set
# CONFIG_PATA_CS5536 is not set
# CONFIG_PATA_CYPRESS is not set
# CONFIG_PATA_EFAR is not set
# CONFIG_ATA_GENERIC is not set
# CONFIG_PATA_HPT366 is not set
# CONFIG_PATA_HPT37X is not set
# CONFIG_PATA_HPT3X2N is not set
# CONFIG_PATA_HPT3X3 is not set
# CONFIG_PATA_IT821X is not set
# CONFIG_PATA_IT8213 is not set
# CONFIG_PATA_JMICRON is not set
# CONFIG_PATA_TRIFLEX is not set
# CONFIG_PATA_MARVELL is not set
# CONFIG_PATA_MPIIX is not set
# CONFIG_PATA_OLDPIIX is not set
# CONFIG_PATA_NETCELL is not set
# CONFIG_PATA_NINJA32 is not set
# CONFIG_PATA_NS87410 is not set
# CONFIG_PATA_NS87415 is not set
# CONFIG_PATA_OPTI is not set
# CONFIG_PATA_OPTIDMA is not set
# CONFIG_PATA_PCMCIA is not set
# CONFIG_PATA_PDC_OLD is not set
# CONFIG_PATA_RADISYS is not set
# CONFIG_PATA_RZ1000 is not set
# CONFIG_PATA_SC1200 is not set
# CONFIG_PATA_SERVERWORKS is not set
# CONFIG_PATA_PDC2027X is not set
# CONFIG_PATA_SIL680 is not set
# CONFIG_PATA_SIS is not set
# CONFIG_PATA_VIA is not set
# CONFIG_PATA_WINBOND is not set
# CONFIG_PATA_SCH is not set
# CONFIG_MD is not set
# CONFIG_FUSION is not set

#
# IEEE 1394 (FireWire) support
#

#
# Enable only one of the two stacks, unless you know what you are doing
#
# CONFIG_FIREWIRE is not set
# CONFIG_IEEE1394 is not set
# CONFIG_I2O is not set
# CONFIG_MACINTOSH_DRIVERS is not set
CONFIG_NETDEVICES=y
# CONFIG_DUMMY is not set
# CONFIG_BONDING is not set
# CONFIG_MACVLAN is not set
# CONFIG_EQUALIZER is not set
# CONFIG_TUN is not set
CONFIG_VETH=y
# CONFIG_NET_SB1000 is not set
# CONFIG_ARCNET is not set
# CONFIG_PHYLIB is not set
CONFIG_NET_ETHERNET=y
CONFIG_MII=y
# CONFIG_HAPPYMEAL is not set
# CONFIG_SUNGEM is not set
# CONFIG_CASSINI is not set
# CONFIG_NET_VENDOR_3COM is not set
# CONFIG_ETHOC is not set
# CONFIG_DNET is not set
# CONFIG_NET_TULIP is not set
# CONFIG_HP100 is not set
# CONFIG_IBM_NEW_EMAC_ZMII is not set
# CONFIG_IBM_NEW_EMAC_RGMII is not set
# CONFIG_IBM_NEW_EMAC_TAH is not set
# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
CONFIG_NET_PCI=y
# CONFIG_PCNET32 is not set
# CONFIG_AMD8111_ETH is not set
# CONFIG_ADAPTEC_STARFIRE is not set
# CONFIG_B44 is not set
# CONFIG_FORCEDETH is not set
# CONFIG_E100 is not set
# CONFIG_FEALNX is not set
# CONFIG_NATSEMI is not set
# CONFIG_NE2K_PCI is not set
CONFIG_8139CP=y
# CONFIG_8139TOO is not set
# CONFIG_R6040 is not set
# CONFIG_SIS900 is not set
# CONFIG_EPIC100 is not set
# CONFIG_SMSC9420 is not set
# CONFIG_SUNDANCE is not set
# CONFIG_TLAN is not set
# CONFIG_KS8842 is not set
# CONFIG_VIA_RHINE is not set
# CONFIG_SC92031 is not set
# CONFIG_ATL2 is not set
# CONFIG_NETDEV_1000 is not set
# CONFIG_NETDEV_10000 is not set
# CONFIG_TR is not set

#
# Wireless LAN
#
# CONFIG_WLAN_PRE80211 is not set
# CONFIG_WLAN_80211 is not set

#
# Enable WiMAX (Networking options) to see the WiMAX drivers
#

#
# USB Network Adapters
#
# CONFIG_USB_CATC is not set
# CONFIG_USB_KAWETH is not set
# CONFIG_USB_PEGASUS is not set
# CONFIG_USB_RTL8150 is not set
# CONFIG_USB_USBNET is not set
# CONFIG_NET_PCMCIA is not set
# CONFIG_WAN is not set
# CONFIG_FDDI is not set
# CONFIG_HIPPI is not set
# CONFIG_PPP is not set
# CONFIG_SLIP is not set
# CONFIG_NET_FC is not set
CONFIG_NETCONSOLE=y
# CONFIG_NETCONSOLE_DYNAMIC is not set
CONFIG_NETPOLL=y
# CONFIG_NETPOLL_TRAP is not set
CONFIG_NET_POLL_CONTROLLER=y
# CONFIG_VIRTIO_NET is not set
# CONFIG_ISDN is not set
# CONFIG_PHONE is not set

#
# Input device support
#
CONFIG_INPUT=y
CONFIG_INPUT_FF_MEMLESS=y
CONFIG_INPUT_POLLDEV=y

#
# Userland interfaces
#
CONFIG_INPUT_MOUSEDEV=y
# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
# CONFIG_INPUT_JOYDEV is not set
CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_EVBUG is not set

#
# Input Device Drivers
#
CONFIG_INPUT_KEYBOARD=y
CONFIG_KEYBOARD_ATKBD=y
# CONFIG_KEYBOARD_SUNKBD is not set
# CONFIG_KEYBOARD_LKKBD is not set
# CONFIG_KEYBOARD_XTKBD is not set
# CONFIG_KEYBOARD_NEWTON is not set
# CONFIG_KEYBOARD_STOWAWAY is not set
CONFIG_INPUT_MOUSE=y
CONFIG_MOUSE_PS2=y
CONFIG_MOUSE_PS2_ALPS=y
CONFIG_MOUSE_PS2_LOGIPS2PP=y
CONFIG_MOUSE_PS2_SYNAPTICS=y
CONFIG_MOUSE_PS2_LIFEBOOK=y
CONFIG_MOUSE_PS2_TRACKPOINT=y
# CONFIG_MOUSE_PS2_ELANTECH is not set
# CONFIG_MOUSE_PS2_TOUCHKIT is not set
# CONFIG_MOUSE_SERIAL is not set
# CONFIG_MOUSE_APPLETOUCH is not set
# CONFIG_MOUSE_BCM5974 is not set
# CONFIG_MOUSE_VSXXXAA is not set
CONFIG_INPUT_JOYSTICK=y
# CONFIG_JOYSTICK_ANALOG is not set
# CONFIG_JOYSTICK_A3D is not set
# CONFIG_JOYSTICK_ADI is not set
# CONFIG_JOYSTICK_COBRA is not set
# CONFIG_JOYSTICK_GF2K is not set
# CONFIG_JOYSTICK_GRIP is not set
# CONFIG_JOYSTICK_GRIP_MP is not set
# CONFIG_JOYSTICK_GUILLEMOT is not set
# CONFIG_JOYSTICK_INTERACT is not set
# CONFIG_JOYSTICK_SIDEWINDER is not set
# CONFIG_JOYSTICK_TMDC is not set
# CONFIG_JOYSTICK_IFORCE is not set
# CONFIG_JOYSTICK_WARRIOR is not set
# CONFIG_JOYSTICK_MAGELLAN is not set
# CONFIG_JOYSTICK_SPACEORB is not set
# CONFIG_JOYSTICK_SPACEBALL is not set
# CONFIG_JOYSTICK_STINGER is not set
# CONFIG_JOYSTICK_TWIDJOY is not set
# CONFIG_JOYSTICK_ZHENHUA is not set
# CONFIG_JOYSTICK_JOYDUMP is not set
# CONFIG_JOYSTICK_XPAD is not set
CONFIG_INPUT_TABLET=y
# CONFIG_TABLET_USB_ACECAD is not set
# CONFIG_TABLET_USB_AIPTEK is not set
# CONFIG_TABLET_USB_GTCO is not set
# CONFIG_TABLET_USB_KBTAB is not set
# CONFIG_TABLET_USB_WACOM is not set
CONFIG_INPUT_TOUCHSCREEN=y
# CONFIG_TOUCHSCREEN_AD7879_I2C is not set
# CONFIG_TOUCHSCREEN_AD7879 is not set
# CONFIG_TOUCHSCREEN_FUJITSU is not set
# CONFIG_TOUCHSCREEN_GUNZE is not set
# CONFIG_TOUCHSCREEN_ELO is not set
# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set
# CONFIG_TOUCHSCREEN_MTOUCH is not set
# CONFIG_TOUCHSCREEN_INEXIO is not set
# CONFIG_TOUCHSCREEN_MK712 is not set
# CONFIG_TOUCHSCREEN_PENMOUNT is not set
# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set
# CONFIG_TOUCHSCREEN_TOUCHWIN is not set
# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set
# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set
# CONFIG_TOUCHSCREEN_TSC2007 is not set
CONFIG_INPUT_MISC=y
# CONFIG_INPUT_PCSPKR is not set
# CONFIG_INPUT_APANEL is not set
# CONFIG_INPUT_WISTRON_BTNS is not set
# CONFIG_INPUT_ATLAS_BTNS is not set
# CONFIG_INPUT_ATI_REMOTE is not set
# CONFIG_INPUT_ATI_REMOTE2 is not set
# CONFIG_INPUT_KEYSPAN_REMOTE is not set
# CONFIG_INPUT_POWERMATE is not set
# CONFIG_INPUT_YEALINK is not set
# CONFIG_INPUT_CM109 is not set
# CONFIG_INPUT_UINPUT is not set

#
# Hardware I/O ports
#
CONFIG_SERIO=y
CONFIG_SERIO_I8042=y
CONFIG_SERIO_SERPORT=y
# CONFIG_SERIO_CT82C710 is not set
# CONFIG_SERIO_PCIPS2 is not set
CONFIG_SERIO_LIBPS2=y
# CONFIG_SERIO_RAW is not set
# CONFIG_GAMEPORT is not set

#
# Character devices
#
CONFIG_VT=y
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_VT_CONSOLE=y
CONFIG_HW_CONSOLE=y
CONFIG_VT_HW_CONSOLE_BINDING=y
CONFIG_DEVKMEM=y
CONFIG_SERIAL_NONSTANDARD=y
# CONFIG_COMPUTONE is not set
# CONFIG_ROCKETPORT is not set
# CONFIG_CYCLADES is not set
# CONFIG_DIGIEPCA is not set
# CONFIG_MOXA_INTELLIO is not set
# CONFIG_MOXA_SMARTIO is not set
# CONFIG_ISI is not set
# CONFIG_SYNCLINK is not set
# CONFIG_SYNCLINKMP is not set
# CONFIG_SYNCLINK_GT is not set
# CONFIG_N_HDLC is not set
# CONFIG_RISCOM8 is not set
# CONFIG_SPECIALIX is not set
# CONFIG_SX is not set
# CONFIG_RIO is not set
# CONFIG_STALDRV is not set
# CONFIG_NOZOMI is not set

#
# Serial drivers
#
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_FIX_EARLYCON_MEM=y
CONFIG_SERIAL_8250_PCI=y
CONFIG_SERIAL_8250_PNP=y
# CONFIG_SERIAL_8250_CS is not set
CONFIG_SERIAL_8250_NR_UARTS=32
CONFIG_SERIAL_8250_RUNTIME_UARTS=4
CONFIG_SERIAL_8250_EXTENDED=y
CONFIG_SERIAL_8250_MANY_PORTS=y
CONFIG_SERIAL_8250_SHARE_IRQ=y
CONFIG_SERIAL_8250_DETECT_IRQ=y
CONFIG_SERIAL_8250_RSA=y

#
# Non-8250 serial port support
#
CONFIG_SERIAL_CORE=y
CONFIG_SERIAL_CORE_CONSOLE=y
# CONFIG_SERIAL_JSM is not set
CONFIG_UNIX98_PTYS=y
CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
# CONFIG_LEGACY_PTYS is not set
# CONFIG_VIRTIO_CONSOLE is not set
# CONFIG_IPMI_HANDLER is not set
CONFIG_HW_RANDOM=y
# CONFIG_HW_RANDOM_TIMERIOMEM is not set
CONFIG_HW_RANDOM_INTEL=y
CONFIG_HW_RANDOM_AMD=y
CONFIG_HW_RANDOM_GEODE=y
CONFIG_HW_RANDOM_VIA=y
# CONFIG_HW_RANDOM_VIRTIO is not set
CONFIG_NVRAM=y
# CONFIG_R3964 is not set
# CONFIG_APPLICOM is not set
# CONFIG_SONYPI is not set

#
# PCMCIA character devices
#
# CONFIG_SYNCLINK_CS is not set
# CONFIG_CARDMAN_4000 is not set
# CONFIG_CARDMAN_4040 is not set
# CONFIG_IPWIRELESS is not set
# CONFIG_MWAVE is not set
# CONFIG_PC8736x_GPIO is not set
# CONFIG_NSC_GPIO is not set
# CONFIG_CS5535_GPIO is not set
# CONFIG_RAW_DRIVER is not set
CONFIG_HPET=y
# CONFIG_HPET_MMAP is not set
# CONFIG_HANGCHECK_TIMER is not set
# CONFIG_TCG_TPM is not set
# CONFIG_TELCLOCK is not set
CONFIG_DEVPORT=y
CONFIG_I2C=y
CONFIG_I2C_BOARDINFO=y
# CONFIG_I2C_CHARDEV is not set
CONFIG_I2C_HELPER_AUTO=y
CONFIG_I2C_ALGOBIT=y

#
# I2C Hardware Bus support
#

#
# PC SMBus host controller drivers
#
# CONFIG_I2C_ALI1535 is not set
# CONFIG_I2C_ALI1563 is not set
# CONFIG_I2C_ALI15X3 is not set
# CONFIG_I2C_AMD756 is not set
# CONFIG_I2C_AMD8111 is not set
CONFIG_I2C_I801=y
# CONFIG_I2C_ISCH is not set
# CONFIG_I2C_PIIX4 is not set
# CONFIG_I2C_NFORCE2 is not set
# CONFIG_I2C_SIS5595 is not set
# CONFIG_I2C_SIS630 is not set
# CONFIG_I2C_SIS96X is not set
# CONFIG_I2C_VIA is not set
# CONFIG_I2C_VIAPRO is not set

#
# I2C system bus drivers (mostly embedded / system-on-chip)
#
# CONFIG_I2C_OCORES is not set
# CONFIG_I2C_SIMTEC is not set

#
# External I2C/SMBus adapter drivers
#
# CONFIG_I2C_PARPORT_LIGHT is not set
# CONFIG_I2C_TAOS_EVM is not set
# CONFIG_I2C_TINY_USB is not set

#
# Graphics adapter I2C/DDC channel drivers
#
# CONFIG_I2C_VOODOO3 is not set

#
# Other I2C/SMBus bus drivers
#
# CONFIG_I2C_PCA_PLATFORM is not set
# CONFIG_I2C_STUB is not set
# CONFIG_SCx200_ACB is not set

#
# Miscellaneous I2C Chip support
#
# CONFIG_DS1682 is not set
# CONFIG_SENSORS_PCF8574 is not set
# CONFIG_PCF8575 is not set
# CONFIG_SENSORS_PCA9539 is not set
# CONFIG_SENSORS_TSL2550 is not set
# CONFIG_I2C_DEBUG_CORE is not set
# CONFIG_I2C_DEBUG_ALGO is not set
# CONFIG_I2C_DEBUG_BUS is not set
# CONFIG_I2C_DEBUG_CHIP is not set
# CONFIG_SPI is not set
CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
# CONFIG_GPIOLIB is not set
# CONFIG_W1 is not set
CONFIG_POWER_SUPPLY=y
# CONFIG_POWER_SUPPLY_DEBUG is not set
# CONFIG_PDA_POWER is not set
# CONFIG_BATTERY_DS2760 is not set
# CONFIG_BATTERY_BQ27x00 is not set
# CONFIG_HWMON is not set
CONFIG_THERMAL=y
CONFIG_WATCHDOG=y
# CONFIG_WATCHDOG_NOWAYOUT is not set

#
# Watchdog Device Drivers
#
# CONFIG_SOFT_WATCHDOG is not set
# CONFIG_ACQUIRE_WDT is not set
# CONFIG_ADVANTECH_WDT is not set
# CONFIG_ALIM1535_WDT is not set
# CONFIG_ALIM7101_WDT is not set
# CONFIG_SC520_WDT is not set
# CONFIG_EUROTECH_WDT is not set
# CONFIG_IB700_WDT is not set
# CONFIG_IBMASR is not set
# CONFIG_WAFER_WDT is not set
# CONFIG_I6300ESB_WDT is not set
# CONFIG_ITCO_WDT is not set
# CONFIG_IT8712F_WDT is not set
# CONFIG_IT87_WDT is not set
# CONFIG_HP_WATCHDOG is not set
# CONFIG_SC1200_WDT is not set
# CONFIG_PC87413_WDT is not set
# CONFIG_60XX_WDT is not set
# CONFIG_SBC8360_WDT is not set
# CONFIG_SBC7240_WDT is not set
# CONFIG_CPU5_WDT is not set
# CONFIG_SMSC_SCH311X_WDT is not set
# CONFIG_SMSC37B787_WDT is not set
# CONFIG_W83627HF_WDT is not set
# CONFIG_W83697HF_WDT is not set
# CONFIG_W83697UG_WDT is not set
# CONFIG_W83877F_WDT is not set
# CONFIG_W83977F_WDT is not set
# CONFIG_MACHZ_WDT is not set
# CONFIG_SBC_EPX_C3_WATCHDOG is not set

#
# PCI-based Watchdog Cards
#
# CONFIG_PCIPCWATCHDOG is not set
# CONFIG_WDTPCI is not set

#
# USB-based Watchdog Cards
#
# CONFIG_USBPCWATCHDOG is not set
CONFIG_SSB_POSSIBLE=y

#
# Sonics Silicon Backplane
#
# CONFIG_SSB is not set

#
# Multifunction device drivers
#
# CONFIG_MFD_CORE is not set
# CONFIG_MFD_SM501 is not set
# CONFIG_HTC_PASIC3 is not set
# CONFIG_TWL4030_CORE is not set
# CONFIG_MFD_TMIO is not set
# CONFIG_PMIC_DA903X is not set
# CONFIG_MFD_WM8400 is not set
# CONFIG_MFD_WM8350_I2C is not set
# CONFIG_MFD_PCF50633 is not set
# CONFIG_AB3100_CORE is not set
# CONFIG_REGULATOR is not set
# CONFIG_MEDIA_SUPPORT is not set

#
# Graphics support
#
CONFIG_AGP=y
# CONFIG_AGP_ALI is not set
# CONFIG_AGP_ATI is not set
# CONFIG_AGP_AMD is not set
CONFIG_AGP_AMD64=y
CONFIG_AGP_INTEL=y
# CONFIG_AGP_NVIDIA is not set
# CONFIG_AGP_SIS is not set
# CONFIG_AGP_SWORKS is not set
# CONFIG_AGP_VIA is not set
# CONFIG_AGP_EFFICEON is not set
CONFIG_DRM=y
# CONFIG_DRM_TDFX is not set
# CONFIG_DRM_R128 is not set
# CONFIG_DRM_RADEON is not set
# CONFIG_DRM_I810 is not set
# CONFIG_DRM_I830 is not set
CONFIG_DRM_I915=y
# CONFIG_DRM_I915_KMS is not set
# CONFIG_DRM_MGA is not set
# CONFIG_DRM_SIS is not set
# CONFIG_DRM_VIA is not set
# CONFIG_DRM_SAVAGE is not set
# CONFIG_VGASTATE is not set
CONFIG_VIDEO_OUTPUT_CONTROL=y
CONFIG_FB=y
# CONFIG_FIRMWARE_EDID is not set
# CONFIG_FB_DDC is not set
# CONFIG_FB_BOOT_VESA_SUPPORT is not set
CONFIG_FB_CFB_FILLRECT=y
CONFIG_FB_CFB_COPYAREA=y
CONFIG_FB_CFB_IMAGEBLIT=y
# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
# CONFIG_FB_SYS_FILLRECT is not set
# CONFIG_FB_SYS_COPYAREA is not set
# CONFIG_FB_SYS_IMAGEBLIT is not set
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_SYS_FOPS is not set
# CONFIG_FB_SVGALIB is not set
# CONFIG_FB_MACMODES is not set
# CONFIG_FB_BACKLIGHT is not set
CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y

#
# Frame buffer hardware drivers
#
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_PM2 is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_ARC is not set
# CONFIG_FB_ASILIANT is not set
# CONFIG_FB_IMSTT is not set
# CONFIG_FB_VGA16 is not set
# CONFIG_FB_UVESA is not set
# CONFIG_FB_VESA is not set
CONFIG_FB_EFI=y
# CONFIG_FB_N411 is not set
# CONFIG_FB_HGA is not set
# CONFIG_FB_S1D13XXX is not set
# CONFIG_FB_NVIDIA is not set
# CONFIG_FB_RIVA is not set
# CONFIG_FB_I810 is not set
# CONFIG_FB_LE80578 is not set
# CONFIG_FB_INTEL is not set
# CONFIG_FB_MATROX is not set
# CONFIG_FB_RADEON is not set
# CONFIG_FB_ATY128 is not set
# CONFIG_FB_ATY is not set
# CONFIG_FB_S3 is not set
# CONFIG_FB_SAVAGE is not set
# CONFIG_FB_SIS is not set
# CONFIG_FB_VIA is not set
# CONFIG_FB_NEOMAGIC is not set
# CONFIG_FB_KYRO is not set
# CONFIG_FB_3DFX is not set
# CONFIG_FB_VOODOO1 is not set
# CONFIG_FB_VT8623 is not set
# CONFIG_FB_TRIDENT is not set
# CONFIG_FB_ARK is not set
# CONFIG_FB_PM3 is not set
# CONFIG_FB_CARMINE is not set
# CONFIG_FB_GEODE is not set
# CONFIG_FB_VIRTUAL is not set
# CONFIG_FB_METRONOME is not set
# CONFIG_FB_MB862XX is not set
# CONFIG_FB_BROADSHEET is not set
CONFIG_BACKLIGHT_LCD_SUPPORT=y
# CONFIG_LCD_CLASS_DEVICE is not set
CONFIG_BACKLIGHT_CLASS_DEVICE=y
CONFIG_BACKLIGHT_GENERIC=y
# CONFIG_BACKLIGHT_PROGEAR is not set
# CONFIG_BACKLIGHT_MBP_NVIDIA is not set
# CONFIG_BACKLIGHT_SAHARA is not set

#
# Display device support
#
# CONFIG_DISPLAY_SUPPORT is not set

#
# Console display driver support
#
CONFIG_VGA_CONSOLE=y
CONFIG_VGACON_SOFT_SCROLLBACK=y
CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64
CONFIG_DUMMY_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE=y
# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set
# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
# CONFIG_FONTS is not set
CONFIG_FONT_8x8=y
CONFIG_FONT_8x16=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_LOGO_LINUX_VGA16 is not set
CONFIG_LOGO_LINUX_CLUT224=y
CONFIG_SOUND=y
CONFIG_SOUND_OSS_CORE=y
CONFIG_SND=y
CONFIG_SND_TIMER=y
CONFIG_SND_PCM=y
CONFIG_SND_HWDEP=y
CONFIG_SND_JACK=y
CONFIG_SND_SEQUENCER=y
CONFIG_SND_SEQ_DUMMY=y
CONFIG_SND_OSSEMUL=y
CONFIG_SND_MIXER_OSS=y
CONFIG_SND_PCM_OSS=y
CONFIG_SND_PCM_OSS_PLUGINS=y
CONFIG_SND_SEQUENCER_OSS=y
# CONFIG_SND_HRTIMER is not set
CONFIG_SND_DYNAMIC_MINORS=y
CONFIG_SND_SUPPORT_OLD_API=y
CONFIG_SND_VERBOSE_PROCFS=y
# CONFIG_SND_VERBOSE_PRINTK is not set
# CONFIG_SND_DEBUG is not set
CONFIG_SND_VMASTER=y
# CONFIG_SND_RAWMIDI_SEQ is not set
# CONFIG_SND_OPL3_LIB_SEQ is not set
# CONFIG_SND_OPL4_LIB_SEQ is not set
# CONFIG_SND_SBAWE_SEQ is not set
# CONFIG_SND_EMU10K1_SEQ is not set
CONFIG_SND_DRIVERS=y
# CONFIG_SND_PCSP is not set
# CONFIG_SND_DUMMY is not set
# CONFIG_SND_VIRMIDI is not set
# CONFIG_SND_MTPAV is not set
# CONFIG_SND_SERIAL_U16550 is not set
# CONFIG_SND_MPU401 is not set
CONFIG_SND_PCI=y
# CONFIG_SND_AD1889 is not set
# CONFIG_SND_ALS300 is not set
# CONFIG_SND_ALS4000 is not set
# CONFIG_SND_ALI5451 is not set
# CONFIG_SND_ATIIXP is not set
# CONFIG_SND_ATIIXP_MODEM is not set
# CONFIG_SND_AU8810 is not set
# CONFIG_SND_AU8820 is not set
# CONFIG_SND_AU8830 is not set
# CONFIG_SND_AW2 is not set
# CONFIG_SND_AZT3328 is not set
# CONFIG_SND_BT87X is not set
# CONFIG_SND_CA0106 is not set
# CONFIG_SND_CMIPCI is not set
# CONFIG_SND_OXYGEN is not set
# CONFIG_SND_CS4281 is not set
# CONFIG_SND_CS46XX is not set
# CONFIG_SND_CS5530 is not set
# CONFIG_SND_CS5535AUDIO is not set
# CONFIG_SND_CTXFI is not set
# CONFIG_SND_DARLA20 is not set
# CONFIG_SND_GINA20 is not set
# CONFIG_SND_LAYLA20 is not set
# CONFIG_SND_DARLA24 is not set
# CONFIG_SND_GINA24 is not set
# CONFIG_SND_LAYLA24 is not set
# CONFIG_SND_MONA is not set
# CONFIG_SND_MIA is not set
# CONFIG_SND_ECHO3G is not set
# CONFIG_SND_INDIGO is not set
# CONFIG_SND_INDIGOIO is not set
# CONFIG_SND_INDIGODJ is not set
# CONFIG_SND_INDIGOIOX is not set
# CONFIG_SND_INDIGODJX is not set
# CONFIG_SND_EMU10K1 is not set
# CONFIG_SND_EMU10K1X is not set
# CONFIG_SND_ENS1370 is not set
# CONFIG_SND_ENS1371 is not set
# CONFIG_SND_ES1938 is not set
# CONFIG_SND_ES1968 is not set
# CONFIG_SND_FM801 is not set
CONFIG_SND_HDA_INTEL=y
CONFIG_SND_HDA_HWDEP=y
# CONFIG_SND_HDA_RECONFIG is not set
# CONFIG_SND_HDA_INPUT_BEEP is not set
CONFIG_SND_HDA_CODEC_REALTEK=y
CONFIG_SND_HDA_CODEC_ANALOG=y
CONFIG_SND_HDA_CODEC_SIGMATEL=y
CONFIG_SND_HDA_CODEC_VIA=y
CONFIG_SND_HDA_CODEC_ATIHDMI=y
CONFIG_SND_HDA_CODEC_NVHDMI=y
CONFIG_SND_HDA_CODEC_INTELHDMI=y
CONFIG_SND_HDA_ELD=y
CONFIG_SND_HDA_CODEC_CONEXANT=y
CONFIG_SND_HDA_CODEC_CA0110=y
CONFIG_SND_HDA_CODEC_CMEDIA=y
CONFIG_SND_HDA_CODEC_SI3054=y
CONFIG_SND_HDA_GENERIC=y
# CONFIG_SND_HDA_POWER_SAVE is not set
# CONFIG_SND_HDSP is not set
# CONFIG_SND_HDSPM is not set
# CONFIG_SND_HIFIER is not set
# CONFIG_SND_ICE1712 is not set
# CONFIG_SND_ICE1724 is not set
# CONFIG_SND_INTEL8X0 is not set
# CONFIG_SND_INTEL8X0M is not set
# CONFIG_SND_KORG1212 is not set
# CONFIG_SND_LX6464ES is not set
# CONFIG_SND_MAESTRO3 is not set
# CONFIG_SND_MIXART is not set
# CONFIG_SND_NM256 is not set
# CONFIG_SND_PCXHR is not set
# CONFIG_SND_RIPTIDE is not set
# CONFIG_SND_RME32 is not set
# CONFIG_SND_RME96 is not set
# CONFIG_SND_RME9652 is not set
# CONFIG_SND_SIS7019 is not set
# CONFIG_SND_SONICVIBES is not set
# CONFIG_SND_TRIDENT is not set
# CONFIG_SND_VIA82XX is not set
# CONFIG_SND_VIA82XX_MODEM is not set
# CONFIG_SND_VIRTUOSO is not set
# CONFIG_SND_VX222 is not set
# CONFIG_SND_YMFPCI is not set
CONFIG_SND_USB=y
# CONFIG_SND_USB_AUDIO is not set
# CONFIG_SND_USB_USX2Y is not set
# CONFIG_SND_USB_CAIAQ is not set
# CONFIG_SND_USB_US122L is not set
CONFIG_SND_PCMCIA=y
# CONFIG_SND_VXPOCKET is not set
# CONFIG_SND_PDAUDIOCF is not set
# CONFIG_SND_SOC is not set
# CONFIG_SOUND_PRIME is not set
CONFIG_HID_SUPPORT=y
CONFIG_HID=y
CONFIG_HID_DEBUG=y
CONFIG_HIDRAW=y

#
# USB Input Devices
#
CONFIG_USB_HID=y
CONFIG_HID_PID=y
CONFIG_USB_HIDDEV=y

#
# Special HID drivers
#
CONFIG_HID_A4TECH=y
CONFIG_HID_APPLE=y
CONFIG_HID_BELKIN=y
CONFIG_HID_CHERRY=y
CONFIG_HID_CHICONY=y
CONFIG_HID_CYPRESS=y
CONFIG_HID_DRAGONRISE=y
# CONFIG_DRAGONRISE_FF is not set
CONFIG_HID_EZKEY=y
CONFIG_HID_KYE=y
CONFIG_HID_GYRATION=y
CONFIG_HID_KENSINGTON=y
CONFIG_HID_LOGITECH=y
CONFIG_LOGITECH_FF=y
# CONFIG_LOGIRUMBLEPAD2_FF is not set
CONFIG_HID_MICROSOFT=y
CONFIG_HID_MONTEREY=y
CONFIG_HID_NTRIG=y
CONFIG_HID_PANTHERLORD=y
CONFIG_PANTHERLORD_FF=y
CONFIG_HID_PETALYNX=y
CONFIG_HID_SAMSUNG=y
CONFIG_HID_SONY=y
CONFIG_HID_SUNPLUS=y
CONFIG_HID_GREENASIA=y
# CONFIG_GREENASIA_FF is not set
CONFIG_HID_SMARTJOYPLUS=y
# CONFIG_SMARTJOYPLUS_FF is not set
CONFIG_HID_TOPSEED=y
CONFIG_HID_THRUSTMASTER=y
CONFIG_THRUSTMASTER_FF=y
CONFIG_HID_ZEROPLUS=y
CONFIG_ZEROPLUS_FF=y
CONFIG_USB_SUPPORT=y
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB_ARCH_HAS_OHCI=y
CONFIG_USB_ARCH_HAS_EHCI=y
CONFIG_USB=y
CONFIG_USB_DEBUG=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y

#
# Miscellaneous USB options
#
# CONFIG_USB_DEVICE_CLASS is not set
# CONFIG_USB_DYNAMIC_MINORS is not set
CONFIG_USB_SUSPEND=y
# CONFIG_USB_OTG is not set
CONFIG_USB_MON=y
# CONFIG_USB_WUSB is not set
# CONFIG_USB_WUSB_CBAF is not set

#
# USB Host Controller Drivers
#
# CONFIG_USB_C67X00_HCD is not set
# CONFIG_USB_XHCI_HCD is not set
CONFIG_USB_EHCI_HCD=y
# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
# CONFIG_USB_EHCI_TT_NEWSCHED is not set
# CONFIG_USB_OXU210HP_HCD is not set
# CONFIG_USB_ISP116X_HCD is not set
# CONFIG_USB_ISP1760_HCD is not set
CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set
# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set
CONFIG_USB_OHCI_LITTLE_ENDIAN=y
CONFIG_USB_UHCI_HCD=y
# CONFIG_USB_SL811_HCD is not set
# CONFIG_USB_R8A66597_HCD is not set
# CONFIG_USB_WHCI_HCD is not set
# CONFIG_USB_HWA_HCD is not set

#
# USB Device Class drivers
#
# CONFIG_USB_ACM is not set
CONFIG_USB_PRINTER=y
# CONFIG_USB_WDM is not set
# CONFIG_USB_TMC is not set

#
# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
#

#
# also be needed; see USB_STORAGE Help for more info
#
# CONFIG_USB_STORAGE is not set
CONFIG_USB_LIBUSUAL=y

#
# USB Imaging devices
#
# CONFIG_USB_MDC800 is not set
# CONFIG_USB_MICROTEK is not set

#
# USB port drivers
#
# CONFIG_USB_SERIAL is not set

#
# USB Miscellaneous drivers
#
# CONFIG_USB_EMI62 is not set
# CONFIG_USB_EMI26 is not set
# CONFIG_USB_ADUTUX is not set
# CONFIG_USB_SEVSEG is not set
# CONFIG_USB_RIO500 is not set
# CONFIG_USB_LEGOTOWER is not set
# CONFIG_USB_LCD is not set
# CONFIG_USB_BERRY_CHARGE is not set
# CONFIG_USB_LED is not set
# CONFIG_USB_CYPRESS_CY7C63 is not set
# CONFIG_USB_CYTHERM is not set
# CONFIG_USB_IDMOUSE is not set
# CONFIG_USB_FTDI_ELAN is not set
# CONFIG_USB_APPLEDISPLAY is not set
# CONFIG_USB_SISUSBVGA is not set
# CONFIG_USB_LD is not set
# CONFIG_USB_TRANCEVIBRATOR is not set
# CONFIG_USB_IOWARRIOR is not set
# CONFIG_USB_ISIGHTFW is not set
# CONFIG_USB_VST is not set
# CONFIG_USB_GADGET is not set

#
# OTG and related infrastructure
#
# CONFIG_NOP_USB_XCEIV is not set
# CONFIG_UWB is not set
# CONFIG_MMC is not set
# CONFIG_MEMSTICK is not set
CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=y

#
# LED drivers
#
# CONFIG_LEDS_ALIX2 is not set
# CONFIG_LEDS_PCA9532 is not set
# CONFIG_LEDS_LP5521 is not set
# CONFIG_LEDS_CLEVO_MAIL is not set
# CONFIG_LEDS_PCA955X is not set
# CONFIG_LEDS_BD2802 is not set

#
# LED Triggers
#
CONFIG_LEDS_TRIGGERS=y
# CONFIG_LEDS_TRIGGER_TIMER is not set
# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set

#
# iptables trigger is under Netfilter config (LED target)
#
# CONFIG_ACCESSIBILITY is not set
# CONFIG_INFINIBAND is not set
# CONFIG_EDAC is not set
CONFIG_RTC_LIB=y
CONFIG_RTC_CLASS=y
# CONFIG_RTC_HCTOSYS is not set
# CONFIG_RTC_DEBUG is not set

#
# RTC interfaces
#
CONFIG_RTC_INTF_SYSFS=y
CONFIG_RTC_INTF_PROC=y
CONFIG_RTC_INTF_DEV=y
# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
# CONFIG_RTC_DRV_TEST is not set

#
# I2C RTC drivers
#
# CONFIG_RTC_DRV_DS1307 is not set
# CONFIG_RTC_DRV_DS1374 is not set
# CONFIG_RTC_DRV_DS1672 is not set
# CONFIG_RTC_DRV_MAX6900 is not set
# CONFIG_RTC_DRV_RS5C372 is not set
# CONFIG_RTC_DRV_ISL1208 is not set
# CONFIG_RTC_DRV_X1205 is not set
# CONFIG_RTC_DRV_PCF8563 is not set
# CONFIG_RTC_DRV_PCF8583 is not set
# CONFIG_RTC_DRV_M41T80 is not set
# CONFIG_RTC_DRV_S35390A is not set
# CONFIG_RTC_DRV_FM3130 is not set
# CONFIG_RTC_DRV_RX8581 is not set

#
# SPI RTC drivers
#

#
# Platform RTC drivers
#
CONFIG_RTC_DRV_CMOS=y
# CONFIG_RTC_DRV_DS1286 is not set
# CONFIG_RTC_DRV_DS1511 is not set
# CONFIG_RTC_DRV_DS1553 is not set
# CONFIG_RTC_DRV_DS1742 is not set
# CONFIG_RTC_DRV_STK17TA8 is not set
# CONFIG_RTC_DRV_M48T86 is not set
# CONFIG_RTC_DRV_M48T35 is not set
# CONFIG_RTC_DRV_M48T59 is not set
# CONFIG_RTC_DRV_BQ4802 is not set
# CONFIG_RTC_DRV_V3020 is not set

#
# on-CPU RTC drivers
#
CONFIG_DMADEVICES=y

#
# DMA Devices
#
# CONFIG_INTEL_IOATDMA is not set
# CONFIG_AUXDISPLAY is not set
# CONFIG_UIO is not set

#
# TI VLYNQ
#
# CONFIG_STAGING is not set
CONFIG_X86_PLATFORM_DEVICES=y
# CONFIG_ACER_WMI is not set
# CONFIG_ASUS_LAPTOP is not set
# CONFIG_FUJITSU_LAPTOP is not set
# CONFIG_TC1100_WMI is not set
# CONFIG_MSI_LAPTOP is not set
# CONFIG_PANASONIC_LAPTOP is not set
# CONFIG_COMPAL_LAPTOP is not set
# CONFIG_THINKPAD_ACPI is not set
# CONFIG_INTEL_MENLOW is not set
# CONFIG_EEEPC_LAPTOP is not set
# CONFIG_ACPI_WMI is not set
# CONFIG_ACPI_ASUS is not set
# CONFIG_ACPI_TOSHIBA is not set

#
# Firmware Drivers
#
# CONFIG_EDD is not set
CONFIG_FIRMWARE_MEMMAP=y
CONFIG_EFI_VARS=y
# CONFIG_DELL_RBU is not set
# CONFIG_DCDBAS is not set
CONFIG_DMIID=y
CONFIG_ISCSI_IBFT_FIND=y
CONFIG_ISCSI_IBFT=y

#
# File systems
#
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
CONFIG_EXT2_FS_XIP=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
CONFIG_EXT3_FS_XATTR=y
CONFIG_EXT3_FS_POSIX_ACL=y
CONFIG_EXT3_FS_SECURITY=y
# CONFIG_EXT4_FS is not set
CONFIG_FS_XIP=y
CONFIG_JBD=y
# CONFIG_JBD_DEBUG is not set
CONFIG_FS_MBCACHE=y
# CONFIG_REISERFS_FS is not set
# CONFIG_JFS_FS is not set
CONFIG_FS_POSIX_ACL=y
# CONFIG_XFS_FS is not set
# CONFIG_OCFS2_FS is not set
# CONFIG_BTRFS_FS is not set
CONFIG_FILE_LOCKING=y
CONFIG_FSNOTIFY=y
CONFIG_DNOTIFY=y
CONFIG_INOTIFY=y
CONFIG_INOTIFY_USER=y
CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_QUOTA_TREE=y
# CONFIG_QFMT_V1 is not set
CONFIG_QFMT_V2=y
CONFIG_QUOTACTL=y
# CONFIG_AUTOFS_FS is not set
CONFIG_AUTOFS4_FS=y
# CONFIG_FUSE_FS is not set
CONFIG_GENERIC_ACL=y

#
# Caches
#
# CONFIG_FSCACHE is not set

#
# CD-ROM/DVD Filesystems
#
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
# CONFIG_UDF_FS is not set

#
# DOS/FAT/NT Filesystems
#
CONFIG_FAT_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_FAT_DEFAULT_CODEPAGE=437
CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
# CONFIG_NTFS_FS is not set

#
# Pseudo filesystems
#
CONFIG_PROC_FS=y
CONFIG_PROC_KCORE=y
CONFIG_PROC_VMCORE=y
CONFIG_PROC_SYSCTL=y
CONFIG_PROC_PAGE_MONITOR=y
CONFIG_SYSFS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
CONFIG_HUGETLB_PAGE=y
# CONFIG_CONFIGFS_FS is not set
CONFIG_MISC_FILESYSTEMS=y
# CONFIG_ADFS_FS is not set
# CONFIG_AFFS_FS is not set
# CONFIG_ECRYPT_FS is not set
# CONFIG_HFS_FS is not set
# CONFIG_HFSPLUS_FS is not set
# CONFIG_BEFS_FS is not set
# CONFIG_BFS_FS is not set
# CONFIG_EFS_FS is not set
# CONFIG_CRAMFS is not set
# CONFIG_SQUASHFS is not set
# CONFIG_VXFS_FS is not set
# CONFIG_MINIX_FS is not set
# CONFIG_OMFS_FS is not set
# CONFIG_HPFS_FS is not set
# CONFIG_QNX4FS_FS is not set
# CONFIG_ROMFS_FS is not set
# CONFIG_SYSV_FS is not set
# CONFIG_UFS_FS is not set
# CONFIG_NILFS2_FS is not set
CONFIG_NETWORK_FILESYSTEMS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
# CONFIG_NFSD is not set
CONFIG_LOCKD=y
CONFIG_LOCKD_V4=y
CONFIG_NFS_ACL_SUPPORT=y
CONFIG_NFS_COMMON=y
CONFIG_SUNRPC=y
CONFIG_SUNRPC_GSS=y
CONFIG_RPCSEC_GSS_KRB5=y
# CONFIG_RPCSEC_GSS_SPKM3 is not set
# CONFIG_SMB_FS is not set
# CONFIG_CIFS is not set
# CONFIG_NCP_FS is not set
# CONFIG_CODA_FS is not set
# CONFIG_AFS_FS is not set

#
# Partition Types
#
CONFIG_PARTITION_ADVANCED=y
# CONFIG_ACORN_PARTITION is not set
CONFIG_OSF_PARTITION=y
CONFIG_AMIGA_PARTITION=y
# CONFIG_ATARI_PARTITION is not set
CONFIG_MAC_PARTITION=y
CONFIG_MSDOS_PARTITION=y
CONFIG_BSD_DISKLABEL=y
CONFIG_MINIX_SUBPARTITION=y
CONFIG_SOLARIS_X86_PARTITION=y
CONFIG_UNIXWARE_DISKLABEL=y
# CONFIG_LDM_PARTITION is not set
CONFIG_SGI_PARTITION=y
# CONFIG_ULTRIX_PARTITION is not set
CONFIG_SUN_PARTITION=y
CONFIG_KARMA_PARTITION=y
CONFIG_EFI_PARTITION=y
# CONFIG_SYSV68_PARTITION is not set
CONFIG_NLS=y
CONFIG_NLS_DEFAULT="utf8"
CONFIG_NLS_CODEPAGE_437=y
# CONFIG_NLS_CODEPAGE_737 is not set
# CONFIG_NLS_CODEPAGE_775 is not set
# CONFIG_NLS_CODEPAGE_850 is not set
# CONFIG_NLS_CODEPAGE_852 is not set
# CONFIG_NLS_CODEPAGE_855 is not set
# CONFIG_NLS_CODEPAGE_857 is not set
# CONFIG_NLS_CODEPAGE_860 is not set
# CONFIG_NLS_CODEPAGE_861 is not set
# CONFIG_NLS_CODEPAGE_862 is not set
# CONFIG_NLS_CODEPAGE_863 is not set
# CONFIG_NLS_CODEPAGE_864 is not set
# CONFIG_NLS_CODEPAGE_865 is not set
# CONFIG_NLS_CODEPAGE_866 is not set
# CONFIG_NLS_CODEPAGE_869 is not set
# CONFIG_NLS_CODEPAGE_936 is not set
# CONFIG_NLS_CODEPAGE_950 is not set
# CONFIG_NLS_CODEPAGE_932 is not set
# CONFIG_NLS_CODEPAGE_949 is not set
# CONFIG_NLS_CODEPAGE_874 is not set
# CONFIG_NLS_ISO8859_8 is not set
# CONFIG_NLS_CODEPAGE_1250 is not set
# CONFIG_NLS_CODEPAGE_1251 is not set
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
# CONFIG_NLS_ISO8859_2 is not set
# CONFIG_NLS_ISO8859_3 is not set
# CONFIG_NLS_ISO8859_4 is not set
# CONFIG_NLS_ISO8859_5 is not set
# CONFIG_NLS_ISO8859_6 is not set
# CONFIG_NLS_ISO8859_7 is not set
# CONFIG_NLS_ISO8859_9 is not set
# CONFIG_NLS_ISO8859_13 is not set
# CONFIG_NLS_ISO8859_14 is not set
# CONFIG_NLS_ISO8859_15 is not set
# CONFIG_NLS_KOI8_R is not set
# CONFIG_NLS_KOI8_U is not set
CONFIG_NLS_UTF8=y
# CONFIG_DLM is not set

#
# Kernel hacking
#
CONFIG_TRACE_IRQFLAGS_SUPPORT=y
# CONFIG_PRINTK_TIME is not set
CONFIG_ENABLE_WARN_DEPRECATED=y
CONFIG_ENABLE_MUST_CHECK=y
CONFIG_FRAME_WARN=2048
CONFIG_MAGIC_SYSRQ=y
# CONFIG_UNUSED_SYMBOLS is not set
CONFIG_DEBUG_FS=y
# CONFIG_HEADERS_CHECK is not set
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_SHIRQ=y
CONFIG_DETECT_SOFTLOCKUP=y
# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
# CONFIG_SCHED_DEBUG is not set
CONFIG_SCHEDSTATS=y
CONFIG_TIMER_STATS=y
# CONFIG_DEBUG_OBJECTS is not set
CONFIG_SLUB_DEBUG_ON=y
# CONFIG_SLUB_STATS is not set
# CONFIG_DEBUG_KMEMLEAK is not set
CONFIG_DEBUG_PREEMPT=y
CONFIG_DEBUG_RT_MUTEXES=y
CONFIG_DEBUG_PI_LIST=y
# CONFIG_RT_MUTEX_TESTER is not set
CONFIG_DEBUG_SPINLOCK=y
CONFIG_DEBUG_MUTEXES=y
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
CONFIG_LOCKDEP=y
CONFIG_LOCK_STAT=y
# CONFIG_DEBUG_LOCKDEP is not set
CONFIG_TRACE_IRQFLAGS=y
CONFIG_DEBUG_SPINLOCK_SLEEP=y
CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
CONFIG_STACKTRACE=y
# CONFIG_DEBUG_KOBJECT is not set
# CONFIG_DEBUG_HIGHMEM is not set
CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_INFO is not set
CONFIG_DEBUG_VM=y
CONFIG_DEBUG_VIRTUAL=y
CONFIG_DEBUG_WRITECOUNT=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_SG=y
# CONFIG_DEBUG_NOTIFIERS is not set
CONFIG_ARCH_WANT_FRAME_POINTERS=y
CONFIG_FRAME_POINTER=y
# CONFIG_BOOT_PRINTK_DELAY is not set
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_KPROBES_SANITY_TEST is not set
CONFIG_BACKTRACE_SELF_TEST=y
# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
# CONFIG_LKDTM is not set
# CONFIG_FAULT_INJECTION is not set
# CONFIG_LATENCYTOP is not set
CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_DEBUG_PAGEALLOC=y
CONFIG_USER_STACKTRACE_SUPPORT=y
CONFIG_NOP_TRACER=y
CONFIG_HAVE_FTRACE_NMI_ENTER=y
CONFIG_HAVE_FUNCTION_TRACER=y
CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
CONFIG_HAVE_DYNAMIC_FTRACE=y
CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
CONFIG_HAVE_FTRACE_SYSCALLS=y
CONFIG_TRACER_MAX_TRACE=y
CONFIG_RING_BUFFER=y
CONFIG_FTRACE_NMI_ENTER=y
CONFIG_EVENT_TRACING=y
CONFIG_CONTEXT_SWITCH_TRACER=y
CONFIG_TRACING=y
CONFIG_GENERIC_TRACER=y
CONFIG_TRACING_SUPPORT=y
CONFIG_FTRACE=y
CONFIG_FUNCTION_TRACER=y
CONFIG_FUNCTION_GRAPH_TRACER=y
CONFIG_IRQSOFF_TRACER=y
CONFIG_PREEMPT_TRACER=y
# CONFIG_SYSPROF_TRACER is not set
CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
# CONFIG_BOOT_TRACER is not set
CONFIG_BRANCH_PROFILE_NONE=y
# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set
# CONFIG_PROFILE_ALL_BRANCHES is not set
# CONFIG_POWER_TRACER is not set
# CONFIG_STACK_TRACER is not set
# CONFIG_KMEMTRACE is not set
# CONFIG_WORKQUEUE_TRACER is not set
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_DYNAMIC_FTRACE=y
# CONFIG_FUNCTION_PROFILER is not set
CONFIG_FTRACE_MCOUNT_RECORD=y
CONFIG_FTRACE_SELFTEST=y
CONFIG_FTRACE_STARTUP_TEST=y
# CONFIG_MMIOTRACE is not set
# CONFIG_RING_BUFFER_BENCHMARK is not set
# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set
CONFIG_DYNAMIC_DEBUG=y
# CONFIG_DMA_API_DEBUG is not set
CONFIG_SAMPLES=y
CONFIG_SAMPLE_MARKERS=m
CONFIG_SAMPLE_TRACEPOINTS=m
# CONFIG_SAMPLE_TRACE_EVENTS is not set
# CONFIG_SAMPLE_KOBJECT is not set
# CONFIG_SAMPLE_KPROBES is not set
CONFIG_HAVE_ARCH_KGDB=y
# CONFIG_KGDB is not set
CONFIG_HAVE_ARCH_KMEMCHECK=y
# CONFIG_STRICT_DEVMEM is not set
CONFIG_X86_VERBOSE_BOOTUP=y
CONFIG_EARLY_PRINTK=y
# CONFIG_EARLY_PRINTK_DBGP is not set
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_PER_CPU_MAPS=y
CONFIG_X86_PTDUMP=y
CONFIG_DEBUG_RODATA=y
# CONFIG_DEBUG_RODATA_TEST is not set
# CONFIG_DEBUG_NX_TEST is not set
CONFIG_4KSTACKS=y
CONFIG_DOUBLEFAULT=y
# CONFIG_IOMMU_STRESS is not set
CONFIG_HAVE_MMIOTRACE_SUPPORT=y
CONFIG_IO_DELAY_TYPE_0X80=0
CONFIG_IO_DELAY_TYPE_0XED=1
CONFIG_IO_DELAY_TYPE_UDELAY=2
CONFIG_IO_DELAY_TYPE_NONE=3
CONFIG_IO_DELAY_0X80=y
# CONFIG_IO_DELAY_0XED is not set
# CONFIG_IO_DELAY_UDELAY is not set
# CONFIG_IO_DELAY_NONE is not set
CONFIG_DEFAULT_IO_DELAY_TYPE=0
CONFIG_DEBUG_BOOT_PARAMS=y
# CONFIG_CPA_DEBUG is not set
CONFIG_OPTIMIZE_INLINING=y

#
# Security options
#
CONFIG_KEYS=y
CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
# CONFIG_SECURITYFS is not set
CONFIG_SECURITY_NETWORK=y
# CONFIG_SECURITY_NETWORK_XFRM is not set
# CONFIG_SECURITY_PATH is not set
CONFIG_SECURITY_FILE_CAPABILITIES=y
# CONFIG_SECURITY_ROOTPLUG is not set
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1
CONFIG_SECURITY_SELINUX_DISABLE=y
CONFIG_SECURITY_SELINUX_DEVELOP=y
CONFIG_SECURITY_SELINUX_AVC_STATS=y
CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set
# CONFIG_SECURITY_SMACK is not set
# CONFIG_SECURITY_TOMOYO is not set
# CONFIG_IMA is not set
CONFIG_CRYPTO=y

#
# Crypto core or helper
#
# CONFIG_CRYPTO_FIPS is not set
CONFIG_CRYPTO_ALGAPI=y
CONFIG_CRYPTO_ALGAPI2=y
CONFIG_CRYPTO_AEAD=y
CONFIG_CRYPTO_AEAD2=y
CONFIG_CRYPTO_BLKCIPHER=y
CONFIG_CRYPTO_BLKCIPHER2=y
CONFIG_CRYPTO_HASH=y
CONFIG_CRYPTO_HASH2=y
CONFIG_CRYPTO_RNG2=y
CONFIG_CRYPTO_PCOMP=y
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_MANAGER2=y
# CONFIG_CRYPTO_GF128MUL is not set
# CONFIG_CRYPTO_NULL is not set
CONFIG_CRYPTO_WORKQUEUE=y
# CONFIG_CRYPTO_CRYPTD is not set
CONFIG_CRYPTO_AUTHENC=y
# CONFIG_CRYPTO_TEST is not set

#
# Authenticated Encryption with Associated Data
#
# CONFIG_CRYPTO_CCM is not set
# CONFIG_CRYPTO_GCM is not set
# CONFIG_CRYPTO_SEQIV is not set

#
# Block modes
#
CONFIG_CRYPTO_CBC=y
# CONFIG_CRYPTO_CTR is not set
# CONFIG_CRYPTO_CTS is not set
CONFIG_CRYPTO_ECB=y
# CONFIG_CRYPTO_LRW is not set
# CONFIG_CRYPTO_PCBC is not set
# CONFIG_CRYPTO_XTS is not set

#
# Hash modes
#
CONFIG_CRYPTO_HMAC=y
# CONFIG_CRYPTO_XCBC is not set

#
# Digest
#
# CONFIG_CRYPTO_CRC32C is not set
# CONFIG_CRYPTO_CRC32C_INTEL is not set
# CONFIG_CRYPTO_MD4 is not set
CONFIG_CRYPTO_MD5=y
# CONFIG_CRYPTO_MICHAEL_MIC is not set
# CONFIG_CRYPTO_RMD128 is not set
# CONFIG_CRYPTO_RMD160 is not set
# CONFIG_CRYPTO_RMD256 is not set
# CONFIG_CRYPTO_RMD320 is not set
CONFIG_CRYPTO_SHA1=y
# CONFIG_CRYPTO_SHA256 is not set
# CONFIG_CRYPTO_SHA512 is not set
# CONFIG_CRYPTO_TGR192 is not set
# CONFIG_CRYPTO_WP512 is not set

#
# Ciphers
#
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_586=y
# CONFIG_CRYPTO_ANUBIS is not set
CONFIG_CRYPTO_ARC4=y
# CONFIG_CRYPTO_BLOWFISH is not set
# CONFIG_CRYPTO_CAMELLIA is not set
# CONFIG_CRYPTO_CAST5 is not set
# CONFIG_CRYPTO_CAST6 is not set
CONFIG_CRYPTO_DES=y
# CONFIG_CRYPTO_FCRYPT is not set
# CONFIG_CRYPTO_KHAZAD is not set
# CONFIG_CRYPTO_SALSA20 is not set
# CONFIG_CRYPTO_SALSA20_586 is not set
# CONFIG_CRYPTO_SEED is not set
# CONFIG_CRYPTO_SERPENT is not set
# CONFIG_CRYPTO_TEA is not set
# CONFIG_CRYPTO_TWOFISH is not set
# CONFIG_CRYPTO_TWOFISH_586 is not set

#
# Compression
#
# CONFIG_CRYPTO_DEFLATE is not set
# CONFIG_CRYPTO_ZLIB is not set
# CONFIG_CRYPTO_LZO is not set

#
# Random Number Generation
#
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRYPTO_HW=y
# CONFIG_CRYPTO_DEV_PADLOCK is not set
# CONFIG_CRYPTO_DEV_GEODE is not set
# CONFIG_CRYPTO_DEV_HIFN_795X is not set
CONFIG_HAVE_KVM=y
CONFIG_HAVE_KVM_IRQCHIP=y
CONFIG_VIRTUALIZATION=y
# CONFIG_KVM is not set
# CONFIG_LGUEST is not set
CONFIG_VIRTIO=y
CONFIG_VIRTIO_RING=y
CONFIG_VIRTIO_PCI=y
# CONFIG_VIRTIO_BALLOON is not set
CONFIG_BINARY_PRINTF=y

#
# Library routines
#
CONFIG_BITREVERSE=y
CONFIG_GENERIC_FIND_FIRST_BIT=y
CONFIG_GENERIC_FIND_NEXT_BIT=y
CONFIG_GENERIC_FIND_LAST_BIT=y
# CONFIG_CRC_CCITT is not set
# CONFIG_CRC16 is not set
CONFIG_CRC_T10DIF=y
# CONFIG_CRC_ITU_T is not set
CONFIG_CRC32=y
# CONFIG_CRC7 is not set
# CONFIG_LIBCRC32C is not set
CONFIG_AUDIT_GENERIC=y
CONFIG_ZLIB_INFLATE=y
CONFIG_DECOMPRESS_GZIP=y
CONFIG_DECOMPRESS_BZIP2=y
CONFIG_DECOMPRESS_LZMA=y
CONFIG_HAS_IOMEM=y
CONFIG_HAS_IOPORT=y
CONFIG_HAS_DMA=y
# CONFIG_CPUMASK_OFFSTACK is not set
CONFIG_NLATTR=y

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] mm: clear N_HIGH_MEMORY map before se set it again -v4
  2009-06-04 17:26                           ` [PATCH] mm: clear N_HIGH_MEMORY map before se set it again -v4 Yinghai Lu
@ 2009-06-19  6:42                             ` Nathan Lynch
  2009-06-19  8:18                               ` Yinghai Lu
       [not found]                               ` <m3bpokiv0u.fsf-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
       [not found]                             ` <4A2803D1.4070001-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
  1 sibling, 2 replies; 102+ messages in thread
From: Nathan Lynch @ 2009-06-19  6:42 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Christoph Lameter, Andrew Morton, mingo, mel, tglx, hpa,
	suresh.b.siddha, linux-kernel, viro, rusty, steiner, rientjes,
	containers

Yinghai Lu <yinghai@kernel.org> writes:
> SRAT tables may contains nodes of very small size. The arch code may
> decide to not activate such a node. However, currently the early boot code
> sets N_HIGH_MEMORY for such nodes. These nodes therefore seem to be active
> although these nodes have no present pages.
>
> for 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
>
> v4: update description according to Christoph
>
> Signed-off-by: Yinghai Lu <Yinghai@kernel.org>
> Tested-by: Jack Steiner <steiner@sgi.com>
> Acked-by: Christoph Lameter <cl@linux-foundation.org>
>
> ---
>  mm/page_alloc.c |    5 +++++
>  1 file changed, 5 insertions(+)
>
> Index: linux-2.6/mm/page_alloc.c
> ===================================================================
> --- linux-2.6.orig/mm/page_alloc.c
> +++ linux-2.6/mm/page_alloc.c
> @@ -4041,6 +4041,11 @@ void __init free_area_init_nodes(unsigne
>  						early_node_map[i].start_pfn,
>  						early_node_map[i].end_pfn);
>  
> +	/*
> +	 * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init
> +	 * that node_mask, clear it at first
> +	 */
> +	nodes_clear(node_states[N_HIGH_MEMORY]);
>  	/* Initialise every node */
>  	mminit_verify_pageflags_layout();
>  	setup_nr_node_ids();

This patch breaks the cpuset.mems cgroup attribute on an i386 kvm guest.

With v2.6.30:

# uname -r
2.6.30
# cat /cgroup/cpuset.mems
0
# mkdir /cgroup/test
# for i in cpus mems ; do cat /cgroup/cpuset.$i > /cgroup/test/cpuset.$i ; done
# echo $$ > /cgroup/test/tasks
# echo $?
0

With a pulled-today Linus tree:

# uname -r
2.6.30-06725-g1d89b30
# cat /cgroup/cpuset.mems

# mkdir /cgroup/test
# for i in cpus mems ; do cat /cgroup/cpuset.$i > /cgroup/test/cpuset.$i ; done
# echo $$ > /cgroup/test/tasks
-bash: echo: write error: No space left on device

(Note that in addition to the ENOSPC error, /cgroup/cpuset.mems is empty
rather than '0' in the second test.)

I bisected to the commit containing this change.  Reverting fixes the
problem.

.config below:

#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.30
# Fri Jun 19 01:27:04 2009
#
# CONFIG_64BIT is not set
CONFIG_X86_32=y
# CONFIG_X86_64 is not set
CONFIG_X86=y
CONFIG_OUTPUT_FORMAT="elf32-i386"
CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig"
CONFIG_GENERIC_TIME=y
CONFIG_GENERIC_CMOS_UPDATE=y
CONFIG_CLOCKSOURCE_WATCHDOG=y
CONFIG_GENERIC_CLOCKEVENTS=y
CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_HAVE_LATENCYTOP_SUPPORT=y
CONFIG_FAST_CMPXCHG_LOCAL=y
CONFIG_MMU=y
CONFIG_ZONE_DMA=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_IOMAP=y
CONFIG_GENERIC_BUG=y
CONFIG_GENERIC_HWEIGHT=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
# CONFIG_RWSEM_GENERIC_SPINLOCK is not set
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
# CONFIG_GENERIC_TIME_VSYSCALL is not set
CONFIG_ARCH_HAS_CPU_RELAX=y
CONFIG_ARCH_HAS_DEFAULT_IDLE=y
CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
CONFIG_HAVE_SETUP_PER_CPU_AREA=y
CONFIG_HAVE_DYNAMIC_PER_CPU_AREA=y
# CONFIG_HAVE_CPUMASK_OF_CPU_MAP is not set
CONFIG_ARCH_HIBERNATION_POSSIBLE=y
CONFIG_ARCH_SUSPEND_POSSIBLE=y
# CONFIG_ZONE_DMA32 is not set
CONFIG_ARCH_POPULATES_NODE_MAP=y
# CONFIG_AUDIT_ARCH is not set
CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_GENERIC_PENDING_IRQ=y
CONFIG_USE_GENERIC_SMP_HELPERS=y
CONFIG_X86_32_SMP=y
CONFIG_X86_HT=y
CONFIG_X86_TRAMPOLINE=y
CONFIG_X86_32_LAZY_GS=y
CONFIG_KTIME_SCALAR=y
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"

#
# General setup
#
CONFIG_EXPERIMENTAL=y
CONFIG_LOCK_KERNEL=y
CONFIG_INIT_ENV_ARG_LIMIT=32
CONFIG_LOCALVERSION=""
CONFIG_LOCALVERSION_AUTO=y
CONFIG_HAVE_KERNEL_GZIP=y
CONFIG_HAVE_KERNEL_BZIP2=y
CONFIG_HAVE_KERNEL_LZMA=y
CONFIG_KERNEL_GZIP=y
# CONFIG_KERNEL_BZIP2 is not set
# CONFIG_KERNEL_LZMA is not set
CONFIG_SWAP=y
CONFIG_SYSVIPC=y
CONFIG_SYSVIPC_SYSCTL=y
CONFIG_POSIX_MQUEUE=y
CONFIG_POSIX_MQUEUE_SYSCTL=y
CONFIG_BSD_PROCESS_ACCT=y
# CONFIG_BSD_PROCESS_ACCT_V3 is not set
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_AUDIT=y
CONFIG_AUDITSYSCALL=y
CONFIG_AUDIT_TREE=y

#
# RCU Subsystem
#
CONFIG_CLASSIC_RCU=y
# CONFIG_TREE_RCU is not set
# CONFIG_PREEMPT_RCU is not set
# CONFIG_TREE_RCU_TRACE is not set
# CONFIG_PREEMPT_RCU_TRACE is not set
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=19
CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
CONFIG_GROUP_SCHED=y
CONFIG_FAIR_GROUP_SCHED=y
# CONFIG_RT_GROUP_SCHED is not set
# CONFIG_USER_SCHED is not set
CONFIG_CGROUP_SCHED=y
CONFIG_CGROUPS=y
# CONFIG_CGROUP_DEBUG is not set
CONFIG_CGROUP_NS=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
CONFIG_PROC_PID_CPUSET=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_RESOURCE_COUNTERS=y
# CONFIG_CGROUP_MEM_RES_CTLR is not set
CONFIG_SYSFS_DEPRECATED=y
CONFIG_SYSFS_DEPRECATED_V2=y
CONFIG_RELAY=y
CONFIG_NAMESPACES=y
CONFIG_UTS_NS=y
CONFIG_IPC_NS=y
CONFIG_USER_NS=y
CONFIG_PID_NS=y
CONFIG_NET_NS=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_INITRAMFS_SOURCE=""
CONFIG_RD_GZIP=y
CONFIG_RD_BZIP2=y
CONFIG_RD_LZMA=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_SYSCTL=y
CONFIG_ANON_INODES=y
# CONFIG_EMBEDDED is not set
CONFIG_UID16=y
CONFIG_SYSCTL_SYSCALL=y
CONFIG_KALLSYMS=y
CONFIG_KALLSYMS_ALL=y
CONFIG_KALLSYMS_EXTRA_PASS=y
CONFIG_HOTPLUG=y
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_ELF_CORE=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
CONFIG_SIGNALFD=y
CONFIG_TIMERFD=y
CONFIG_EVENTFD=y
CONFIG_SHMEM=y
CONFIG_AIO=y
CONFIG_HAVE_PERF_COUNTERS=y

#
# Performance Counters
#
# CONFIG_PERF_COUNTERS is not set
CONFIG_VM_EVENT_COUNTERS=y
CONFIG_PCI_QUIRKS=y
CONFIG_SLUB_DEBUG=y
# CONFIG_STRIP_ASM_SYMS is not set
# CONFIG_COMPAT_BRK is not set
# CONFIG_SLAB is not set
CONFIG_SLUB=y
# CONFIG_SLOB is not set
CONFIG_PROFILING=y
CONFIG_TRACEPOINTS=y
CONFIG_MARKERS=y
# CONFIG_OPROFILE is not set
CONFIG_HAVE_OPROFILE=y
CONFIG_KPROBES=y
CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
CONFIG_KRETPROBES=y
CONFIG_HAVE_IOREMAP_PROT=y
CONFIG_HAVE_KPROBES=y
CONFIG_HAVE_KRETPROBES=y
CONFIG_HAVE_ARCH_TRACEHOOK=y
CONFIG_HAVE_DMA_API_DEBUG=y
# CONFIG_SLOW_WORK is not set
CONFIG_HAVE_GENERIC_DMA_COHERENT=y
CONFIG_SLABINFO=y
CONFIG_RT_MUTEXES=y
CONFIG_BASE_SMALL=0
CONFIG_MODULES=y
# CONFIG_MODULE_FORCE_LOAD is not set
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_MODVERSIONS is not set
# CONFIG_MODULE_SRCVERSION_ALL is not set
CONFIG_STOP_MACHINE=y
CONFIG_BLOCK=y
# CONFIG_LBD is not set
CONFIG_BLK_DEV_BSG=y
# CONFIG_BLK_DEV_INTEGRITY is not set

#
# IO Schedulers
#
CONFIG_IOSCHED_NOOP=y
CONFIG_IOSCHED_AS=y
CONFIG_IOSCHED_DEADLINE=y
CONFIG_IOSCHED_CFQ=y
# CONFIG_DEFAULT_AS is not set
# CONFIG_DEFAULT_DEADLINE is not set
CONFIG_DEFAULT_CFQ=y
# CONFIG_DEFAULT_NOOP is not set
CONFIG_DEFAULT_IOSCHED="cfq"
CONFIG_FREEZER=y

#
# Processor type and features
#
CONFIG_TICK_ONESHOT=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
CONFIG_SMP=y
# CONFIG_SPARSE_IRQ is not set
CONFIG_X86_MPPARSE=y
# CONFIG_X86_BIGSMP is not set
CONFIG_X86_EXTENDED_PLATFORM=y
# CONFIG_X86_ELAN is not set
# CONFIG_X86_RDC321X is not set
# CONFIG_X86_32_NON_STANDARD is not set
CONFIG_SCHED_OMIT_FRAME_POINTER=y
# CONFIG_PARAVIRT_GUEST is not set
# CONFIG_MEMTEST is not set
# CONFIG_M386 is not set
# CONFIG_M486 is not set
# CONFIG_M586 is not set
# CONFIG_M586TSC is not set
# CONFIG_M586MMX is not set
CONFIG_M686=y
# CONFIG_MPENTIUMII is not set
# CONFIG_MPENTIUMIII is not set
# CONFIG_MPENTIUMM is not set
# CONFIG_MPENTIUM4 is not set
# CONFIG_MK6 is not set
# CONFIG_MK7 is not set
# CONFIG_MK8 is not set
# CONFIG_MCRUSOE is not set
# CONFIG_MEFFICEON is not set
# CONFIG_MWINCHIPC6 is not set
# CONFIG_MWINCHIP3D is not set
# CONFIG_MGEODEGX1 is not set
# CONFIG_MGEODE_LX is not set
# CONFIG_MCYRIXIII is not set
# CONFIG_MVIAC3_2 is not set
# CONFIG_MVIAC7 is not set
# CONFIG_MPSC is not set
# CONFIG_MCORE2 is not set
# CONFIG_GENERIC_CPU is not set
CONFIG_X86_GENERIC=y
CONFIG_X86_CPU=y
CONFIG_X86_L1_CACHE_BYTES=64
CONFIG_X86_INTERNODE_CACHE_BYTES=64
CONFIG_X86_CMPXCHG=y
CONFIG_X86_L1_CACHE_SHIFT=5
CONFIG_X86_XADD=y
# CONFIG_X86_PPRO_FENCE is not set
CONFIG_X86_WP_WORKS_OK=y
CONFIG_X86_INVLPG=y
CONFIG_X86_BSWAP=y
CONFIG_X86_POPAD_OK=y
CONFIG_X86_INTEL_USERCOPY=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
CONFIG_X86_TSC=y
CONFIG_X86_CMOV=y
CONFIG_X86_MINIMUM_CPU_FAMILY=4
CONFIG_X86_DEBUGCTLMSR=y
CONFIG_CPU_SUP_INTEL=y
CONFIG_CPU_SUP_CYRIX_32=y
CONFIG_CPU_SUP_AMD=y
CONFIG_CPU_SUP_CENTAUR=y
CONFIG_CPU_SUP_TRANSMETA_32=y
CONFIG_CPU_SUP_UMC_32=y
# CONFIG_X86_DS is not set
CONFIG_HPET_TIMER=y
CONFIG_HPET_EMULATE_RTC=y
CONFIG_DMI=y
# CONFIG_IOMMU_HELPER is not set
# CONFIG_IOMMU_API is not set
CONFIG_NR_CPUS=8
CONFIG_SCHED_SMT=y
CONFIG_SCHED_MC=y
# CONFIG_PREEMPT_NONE is not set
# CONFIG_PREEMPT_VOLUNTARY is not set
CONFIG_PREEMPT=y
CONFIG_X86_LOCAL_APIC=y
CONFIG_X86_IO_APIC=y
# CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS is not set
# CONFIG_X86_MCE is not set
# CONFIG_X86_ANCIENT_MCE is not set
CONFIG_VM86=y
# CONFIG_TOSHIBA is not set
# CONFIG_I8K is not set
CONFIG_X86_REBOOTFIXUPS=y
CONFIG_MICROCODE=y
CONFIG_MICROCODE_INTEL=y
# CONFIG_MICROCODE_AMD is not set
CONFIG_MICROCODE_OLD_INTERFACE=y
CONFIG_X86_MSR=y
CONFIG_X86_CPUID=y
# CONFIG_X86_CPU_DEBUG is not set
# CONFIG_NOHIGHMEM is not set
CONFIG_HIGHMEM4G=y
# CONFIG_HIGHMEM64G is not set
CONFIG_PAGE_OFFSET=0xC0000000
CONFIG_HIGHMEM=y
# CONFIG_ARCH_PHYS_ADDR_T_64BIT is not set
CONFIG_ARCH_FLATMEM_ENABLE=y
CONFIG_ARCH_SPARSEMEM_ENABLE=y
CONFIG_ARCH_SELECT_MEMORY_MODEL=y
CONFIG_SELECT_MEMORY_MODEL=y
CONFIG_FLATMEM_MANUAL=y
# CONFIG_DISCONTIGMEM_MANUAL is not set
# CONFIG_SPARSEMEM_MANUAL is not set
CONFIG_FLATMEM=y
CONFIG_FLAT_NODE_MEM_MAP=y
CONFIG_SPARSEMEM_STATIC=y
CONFIG_PAGEFLAGS_EXTENDED=y
CONFIG_SPLIT_PTLOCK_CPUS=4
# CONFIG_PHYS_ADDR_T_64BIT is not set
CONFIG_ZONE_DMA_FLAG=1
CONFIG_BOUNCE=y
CONFIG_VIRT_TO_BUS=y
CONFIG_HAVE_MLOCK=y
CONFIG_HAVE_MLOCKED_PAGE_BIT=y
CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
CONFIG_HIGHPTE=y
# CONFIG_X86_CHECK_BIOS_CORRUPTION is not set
CONFIG_X86_RESERVE_LOW_64K=y
# CONFIG_MATH_EMULATION is not set
CONFIG_MTRR=y
# CONFIG_MTRR_SANITIZER is not set
CONFIG_X86_PAT=y
CONFIG_EFI=y
CONFIG_SECCOMP=y
# CONFIG_CC_STACKPROTECTOR is not set
# CONFIG_HZ_100 is not set
# CONFIG_HZ_250 is not set
# CONFIG_HZ_300 is not set
CONFIG_HZ_1000=y
CONFIG_HZ=1000
CONFIG_SCHED_HRTICK=y
CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
# CONFIG_KEXEC_JUMP is not set
CONFIG_PHYSICAL_START=0x1000000
CONFIG_RELOCATABLE=y
CONFIG_X86_NEED_RELOCS=y
CONFIG_PHYSICAL_ALIGN=0x200000
CONFIG_HOTPLUG_CPU=y
CONFIG_COMPAT_VDSO=y
# CONFIG_CMDLINE_BOOL is not set
CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y

#
# Power management and ACPI options
#
CONFIG_PM=y
CONFIG_PM_DEBUG=y
# CONFIG_PM_VERBOSE is not set
CONFIG_CAN_PM_TRACE=y
CONFIG_PM_TRACE=y
CONFIG_PM_TRACE_RTC=y
CONFIG_PM_SLEEP_SMP=y
CONFIG_PM_SLEEP=y
CONFIG_SUSPEND=y
# CONFIG_PM_TEST_SUSPEND is not set
CONFIG_SUSPEND_FREEZER=y
CONFIG_HIBERNATION_NVS=y
CONFIG_HIBERNATION=y
CONFIG_PM_STD_PARTITION=""
CONFIG_ACPI=y
CONFIG_ACPI_SLEEP=y
CONFIG_ACPI_PROCFS=y
CONFIG_ACPI_PROCFS_POWER=y
CONFIG_ACPI_SYSFS_POWER=y
CONFIG_ACPI_PROC_EVENT=y
CONFIG_ACPI_AC=y
CONFIG_ACPI_BATTERY=y
CONFIG_ACPI_BUTTON=y
CONFIG_ACPI_VIDEO=y
CONFIG_ACPI_FAN=y
CONFIG_ACPI_DOCK=y
CONFIG_ACPI_PROCESSOR=y
CONFIG_ACPI_HOTPLUG_CPU=y
CONFIG_ACPI_THERMAL=y
# CONFIG_ACPI_CUSTOM_DSDT is not set
CONFIG_ACPI_BLACKLIST_YEAR=0
# CONFIG_ACPI_DEBUG is not set
# CONFIG_ACPI_PCI_SLOT is not set
CONFIG_X86_PM_TIMER=y
CONFIG_ACPI_CONTAINER=y
# CONFIG_ACPI_SBS is not set
# CONFIG_APM is not set

#
# CPU Frequency scaling
#
CONFIG_CPU_FREQ=y
CONFIG_CPU_FREQ_TABLE=y
CONFIG_CPU_FREQ_DEBUG=y
# CONFIG_CPU_FREQ_STAT is not set
# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set
# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set

#
# CPUFreq processor drivers
#
CONFIG_X86_ACPI_CPUFREQ=y
# CONFIG_X86_POWERNOW_K6 is not set
# CONFIG_X86_POWERNOW_K7 is not set
# CONFIG_X86_POWERNOW_K8 is not set
# CONFIG_X86_GX_SUSPMOD is not set
# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
# CONFIG_X86_SPEEDSTEP_ICH is not set
# CONFIG_X86_SPEEDSTEP_SMI is not set
# CONFIG_X86_P4_CLOCKMOD is not set
# CONFIG_X86_CPUFREQ_NFORCE2 is not set
# CONFIG_X86_LONGRUN is not set
# CONFIG_X86_LONGHAUL is not set
# CONFIG_X86_E_POWERSAVER is not set

#
# shared options
#
# CONFIG_X86_SPEEDSTEP_LIB is not set
CONFIG_CPU_IDLE=y
CONFIG_CPU_IDLE_GOV_LADDER=y
CONFIG_CPU_IDLE_GOV_MENU=y

#
# Bus options (PCI etc.)
#
CONFIG_PCI=y
# CONFIG_PCI_GOBIOS is not set
# CONFIG_PCI_GOMMCONFIG is not set
# CONFIG_PCI_GODIRECT is not set
# CONFIG_PCI_GOOLPC is not set
CONFIG_PCI_GOANY=y
CONFIG_PCI_BIOS=y
CONFIG_PCI_DIRECT=y
CONFIG_PCI_MMCONFIG=y
CONFIG_PCI_DOMAINS=y
# CONFIG_DMAR is not set
CONFIG_PCIEPORTBUS=y
# CONFIG_HOTPLUG_PCI_PCIE is not set
CONFIG_PCIEAER=y
# CONFIG_PCIEASPM is not set
CONFIG_ARCH_SUPPORTS_MSI=y
CONFIG_PCI_MSI=y
# CONFIG_PCI_LEGACY is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_STUB is not set
CONFIG_HT_IRQ=y
# CONFIG_PCI_IOV is not set
CONFIG_ISA_DMA_API=y
# CONFIG_ISA is not set
# CONFIG_MCA is not set
# CONFIG_SCx200 is not set
# CONFIG_OLPC is not set
CONFIG_K8_NB=y
CONFIG_PCCARD=y
# CONFIG_PCMCIA_DEBUG is not set
CONFIG_PCMCIA=y
CONFIG_PCMCIA_LOAD_CIS=y
CONFIG_PCMCIA_IOCTL=y
CONFIG_CARDBUS=y

#
# PC-card bridges
#
CONFIG_YENTA=y
CONFIG_YENTA_O2=y
CONFIG_YENTA_RICOH=y
CONFIG_YENTA_TI=y
CONFIG_YENTA_ENE_TUNE=y
CONFIG_YENTA_TOSHIBA=y
# CONFIG_PD6729 is not set
# CONFIG_I82092 is not set
CONFIG_PCCARD_NONSTATIC=y
CONFIG_HOTPLUG_PCI=y
# CONFIG_HOTPLUG_PCI_FAKE is not set
# CONFIG_HOTPLUG_PCI_IBM is not set
# CONFIG_HOTPLUG_PCI_ACPI is not set
# CONFIG_HOTPLUG_PCI_CPCI is not set
# CONFIG_HOTPLUG_PCI_SHPC is not set

#
# Executable file formats / Emulations
#
CONFIG_BINFMT_ELF=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_HAVE_AOUT=y
# CONFIG_BINFMT_AOUT is not set
CONFIG_BINFMT_MISC=y
CONFIG_HAVE_ATOMIC_IOMAP=y
CONFIG_NET=y

#
# Networking options
#
CONFIG_PACKET=y
CONFIG_PACKET_MMAP=y
CONFIG_UNIX=y
CONFIG_XFRM=y
CONFIG_XFRM_USER=y
# CONFIG_XFRM_SUB_POLICY is not set
# CONFIG_XFRM_MIGRATE is not set
# CONFIG_XFRM_STATISTICS is not set
# CONFIG_NET_KEY is not set
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_ASK_IP_FIB_HASH=y
# CONFIG_IP_FIB_TRIE is not set
CONFIG_IP_FIB_HASH=y
CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_ROUTE_MULTIPATH=y
CONFIG_IP_ROUTE_VERBOSE=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_IP_PNP_RARP=y
# CONFIG_NET_IPIP is not set
# CONFIG_NET_IPGRE is not set
CONFIG_IP_MROUTE=y
CONFIG_IP_PIMSM_V1=y
CONFIG_IP_PIMSM_V2=y
# CONFIG_ARPD is not set
CONFIG_SYN_COOKIES=y
# CONFIG_INET_AH is not set
# CONFIG_INET_ESP is not set
# CONFIG_INET_IPCOMP is not set
# CONFIG_INET_XFRM_TUNNEL is not set
CONFIG_INET_TUNNEL=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
CONFIG_INET_LRO=y
# CONFIG_INET_DIAG is not set
CONFIG_TCP_CONG_ADVANCED=y
# CONFIG_TCP_CONG_BIC is not set
CONFIG_TCP_CONG_CUBIC=y
# CONFIG_TCP_CONG_WESTWOOD is not set
# CONFIG_TCP_CONG_HTCP is not set
# CONFIG_TCP_CONG_HSTCP is not set
# CONFIG_TCP_CONG_HYBLA is not set
# CONFIG_TCP_CONG_VEGAS is not set
# CONFIG_TCP_CONG_SCALABLE is not set
# CONFIG_TCP_CONG_LP is not set
# CONFIG_TCP_CONG_VENO is not set
# CONFIG_TCP_CONG_YEAH is not set
# CONFIG_TCP_CONG_ILLINOIS is not set
# CONFIG_DEFAULT_BIC is not set
CONFIG_DEFAULT_CUBIC=y
# CONFIG_DEFAULT_HTCP is not set
# CONFIG_DEFAULT_VEGAS is not set
# CONFIG_DEFAULT_WESTWOOD is not set
# CONFIG_DEFAULT_RENO is not set
CONFIG_DEFAULT_TCP_CONG="cubic"
CONFIG_TCP_MD5SIG=y
CONFIG_IPV6=y
# CONFIG_IPV6_PRIVACY is not set
# CONFIG_IPV6_ROUTER_PREF is not set
# CONFIG_IPV6_OPTIMISTIC_DAD is not set
CONFIG_INET6_AH=y
CONFIG_INET6_ESP=y
# CONFIG_INET6_IPCOMP is not set
# CONFIG_IPV6_MIP6 is not set
# CONFIG_INET6_XFRM_TUNNEL is not set
# CONFIG_INET6_TUNNEL is not set
CONFIG_INET6_XFRM_MODE_TRANSPORT=y
CONFIG_INET6_XFRM_MODE_TUNNEL=y
CONFIG_INET6_XFRM_MODE_BEET=y
# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
CONFIG_IPV6_SIT=y
CONFIG_IPV6_NDISC_NODETYPE=y
# CONFIG_IPV6_TUNNEL is not set
# CONFIG_IPV6_MULTIPLE_TABLES is not set
# CONFIG_IPV6_MROUTE is not set
CONFIG_NETLABEL=y
CONFIG_NETWORK_SECMARK=y
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_DEBUG is not set
# CONFIG_NETFILTER_ADVANCED is not set

#
# Core Netfilter Configuration
#
CONFIG_NETFILTER_NETLINK=y
CONFIG_NETFILTER_NETLINK_LOG=y
CONFIG_NF_CONNTRACK=y
CONFIG_NF_CONNTRACK_SECMARK=y
CONFIG_NF_CONNTRACK_FTP=y
CONFIG_NF_CONNTRACK_IRC=y
CONFIG_NF_CONNTRACK_SIP=y
CONFIG_NF_CT_NETLINK=y
CONFIG_NETFILTER_XTABLES=y
CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
CONFIG_NETFILTER_XT_TARGET_MARK=y
CONFIG_NETFILTER_XT_TARGET_NFLOG=y
CONFIG_NETFILTER_XT_TARGET_SECMARK=y
CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
CONFIG_NETFILTER_XT_MATCH_MARK=y
CONFIG_NETFILTER_XT_MATCH_POLICY=y
CONFIG_NETFILTER_XT_MATCH_STATE=y
# CONFIG_IP_VS is not set

#
# IP: Netfilter Configuration
#
CONFIG_NF_DEFRAG_IPV4=y
CONFIG_NF_CONNTRACK_IPV4=y
CONFIG_NF_CONNTRACK_PROC_COMPAT=y
CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_NF_FILTER=y
CONFIG_IP_NF_TARGET_REJECT=y
CONFIG_IP_NF_TARGET_LOG=y
CONFIG_IP_NF_TARGET_ULOG=y
CONFIG_NF_NAT=y
CONFIG_NF_NAT_NEEDED=y
CONFIG_IP_NF_TARGET_MASQUERADE=y
CONFIG_NF_NAT_FTP=y
CONFIG_NF_NAT_IRC=y
# CONFIG_NF_NAT_TFTP is not set
# CONFIG_NF_NAT_AMANDA is not set
# CONFIG_NF_NAT_PPTP is not set
# CONFIG_NF_NAT_H323 is not set
CONFIG_NF_NAT_SIP=y
CONFIG_IP_NF_MANGLE=y

#
# IPv6: Netfilter Configuration
#
CONFIG_NF_CONNTRACK_IPV6=y
CONFIG_IP6_NF_IPTABLES=y
CONFIG_IP6_NF_MATCH_IPV6HEADER=y
CONFIG_IP6_NF_TARGET_LOG=y
CONFIG_IP6_NF_FILTER=y
CONFIG_IP6_NF_TARGET_REJECT=y
CONFIG_IP6_NF_MANGLE=y
# CONFIG_IP_DCCP is not set
# CONFIG_IP_SCTP is not set
# CONFIG_TIPC is not set
# CONFIG_ATM is not set
CONFIG_STP=y
CONFIG_BRIDGE=y
# CONFIG_NET_DSA is not set
# CONFIG_VLAN_8021Q is not set
# CONFIG_DECNET is not set
CONFIG_LLC=y
# CONFIG_LLC2 is not set
# CONFIG_IPX is not set
# CONFIG_ATALK is not set
# CONFIG_X25 is not set
# CONFIG_LAPB is not set
# CONFIG_ECONET is not set
# CONFIG_WAN_ROUTER is not set
# CONFIG_PHONET is not set
# CONFIG_IEEE802154 is not set
# CONFIG_NET_SCHED is not set
# CONFIG_DCB is not set

#
# Network testing
#
# CONFIG_NET_PKTGEN is not set
# CONFIG_NET_TCPPROBE is not set
# CONFIG_NET_DROP_MONITOR is not set
CONFIG_HAMRADIO=y

#
# Packet Radio protocols
#
# CONFIG_AX25 is not set
# CONFIG_CAN is not set
# CONFIG_IRDA is not set
# CONFIG_BT is not set
# CONFIG_AF_RXRPC is not set
CONFIG_FIB_RULES=y
# CONFIG_WIRELESS is not set
# CONFIG_WIMAX is not set
# CONFIG_RFKILL is not set
# CONFIG_NET_9P is not set

#
# Device Drivers
#

#
# Generic Driver Options
#
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_STANDALONE=y
CONFIG_PREVENT_FIRMWARE_BUILD=y
CONFIG_FW_LOADER=y
CONFIG_FIRMWARE_IN_KERNEL=y
CONFIG_EXTRA_FIRMWARE=""
# CONFIG_DEBUG_DRIVER is not set
CONFIG_DEBUG_DEVRES=y
# CONFIG_SYS_HYPERVISOR is not set
CONFIG_CONNECTOR=y
CONFIG_PROC_EVENTS=y
# CONFIG_MTD is not set
# CONFIG_PARPORT is not set
CONFIG_PNP=y
CONFIG_PNP_DEBUG_MESSAGES=y

#
# Protocols
#
CONFIG_PNPACPI=y
CONFIG_BLK_DEV=y
# CONFIG_BLK_DEV_FD is not set
# CONFIG_BLK_CPQ_DA is not set
# CONFIG_BLK_CPQ_CISS_DA is not set
# CONFIG_BLK_DEV_DAC960 is not set
# CONFIG_BLK_DEV_UMEM is not set
# CONFIG_BLK_DEV_COW_COMMON is not set
CONFIG_BLK_DEV_LOOP=y
# CONFIG_BLK_DEV_CRYPTOLOOP is not set
# CONFIG_BLK_DEV_NBD is not set
# CONFIG_BLK_DEV_SX8 is not set
# CONFIG_BLK_DEV_UB is not set
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=16
CONFIG_BLK_DEV_RAM_SIZE=16384
# CONFIG_BLK_DEV_XIP is not set
# CONFIG_CDROM_PKTCDVD is not set
# CONFIG_ATA_OVER_ETH is not set
# CONFIG_VIRTIO_BLK is not set
# CONFIG_BLK_DEV_HD is not set
CONFIG_MISC_DEVICES=y
# CONFIG_IBM_ASM is not set
# CONFIG_PHANTOM is not set
# CONFIG_SGI_IOC4 is not set
# CONFIG_TIFM_CORE is not set
# CONFIG_ICS932S401 is not set
# CONFIG_ENCLOSURE_SERVICES is not set
# CONFIG_HP_ILO is not set
# CONFIG_ISL29003 is not set
# CONFIG_C2PORT is not set

#
# EEPROM support
#
# CONFIG_EEPROM_AT24 is not set
# CONFIG_EEPROM_LEGACY is not set
# CONFIG_EEPROM_MAX6875 is not set
# CONFIG_EEPROM_93CX6 is not set
# CONFIG_CB710_CORE is not set
CONFIG_HAVE_IDE=y
# CONFIG_IDE is not set

#
# SCSI device support
#
# CONFIG_RAID_ATTRS is not set
CONFIG_SCSI=y
CONFIG_SCSI_DMA=y
# CONFIG_SCSI_TGT is not set
# CONFIG_SCSI_NETLINK is not set
CONFIG_SCSI_PROC_FS=y

#
# SCSI support type (disk, tape, CD-ROM)
#
CONFIG_BLK_DEV_SD=y
# CONFIG_CHR_DEV_ST is not set
# CONFIG_CHR_DEV_OSST is not set
CONFIG_BLK_DEV_SR=y
# CONFIG_BLK_DEV_SR_VENDOR is not set
CONFIG_CHR_DEV_SG=y
# CONFIG_CHR_DEV_SCH is not set
CONFIG_SCSI_MULTI_LUN=y
# CONFIG_SCSI_CONSTANTS is not set
# CONFIG_SCSI_LOGGING is not set
# CONFIG_SCSI_SCAN_ASYNC is not set
CONFIG_SCSI_WAIT_SCAN=m

#
# SCSI Transports
#
# CONFIG_SCSI_SPI_ATTRS is not set
# CONFIG_SCSI_FC_ATTRS is not set
# CONFIG_SCSI_ISCSI_ATTRS is not set
# CONFIG_SCSI_SAS_ATTRS is not set
# CONFIG_SCSI_SAS_LIBSAS is not set
# CONFIG_SCSI_SRP_ATTRS is not set
CONFIG_SCSI_LOWLEVEL=y
# CONFIG_ISCSI_TCP is not set
# CONFIG_SCSI_BNX2_ISCSI is not set
# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
# CONFIG_SCSI_3W_9XXX is not set
# CONFIG_SCSI_ACARD is not set
# CONFIG_SCSI_AACRAID is not set
# CONFIG_SCSI_AIC7XXX is not set
# CONFIG_SCSI_AIC7XXX_OLD is not set
# CONFIG_SCSI_AIC79XX is not set
# CONFIG_SCSI_AIC94XX is not set
# CONFIG_SCSI_MVSAS is not set
# CONFIG_SCSI_DPT_I2O is not set
# CONFIG_SCSI_ADVANSYS is not set
# CONFIG_SCSI_ARCMSR is not set
# CONFIG_MEGARAID_NEWGEN is not set
# CONFIG_MEGARAID_LEGACY is not set
# CONFIG_MEGARAID_SAS is not set
# CONFIG_SCSI_MPT2SAS is not set
# CONFIG_SCSI_HPTIOP is not set
# CONFIG_SCSI_BUSLOGIC is not set
# CONFIG_LIBFC is not set
# CONFIG_LIBFCOE is not set
# CONFIG_FCOE is not set
# CONFIG_FCOE_FNIC is not set
# CONFIG_SCSI_DMX3191D is not set
# CONFIG_SCSI_EATA is not set
# CONFIG_SCSI_FUTURE_DOMAIN is not set
# CONFIG_SCSI_GDTH is not set
# CONFIG_SCSI_IPS is not set
# CONFIG_SCSI_INITIO is not set
# CONFIG_SCSI_INIA100 is not set
# CONFIG_SCSI_STEX is not set
# CONFIG_SCSI_SYM53C8XX_2 is not set
# CONFIG_SCSI_IPR is not set
# CONFIG_SCSI_QLOGIC_1280 is not set
# CONFIG_SCSI_QLA_FC is not set
# CONFIG_SCSI_QLA_ISCSI is not set
# CONFIG_SCSI_LPFC is not set
# CONFIG_SCSI_DC395x is not set
# CONFIG_SCSI_DC390T is not set
# CONFIG_SCSI_NSP32 is not set
# CONFIG_SCSI_DEBUG is not set
# CONFIG_SCSI_SRP is not set
# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
# CONFIG_SCSI_DH is not set
# CONFIG_SCSI_OSD_INITIATOR is not set
CONFIG_ATA=y
# CONFIG_ATA_NONSTANDARD is not set
CONFIG_ATA_ACPI=y
CONFIG_SATA_PMP=y
# CONFIG_SATA_AHCI is not set
# CONFIG_SATA_SIL24 is not set
CONFIG_ATA_SFF=y
# CONFIG_SATA_SVW is not set
CONFIG_ATA_PIIX=y
# CONFIG_SATA_MV is not set
# CONFIG_SATA_NV is not set
# CONFIG_PDC_ADMA is not set
# CONFIG_SATA_QSTOR is not set
# CONFIG_SATA_PROMISE is not set
# CONFIG_SATA_SX4 is not set
# CONFIG_SATA_SIL is not set
# CONFIG_SATA_SIS is not set
# CONFIG_SATA_ULI is not set
# CONFIG_SATA_VIA is not set
# CONFIG_SATA_VITESSE is not set
# CONFIG_SATA_INIC162X is not set
# CONFIG_PATA_ACPI is not set
# CONFIG_PATA_ALI is not set
# CONFIG_PATA_AMD is not set
# CONFIG_PATA_ARTOP is not set
# CONFIG_PATA_ATIIXP is not set
# CONFIG_PATA_CMD640_PCI is not set
# CONFIG_PATA_CMD64X is not set
# CONFIG_PATA_CS5520 is not set
# CONFIG_PATA_CS5530 is not set
# CONFIG_PATA_CS5535 is not set
# CONFIG_PATA_CS5536 is not set
# CONFIG_PATA_CYPRESS is not set
# CONFIG_PATA_EFAR is not set
# CONFIG_ATA_GENERIC is not set
# CONFIG_PATA_HPT366 is not set
# CONFIG_PATA_HPT37X is not set
# CONFIG_PATA_HPT3X2N is not set
# CONFIG_PATA_HPT3X3 is not set
# CONFIG_PATA_IT821X is not set
# CONFIG_PATA_IT8213 is not set
# CONFIG_PATA_JMICRON is not set
# CONFIG_PATA_TRIFLEX is not set
# CONFIG_PATA_MARVELL is not set
# CONFIG_PATA_MPIIX is not set
# CONFIG_PATA_OLDPIIX is not set
# CONFIG_PATA_NETCELL is not set
# CONFIG_PATA_NINJA32 is not set
# CONFIG_PATA_NS87410 is not set
# CONFIG_PATA_NS87415 is not set
# CONFIG_PATA_OPTI is not set
# CONFIG_PATA_OPTIDMA is not set
# CONFIG_PATA_PCMCIA is not set
# CONFIG_PATA_PDC_OLD is not set
# CONFIG_PATA_RADISYS is not set
# CONFIG_PATA_RZ1000 is not set
# CONFIG_PATA_SC1200 is not set
# CONFIG_PATA_SERVERWORKS is not set
# CONFIG_PATA_PDC2027X is not set
# CONFIG_PATA_SIL680 is not set
# CONFIG_PATA_SIS is not set
# CONFIG_PATA_VIA is not set
# CONFIG_PATA_WINBOND is not set
# CONFIG_PATA_SCH is not set
# CONFIG_MD is not set
# CONFIG_FUSION is not set

#
# IEEE 1394 (FireWire) support
#

#
# Enable only one of the two stacks, unless you know what you are doing
#
# CONFIG_FIREWIRE is not set
# CONFIG_IEEE1394 is not set
# CONFIG_I2O is not set
# CONFIG_MACINTOSH_DRIVERS is not set
CONFIG_NETDEVICES=y
# CONFIG_DUMMY is not set
# CONFIG_BONDING is not set
# CONFIG_MACVLAN is not set
# CONFIG_EQUALIZER is not set
# CONFIG_TUN is not set
CONFIG_VETH=y
# CONFIG_NET_SB1000 is not set
# CONFIG_ARCNET is not set
# CONFIG_PHYLIB is not set
CONFIG_NET_ETHERNET=y
CONFIG_MII=y
# CONFIG_HAPPYMEAL is not set
# CONFIG_SUNGEM is not set
# CONFIG_CASSINI is not set
# CONFIG_NET_VENDOR_3COM is not set
# CONFIG_ETHOC is not set
# CONFIG_DNET is not set
# CONFIG_NET_TULIP is not set
# CONFIG_HP100 is not set
# CONFIG_IBM_NEW_EMAC_ZMII is not set
# CONFIG_IBM_NEW_EMAC_RGMII is not set
# CONFIG_IBM_NEW_EMAC_TAH is not set
# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
CONFIG_NET_PCI=y
# CONFIG_PCNET32 is not set
# CONFIG_AMD8111_ETH is not set
# CONFIG_ADAPTEC_STARFIRE is not set
# CONFIG_B44 is not set
# CONFIG_FORCEDETH is not set
# CONFIG_E100 is not set
# CONFIG_FEALNX is not set
# CONFIG_NATSEMI is not set
# CONFIG_NE2K_PCI is not set
CONFIG_8139CP=y
# CONFIG_8139TOO is not set
# CONFIG_R6040 is not set
# CONFIG_SIS900 is not set
# CONFIG_EPIC100 is not set
# CONFIG_SMSC9420 is not set
# CONFIG_SUNDANCE is not set
# CONFIG_TLAN is not set
# CONFIG_KS8842 is not set
# CONFIG_VIA_RHINE is not set
# CONFIG_SC92031 is not set
# CONFIG_ATL2 is not set
# CONFIG_NETDEV_1000 is not set
# CONFIG_NETDEV_10000 is not set
# CONFIG_TR is not set

#
# Wireless LAN
#
# CONFIG_WLAN_PRE80211 is not set
# CONFIG_WLAN_80211 is not set

#
# Enable WiMAX (Networking options) to see the WiMAX drivers
#

#
# USB Network Adapters
#
# CONFIG_USB_CATC is not set
# CONFIG_USB_KAWETH is not set
# CONFIG_USB_PEGASUS is not set
# CONFIG_USB_RTL8150 is not set
# CONFIG_USB_USBNET is not set
# CONFIG_NET_PCMCIA is not set
# CONFIG_WAN is not set
# CONFIG_FDDI is not set
# CONFIG_HIPPI is not set
# CONFIG_PPP is not set
# CONFIG_SLIP is not set
# CONFIG_NET_FC is not set
CONFIG_NETCONSOLE=y
# CONFIG_NETCONSOLE_DYNAMIC is not set
CONFIG_NETPOLL=y
# CONFIG_NETPOLL_TRAP is not set
CONFIG_NET_POLL_CONTROLLER=y
# CONFIG_VIRTIO_NET is not set
# CONFIG_ISDN is not set
# CONFIG_PHONE is not set

#
# Input device support
#
CONFIG_INPUT=y
CONFIG_INPUT_FF_MEMLESS=y
CONFIG_INPUT_POLLDEV=y

#
# Userland interfaces
#
CONFIG_INPUT_MOUSEDEV=y
# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
# CONFIG_INPUT_JOYDEV is not set
CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_EVBUG is not set

#
# Input Device Drivers
#
CONFIG_INPUT_KEYBOARD=y
CONFIG_KEYBOARD_ATKBD=y
# CONFIG_KEYBOARD_SUNKBD is not set
# CONFIG_KEYBOARD_LKKBD is not set
# CONFIG_KEYBOARD_XTKBD is not set
# CONFIG_KEYBOARD_NEWTON is not set
# CONFIG_KEYBOARD_STOWAWAY is not set
CONFIG_INPUT_MOUSE=y
CONFIG_MOUSE_PS2=y
CONFIG_MOUSE_PS2_ALPS=y
CONFIG_MOUSE_PS2_LOGIPS2PP=y
CONFIG_MOUSE_PS2_SYNAPTICS=y
CONFIG_MOUSE_PS2_LIFEBOOK=y
CONFIG_MOUSE_PS2_TRACKPOINT=y
# CONFIG_MOUSE_PS2_ELANTECH is not set
# CONFIG_MOUSE_PS2_TOUCHKIT is not set
# CONFIG_MOUSE_SERIAL is not set
# CONFIG_MOUSE_APPLETOUCH is not set
# CONFIG_MOUSE_BCM5974 is not set
# CONFIG_MOUSE_VSXXXAA is not set
CONFIG_INPUT_JOYSTICK=y
# CONFIG_JOYSTICK_ANALOG is not set
# CONFIG_JOYSTICK_A3D is not set
# CONFIG_JOYSTICK_ADI is not set
# CONFIG_JOYSTICK_COBRA is not set
# CONFIG_JOYSTICK_GF2K is not set
# CONFIG_JOYSTICK_GRIP is not set
# CONFIG_JOYSTICK_GRIP_MP is not set
# CONFIG_JOYSTICK_GUILLEMOT is not set
# CONFIG_JOYSTICK_INTERACT is not set
# CONFIG_JOYSTICK_SIDEWINDER is not set
# CONFIG_JOYSTICK_TMDC is not set
# CONFIG_JOYSTICK_IFORCE is not set
# CONFIG_JOYSTICK_WARRIOR is not set
# CONFIG_JOYSTICK_MAGELLAN is not set
# CONFIG_JOYSTICK_SPACEORB is not set
# CONFIG_JOYSTICK_SPACEBALL is not set
# CONFIG_JOYSTICK_STINGER is not set
# CONFIG_JOYSTICK_TWIDJOY is not set
# CONFIG_JOYSTICK_ZHENHUA is not set
# CONFIG_JOYSTICK_JOYDUMP is not set
# CONFIG_JOYSTICK_XPAD is not set
CONFIG_INPUT_TABLET=y
# CONFIG_TABLET_USB_ACECAD is not set
# CONFIG_TABLET_USB_AIPTEK is not set
# CONFIG_TABLET_USB_GTCO is not set
# CONFIG_TABLET_USB_KBTAB is not set
# CONFIG_TABLET_USB_WACOM is not set
CONFIG_INPUT_TOUCHSCREEN=y
# CONFIG_TOUCHSCREEN_AD7879_I2C is not set
# CONFIG_TOUCHSCREEN_AD7879 is not set
# CONFIG_TOUCHSCREEN_FUJITSU is not set
# CONFIG_TOUCHSCREEN_GUNZE is not set
# CONFIG_TOUCHSCREEN_ELO is not set
# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set
# CONFIG_TOUCHSCREEN_MTOUCH is not set
# CONFIG_TOUCHSCREEN_INEXIO is not set
# CONFIG_TOUCHSCREEN_MK712 is not set
# CONFIG_TOUCHSCREEN_PENMOUNT is not set
# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set
# CONFIG_TOUCHSCREEN_TOUCHWIN is not set
# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set
# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set
# CONFIG_TOUCHSCREEN_TSC2007 is not set
CONFIG_INPUT_MISC=y
# CONFIG_INPUT_PCSPKR is not set
# CONFIG_INPUT_APANEL is not set
# CONFIG_INPUT_WISTRON_BTNS is not set
# CONFIG_INPUT_ATLAS_BTNS is not set
# CONFIG_INPUT_ATI_REMOTE is not set
# CONFIG_INPUT_ATI_REMOTE2 is not set
# CONFIG_INPUT_KEYSPAN_REMOTE is not set
# CONFIG_INPUT_POWERMATE is not set
# CONFIG_INPUT_YEALINK is not set
# CONFIG_INPUT_CM109 is not set
# CONFIG_INPUT_UINPUT is not set

#
# Hardware I/O ports
#
CONFIG_SERIO=y
CONFIG_SERIO_I8042=y
CONFIG_SERIO_SERPORT=y
# CONFIG_SERIO_CT82C710 is not set
# CONFIG_SERIO_PCIPS2 is not set
CONFIG_SERIO_LIBPS2=y
# CONFIG_SERIO_RAW is not set
# CONFIG_GAMEPORT is not set

#
# Character devices
#
CONFIG_VT=y
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_VT_CONSOLE=y
CONFIG_HW_CONSOLE=y
CONFIG_VT_HW_CONSOLE_BINDING=y
CONFIG_DEVKMEM=y
CONFIG_SERIAL_NONSTANDARD=y
# CONFIG_COMPUTONE is not set
# CONFIG_ROCKETPORT is not set
# CONFIG_CYCLADES is not set
# CONFIG_DIGIEPCA is not set
# CONFIG_MOXA_INTELLIO is not set
# CONFIG_MOXA_SMARTIO is not set
# CONFIG_ISI is not set
# CONFIG_SYNCLINK is not set
# CONFIG_SYNCLINKMP is not set
# CONFIG_SYNCLINK_GT is not set
# CONFIG_N_HDLC is not set
# CONFIG_RISCOM8 is not set
# CONFIG_SPECIALIX is not set
# CONFIG_SX is not set
# CONFIG_RIO is not set
# CONFIG_STALDRV is not set
# CONFIG_NOZOMI is not set

#
# Serial drivers
#
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_FIX_EARLYCON_MEM=y
CONFIG_SERIAL_8250_PCI=y
CONFIG_SERIAL_8250_PNP=y
# CONFIG_SERIAL_8250_CS is not set
CONFIG_SERIAL_8250_NR_UARTS=32
CONFIG_SERIAL_8250_RUNTIME_UARTS=4
CONFIG_SERIAL_8250_EXTENDED=y
CONFIG_SERIAL_8250_MANY_PORTS=y
CONFIG_SERIAL_8250_SHARE_IRQ=y
CONFIG_SERIAL_8250_DETECT_IRQ=y
CONFIG_SERIAL_8250_RSA=y

#
# Non-8250 serial port support
#
CONFIG_SERIAL_CORE=y
CONFIG_SERIAL_CORE_CONSOLE=y
# CONFIG_SERIAL_JSM is not set
CONFIG_UNIX98_PTYS=y
CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
# CONFIG_LEGACY_PTYS is not set
# CONFIG_VIRTIO_CONSOLE is not set
# CONFIG_IPMI_HANDLER is not set
CONFIG_HW_RANDOM=y
# CONFIG_HW_RANDOM_TIMERIOMEM is not set
CONFIG_HW_RANDOM_INTEL=y
CONFIG_HW_RANDOM_AMD=y
CONFIG_HW_RANDOM_GEODE=y
CONFIG_HW_RANDOM_VIA=y
# CONFIG_HW_RANDOM_VIRTIO is not set
CONFIG_NVRAM=y
# CONFIG_R3964 is not set
# CONFIG_APPLICOM is not set
# CONFIG_SONYPI is not set

#
# PCMCIA character devices
#
# CONFIG_SYNCLINK_CS is not set
# CONFIG_CARDMAN_4000 is not set
# CONFIG_CARDMAN_4040 is not set
# CONFIG_IPWIRELESS is not set
# CONFIG_MWAVE is not set
# CONFIG_PC8736x_GPIO is not set
# CONFIG_NSC_GPIO is not set
# CONFIG_CS5535_GPIO is not set
# CONFIG_RAW_DRIVER is not set
CONFIG_HPET=y
# CONFIG_HPET_MMAP is not set
# CONFIG_HANGCHECK_TIMER is not set
# CONFIG_TCG_TPM is not set
# CONFIG_TELCLOCK is not set
CONFIG_DEVPORT=y
CONFIG_I2C=y
CONFIG_I2C_BOARDINFO=y
# CONFIG_I2C_CHARDEV is not set
CONFIG_I2C_HELPER_AUTO=y
CONFIG_I2C_ALGOBIT=y

#
# I2C Hardware Bus support
#

#
# PC SMBus host controller drivers
#
# CONFIG_I2C_ALI1535 is not set
# CONFIG_I2C_ALI1563 is not set
# CONFIG_I2C_ALI15X3 is not set
# CONFIG_I2C_AMD756 is not set
# CONFIG_I2C_AMD8111 is not set
CONFIG_I2C_I801=y
# CONFIG_I2C_ISCH is not set
# CONFIG_I2C_PIIX4 is not set
# CONFIG_I2C_NFORCE2 is not set
# CONFIG_I2C_SIS5595 is not set
# CONFIG_I2C_SIS630 is not set
# CONFIG_I2C_SIS96X is not set
# CONFIG_I2C_VIA is not set
# CONFIG_I2C_VIAPRO is not set

#
# I2C system bus drivers (mostly embedded / system-on-chip)
#
# CONFIG_I2C_OCORES is not set
# CONFIG_I2C_SIMTEC is not set

#
# External I2C/SMBus adapter drivers
#
# CONFIG_I2C_PARPORT_LIGHT is not set
# CONFIG_I2C_TAOS_EVM is not set
# CONFIG_I2C_TINY_USB is not set

#
# Graphics adapter I2C/DDC channel drivers
#
# CONFIG_I2C_VOODOO3 is not set

#
# Other I2C/SMBus bus drivers
#
# CONFIG_I2C_PCA_PLATFORM is not set
# CONFIG_I2C_STUB is not set
# CONFIG_SCx200_ACB is not set

#
# Miscellaneous I2C Chip support
#
# CONFIG_DS1682 is not set
# CONFIG_SENSORS_PCF8574 is not set
# CONFIG_PCF8575 is not set
# CONFIG_SENSORS_PCA9539 is not set
# CONFIG_SENSORS_TSL2550 is not set
# CONFIG_I2C_DEBUG_CORE is not set
# CONFIG_I2C_DEBUG_ALGO is not set
# CONFIG_I2C_DEBUG_BUS is not set
# CONFIG_I2C_DEBUG_CHIP is not set
# CONFIG_SPI is not set
CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
# CONFIG_GPIOLIB is not set
# CONFIG_W1 is not set
CONFIG_POWER_SUPPLY=y
# CONFIG_POWER_SUPPLY_DEBUG is not set
# CONFIG_PDA_POWER is not set
# CONFIG_BATTERY_DS2760 is not set
# CONFIG_BATTERY_BQ27x00 is not set
# CONFIG_HWMON is not set
CONFIG_THERMAL=y
CONFIG_WATCHDOG=y
# CONFIG_WATCHDOG_NOWAYOUT is not set

#
# Watchdog Device Drivers
#
# CONFIG_SOFT_WATCHDOG is not set
# CONFIG_ACQUIRE_WDT is not set
# CONFIG_ADVANTECH_WDT is not set
# CONFIG_ALIM1535_WDT is not set
# CONFIG_ALIM7101_WDT is not set
# CONFIG_SC520_WDT is not set
# CONFIG_EUROTECH_WDT is not set
# CONFIG_IB700_WDT is not set
# CONFIG_IBMASR is not set
# CONFIG_WAFER_WDT is not set
# CONFIG_I6300ESB_WDT is not set
# CONFIG_ITCO_WDT is not set
# CONFIG_IT8712F_WDT is not set
# CONFIG_IT87_WDT is not set
# CONFIG_HP_WATCHDOG is not set
# CONFIG_SC1200_WDT is not set
# CONFIG_PC87413_WDT is not set
# CONFIG_60XX_WDT is not set
# CONFIG_SBC8360_WDT is not set
# CONFIG_SBC7240_WDT is not set
# CONFIG_CPU5_WDT is not set
# CONFIG_SMSC_SCH311X_WDT is not set
# CONFIG_SMSC37B787_WDT is not set
# CONFIG_W83627HF_WDT is not set
# CONFIG_W83697HF_WDT is not set
# CONFIG_W83697UG_WDT is not set
# CONFIG_W83877F_WDT is not set
# CONFIG_W83977F_WDT is not set
# CONFIG_MACHZ_WDT is not set
# CONFIG_SBC_EPX_C3_WATCHDOG is not set

#
# PCI-based Watchdog Cards
#
# CONFIG_PCIPCWATCHDOG is not set
# CONFIG_WDTPCI is not set

#
# USB-based Watchdog Cards
#
# CONFIG_USBPCWATCHDOG is not set
CONFIG_SSB_POSSIBLE=y

#
# Sonics Silicon Backplane
#
# CONFIG_SSB is not set

#
# Multifunction device drivers
#
# CONFIG_MFD_CORE is not set
# CONFIG_MFD_SM501 is not set
# CONFIG_HTC_PASIC3 is not set
# CONFIG_TWL4030_CORE is not set
# CONFIG_MFD_TMIO is not set
# CONFIG_PMIC_DA903X is not set
# CONFIG_MFD_WM8400 is not set
# CONFIG_MFD_WM8350_I2C is not set
# CONFIG_MFD_PCF50633 is not set
# CONFIG_AB3100_CORE is not set
# CONFIG_REGULATOR is not set
# CONFIG_MEDIA_SUPPORT is not set

#
# Graphics support
#
CONFIG_AGP=y
# CONFIG_AGP_ALI is not set
# CONFIG_AGP_ATI is not set
# CONFIG_AGP_AMD is not set
CONFIG_AGP_AMD64=y
CONFIG_AGP_INTEL=y
# CONFIG_AGP_NVIDIA is not set
# CONFIG_AGP_SIS is not set
# CONFIG_AGP_SWORKS is not set
# CONFIG_AGP_VIA is not set
# CONFIG_AGP_EFFICEON is not set
CONFIG_DRM=y
# CONFIG_DRM_TDFX is not set
# CONFIG_DRM_R128 is not set
# CONFIG_DRM_RADEON is not set
# CONFIG_DRM_I810 is not set
# CONFIG_DRM_I830 is not set
CONFIG_DRM_I915=y
# CONFIG_DRM_I915_KMS is not set
# CONFIG_DRM_MGA is not set
# CONFIG_DRM_SIS is not set
# CONFIG_DRM_VIA is not set
# CONFIG_DRM_SAVAGE is not set
# CONFIG_VGASTATE is not set
CONFIG_VIDEO_OUTPUT_CONTROL=y
CONFIG_FB=y
# CONFIG_FIRMWARE_EDID is not set
# CONFIG_FB_DDC is not set
# CONFIG_FB_BOOT_VESA_SUPPORT is not set
CONFIG_FB_CFB_FILLRECT=y
CONFIG_FB_CFB_COPYAREA=y
CONFIG_FB_CFB_IMAGEBLIT=y
# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
# CONFIG_FB_SYS_FILLRECT is not set
# CONFIG_FB_SYS_COPYAREA is not set
# CONFIG_FB_SYS_IMAGEBLIT is not set
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_SYS_FOPS is not set
# CONFIG_FB_SVGALIB is not set
# CONFIG_FB_MACMODES is not set
# CONFIG_FB_BACKLIGHT is not set
CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y

#
# Frame buffer hardware drivers
#
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_PM2 is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_ARC is not set
# CONFIG_FB_ASILIANT is not set
# CONFIG_FB_IMSTT is not set
# CONFIG_FB_VGA16 is not set
# CONFIG_FB_UVESA is not set
# CONFIG_FB_VESA is not set
CONFIG_FB_EFI=y
# CONFIG_FB_N411 is not set
# CONFIG_FB_HGA is not set
# CONFIG_FB_S1D13XXX is not set
# CONFIG_FB_NVIDIA is not set
# CONFIG_FB_RIVA is not set
# CONFIG_FB_I810 is not set
# CONFIG_FB_LE80578 is not set
# CONFIG_FB_INTEL is not set
# CONFIG_FB_MATROX is not set
# CONFIG_FB_RADEON is not set
# CONFIG_FB_ATY128 is not set
# CONFIG_FB_ATY is not set
# CONFIG_FB_S3 is not set
# CONFIG_FB_SAVAGE is not set
# CONFIG_FB_SIS is not set
# CONFIG_FB_VIA is not set
# CONFIG_FB_NEOMAGIC is not set
# CONFIG_FB_KYRO is not set
# CONFIG_FB_3DFX is not set
# CONFIG_FB_VOODOO1 is not set
# CONFIG_FB_VT8623 is not set
# CONFIG_FB_TRIDENT is not set
# CONFIG_FB_ARK is not set
# CONFIG_FB_PM3 is not set
# CONFIG_FB_CARMINE is not set
# CONFIG_FB_GEODE is not set
# CONFIG_FB_VIRTUAL is not set
# CONFIG_FB_METRONOME is not set
# CONFIG_FB_MB862XX is not set
# CONFIG_FB_BROADSHEET is not set
CONFIG_BACKLIGHT_LCD_SUPPORT=y
# CONFIG_LCD_CLASS_DEVICE is not set
CONFIG_BACKLIGHT_CLASS_DEVICE=y
CONFIG_BACKLIGHT_GENERIC=y
# CONFIG_BACKLIGHT_PROGEAR is not set
# CONFIG_BACKLIGHT_MBP_NVIDIA is not set
# CONFIG_BACKLIGHT_SAHARA is not set

#
# Display device support
#
# CONFIG_DISPLAY_SUPPORT is not set

#
# Console display driver support
#
CONFIG_VGA_CONSOLE=y
CONFIG_VGACON_SOFT_SCROLLBACK=y
CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64
CONFIG_DUMMY_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE=y
# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set
# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
# CONFIG_FONTS is not set
CONFIG_FONT_8x8=y
CONFIG_FONT_8x16=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_LOGO_LINUX_VGA16 is not set
CONFIG_LOGO_LINUX_CLUT224=y
CONFIG_SOUND=y
CONFIG_SOUND_OSS_CORE=y
CONFIG_SND=y
CONFIG_SND_TIMER=y
CONFIG_SND_PCM=y
CONFIG_SND_HWDEP=y
CONFIG_SND_JACK=y
CONFIG_SND_SEQUENCER=y
CONFIG_SND_SEQ_DUMMY=y
CONFIG_SND_OSSEMUL=y
CONFIG_SND_MIXER_OSS=y
CONFIG_SND_PCM_OSS=y
CONFIG_SND_PCM_OSS_PLUGINS=y
CONFIG_SND_SEQUENCER_OSS=y
# CONFIG_SND_HRTIMER is not set
CONFIG_SND_DYNAMIC_MINORS=y
CONFIG_SND_SUPPORT_OLD_API=y
CONFIG_SND_VERBOSE_PROCFS=y
# CONFIG_SND_VERBOSE_PRINTK is not set
# CONFIG_SND_DEBUG is not set
CONFIG_SND_VMASTER=y
# CONFIG_SND_RAWMIDI_SEQ is not set
# CONFIG_SND_OPL3_LIB_SEQ is not set
# CONFIG_SND_OPL4_LIB_SEQ is not set
# CONFIG_SND_SBAWE_SEQ is not set
# CONFIG_SND_EMU10K1_SEQ is not set
CONFIG_SND_DRIVERS=y
# CONFIG_SND_PCSP is not set
# CONFIG_SND_DUMMY is not set
# CONFIG_SND_VIRMIDI is not set
# CONFIG_SND_MTPAV is not set
# CONFIG_SND_SERIAL_U16550 is not set
# CONFIG_SND_MPU401 is not set
CONFIG_SND_PCI=y
# CONFIG_SND_AD1889 is not set
# CONFIG_SND_ALS300 is not set
# CONFIG_SND_ALS4000 is not set
# CONFIG_SND_ALI5451 is not set
# CONFIG_SND_ATIIXP is not set
# CONFIG_SND_ATIIXP_MODEM is not set
# CONFIG_SND_AU8810 is not set
# CONFIG_SND_AU8820 is not set
# CONFIG_SND_AU8830 is not set
# CONFIG_SND_AW2 is not set
# CONFIG_SND_AZT3328 is not set
# CONFIG_SND_BT87X is not set
# CONFIG_SND_CA0106 is not set
# CONFIG_SND_CMIPCI is not set
# CONFIG_SND_OXYGEN is not set
# CONFIG_SND_CS4281 is not set
# CONFIG_SND_CS46XX is not set
# CONFIG_SND_CS5530 is not set
# CONFIG_SND_CS5535AUDIO is not set
# CONFIG_SND_CTXFI is not set
# CONFIG_SND_DARLA20 is not set
# CONFIG_SND_GINA20 is not set
# CONFIG_SND_LAYLA20 is not set
# CONFIG_SND_DARLA24 is not set
# CONFIG_SND_GINA24 is not set
# CONFIG_SND_LAYLA24 is not set
# CONFIG_SND_MONA is not set
# CONFIG_SND_MIA is not set
# CONFIG_SND_ECHO3G is not set
# CONFIG_SND_INDIGO is not set
# CONFIG_SND_INDIGOIO is not set
# CONFIG_SND_INDIGODJ is not set
# CONFIG_SND_INDIGOIOX is not set
# CONFIG_SND_INDIGODJX is not set
# CONFIG_SND_EMU10K1 is not set
# CONFIG_SND_EMU10K1X is not set
# CONFIG_SND_ENS1370 is not set
# CONFIG_SND_ENS1371 is not set
# CONFIG_SND_ES1938 is not set
# CONFIG_SND_ES1968 is not set
# CONFIG_SND_FM801 is not set
CONFIG_SND_HDA_INTEL=y
CONFIG_SND_HDA_HWDEP=y
# CONFIG_SND_HDA_RECONFIG is not set
# CONFIG_SND_HDA_INPUT_BEEP is not set
CONFIG_SND_HDA_CODEC_REALTEK=y
CONFIG_SND_HDA_CODEC_ANALOG=y
CONFIG_SND_HDA_CODEC_SIGMATEL=y
CONFIG_SND_HDA_CODEC_VIA=y
CONFIG_SND_HDA_CODEC_ATIHDMI=y
CONFIG_SND_HDA_CODEC_NVHDMI=y
CONFIG_SND_HDA_CODEC_INTELHDMI=y
CONFIG_SND_HDA_ELD=y
CONFIG_SND_HDA_CODEC_CONEXANT=y
CONFIG_SND_HDA_CODEC_CA0110=y
CONFIG_SND_HDA_CODEC_CMEDIA=y
CONFIG_SND_HDA_CODEC_SI3054=y
CONFIG_SND_HDA_GENERIC=y
# CONFIG_SND_HDA_POWER_SAVE is not set
# CONFIG_SND_HDSP is not set
# CONFIG_SND_HDSPM is not set
# CONFIG_SND_HIFIER is not set
# CONFIG_SND_ICE1712 is not set
# CONFIG_SND_ICE1724 is not set
# CONFIG_SND_INTEL8X0 is not set
# CONFIG_SND_INTEL8X0M is not set
# CONFIG_SND_KORG1212 is not set
# CONFIG_SND_LX6464ES is not set
# CONFIG_SND_MAESTRO3 is not set
# CONFIG_SND_MIXART is not set
# CONFIG_SND_NM256 is not set
# CONFIG_SND_PCXHR is not set
# CONFIG_SND_RIPTIDE is not set
# CONFIG_SND_RME32 is not set
# CONFIG_SND_RME96 is not set
# CONFIG_SND_RME9652 is not set
# CONFIG_SND_SIS7019 is not set
# CONFIG_SND_SONICVIBES is not set
# CONFIG_SND_TRIDENT is not set
# CONFIG_SND_VIA82XX is not set
# CONFIG_SND_VIA82XX_MODEM is not set
# CONFIG_SND_VIRTUOSO is not set
# CONFIG_SND_VX222 is not set
# CONFIG_SND_YMFPCI is not set
CONFIG_SND_USB=y
# CONFIG_SND_USB_AUDIO is not set
# CONFIG_SND_USB_USX2Y is not set
# CONFIG_SND_USB_CAIAQ is not set
# CONFIG_SND_USB_US122L is not set
CONFIG_SND_PCMCIA=y
# CONFIG_SND_VXPOCKET is not set
# CONFIG_SND_PDAUDIOCF is not set
# CONFIG_SND_SOC is not set
# CONFIG_SOUND_PRIME is not set
CONFIG_HID_SUPPORT=y
CONFIG_HID=y
CONFIG_HID_DEBUG=y
CONFIG_HIDRAW=y

#
# USB Input Devices
#
CONFIG_USB_HID=y
CONFIG_HID_PID=y
CONFIG_USB_HIDDEV=y

#
# Special HID drivers
#
CONFIG_HID_A4TECH=y
CONFIG_HID_APPLE=y
CONFIG_HID_BELKIN=y
CONFIG_HID_CHERRY=y
CONFIG_HID_CHICONY=y
CONFIG_HID_CYPRESS=y
CONFIG_HID_DRAGONRISE=y
# CONFIG_DRAGONRISE_FF is not set
CONFIG_HID_EZKEY=y
CONFIG_HID_KYE=y
CONFIG_HID_GYRATION=y
CONFIG_HID_KENSINGTON=y
CONFIG_HID_LOGITECH=y
CONFIG_LOGITECH_FF=y
# CONFIG_LOGIRUMBLEPAD2_FF is not set
CONFIG_HID_MICROSOFT=y
CONFIG_HID_MONTEREY=y
CONFIG_HID_NTRIG=y
CONFIG_HID_PANTHERLORD=y
CONFIG_PANTHERLORD_FF=y
CONFIG_HID_PETALYNX=y
CONFIG_HID_SAMSUNG=y
CONFIG_HID_SONY=y
CONFIG_HID_SUNPLUS=y
CONFIG_HID_GREENASIA=y
# CONFIG_GREENASIA_FF is not set
CONFIG_HID_SMARTJOYPLUS=y
# CONFIG_SMARTJOYPLUS_FF is not set
CONFIG_HID_TOPSEED=y
CONFIG_HID_THRUSTMASTER=y
CONFIG_THRUSTMASTER_FF=y
CONFIG_HID_ZEROPLUS=y
CONFIG_ZEROPLUS_FF=y
CONFIG_USB_SUPPORT=y
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB_ARCH_HAS_OHCI=y
CONFIG_USB_ARCH_HAS_EHCI=y
CONFIG_USB=y
CONFIG_USB_DEBUG=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y

#
# Miscellaneous USB options
#
# CONFIG_USB_DEVICE_CLASS is not set
# CONFIG_USB_DYNAMIC_MINORS is not set
CONFIG_USB_SUSPEND=y
# CONFIG_USB_OTG is not set
CONFIG_USB_MON=y
# CONFIG_USB_WUSB is not set
# CONFIG_USB_WUSB_CBAF is not set

#
# USB Host Controller Drivers
#
# CONFIG_USB_C67X00_HCD is not set
# CONFIG_USB_XHCI_HCD is not set
CONFIG_USB_EHCI_HCD=y
# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
# CONFIG_USB_EHCI_TT_NEWSCHED is not set
# CONFIG_USB_OXU210HP_HCD is not set
# CONFIG_USB_ISP116X_HCD is not set
# CONFIG_USB_ISP1760_HCD is not set
CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set
# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set
CONFIG_USB_OHCI_LITTLE_ENDIAN=y
CONFIG_USB_UHCI_HCD=y
# CONFIG_USB_SL811_HCD is not set
# CONFIG_USB_R8A66597_HCD is not set
# CONFIG_USB_WHCI_HCD is not set
# CONFIG_USB_HWA_HCD is not set

#
# USB Device Class drivers
#
# CONFIG_USB_ACM is not set
CONFIG_USB_PRINTER=y
# CONFIG_USB_WDM is not set
# CONFIG_USB_TMC is not set

#
# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
#

#
# also be needed; see USB_STORAGE Help for more info
#
# CONFIG_USB_STORAGE is not set
CONFIG_USB_LIBUSUAL=y

#
# USB Imaging devices
#
# CONFIG_USB_MDC800 is not set
# CONFIG_USB_MICROTEK is not set

#
# USB port drivers
#
# CONFIG_USB_SERIAL is not set

#
# USB Miscellaneous drivers
#
# CONFIG_USB_EMI62 is not set
# CONFIG_USB_EMI26 is not set
# CONFIG_USB_ADUTUX is not set
# CONFIG_USB_SEVSEG is not set
# CONFIG_USB_RIO500 is not set
# CONFIG_USB_LEGOTOWER is not set
# CONFIG_USB_LCD is not set
# CONFIG_USB_BERRY_CHARGE is not set
# CONFIG_USB_LED is not set
# CONFIG_USB_CYPRESS_CY7C63 is not set
# CONFIG_USB_CYTHERM is not set
# CONFIG_USB_IDMOUSE is not set
# CONFIG_USB_FTDI_ELAN is not set
# CONFIG_USB_APPLEDISPLAY is not set
# CONFIG_USB_SISUSBVGA is not set
# CONFIG_USB_LD is not set
# CONFIG_USB_TRANCEVIBRATOR is not set
# CONFIG_USB_IOWARRIOR is not set
# CONFIG_USB_ISIGHTFW is not set
# CONFIG_USB_VST is not set
# CONFIG_USB_GADGET is not set

#
# OTG and related infrastructure
#
# CONFIG_NOP_USB_XCEIV is not set
# CONFIG_UWB is not set
# CONFIG_MMC is not set
# CONFIG_MEMSTICK is not set
CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=y

#
# LED drivers
#
# CONFIG_LEDS_ALIX2 is not set
# CONFIG_LEDS_PCA9532 is not set
# CONFIG_LEDS_LP5521 is not set
# CONFIG_LEDS_CLEVO_MAIL is not set
# CONFIG_LEDS_PCA955X is not set
# CONFIG_LEDS_BD2802 is not set

#
# LED Triggers
#
CONFIG_LEDS_TRIGGERS=y
# CONFIG_LEDS_TRIGGER_TIMER is not set
# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set

#
# iptables trigger is under Netfilter config (LED target)
#
# CONFIG_ACCESSIBILITY is not set
# CONFIG_INFINIBAND is not set
# CONFIG_EDAC is not set
CONFIG_RTC_LIB=y
CONFIG_RTC_CLASS=y
# CONFIG_RTC_HCTOSYS is not set
# CONFIG_RTC_DEBUG is not set

#
# RTC interfaces
#
CONFIG_RTC_INTF_SYSFS=y
CONFIG_RTC_INTF_PROC=y
CONFIG_RTC_INTF_DEV=y
# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
# CONFIG_RTC_DRV_TEST is not set

#
# I2C RTC drivers
#
# CONFIG_RTC_DRV_DS1307 is not set
# CONFIG_RTC_DRV_DS1374 is not set
# CONFIG_RTC_DRV_DS1672 is not set
# CONFIG_RTC_DRV_MAX6900 is not set
# CONFIG_RTC_DRV_RS5C372 is not set
# CONFIG_RTC_DRV_ISL1208 is not set
# CONFIG_RTC_DRV_X1205 is not set
# CONFIG_RTC_DRV_PCF8563 is not set
# CONFIG_RTC_DRV_PCF8583 is not set
# CONFIG_RTC_DRV_M41T80 is not set
# CONFIG_RTC_DRV_S35390A is not set
# CONFIG_RTC_DRV_FM3130 is not set
# CONFIG_RTC_DRV_RX8581 is not set

#
# SPI RTC drivers
#

#
# Platform RTC drivers
#
CONFIG_RTC_DRV_CMOS=y
# CONFIG_RTC_DRV_DS1286 is not set
# CONFIG_RTC_DRV_DS1511 is not set
# CONFIG_RTC_DRV_DS1553 is not set
# CONFIG_RTC_DRV_DS1742 is not set
# CONFIG_RTC_DRV_STK17TA8 is not set
# CONFIG_RTC_DRV_M48T86 is not set
# CONFIG_RTC_DRV_M48T35 is not set
# CONFIG_RTC_DRV_M48T59 is not set
# CONFIG_RTC_DRV_BQ4802 is not set
# CONFIG_RTC_DRV_V3020 is not set

#
# on-CPU RTC drivers
#
CONFIG_DMADEVICES=y

#
# DMA Devices
#
# CONFIG_INTEL_IOATDMA is not set
# CONFIG_AUXDISPLAY is not set
# CONFIG_UIO is not set

#
# TI VLYNQ
#
# CONFIG_STAGING is not set
CONFIG_X86_PLATFORM_DEVICES=y
# CONFIG_ACER_WMI is not set
# CONFIG_ASUS_LAPTOP is not set
# CONFIG_FUJITSU_LAPTOP is not set
# CONFIG_TC1100_WMI is not set
# CONFIG_MSI_LAPTOP is not set
# CONFIG_PANASONIC_LAPTOP is not set
# CONFIG_COMPAL_LAPTOP is not set
# CONFIG_THINKPAD_ACPI is not set
# CONFIG_INTEL_MENLOW is not set
# CONFIG_EEEPC_LAPTOP is not set
# CONFIG_ACPI_WMI is not set
# CONFIG_ACPI_ASUS is not set
# CONFIG_ACPI_TOSHIBA is not set

#
# Firmware Drivers
#
# CONFIG_EDD is not set
CONFIG_FIRMWARE_MEMMAP=y
CONFIG_EFI_VARS=y
# CONFIG_DELL_RBU is not set
# CONFIG_DCDBAS is not set
CONFIG_DMIID=y
CONFIG_ISCSI_IBFT_FIND=y
CONFIG_ISCSI_IBFT=y

#
# File systems
#
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
CONFIG_EXT2_FS_XIP=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
CONFIG_EXT3_FS_XATTR=y
CONFIG_EXT3_FS_POSIX_ACL=y
CONFIG_EXT3_FS_SECURITY=y
# CONFIG_EXT4_FS is not set
CONFIG_FS_XIP=y
CONFIG_JBD=y
# CONFIG_JBD_DEBUG is not set
CONFIG_FS_MBCACHE=y
# CONFIG_REISERFS_FS is not set
# CONFIG_JFS_FS is not set
CONFIG_FS_POSIX_ACL=y
# CONFIG_XFS_FS is not set
# CONFIG_OCFS2_FS is not set
# CONFIG_BTRFS_FS is not set
CONFIG_FILE_LOCKING=y
CONFIG_FSNOTIFY=y
CONFIG_DNOTIFY=y
CONFIG_INOTIFY=y
CONFIG_INOTIFY_USER=y
CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_QUOTA_TREE=y
# CONFIG_QFMT_V1 is not set
CONFIG_QFMT_V2=y
CONFIG_QUOTACTL=y
# CONFIG_AUTOFS_FS is not set
CONFIG_AUTOFS4_FS=y
# CONFIG_FUSE_FS is not set
CONFIG_GENERIC_ACL=y

#
# Caches
#
# CONFIG_FSCACHE is not set

#
# CD-ROM/DVD Filesystems
#
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
# CONFIG_UDF_FS is not set

#
# DOS/FAT/NT Filesystems
#
CONFIG_FAT_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_FAT_DEFAULT_CODEPAGE=437
CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
# CONFIG_NTFS_FS is not set

#
# Pseudo filesystems
#
CONFIG_PROC_FS=y
CONFIG_PROC_KCORE=y
CONFIG_PROC_VMCORE=y
CONFIG_PROC_SYSCTL=y
CONFIG_PROC_PAGE_MONITOR=y
CONFIG_SYSFS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
CONFIG_HUGETLB_PAGE=y
# CONFIG_CONFIGFS_FS is not set
CONFIG_MISC_FILESYSTEMS=y
# CONFIG_ADFS_FS is not set
# CONFIG_AFFS_FS is not set
# CONFIG_ECRYPT_FS is not set
# CONFIG_HFS_FS is not set
# CONFIG_HFSPLUS_FS is not set
# CONFIG_BEFS_FS is not set
# CONFIG_BFS_FS is not set
# CONFIG_EFS_FS is not set
# CONFIG_CRAMFS is not set
# CONFIG_SQUASHFS is not set
# CONFIG_VXFS_FS is not set
# CONFIG_MINIX_FS is not set
# CONFIG_OMFS_FS is not set
# CONFIG_HPFS_FS is not set
# CONFIG_QNX4FS_FS is not set
# CONFIG_ROMFS_FS is not set
# CONFIG_SYSV_FS is not set
# CONFIG_UFS_FS is not set
# CONFIG_NILFS2_FS is not set
CONFIG_NETWORK_FILESYSTEMS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
# CONFIG_NFSD is not set
CONFIG_LOCKD=y
CONFIG_LOCKD_V4=y
CONFIG_NFS_ACL_SUPPORT=y
CONFIG_NFS_COMMON=y
CONFIG_SUNRPC=y
CONFIG_SUNRPC_GSS=y
CONFIG_RPCSEC_GSS_KRB5=y
# CONFIG_RPCSEC_GSS_SPKM3 is not set
# CONFIG_SMB_FS is not set
# CONFIG_CIFS is not set
# CONFIG_NCP_FS is not set
# CONFIG_CODA_FS is not set
# CONFIG_AFS_FS is not set

#
# Partition Types
#
CONFIG_PARTITION_ADVANCED=y
# CONFIG_ACORN_PARTITION is not set
CONFIG_OSF_PARTITION=y
CONFIG_AMIGA_PARTITION=y
# CONFIG_ATARI_PARTITION is not set
CONFIG_MAC_PARTITION=y
CONFIG_MSDOS_PARTITION=y
CONFIG_BSD_DISKLABEL=y
CONFIG_MINIX_SUBPARTITION=y
CONFIG_SOLARIS_X86_PARTITION=y
CONFIG_UNIXWARE_DISKLABEL=y
# CONFIG_LDM_PARTITION is not set
CONFIG_SGI_PARTITION=y
# CONFIG_ULTRIX_PARTITION is not set
CONFIG_SUN_PARTITION=y
CONFIG_KARMA_PARTITION=y
CONFIG_EFI_PARTITION=y
# CONFIG_SYSV68_PARTITION is not set
CONFIG_NLS=y
CONFIG_NLS_DEFAULT="utf8"
CONFIG_NLS_CODEPAGE_437=y
# CONFIG_NLS_CODEPAGE_737 is not set
# CONFIG_NLS_CODEPAGE_775 is not set
# CONFIG_NLS_CODEPAGE_850 is not set
# CONFIG_NLS_CODEPAGE_852 is not set
# CONFIG_NLS_CODEPAGE_855 is not set
# CONFIG_NLS_CODEPAGE_857 is not set
# CONFIG_NLS_CODEPAGE_860 is not set
# CONFIG_NLS_CODEPAGE_861 is not set
# CONFIG_NLS_CODEPAGE_862 is not set
# CONFIG_NLS_CODEPAGE_863 is not set
# CONFIG_NLS_CODEPAGE_864 is not set
# CONFIG_NLS_CODEPAGE_865 is not set
# CONFIG_NLS_CODEPAGE_866 is not set
# CONFIG_NLS_CODEPAGE_869 is not set
# CONFIG_NLS_CODEPAGE_936 is not set
# CONFIG_NLS_CODEPAGE_950 is not set
# CONFIG_NLS_CODEPAGE_932 is not set
# CONFIG_NLS_CODEPAGE_949 is not set
# CONFIG_NLS_CODEPAGE_874 is not set
# CONFIG_NLS_ISO8859_8 is not set
# CONFIG_NLS_CODEPAGE_1250 is not set
# CONFIG_NLS_CODEPAGE_1251 is not set
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
# CONFIG_NLS_ISO8859_2 is not set
# CONFIG_NLS_ISO8859_3 is not set
# CONFIG_NLS_ISO8859_4 is not set
# CONFIG_NLS_ISO8859_5 is not set
# CONFIG_NLS_ISO8859_6 is not set
# CONFIG_NLS_ISO8859_7 is not set
# CONFIG_NLS_ISO8859_9 is not set
# CONFIG_NLS_ISO8859_13 is not set
# CONFIG_NLS_ISO8859_14 is not set
# CONFIG_NLS_ISO8859_15 is not set
# CONFIG_NLS_KOI8_R is not set
# CONFIG_NLS_KOI8_U is not set
CONFIG_NLS_UTF8=y
# CONFIG_DLM is not set

#
# Kernel hacking
#
CONFIG_TRACE_IRQFLAGS_SUPPORT=y
# CONFIG_PRINTK_TIME is not set
CONFIG_ENABLE_WARN_DEPRECATED=y
CONFIG_ENABLE_MUST_CHECK=y
CONFIG_FRAME_WARN=2048
CONFIG_MAGIC_SYSRQ=y
# CONFIG_UNUSED_SYMBOLS is not set
CONFIG_DEBUG_FS=y
# CONFIG_HEADERS_CHECK is not set
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_SHIRQ=y
CONFIG_DETECT_SOFTLOCKUP=y
# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
# CONFIG_SCHED_DEBUG is not set
CONFIG_SCHEDSTATS=y
CONFIG_TIMER_STATS=y
# CONFIG_DEBUG_OBJECTS is not set
CONFIG_SLUB_DEBUG_ON=y
# CONFIG_SLUB_STATS is not set
# CONFIG_DEBUG_KMEMLEAK is not set
CONFIG_DEBUG_PREEMPT=y
CONFIG_DEBUG_RT_MUTEXES=y
CONFIG_DEBUG_PI_LIST=y
# CONFIG_RT_MUTEX_TESTER is not set
CONFIG_DEBUG_SPINLOCK=y
CONFIG_DEBUG_MUTEXES=y
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
CONFIG_LOCKDEP=y
CONFIG_LOCK_STAT=y
# CONFIG_DEBUG_LOCKDEP is not set
CONFIG_TRACE_IRQFLAGS=y
CONFIG_DEBUG_SPINLOCK_SLEEP=y
CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
CONFIG_STACKTRACE=y
# CONFIG_DEBUG_KOBJECT is not set
# CONFIG_DEBUG_HIGHMEM is not set
CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_INFO is not set
CONFIG_DEBUG_VM=y
CONFIG_DEBUG_VIRTUAL=y
CONFIG_DEBUG_WRITECOUNT=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_SG=y
# CONFIG_DEBUG_NOTIFIERS is not set
CONFIG_ARCH_WANT_FRAME_POINTERS=y
CONFIG_FRAME_POINTER=y
# CONFIG_BOOT_PRINTK_DELAY is not set
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_KPROBES_SANITY_TEST is not set
CONFIG_BACKTRACE_SELF_TEST=y
# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
# CONFIG_LKDTM is not set
# CONFIG_FAULT_INJECTION is not set
# CONFIG_LATENCYTOP is not set
CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_DEBUG_PAGEALLOC=y
CONFIG_USER_STACKTRACE_SUPPORT=y
CONFIG_NOP_TRACER=y
CONFIG_HAVE_FTRACE_NMI_ENTER=y
CONFIG_HAVE_FUNCTION_TRACER=y
CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
CONFIG_HAVE_DYNAMIC_FTRACE=y
CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
CONFIG_HAVE_FTRACE_SYSCALLS=y
CONFIG_TRACER_MAX_TRACE=y
CONFIG_RING_BUFFER=y
CONFIG_FTRACE_NMI_ENTER=y
CONFIG_EVENT_TRACING=y
CONFIG_CONTEXT_SWITCH_TRACER=y
CONFIG_TRACING=y
CONFIG_GENERIC_TRACER=y
CONFIG_TRACING_SUPPORT=y
CONFIG_FTRACE=y
CONFIG_FUNCTION_TRACER=y
CONFIG_FUNCTION_GRAPH_TRACER=y
CONFIG_IRQSOFF_TRACER=y
CONFIG_PREEMPT_TRACER=y
# CONFIG_SYSPROF_TRACER is not set
CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
# CONFIG_BOOT_TRACER is not set
CONFIG_BRANCH_PROFILE_NONE=y
# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set
# CONFIG_PROFILE_ALL_BRANCHES is not set
# CONFIG_POWER_TRACER is not set
# CONFIG_STACK_TRACER is not set
# CONFIG_KMEMTRACE is not set
# CONFIG_WORKQUEUE_TRACER is not set
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_DYNAMIC_FTRACE=y
# CONFIG_FUNCTION_PROFILER is not set
CONFIG_FTRACE_MCOUNT_RECORD=y
CONFIG_FTRACE_SELFTEST=y
CONFIG_FTRACE_STARTUP_TEST=y
# CONFIG_MMIOTRACE is not set
# CONFIG_RING_BUFFER_BENCHMARK is not set
# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set
CONFIG_DYNAMIC_DEBUG=y
# CONFIG_DMA_API_DEBUG is not set
CONFIG_SAMPLES=y
CONFIG_SAMPLE_MARKERS=m
CONFIG_SAMPLE_TRACEPOINTS=m
# CONFIG_SAMPLE_TRACE_EVENTS is not set
# CONFIG_SAMPLE_KOBJECT is not set
# CONFIG_SAMPLE_KPROBES is not set
CONFIG_HAVE_ARCH_KGDB=y
# CONFIG_KGDB is not set
CONFIG_HAVE_ARCH_KMEMCHECK=y
# CONFIG_STRICT_DEVMEM is not set
CONFIG_X86_VERBOSE_BOOTUP=y
CONFIG_EARLY_PRINTK=y
# CONFIG_EARLY_PRINTK_DBGP is not set
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_PER_CPU_MAPS=y
CONFIG_X86_PTDUMP=y
CONFIG_DEBUG_RODATA=y
# CONFIG_DEBUG_RODATA_TEST is not set
# CONFIG_DEBUG_NX_TEST is not set
CONFIG_4KSTACKS=y
CONFIG_DOUBLEFAULT=y
# CONFIG_IOMMU_STRESS is not set
CONFIG_HAVE_MMIOTRACE_SUPPORT=y
CONFIG_IO_DELAY_TYPE_0X80=0
CONFIG_IO_DELAY_TYPE_0XED=1
CONFIG_IO_DELAY_TYPE_UDELAY=2
CONFIG_IO_DELAY_TYPE_NONE=3
CONFIG_IO_DELAY_0X80=y
# CONFIG_IO_DELAY_0XED is not set
# CONFIG_IO_DELAY_UDELAY is not set
# CONFIG_IO_DELAY_NONE is not set
CONFIG_DEFAULT_IO_DELAY_TYPE=0
CONFIG_DEBUG_BOOT_PARAMS=y
# CONFIG_CPA_DEBUG is not set
CONFIG_OPTIMIZE_INLINING=y

#
# Security options
#
CONFIG_KEYS=y
CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
# CONFIG_SECURITYFS is not set
CONFIG_SECURITY_NETWORK=y
# CONFIG_SECURITY_NETWORK_XFRM is not set
# CONFIG_SECURITY_PATH is not set
CONFIG_SECURITY_FILE_CAPABILITIES=y
# CONFIG_SECURITY_ROOTPLUG is not set
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1
CONFIG_SECURITY_SELINUX_DISABLE=y
CONFIG_SECURITY_SELINUX_DEVELOP=y
CONFIG_SECURITY_SELINUX_AVC_STATS=y
CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set
# CONFIG_SECURITY_SMACK is not set
# CONFIG_SECURITY_TOMOYO is not set
# CONFIG_IMA is not set
CONFIG_CRYPTO=y

#
# Crypto core or helper
#
# CONFIG_CRYPTO_FIPS is not set
CONFIG_CRYPTO_ALGAPI=y
CONFIG_CRYPTO_ALGAPI2=y
CONFIG_CRYPTO_AEAD=y
CONFIG_CRYPTO_AEAD2=y
CONFIG_CRYPTO_BLKCIPHER=y
CONFIG_CRYPTO_BLKCIPHER2=y
CONFIG_CRYPTO_HASH=y
CONFIG_CRYPTO_HASH2=y
CONFIG_CRYPTO_RNG2=y
CONFIG_CRYPTO_PCOMP=y
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_MANAGER2=y
# CONFIG_CRYPTO_GF128MUL is not set
# CONFIG_CRYPTO_NULL is not set
CONFIG_CRYPTO_WORKQUEUE=y
# CONFIG_CRYPTO_CRYPTD is not set
CONFIG_CRYPTO_AUTHENC=y
# CONFIG_CRYPTO_TEST is not set

#
# Authenticated Encryption with Associated Data
#
# CONFIG_CRYPTO_CCM is not set
# CONFIG_CRYPTO_GCM is not set
# CONFIG_CRYPTO_SEQIV is not set

#
# Block modes
#
CONFIG_CRYPTO_CBC=y
# CONFIG_CRYPTO_CTR is not set
# CONFIG_CRYPTO_CTS is not set
CONFIG_CRYPTO_ECB=y
# CONFIG_CRYPTO_LRW is not set
# CONFIG_CRYPTO_PCBC is not set
# CONFIG_CRYPTO_XTS is not set

#
# Hash modes
#
CONFIG_CRYPTO_HMAC=y
# CONFIG_CRYPTO_XCBC is not set

#
# Digest
#
# CONFIG_CRYPTO_CRC32C is not set
# CONFIG_CRYPTO_CRC32C_INTEL is not set
# CONFIG_CRYPTO_MD4 is not set
CONFIG_CRYPTO_MD5=y
# CONFIG_CRYPTO_MICHAEL_MIC is not set
# CONFIG_CRYPTO_RMD128 is not set
# CONFIG_CRYPTO_RMD160 is not set
# CONFIG_CRYPTO_RMD256 is not set
# CONFIG_CRYPTO_RMD320 is not set
CONFIG_CRYPTO_SHA1=y
# CONFIG_CRYPTO_SHA256 is not set
# CONFIG_CRYPTO_SHA512 is not set
# CONFIG_CRYPTO_TGR192 is not set
# CONFIG_CRYPTO_WP512 is not set

#
# Ciphers
#
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_586=y
# CONFIG_CRYPTO_ANUBIS is not set
CONFIG_CRYPTO_ARC4=y
# CONFIG_CRYPTO_BLOWFISH is not set
# CONFIG_CRYPTO_CAMELLIA is not set
# CONFIG_CRYPTO_CAST5 is not set
# CONFIG_CRYPTO_CAST6 is not set
CONFIG_CRYPTO_DES=y
# CONFIG_CRYPTO_FCRYPT is not set
# CONFIG_CRYPTO_KHAZAD is not set
# CONFIG_CRYPTO_SALSA20 is not set
# CONFIG_CRYPTO_SALSA20_586 is not set
# CONFIG_CRYPTO_SEED is not set
# CONFIG_CRYPTO_SERPENT is not set
# CONFIG_CRYPTO_TEA is not set
# CONFIG_CRYPTO_TWOFISH is not set
# CONFIG_CRYPTO_TWOFISH_586 is not set

#
# Compression
#
# CONFIG_CRYPTO_DEFLATE is not set
# CONFIG_CRYPTO_ZLIB is not set
# CONFIG_CRYPTO_LZO is not set

#
# Random Number Generation
#
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRYPTO_HW=y
# CONFIG_CRYPTO_DEV_PADLOCK is not set
# CONFIG_CRYPTO_DEV_GEODE is not set
# CONFIG_CRYPTO_DEV_HIFN_795X is not set
CONFIG_HAVE_KVM=y
CONFIG_HAVE_KVM_IRQCHIP=y
CONFIG_VIRTUALIZATION=y
# CONFIG_KVM is not set
# CONFIG_LGUEST is not set
CONFIG_VIRTIO=y
CONFIG_VIRTIO_RING=y
CONFIG_VIRTIO_PCI=y
# CONFIG_VIRTIO_BALLOON is not set
CONFIG_BINARY_PRINTF=y

#
# Library routines
#
CONFIG_BITREVERSE=y
CONFIG_GENERIC_FIND_FIRST_BIT=y
CONFIG_GENERIC_FIND_NEXT_BIT=y
CONFIG_GENERIC_FIND_LAST_BIT=y
# CONFIG_CRC_CCITT is not set
# CONFIG_CRC16 is not set
CONFIG_CRC_T10DIF=y
# CONFIG_CRC_ITU_T is not set
CONFIG_CRC32=y
# CONFIG_CRC7 is not set
# CONFIG_LIBCRC32C is not set
CONFIG_AUDIT_GENERIC=y
CONFIG_ZLIB_INFLATE=y
CONFIG_DECOMPRESS_GZIP=y
CONFIG_DECOMPRESS_BZIP2=y
CONFIG_DECOMPRESS_LZMA=y
CONFIG_HAS_IOMEM=y
CONFIG_HAS_IOPORT=y
CONFIG_HAS_DMA=y
# CONFIG_CPUMASK_OFFSTACK is not set
CONFIG_NLATTR=y

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] mm: clear N_HIGH_MEMORY map before se set it again -v4
       [not found]                               ` <m3bpokiv0u.fsf-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
@ 2009-06-19  8:18                                 ` Yinghai Lu
  0 siblings, 0 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-06-19  8:18 UTC (permalink / raw)
  To: Nathan Lynch
  Cc: steiner-sJ/iWh9BUns, Christoph Lameter,
	suresh.b.siddha-ral2JQCrhuEAvxtiuMwx3w,
	mel-wPRd99KPJ+uzQB+pC5nmwQ,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	rusty-8n+1lVoiYb80n/F98K4Iww,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, tglx-hfZtesqFncYOwBW4kG4KsQ,
	viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn,
	hpa-YMNOUZJC4hwAvxtiuMwx3w, rientjes-hpIqsD4AKlfQT0dZR+AlfA,
	Andrew Morton, mingo-X9Un+BFzKDI

Nathan Lynch wrote:
> Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> writes:
>> SRAT tables may contains nodes of very small size. The arch code may
>> decide to not activate such a node. However, currently the early boot code
>> sets N_HIGH_MEMORY for such nodes. These nodes therefore seem to be active
>> although these nodes have no present pages.
>>
>> for 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
>>
>> v4: update description according to Christoph
>>
>> Signed-off-by: Yinghai Lu <Yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
>> Tested-by: Jack Steiner <steiner-sJ/iWh9BUns@public.gmane.org>
>> Acked-by: Christoph Lameter <cl-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
>>
>> ---
>>  mm/page_alloc.c |    5 +++++
>>  1 file changed, 5 insertions(+)
>>
>> Index: linux-2.6/mm/page_alloc.c
>> ===================================================================
>> --- linux-2.6.orig/mm/page_alloc.c
>> +++ linux-2.6/mm/page_alloc.c
>> @@ -4041,6 +4041,11 @@ void __init free_area_init_nodes(unsigne
>>  						early_node_map[i].start_pfn,
>>  						early_node_map[i].end_pfn);
>>  
>> +	/*
>> +	 * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init
>> +	 * that node_mask, clear it at first
>> +	 */
>> +	nodes_clear(node_states[N_HIGH_MEMORY]);
>>  	/* Initialise every node */
>>  	mminit_verify_pageflags_layout();
>>  	setup_nr_node_ids();
> 
> This patch breaks the cpuset.mems cgroup attribute on an i386 kvm guest.
> 
> With v2.6.30:
> 
> # uname -r
> 2.6.30
> # cat /cgroup/cpuset.mems
> 0
> # mkdir /cgroup/test
> # for i in cpus mems ; do cat /cgroup/cpuset.$i > /cgroup/test/cpuset.$i ; done
> # echo $$ > /cgroup/test/tasks
> # echo $?
> 0
> 
> With a pulled-today Linus tree:
> 
> # uname -r
> 2.6.30-06725-g1d89b30
> # cat /cgroup/cpuset.mems
> 
> # mkdir /cgroup/test
> # for i in cpus mems ; do cat /cgroup/cpuset.$i > /cgroup/test/cpuset.$i ; done
> # echo $$ > /cgroup/test/tasks
> -bash: echo: write error: No space left on device
> 
> (Note that in addition to the ENOSPC error, /cgroup/cpuset.mems is empty
> rather than '0' in the second test.)
> 
> I bisected to the commit containing this change.  Reverting fixes the
> problem.
> 

can you use following patch to see what happens to that nodemask?

YH

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a5f3c27..eb89e8b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4189,6 +4189,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
 {
 	unsigned long nid;
 	int i;
+	char buf[512];
 
 	/* Sort early_node_map as initialisation assumes it is sorted */
 	sort_node_map();
@@ -4244,6 +4245,9 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
 	 * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init
 	 * that node_mask, clear it at first
 	 */
+	memset(buf, 0, 512);
+	nodemask_scnprintf(buf, 512, node_states[N_HIGH_MEMORY]);
+	printk(KERN_DEBUG "before clear: node_states [%d]: %s\n", N_HIGH_MEMORY, buf);
 	nodes_clear(node_states[N_HIGH_MEMORY]);
 	/* Initialise every node */
 	mminit_verify_pageflags_layout();
@@ -4258,6 +4262,9 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
 			node_set_state(nid, N_HIGH_MEMORY);
 		check_for_regular_memory(pgdat);
 	}
+	memset(buf, 0, 512);
+	nodemask_scnprintf(buf, 512, node_states[N_HIGH_MEMORY]);
+	printk(KERN_DEBUG "after online check: node_states [%d]: %s\n", N_HIGH_MEMORY, buf);
 }
 
 static int __init cmdline_parse_core(char *p, unsigned long *core)

^ permalink raw reply related	[flat|nested] 102+ messages in thread

* Re: [PATCH] mm: clear N_HIGH_MEMORY map before se set it again -v4
  2009-06-19  6:42                             ` Nathan Lynch
@ 2009-06-19  8:18                               ` Yinghai Lu
       [not found]                                 ` <4A3B49BA.40100-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
  2009-06-19  8:43                                 ` Nathan Lynch
       [not found]                               ` <m3bpokiv0u.fsf-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
  1 sibling, 2 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-06-19  8:18 UTC (permalink / raw)
  To: Nathan Lynch
  Cc: Christoph Lameter, Andrew Morton, mingo, mel, tglx, hpa,
	suresh.b.siddha, linux-kernel, viro, rusty, steiner, rientjes,
	containers

Nathan Lynch wrote:
> Yinghai Lu <yinghai@kernel.org> writes:
>> SRAT tables may contains nodes of very small size. The arch code may
>> decide to not activate such a node. However, currently the early boot code
>> sets N_HIGH_MEMORY for such nodes. These nodes therefore seem to be active
>> although these nodes have no present pages.
>>
>> for 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
>>
>> v4: update description according to Christoph
>>
>> Signed-off-by: Yinghai Lu <Yinghai@kernel.org>
>> Tested-by: Jack Steiner <steiner@sgi.com>
>> Acked-by: Christoph Lameter <cl@linux-foundation.org>
>>
>> ---
>>  mm/page_alloc.c |    5 +++++
>>  1 file changed, 5 insertions(+)
>>
>> Index: linux-2.6/mm/page_alloc.c
>> ===================================================================
>> --- linux-2.6.orig/mm/page_alloc.c
>> +++ linux-2.6/mm/page_alloc.c
>> @@ -4041,6 +4041,11 @@ void __init free_area_init_nodes(unsigne
>>  						early_node_map[i].start_pfn,
>>  						early_node_map[i].end_pfn);
>>  
>> +	/*
>> +	 * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init
>> +	 * that node_mask, clear it at first
>> +	 */
>> +	nodes_clear(node_states[N_HIGH_MEMORY]);
>>  	/* Initialise every node */
>>  	mminit_verify_pageflags_layout();
>>  	setup_nr_node_ids();
> 
> This patch breaks the cpuset.mems cgroup attribute on an i386 kvm guest.
> 
> With v2.6.30:
> 
> # uname -r
> 2.6.30
> # cat /cgroup/cpuset.mems
> 0
> # mkdir /cgroup/test
> # for i in cpus mems ; do cat /cgroup/cpuset.$i > /cgroup/test/cpuset.$i ; done
> # echo $$ > /cgroup/test/tasks
> # echo $?
> 0
> 
> With a pulled-today Linus tree:
> 
> # uname -r
> 2.6.30-06725-g1d89b30
> # cat /cgroup/cpuset.mems
> 
> # mkdir /cgroup/test
> # for i in cpus mems ; do cat /cgroup/cpuset.$i > /cgroup/test/cpuset.$i ; done
> # echo $$ > /cgroup/test/tasks
> -bash: echo: write error: No space left on device
> 
> (Note that in addition to the ENOSPC error, /cgroup/cpuset.mems is empty
> rather than '0' in the second test.)
> 
> I bisected to the commit containing this change.  Reverting fixes the
> problem.
> 

can you use following patch to see what happens to that nodemask?

YH

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a5f3c27..eb89e8b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4189,6 +4189,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
 {
 	unsigned long nid;
 	int i;
+	char buf[512];
 
 	/* Sort early_node_map as initialisation assumes it is sorted */
 	sort_node_map();
@@ -4244,6 +4245,9 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
 	 * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init
 	 * that node_mask, clear it at first
 	 */
+	memset(buf, 0, 512);
+	nodemask_scnprintf(buf, 512, node_states[N_HIGH_MEMORY]);
+	printk(KERN_DEBUG "before clear: node_states [%d]: %s\n", N_HIGH_MEMORY, buf);
 	nodes_clear(node_states[N_HIGH_MEMORY]);
 	/* Initialise every node */
 	mminit_verify_pageflags_layout();
@@ -4258,6 +4262,9 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
 			node_set_state(nid, N_HIGH_MEMORY);
 		check_for_regular_memory(pgdat);
 	}
+	memset(buf, 0, 512);
+	nodemask_scnprintf(buf, 512, node_states[N_HIGH_MEMORY]);
+	printk(KERN_DEBUG "after online check: node_states [%d]: %s\n", N_HIGH_MEMORY, buf);
 }
 
 static int __init cmdline_parse_core(char *p, unsigned long *core)

^ permalink raw reply related	[flat|nested] 102+ messages in thread

* Re: [PATCH] mm: clear N_HIGH_MEMORY map before se set it again -v4
       [not found]                                 ` <4A3B49BA.40100-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
@ 2009-06-19  8:43                                   ` Nathan Lynch
  0 siblings, 0 replies; 102+ messages in thread
From: Nathan Lynch @ 2009-06-19  8:43 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: steiner-sJ/iWh9BUns, Christoph Lameter,
	suresh.b.siddha-ral2JQCrhuEAvxtiuMwx3w,
	mel-wPRd99KPJ+uzQB+pC5nmwQ,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	rusty-8n+1lVoiYb80n/F98K4Iww,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, tglx-hfZtesqFncYOwBW4kG4KsQ,
	viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn,
	hpa-YMNOUZJC4hwAvxtiuMwx3w, rientjes-hpIqsD4AKlfQT0dZR+AlfA,
	Andrew Morton, mingo-X9Un+BFzKDI

Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> writes:
>> I bisected to the commit containing this change.  Reverting fixes the
>> problem.
>> 
>
> can you use following patch to see what happens to that nodemask?

# dmesg | grep node_states
before clear: node_states [3]: 1
after online check: node_states [3]: 0

Full dmesg below.

Initializing cgroup subsys cpuset
Initializing cgroup subsys cpu
Linux version 2.6.30-06725-g1d89b30-dirty (nathanl-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org) (gcc version 4.3.2 20081105 (Red Hat 4.3.2-7) (GCC) ) #126 SMP PREEMPT Fri Jun 19 03:34:03 CDT 2009
KERNEL supported cpus:
  Intel GenuineIntel
  AMD AuthenticAMD
  NSC Geode by NSC
  Cyrix CyrixInstead
  Centaur CentaurHauls
  Transmeta GenuineTMx86
  Transmeta TransmetaCPU
  UMC UMC UMC UMC
BIOS-provided physical RAM map:
 BIOS-e820: 0000000000000000 - 000000000009fc00 (usable)
 BIOS-e820: 000000000009fc00 - 00000000000a0000 (reserved)
 BIOS-e820: 00000000000e8000 - 0000000000100000 (reserved)
 BIOS-e820: 0000000000100000 - 000000001fff0000 (usable)
 BIOS-e820: 000000001fff0000 - 0000000020000000 (ACPI data)
 BIOS-e820: 00000000fffbd000 - 0000000100000000 (reserved)
debug: ignoring loglevel setting.
DMI 2.4 present.
last_pfn = 0x1fff0 max_arch_pfn = 0x100000
MTRR default type: uncachable
MTRR fixed ranges enabled:
  00000-9FFFF write-back
  A0000-FFFFF uncachable
MTRR variable ranges enabled:
  0 base 0000000000 mask FFE0000000 write-back
  1 disabled
  2 disabled
  3 disabled
  4 disabled
  5 disabled
  6 disabled
  7 disabled
PAT not supported by CPU.
initial memory mapped : 0 - 01400000
init_memory_mapping: 0000000000000000-000000001fff0000
 0000000000 - 001fff0000 page 4k
kernel direct mapping tables up to 1fff0000 @ 7000-8a000
RAMDISK: 1fce1000 - 1ffef7d3
ACPI: RSDP 000fb9d0 00014 (v00 QEMU  )
ACPI: RSDT 1fff0000 0002C (v01 QEMU   QEMURSDT 00000001 QEMU 00000001)
ACPI: FACP 1fff002c 00074 (v01 QEMU   QEMUFACP 00000001 QEMU 00000001)
ACPI: DSDT 1fff0100 0253C (v01   BXPC   BXDSDT 00000001 INTL 20061109)
ACPI: FACS 1fff00c0 00040
ACPI: APIC 1fff2640 000E0 (v01 QEMU   QEMUAPIC 00000001 QEMU 00000001)
ACPI: Local APIC address 0xfee00000
0MB HIGHMEM available.
511MB LOWMEM available.
  mapped low ram: 0 - 1fff0000
  low ram: 0 - 1fff0000
  node 0 low ram: 00000000 - 1fff0000
  node 0 bootmap 00002000 - 00006000
(9 early reservations) ==> bootmem [0000000000 - 001fff0000]
  #0 [0000000000 - 0000001000]   BIOS data page ==> [0000000000 - 0000001000]
  #1 [0000001000 - 0000002000]    EX TRAMPOLINE ==> [0000001000 - 0000002000]
  #2 [0000006000 - 0000007000]       TRAMPOLINE ==> [0000006000 - 0000007000]
  #3 [0000200000 - 0001149b88]    TEXT DATA BSS ==> [0000200000 - 0001149b88]
  #4 [001fce1000 - 001ffef7d3]          RAMDISK ==> [001fce1000 - 001ffef7d3]
  #5 [000009fc00 - 0000100000]    BIOS reserved ==> [000009fc00 - 0000100000]
  #6 [000114a000 - 000114f065]              BRK ==> [000114a000 - 000114f065]
  #7 [0000007000 - 0000082000]          PGTABLE ==> [0000007000 - 0000082000]
  #8 [0000002000 - 0000006000]          BOOTMAP ==> [0000002000 - 0000006000]
found SMP MP-table at [c00fb8b0] fb8b0
Zone PFN ranges:
  DMA      0x00000000 -> 0x00001000
  Normal   0x00001000 -> 0x0001fff0
  HighMem  0x0001fff0 -> 0x0001fff0
Movable zone start PFN for each node
early_node_map[2] active PFN ranges
    0: 0x00000000 -> 0x0000009f
    0: 0x00000100 -> 0x0001fff0
before clear: node_states [3]: 1
On node 0 totalpages: 130959
free_area_init_node: node 0, pgdat c0976960, node_mem_map c1151000
  DMA zone: 60 pages used for memmap
  DMA zone: 0 pages reserved
  DMA zone: 3939 pages, LIFO batch:0
  Normal zone: 1860 pages used for memmap
  Normal zone: 125100 pages, LIFO batch:31
after online check: node_states [3]: 0
Using APIC driver default
ACPI: PM-Timer IO Port: 0xb008
ACPI: Local APIC address 0xfee00000
ACPI: LAPIC (acpi_id[0x00] lapic_id[0x00] enabled)
ACPI: LAPIC (acpi_id[0x01] lapic_id[0x01] disabled)
ACPI: LAPIC (acpi_id[0x02] lapic_id[0x02] disabled)
ACPI: LAPIC (acpi_id[0x03] lapic_id[0x03] disabled)
ACPI: LAPIC (acpi_id[0x04] lapic_id[0x04] disabled)
ACPI: LAPIC (acpi_id[0x05] lapic_id[0x05] disabled)
ACPI: LAPIC (acpi_id[0x06] lapic_id[0x06] disabled)
ACPI: LAPIC (acpi_id[0x07] lapic_id[0x07] disabled)
ACPI: LAPIC (acpi_id[0x08] lapic_id[0x08] disabled)
ACPI: LAPIC (acpi_id[0x09] lapic_id[0x09] disabled)
ACPI: LAPIC (acpi_id[0x0a] lapic_id[0x0a] disabled)
ACPI: LAPIC (acpi_id[0x0b] lapic_id[0x0b] disabled)
ACPI: LAPIC (acpi_id[0x0c] lapic_id[0x0c] disabled)
ACPI: LAPIC (acpi_id[0x0d] lapic_id[0x0d] disabled)
ACPI: LAPIC (acpi_id[0x0e] lapic_id[0x0e] disabled)
ACPI: LAPIC (acpi_id[0x0f] lapic_id[0x0f] disabled)
ACPI: IOAPIC (id[0x01] address[0xfec00000] gsi_base[0])
IOAPIC[0]: apic_id 1, version 17, address 0xfec00000, GSI 0-23
ACPI: INT_SRC_OVR (bus 0 bus_irq 5 global_irq 5 high level)
ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level)
ACPI: INT_SRC_OVR (bus 0 bus_irq 10 global_irq 10 high level)
ACPI: INT_SRC_OVR (bus 0 bus_irq 11 global_irq 11 high level)
ACPI: IRQ5 used by override.
ACPI: IRQ9 used by override.
ACPI: IRQ10 used by override.
ACPI: IRQ11 used by override.
Enabling APIC mode:  Flat.  Using 1 I/O APICs
Using ACPI (MADT) for SMP configuration information
16 Processors exceeds NR_CPUS limit of 8
SMP: Allowing 8 CPUs, 7 hotplug CPUs
nr_irqs_gsi: 24
PM: Registered nosave memory: 000000000009f000 - 00000000000a0000
PM: Registered nosave memory: 00000000000a0000 - 00000000000e8000
PM: Registered nosave memory: 00000000000e8000 - 0000000000100000
Allocating PCI resources starting at 20000000 (gap: 20000000:dffbd000)
NR_CPUS:8 nr_cpumask_bits:8 nr_cpu_ids:8 nr_node_ids:1
PERCPU: Embedded 335 pages at c18d7000, static data 1347836 bytes
Built 1 zonelists in Zone order, mobility grouping on.  Total pages: 129039
Kernel command line: console=ttyS0 debug ignore_loglevel ro root=/dev/sda3 ckpt_debug=255
PID hash table entries: 2048 (order: 11, 8192 bytes)
Dentry cache hash table entries: 65536 (order: 6, 262144 bytes)
Inode-cache hash table entries: 32768 (order: 5, 131072 bytes)
Enabling fast FPU save and restore... done.
Enabling unmasked SIMD FPU exception support... done.
Initializing CPU#0
Initializing HighMem for node 0 (00000000:00000000)
Memory: 485620k/524224k available (5261k kernel code, 38160k reserved, 2559k data, 1700k init, 0k highmem)
virtual kernel memory layout:
    fixmap  : 0xfff37000 - 0xfffff000   ( 800 kB)
    pkmap   : 0xff800000 - 0xffc00000   (4096 kB)
    vmalloc : 0xe07f0000 - 0xff7fe000   ( 496 MB)
    lowmem  : 0xc0000000 - 0xdfff0000   ( 511 MB)
      .init : 0xc09aa000 - 0xc0b53000   (1700 kB)
      .data : 0xc0723427 - 0xc09a337c   (2559 kB)
      .text : 0xc0200000 - 0xc0723427   (5261 kB)
Checking if this processor honours the WP bit even in supervisor mode...Ok.
SLUB: Genslabs=13, HWalign=64, Order=0-3, MinObjects=0, CPUs=8, Nodes=1
NR_IRQS:512
CPU 0 irqstacks, hard=c18d7000 soft=c18d8000
Fast TSC calibration failed
TSC: PIT calibration matches PMTIMER. 1 loops
Detected 1994.971 MHz processor.
Console: colour VGA+ 80x25
console [ttyS0] enabled
Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar
... MAX_LOCKDEP_SUBCLASSES:  8
... MAX_LOCK_DEPTH:          48
... MAX_LOCKDEP_KEYS:        8191
... CLASSHASH_SIZE:          4096
... MAX_LOCKDEP_ENTRIES:     16384
... MAX_LOCKDEP_CHAINS:      32768
... CHAINHASH_SIZE:          16384
 memory used by lock dependency info: 3743 kB
 per task-struct memory footprint: 1920 bytes
------------------------
| Locking API testsuite:
----------------------------------------------------------------------------
                                 | spin |wlock |rlock |mutex | wsem | rsem |
  --------------------------------------------------------------------------
                     A-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
                 A-B-B-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
             A-B-B-C-C-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
             A-B-C-A-B-C deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
         A-B-B-C-C-D-D-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
         A-B-C-D-B-D-D-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
         A-B-C-D-B-C-D-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
                    double unlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
                  initialize held:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
                 bad unlock order:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
  --------------------------------------------------------------------------
              recursive read-lock:             |  ok  |             |  ok  |
           recursive read-lock #2:             |  ok  |             |  ok  |
            mixed read-write-lock:             |  ok  |             |  ok  |
            mixed write-read-lock:             |  ok  |             |  ok  |
  --------------------------------------------------------------------------
     hard-irqs-on + irq-safe-A/12:  ok  |  ok  |  ok  |
     soft-irqs-on + irq-safe-A/12:  ok  |  ok  |  ok  |
     hard-irqs-on + irq-safe-A/21:  ok  |  ok  |  ok  |
     soft-irqs-on + irq-safe-A/21:  ok  |  ok  |  ok  |
       sirq-safe-A => hirqs-on/12:  ok  |  ok  |  ok  |
       sirq-safe-A => hirqs-on/21:  ok  |  ok  |  ok  |
         hard-safe-A + irqs-on/12:  ok  |  ok  |  ok  |
         soft-safe-A + irqs-on/12:  ok  |  ok  |  ok  |
         hard-safe-A + irqs-on/21:  ok  |  ok  |  ok  |
         soft-safe-A + irqs-on/21:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #1/123:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #1/123:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #1/132:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #1/132:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #1/213:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #1/213:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #1/231:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #1/231:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #1/312:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #1/312:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #1/321:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #1/321:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #2/123:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #2/123:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #2/132:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #2/132:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #2/213:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #2/213:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #2/231:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #2/231:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #2/312:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #2/312:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #2/321:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #2/321:  ok  |  ok  |  ok  |
      hard-irq lock-inversion/123:  ok  |  ok  |  ok  |
      soft-irq lock-inversion/123:  ok  |  ok  |  ok  |
      hard-irq lock-inversion/132:  ok  |  ok  |  ok  |
      soft-irq lock-inversion/132:  ok  |  ok  |  ok  |
      hard-irq lock-inversion/213:  ok  |  ok  |  ok  |
      soft-irq lock-inversion/213:  ok  |  ok  |  ok  |
      hard-irq lock-inversion/231:  ok  |  ok  |  ok  |
      soft-irq lock-inversion/231:  ok  |  ok  |  ok  |
      hard-irq lock-inversion/312:  ok  |  ok  |  ok  |
      soft-irq lock-inversion/312:  ok  |  ok  |  ok  |
      hard-irq lock-inversion/321:  ok  |  ok  |  ok  |
      soft-irq lock-inversion/321:  ok  |  ok  |  ok  |
      hard-irq read-recursion/123:  ok  |
      soft-irq read-recursion/123:  ok  |
      hard-irq read-recursion/132:  ok  |
      soft-irq read-recursion/132:  ok  |
      hard-irq read-recursion/213:  ok  |
      soft-irq read-recursion/213:  ok  |
      hard-irq read-recursion/231:  ok  |
      soft-irq read-recursion/231:  ok  |
      hard-irq read-recursion/312:  ok  |
      soft-irq read-recursion/312:  ok  |
      hard-irq read-recursion/321:  ok  |
      soft-irq read-recursion/321:  ok  |
-------------------------------------------------------
Good, all 218 testcases passed! |
---------------------------------
Calibrating delay loop (skipped), value calculated using timer frequency.. 3989.94 BogoMIPS (lpj=1994971)
Security Framework initialized
SELinux:  Initializing.
SELinux:  Starting in permissive mode
Mount-cache hash table entries: 512
Initializing cgroup subsys ns
Initializing cgroup subsys cpuacct
Initializing cgroup subsys devices
Initializing cgroup subsys freezer
CPU: L1 I cache: 32K, L1 D cache: 32K
CPU: L2 cache: 2048K
Checking 'hlt' instruction... OK.
lockdep: fixing up alternatives.
SMP alternatives: switching to UP code
ACPI: Core revision 20090521
ftrace: converting mcount calls to 0f 1f 44 00 00
ftrace: allocating 20993 entries in 42 pages
..TIMER: vector=0x30 apic1=0 pin1=0 apic2=-1 pin2=-1
CPU0: Intel QEMU Virtual CPU version 0.9.1 stepping 03
Testing tracer nop: PASSED
Brought up 1 CPUs
Total of 1 processors activated (3989.94 BogoMIPS).
khelper used greatest stack depth: 2752 bytes left
Time:  8:34:26  Date: 06/19/09
NET: Registered protocol family 16
ACPI: bus type pci registered
PCI: PCI BIOS revision 2.10 entry at 0xfb120, last bus=0
PCI: Using configuration type 1 for base access
bio: create slab <bio-0> at 0
ACPI: EC: Look up EC in DSDT
ACPI: Interpreter enabled
ACPI: (supports S0 S5)
ACPI: Using IOAPIC for interrupt routing
ACPI: No dock devices found.
ACPI: PCI Root Bridge [PCI0] (0000:00)
pci 0000:00:01.1: reg 20 io port: [0xc000-0xc00f]
pci 0000:00:01.3: quirk: region b000-b03f claimed by PIIX4 ACPI
pci 0000:00:01.3: quirk: region b100-b10f claimed by PIIX4 SMB
pci 0000:00:02.0: reg 10 32bit mmio: [0xf0000000-0xf1ffffff]
pci 0000:00:02.0: reg 14 32bit mmio: [0xf2000000-0xf2000fff]
pci 0000:00:03.0: reg 10 io port: [0xc100-0xc1ff]
pci 0000:00:03.0: reg 14 32bit mmio: [0xf2001000-0xf20010ff]
pci_bus 0000:00: on NUMA node 0
ACPI: PCI Interrupt Routing Table [\_SB_.PCI0._PRT]
ACPI: PCI Interrupt Link [LNKA] (IRQs 5 *10 11)
ACPI: PCI Interrupt Link [LNKB] (IRQs 5 *10 11)
ACPI: PCI Interrupt Link [LNKC] (IRQs 5 10 *11)
ACPI: PCI Interrupt Link [LNKD] (IRQs 5 10 *11)
SCSI subsystem initialized
libata version 3.00 loaded.
usbcore: registered new interface driver usbfs
usbcore: registered new interface driver hub
usbcore: registered new device driver usb
PCI: Using ACPI for IRQ routing
NetLabel: Initializing
NetLabel:  domain hash size = 128
NetLabel:  protocols = UNLABELED CIPSOv4
NetLabel:  unlabeled traffic allowed by default
pnp: PnP ACPI init
ACPI: bus type pnp registered
pnp: PnP ACPI: found 7 devices
ACPI: ACPI bus type pnp unregistered
pci_bus 0000:00: resource 0 io:  [0x00-0xffff]
pci_bus 0000:00: resource 1 mem: [0x000000-0xffffffff]
NET: Registered protocol family 2
IP route cache hash table entries: 4096 (order: 2, 16384 bytes)
TCP established hash table entries: 16384 (order: 5, 131072 bytes)
TCP bind hash table entries: 16384 (order: 7, 655360 bytes)
TCP: Hash tables configured (established 16384 bind 16384)
TCP reno registered
NET: Registered protocol family 1
Trying to unpack rootfs image as initramfs...
debug: unmapping init memory dfce1000..dfff0000
microcode: CPU0 sig=0x623, pf=0x0, revision=0x0
Microcode Update Driver: v2.00 <tigran-ppwZ4lME3+KI6QP4U9MhSdBc4/FLrbF6@public.gmane.org>, Peter Oruba
====[ backtrace testing ]===========
Testing a backtrace from process context.
The following trace is a kernel self test and not a bug!
Pid: 1, comm: swapper Not tainted 2.6.30-06725-g1d89b30-dirty #126
Call Trace:
 [<c071abda>] ? printk+0x1d/0x1f
 [<c027d180>] ? backtrace_regression_test+0x0/0xe0
 [<c027d1b5>] backtrace_regression_test+0x35/0xe0
 [<c09bf0b0>] ? crash_notes_memory_init+0x0/0x37
 [<c02e7c0f>] ? __alloc_percpu+0xf/0x20
 [<c09bf0c5>] ? crash_notes_memory_init+0x15/0x37
 [<c020112f>] do_one_initcall+0x2f/0x160
 [<c03358b9>] ? create_proc_entry+0x59/0xa0
 [<c0293752>] ? register_irq_proc+0xb2/0xd0
 [<c02937df>] ? init_irq_proc+0x6f/0x80
 [<c09aa35d>] kernel_init+0x168/0x1eb
 [<c09aa1f5>] ? kernel_init+0x0/0x1eb
 [<c0203d8b>] kernel_thread_helper+0x7/0x10
Testing a backtrace from irq context.
The following trace is a kernel self test and not a bug!
Pid: 4, comm: ksoftirqd/0 Not tainted 2.6.30-06725-g1d89b30-dirty #126
Call Trace:
 [<c0267d59>] ? trace_hardirqs_on_caller+0x69/0x180
 [<c027d16d>] backtrace_test_irq_callback+0xd/0x20
 [<c023f743>] tasklet_action+0x83/0x100
 [<c024054a>] __do_softirq+0xca/0x1f0
 [<c0240480>] ? __do_softirq+0x0/0x1f0
 <IRQ>  [<c024084d>] ? ksoftirqd+0x7d/0x1b0
 [<c02407d0>] ? ksoftirqd+0x0/0x1b0
 [<c02527db>] ? kthread+0x4b/0x80
 [<c0252790>] ? kthread+0x0/0x80
 [<c0203d8b>] ? kernel_thread_helper+0x7/0x10
Testing a saved backtrace.
The following trace is a kernel self test and not a bug!
 [<c020d650>] save_stack_trace+0x30/0x50
 [<c027d241>] backtrace_regression_test+0xc1/0xe0
 [<c020112f>] do_one_initcall+0x2f/0x160
 [<c09aa35d>] kernel_init+0x168/0x1eb
 [<c0203d8b>] kernel_thread_helper+0x7/0x10
 [<ffffffff>] 0xffffffff
====[ end of backtrace testing ]====
audit: initializing netlink socket (disabled)
type=2000 audit(1245400465.957:1): initialized
Testing tracer sched_switch: PASSED
Testing tracer function: 
Switched to high resolution mode on CPU 0
PASSED
Testing dynamic ftrace: PASSED
Testing tracer irqsoff: PASSED
Testing tracer preemptoff: PASSED
Testing tracer preemptirqsoff: PASSED
Testing tracer wakeup: PASSED
Testing tracer wakeup_rt: PASSED
Testing tracer function_graph: PASSED
HugeTLB registered 4 MB page size, pre-allocated 0 pages
khelper used greatest stack depth: 2732 bytes left
VFS: Disk quotas dquot_6.5.2
Dquot-cache hash table entries: 1024 (order 0, 4096 bytes)
msgmni has been set to 948
SELinux:  Registering netfilter hooks
cryptomgr_test used greatest stack depth: 2444 bytes left
alg: No test for stdrng (krng)
Block layer SCSI generic (bsg) driver version 0.4 loaded (major 252)
io scheduler noop registered
io scheduler anticipatory registered
io scheduler deadline registered
io scheduler cfq registered (default)
pci 0000:00:00.0: Limiting direct PCI/PCI transfers
pci 0000:00:01.0: PIIX3: Enabling Passive Release
pci 0000:00:01.0: Activating ISA DMA hang workarounds
pci 0000:00:02.0: Boot video device
pci_hotplug: PCI Hot Plug PCI Core version: 0.5
input: Power Button as /class/input/input0
ACPI: Power Button [PWRF]
processor ACPI_CPU:00: registered as cooling_device0
ACPI Warning: \_PR_.CPU1._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPU2._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPU3._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPU4._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPU5._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPU6._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPU7._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPU8._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPU9._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPUA._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPUB._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPUC._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPUD._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPUE._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
Non-volatile memory driver v1.3
Linux agpgart interface v0.103
[drm] Initialized drm 1.1.0 20060810
Serial: 8250/16550 driver, 4 ports, IRQ sharing enabled
serial8250: ttyS0 at I/O 0x3f8 (irq = 4) is a 16550A
Platform driver 'serial8250' needs updating - please use dev_pm_ops
00:06: ttyS0 at I/O 0x3f8 (irq = 4) is a 16550A
brd: module loaded
loop: module loaded
Driver 'sd' needs updating - please use bus_type methods
Driver 'sr' needs updating - please use bus_type methods
ata_piix 0000:00:01.1: version 2.13
ata_piix 0000:00:01.1: setting latency timer to 64
scsi0 : ata_piix
scsi1 : ata_piix
ata1: PATA max MWDMA2 cmd 0x1f0 ctl 0x3f6 bmdma 0xc000 irq 14
ata2: PATA max MWDMA2 cmd 0x170 ctl 0x376 bmdma 0xc008 irq 15
<6>8139cp: 10/100 PCI Ethernet driver v1.3 (Mar 22, 2004)
ACPI: PCI Interrupt Link [LNKC] enabled at IRQ 11
8139cp 0000:00:03.0: PCI INT A -> Link[LNKC] -> GSI 11 (level, high) -> IRQ 11
eth0: RTL-8139C+ at 0xe080e000, 52:54:00:12:34:56, IRQ 11
8139cp 0000:00:03.0: setting latency timer to 64
console [netcon0] enabled
netconsole: network logging started
ehci_hcd: USB 2.0 'Enhanced' Host Controller (EHCI) Driver
ehci_hcd: block sizes: qh 128 qtd 96 itd 160 sitd 96
ohci_hcd: USB 1.1 'Open' Host Controller (OHCI) Driver
ohci_hcd: block sizes: ed 64 td 64
uhci_hcd: USB Universal Host Controller Interface driver
usbcore: registered new interface driver usblp
usbcore: registered new interface driver libusual
PNP: PS/2 Controller [PNP0303:KBD,PNP0f13:MOU] at 0x60,0x64 irq 1,12
Platform driver 'i8042' needs updating - please use dev_pm_ops
serio: i8042 KBD port at 0x60,0x64 irq 1
serio: i8042 AUX port at 0x60,0x64 irq 12
mice: PS/2 mouse device common for all mice
input: AT Translated Set 2 keyboard as /class/input/input1
rtc_cmos 00:01: rtc core: registered rtc_cmos as rtc0
rtc0: alarms up to one day, 114 bytes nvram
cpuidle: using governor ladder
cpuidle: using governor menu
No iBFT detected.
ata1.00: ATA-7: QEMU HARDDISK, 0.9.1, max UDMA/100
ata1.00: 16777216 sectors, multi 16: LBA48 
ata1.00: configured for MWDMA2
usbcore: registered new interface driver hiddev
usbcore: registered new interface driver usbhid
usbhid: v2.6:USB HID core driver
ata2.00: ATAPI: QEMU DVD-ROM, 0.9.1, max UDMA/100
ata2.00: configured for MWDMA2
scsi 0:0:0:0: Direct-Access     ATA      QEMU HARDDISK    0.9. PQ: 0 ANSI: 5
Advanced Linux Sound Architecture Driver Version 1.0.20.
sd 0:0:0:0: [sda] 16777216 512-byte hardware sectors: (8.58 GB/8.00 GiB)
sd 0:0:0:0: [sda] Write Protect is off
sd 0:0:0:0: [sda] Mode Sense: 00 3a 00 00
sd 0:0:0:0: [sda] Write cache: disabled, read cache: enabled, doesn't support DPO or FUA
 sda: sda1 sda2 sda3
sd 0:0:0:0: Attached scsi generic sg0 type 0
scsi 1:0:0:0: CD-ROM            QEMU     QEMU DVD-ROM     0.9. PQ: 0 ANSI: 5
sr0: scsi3-mmc drive: 4x/4x xa/form2 tray
Uniform CD-ROM driver Revision: 3.20
sr 1:0:0:0: Attached scsi CD-ROM sr0
modprobe used greatest stack depth: 2332 bytes left
input: ImExPS/2 Generic Explorer Mouse as /class/input/input2
sr 1:0:0:0: Attached scsi generic sg1 type 5
sd 0:0:0:0: [sda] Attached SCSI disk
ALSA device list:
  No soundcards found.
Netfilter messages via NETLINK v0.30.
nf_conntrack version 0.5.0 (8191 buckets, 32764 max)
ctnetlink v0.93: registering with nfnetlink.
ip_tables: (C) 2000-2006 Netfilter Core Team
TCP cubic registered
Initializing XFRM netlink socket
NET: Registered protocol family 10
ip6_tables: (C) 2000-2006 Netfilter Core Team
IPv6 over IPv4 tunneling driver
NET: Registered protocol family 17
RPC: Registered udp transport module.
RPC: Registered tcp transport module.
Using IPI No-Shortcut mode
PM: Resume from disk failed.
registered taskstats version 1
Running tests on trace events:
Testing event kfree_skb: OK
Testing event block_rq_abort: OK
Testing event block_rq_insert: OK
Testing event block_rq_issue: OK
Testing event block_rq_requeue: OK
Testing event block_rq_complete: OK
Testing event block_bio_bounce: OK
Testing event block_bio_complete: OK
Testing event block_bio_backmerge: OK
Testing event block_bio_frontmerge: OK
Testing event block_bio_queue: OK
Testing event block_getrq: OK
Testing event block_sleeprq: OK
Testing event block_plug: OK
Testing event block_unplug_timer: OK
Testing event block_unplug_io: OK
Testing event block_split: OK
Testing event block_remap: OK
Testing event kmalloc: OK
Testing event kmem_cache_alloc: OK
Testing event kmalloc_node: OK
Testing event kmem_cache_alloc_node: OK
Testing event kfree: OK
Testing event kmem_cache_free: OK
Testing event lock_acquire: OK
Testing event lock_release: OK
Testing event lock_contended: OK
Testing event lock_acquired: OK
Testing event workqueue_insertion: OK
Testing event workqueue_execution: OK
Testing event workqueue_creation: OK
Testing event workqueue_destruction: OK
Testing event irq_handler_entry: OK
Testing event irq_handler_exit: OK
Testing event softirq_entry: OK
Testing event softirq_exit: OK
Testing event sched_kthread_stop: OK
Testing event sched_kthread_stop_ret: OK
Testing event sched_wait_task: OK
Testing event sched_wakeup: OK
Testing event sched_wakeup_new: OK
Testing event sched_switch: OK
Testing event sched_migrate_task: OK
Testing event sched_process_free: OK
Testing event sched_process_exit: OK
Testing event sched_process_wait: OK
Testing event sched_process_fork: OK
Testing event sched_signal_send: OK
Running tests on trace event systems:
Testing event system skb: OK
Testing event system block: OK
Testing event system kmem: OK
Testing event system lockdep: OK
Testing event system workqueue: OK
Testing event system irq: OK
Testing event system sched: OK
Running tests on all trace events:
Testing all events: OK
Running tests again, along with the function tracer
Running tests on trace events:
Testing event kfree_skb: OK
Testing event block_rq_abort: OK
Testing event block_rq_insert: OK
Testing event block_rq_issue: OK
Testing event block_rq_requeue: OK
Testing event block_rq_complete: OK
Testing event block_bio_bounce: OK
Testing event block_bio_complete: OK
Testing event block_bio_backmerge: OK
Testing event block_bio_frontmerge: OK
Testing event block_bio_queue: OK
Testing event block_getrq: OK
Testing event block_sleeprq: OK
Testing event block_plug: OK
Testing event block_unplug_timer: OK
Testing event block_unplug_io: OK
Testing event block_split: OK
Testing event block_remap: OK
Testing event kmalloc: OK
Testing event kmem_cache_alloc: OK
Testing event kmalloc_node: OK
Testing event kmem_cache_alloc_node: OK
Testing event kfree: OK
Testing event kmem_cache_free: OK
Testing event lock_acquire: OK
Testing event lock_release: OK
Testing event lock_contended: OK
Testing event lock_acquired: OK
Testing event workqueue_insertion: OK
Testing event workqueue_execution: OK
Testing event workqueue_creation: OK
Testing event workqueue_destruction: OK
Testing event irq_handler_entry: OK
Testing event irq_handler_exit: OK
Testing event softirq_entry: OK
Testing event softirq_exit: OK
Testing event sched_kthread_stop: OK
Testing event sched_kthread_stop_ret: OK
Testing event sched_wait_task: OK
Testing event sched_wakeup: OK
Testing event sched_wakeup_new: OK
Testing event sched_switch: OK
Testing event sched_migrate_task: OK
Testing event sched_process_free: OK
Testing event sched_process_exit: OK
Testing event sched_process_wait: OK
Testing event sched_process_fork: OK
Testing event sched_signal_send: OK
Running tests on trace event systems:
Testing event system skb: OK
Testing event system block: OK
Testing event system kmem: OK
Testing event system lockdep: OK
Testing event system workqueue: OK
Testing event system irq: OK
Testing event system sched: OK
Running tests on all trace events:
Testing all events: OK
  Magic number: 13:303:574
debug: unmapping init memory c09aa000..c0b53000
Write protecting the kernel text: 5264k
Write protecting the kernel read-only data: 1908k
async/0 used greatest stack depth: 2288 bytes left
async/1 used greatest stack depth: 1932 bytes left
kjournald starting.  Commit interval 5 seconds
EXT3-fs: mounted filesystem with writeback data mode.
type=1404 audit(1245400469.713:2): enforcing=1 old_enforcing=0 auid=4294967295 ses=4294967295
SELinux: 8192 avtab hash slots, 113721 rules.
SELinux: 8192 avtab hash slots, 113721 rules.
SELinux:  8 users, 11 roles, 2611 types, 123 bools, 1 sens, 1024 cats
SELinux:  73 classes, 113721 rules
SELinux:  class kernel_service not defined in policy
SELinux:  permission open in class sock_file not defined in policy
SELinux: the above unknown classes and permissions will be allowed
SELinux:  Completing initialization.
SELinux:  Setting up existing superblocks.
SELinux: initialized (dev sda3, type ext3), uses xattr
SELinux: initialized (dev tmpfs, type tmpfs), uses transition SIDs
SELinux: initialized (dev selinuxfs, type selinuxfs), uses genfs_contexts
SELinux: initialized (dev mqueue, type mqueue), uses transition SIDs
SELinux: initialized (dev hugetlbfs, type hugetlbfs), uses genfs_contexts
SELinux: initialized (dev devpts, type devpts), uses transition SIDs
SELinux: initialized (dev inotifyfs, type inotifyfs), uses genfs_contexts
SELinux: initialized (dev tmpfs, type tmpfs), uses transition SIDs
SELinux: initialized (dev anon_inodefs, type anon_inodefs), uses genfs_contexts
SELinux: initialized (dev pipefs, type pipefs), uses task SIDs
SELinux: initialized (dev debugfs, type debugfs), uses genfs_contexts
SELinux: initialized (dev sockfs, type sockfs), uses task SIDs
SELinux: initialized (dev proc, type proc), uses genfs_contexts
SELinux: initialized (dev bdev, type bdev), uses genfs_contexts
SELinux: initialized (dev rootfs, type rootfs), uses genfs_contexts
SELinux: initialized (dev sysfs, type sysfs), uses genfs_contexts
type=1403 audit(1245400471.093:3): policy loaded auid=4294967295 ses=4294967295
plymouth used greatest stack depth: 1768 bytes left
awk used greatest stack depth: 1620 bytes left
udevd version 127 started
udevadm used greatest stack depth: 1524 bytes left
udevd used greatest stack depth: 1520 bytes left
grep used greatest stack depth: 1504 bytes left
gzip used greatest stack depth: 1480 bytes left
EXT3 FS on sda3, internal journal
kjournald starting.  Commit interval 5 seconds
EXT3 FS on sda1, internal journal
EXT3-fs: mounted filesystem with writeback data mode.
SELinux: initialized (dev sda1, type ext3), uses xattr
SELinux: initialized (dev tmpfs, type tmpfs), uses transition SIDs
SELinux: initialized (dev cgroup, type cgroup), uses genfs_contexts
Adding 522104k swap on /dev/sda2.  Priority:-1 extents:1 across:522104k 
SELinux: initialized (dev binfmt_misc, type binfmt_misc), uses genfs_contexts
ip used greatest stack depth: 1244 bytes left
eth0: link up, 100Mbps, full-duplex, lpa 0x05E1
device eth0 entered promiscuous mode
br0: port 1(eth0) entering forwarding state
Clocksource tsc unstable (delta = -168975068 ns)
eth0: no IPv6 routers present
br0: no IPv6 routers present
br0: port 1(eth0) entering disabled state
sshd used greatest stack depth: 1212 bytes left
eth0: link up, 100Mbps, full-duplex, lpa 0x05E1
br0: port 1(eth0) entering forwarding state
ip used greatest stack depth: 1168 bytes left
eth0: no IPv6 routers present
br0: no IPv6 routers present

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] mm: clear N_HIGH_MEMORY map before se set it again -v4
  2009-06-19  8:18                               ` Yinghai Lu
       [not found]                                 ` <4A3B49BA.40100-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
@ 2009-06-19  8:43                                 ` Nathan Lynch
  2009-06-19 16:16                                   ` Yinghai Lu
       [not found]                                   ` <m3prd0havh.fsf-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
  1 sibling, 2 replies; 102+ messages in thread
From: Nathan Lynch @ 2009-06-19  8:43 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Christoph Lameter, Andrew Morton, mingo, mel, tglx, hpa,
	suresh.b.siddha, linux-kernel, viro, rusty, steiner, rientjes,
	containers

Yinghai Lu <yinghai@kernel.org> writes:
>> I bisected to the commit containing this change.  Reverting fixes the
>> problem.
>> 
>
> can you use following patch to see what happens to that nodemask?

# dmesg | grep node_states
before clear: node_states [3]: 1
after online check: node_states [3]: 0

Full dmesg below.

Initializing cgroup subsys cpuset
Initializing cgroup subsys cpu
Linux version 2.6.30-06725-g1d89b30-dirty (nathanl@localhost.localdomain) (gcc version 4.3.2 20081105 (Red Hat 4.3.2-7) (GCC) ) #126 SMP PREEMPT Fri Jun 19 03:34:03 CDT 2009
KERNEL supported cpus:
  Intel GenuineIntel
  AMD AuthenticAMD
  NSC Geode by NSC
  Cyrix CyrixInstead
  Centaur CentaurHauls
  Transmeta GenuineTMx86
  Transmeta TransmetaCPU
  UMC UMC UMC UMC
BIOS-provided physical RAM map:
 BIOS-e820: 0000000000000000 - 000000000009fc00 (usable)
 BIOS-e820: 000000000009fc00 - 00000000000a0000 (reserved)
 BIOS-e820: 00000000000e8000 - 0000000000100000 (reserved)
 BIOS-e820: 0000000000100000 - 000000001fff0000 (usable)
 BIOS-e820: 000000001fff0000 - 0000000020000000 (ACPI data)
 BIOS-e820: 00000000fffbd000 - 0000000100000000 (reserved)
debug: ignoring loglevel setting.
DMI 2.4 present.
last_pfn = 0x1fff0 max_arch_pfn = 0x100000
MTRR default type: uncachable
MTRR fixed ranges enabled:
  00000-9FFFF write-back
  A0000-FFFFF uncachable
MTRR variable ranges enabled:
  0 base 0000000000 mask FFE0000000 write-back
  1 disabled
  2 disabled
  3 disabled
  4 disabled
  5 disabled
  6 disabled
  7 disabled
PAT not supported by CPU.
initial memory mapped : 0 - 01400000
init_memory_mapping: 0000000000000000-000000001fff0000
 0000000000 - 001fff0000 page 4k
kernel direct mapping tables up to 1fff0000 @ 7000-8a000
RAMDISK: 1fce1000 - 1ffef7d3
ACPI: RSDP 000fb9d0 00014 (v00 QEMU  )
ACPI: RSDT 1fff0000 0002C (v01 QEMU   QEMURSDT 00000001 QEMU 00000001)
ACPI: FACP 1fff002c 00074 (v01 QEMU   QEMUFACP 00000001 QEMU 00000001)
ACPI: DSDT 1fff0100 0253C (v01   BXPC   BXDSDT 00000001 INTL 20061109)
ACPI: FACS 1fff00c0 00040
ACPI: APIC 1fff2640 000E0 (v01 QEMU   QEMUAPIC 00000001 QEMU 00000001)
ACPI: Local APIC address 0xfee00000
0MB HIGHMEM available.
511MB LOWMEM available.
  mapped low ram: 0 - 1fff0000
  low ram: 0 - 1fff0000
  node 0 low ram: 00000000 - 1fff0000
  node 0 bootmap 00002000 - 00006000
(9 early reservations) ==> bootmem [0000000000 - 001fff0000]
  #0 [0000000000 - 0000001000]   BIOS data page ==> [0000000000 - 0000001000]
  #1 [0000001000 - 0000002000]    EX TRAMPOLINE ==> [0000001000 - 0000002000]
  #2 [0000006000 - 0000007000]       TRAMPOLINE ==> [0000006000 - 0000007000]
  #3 [0000200000 - 0001149b88]    TEXT DATA BSS ==> [0000200000 - 0001149b88]
  #4 [001fce1000 - 001ffef7d3]          RAMDISK ==> [001fce1000 - 001ffef7d3]
  #5 [000009fc00 - 0000100000]    BIOS reserved ==> [000009fc00 - 0000100000]
  #6 [000114a000 - 000114f065]              BRK ==> [000114a000 - 000114f065]
  #7 [0000007000 - 0000082000]          PGTABLE ==> [0000007000 - 0000082000]
  #8 [0000002000 - 0000006000]          BOOTMAP ==> [0000002000 - 0000006000]
found SMP MP-table at [c00fb8b0] fb8b0
Zone PFN ranges:
  DMA      0x00000000 -> 0x00001000
  Normal   0x00001000 -> 0x0001fff0
  HighMem  0x0001fff0 -> 0x0001fff0
Movable zone start PFN for each node
early_node_map[2] active PFN ranges
    0: 0x00000000 -> 0x0000009f
    0: 0x00000100 -> 0x0001fff0
before clear: node_states [3]: 1
On node 0 totalpages: 130959
free_area_init_node: node 0, pgdat c0976960, node_mem_map c1151000
  DMA zone: 60 pages used for memmap
  DMA zone: 0 pages reserved
  DMA zone: 3939 pages, LIFO batch:0
  Normal zone: 1860 pages used for memmap
  Normal zone: 125100 pages, LIFO batch:31
after online check: node_states [3]: 0
Using APIC driver default
ACPI: PM-Timer IO Port: 0xb008
ACPI: Local APIC address 0xfee00000
ACPI: LAPIC (acpi_id[0x00] lapic_id[0x00] enabled)
ACPI: LAPIC (acpi_id[0x01] lapic_id[0x01] disabled)
ACPI: LAPIC (acpi_id[0x02] lapic_id[0x02] disabled)
ACPI: LAPIC (acpi_id[0x03] lapic_id[0x03] disabled)
ACPI: LAPIC (acpi_id[0x04] lapic_id[0x04] disabled)
ACPI: LAPIC (acpi_id[0x05] lapic_id[0x05] disabled)
ACPI: LAPIC (acpi_id[0x06] lapic_id[0x06] disabled)
ACPI: LAPIC (acpi_id[0x07] lapic_id[0x07] disabled)
ACPI: LAPIC (acpi_id[0x08] lapic_id[0x08] disabled)
ACPI: LAPIC (acpi_id[0x09] lapic_id[0x09] disabled)
ACPI: LAPIC (acpi_id[0x0a] lapic_id[0x0a] disabled)
ACPI: LAPIC (acpi_id[0x0b] lapic_id[0x0b] disabled)
ACPI: LAPIC (acpi_id[0x0c] lapic_id[0x0c] disabled)
ACPI: LAPIC (acpi_id[0x0d] lapic_id[0x0d] disabled)
ACPI: LAPIC (acpi_id[0x0e] lapic_id[0x0e] disabled)
ACPI: LAPIC (acpi_id[0x0f] lapic_id[0x0f] disabled)
ACPI: IOAPIC (id[0x01] address[0xfec00000] gsi_base[0])
IOAPIC[0]: apic_id 1, version 17, address 0xfec00000, GSI 0-23
ACPI: INT_SRC_OVR (bus 0 bus_irq 5 global_irq 5 high level)
ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level)
ACPI: INT_SRC_OVR (bus 0 bus_irq 10 global_irq 10 high level)
ACPI: INT_SRC_OVR (bus 0 bus_irq 11 global_irq 11 high level)
ACPI: IRQ5 used by override.
ACPI: IRQ9 used by override.
ACPI: IRQ10 used by override.
ACPI: IRQ11 used by override.
Enabling APIC mode:  Flat.  Using 1 I/O APICs
Using ACPI (MADT) for SMP configuration information
16 Processors exceeds NR_CPUS limit of 8
SMP: Allowing 8 CPUs, 7 hotplug CPUs
nr_irqs_gsi: 24
PM: Registered nosave memory: 000000000009f000 - 00000000000a0000
PM: Registered nosave memory: 00000000000a0000 - 00000000000e8000
PM: Registered nosave memory: 00000000000e8000 - 0000000000100000
Allocating PCI resources starting at 20000000 (gap: 20000000:dffbd000)
NR_CPUS:8 nr_cpumask_bits:8 nr_cpu_ids:8 nr_node_ids:1
PERCPU: Embedded 335 pages at c18d7000, static data 1347836 bytes
Built 1 zonelists in Zone order, mobility grouping on.  Total pages: 129039
Kernel command line: console=ttyS0 debug ignore_loglevel ro root=/dev/sda3 ckpt_debug=255
PID hash table entries: 2048 (order: 11, 8192 bytes)
Dentry cache hash table entries: 65536 (order: 6, 262144 bytes)
Inode-cache hash table entries: 32768 (order: 5, 131072 bytes)
Enabling fast FPU save and restore... done.
Enabling unmasked SIMD FPU exception support... done.
Initializing CPU#0
Initializing HighMem for node 0 (00000000:00000000)
Memory: 485620k/524224k available (5261k kernel code, 38160k reserved, 2559k data, 1700k init, 0k highmem)
virtual kernel memory layout:
    fixmap  : 0xfff37000 - 0xfffff000   ( 800 kB)
    pkmap   : 0xff800000 - 0xffc00000   (4096 kB)
    vmalloc : 0xe07f0000 - 0xff7fe000   ( 496 MB)
    lowmem  : 0xc0000000 - 0xdfff0000   ( 511 MB)
      .init : 0xc09aa000 - 0xc0b53000   (1700 kB)
      .data : 0xc0723427 - 0xc09a337c   (2559 kB)
      .text : 0xc0200000 - 0xc0723427   (5261 kB)
Checking if this processor honours the WP bit even in supervisor mode...Ok.
SLUB: Genslabs=13, HWalign=64, Order=0-3, MinObjects=0, CPUs=8, Nodes=1
NR_IRQS:512
CPU 0 irqstacks, hard=c18d7000 soft=c18d8000
Fast TSC calibration failed
TSC: PIT calibration matches PMTIMER. 1 loops
Detected 1994.971 MHz processor.
Console: colour VGA+ 80x25
console [ttyS0] enabled
Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar
... MAX_LOCKDEP_SUBCLASSES:  8
... MAX_LOCK_DEPTH:          48
... MAX_LOCKDEP_KEYS:        8191
... CLASSHASH_SIZE:          4096
... MAX_LOCKDEP_ENTRIES:     16384
... MAX_LOCKDEP_CHAINS:      32768
... CHAINHASH_SIZE:          16384
 memory used by lock dependency info: 3743 kB
 per task-struct memory footprint: 1920 bytes
------------------------
| Locking API testsuite:
----------------------------------------------------------------------------
                                 | spin |wlock |rlock |mutex | wsem | rsem |
  --------------------------------------------------------------------------
                     A-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
                 A-B-B-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
             A-B-B-C-C-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
             A-B-C-A-B-C deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
         A-B-B-C-C-D-D-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
         A-B-C-D-B-D-D-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
         A-B-C-D-B-C-D-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
                    double unlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
                  initialize held:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
                 bad unlock order:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
  --------------------------------------------------------------------------
              recursive read-lock:             |  ok  |             |  ok  |
           recursive read-lock #2:             |  ok  |             |  ok  |
            mixed read-write-lock:             |  ok  |             |  ok  |
            mixed write-read-lock:             |  ok  |             |  ok  |
  --------------------------------------------------------------------------
     hard-irqs-on + irq-safe-A/12:  ok  |  ok  |  ok  |
     soft-irqs-on + irq-safe-A/12:  ok  |  ok  |  ok  |
     hard-irqs-on + irq-safe-A/21:  ok  |  ok  |  ok  |
     soft-irqs-on + irq-safe-A/21:  ok  |  ok  |  ok  |
       sirq-safe-A => hirqs-on/12:  ok  |  ok  |  ok  |
       sirq-safe-A => hirqs-on/21:  ok  |  ok  |  ok  |
         hard-safe-A + irqs-on/12:  ok  |  ok  |  ok  |
         soft-safe-A + irqs-on/12:  ok  |  ok  |  ok  |
         hard-safe-A + irqs-on/21:  ok  |  ok  |  ok  |
         soft-safe-A + irqs-on/21:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #1/123:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #1/123:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #1/132:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #1/132:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #1/213:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #1/213:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #1/231:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #1/231:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #1/312:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #1/312:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #1/321:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #1/321:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #2/123:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #2/123:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #2/132:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #2/132:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #2/213:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #2/213:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #2/231:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #2/231:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #2/312:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #2/312:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #2/321:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #2/321:  ok  |  ok  |  ok  |
      hard-irq lock-inversion/123:  ok  |  ok  |  ok  |
      soft-irq lock-inversion/123:  ok  |  ok  |  ok  |
      hard-irq lock-inversion/132:  ok  |  ok  |  ok  |
      soft-irq lock-inversion/132:  ok  |  ok  |  ok  |
      hard-irq lock-inversion/213:  ok  |  ok  |  ok  |
      soft-irq lock-inversion/213:  ok  |  ok  |  ok  |
      hard-irq lock-inversion/231:  ok  |  ok  |  ok  |
      soft-irq lock-inversion/231:  ok  |  ok  |  ok  |
      hard-irq lock-inversion/312:  ok  |  ok  |  ok  |
      soft-irq lock-inversion/312:  ok  |  ok  |  ok  |
      hard-irq lock-inversion/321:  ok  |  ok  |  ok  |
      soft-irq lock-inversion/321:  ok  |  ok  |  ok  |
      hard-irq read-recursion/123:  ok  |
      soft-irq read-recursion/123:  ok  |
      hard-irq read-recursion/132:  ok  |
      soft-irq read-recursion/132:  ok  |
      hard-irq read-recursion/213:  ok  |
      soft-irq read-recursion/213:  ok  |
      hard-irq read-recursion/231:  ok  |
      soft-irq read-recursion/231:  ok  |
      hard-irq read-recursion/312:  ok  |
      soft-irq read-recursion/312:  ok  |
      hard-irq read-recursion/321:  ok  |
      soft-irq read-recursion/321:  ok  |
-------------------------------------------------------
Good, all 218 testcases passed! |
---------------------------------
Calibrating delay loop (skipped), value calculated using timer frequency.. 3989.94 BogoMIPS (lpj=1994971)
Security Framework initialized
SELinux:  Initializing.
SELinux:  Starting in permissive mode
Mount-cache hash table entries: 512
Initializing cgroup subsys ns
Initializing cgroup subsys cpuacct
Initializing cgroup subsys devices
Initializing cgroup subsys freezer
CPU: L1 I cache: 32K, L1 D cache: 32K
CPU: L2 cache: 2048K
Checking 'hlt' instruction... OK.
lockdep: fixing up alternatives.
SMP alternatives: switching to UP code
ACPI: Core revision 20090521
ftrace: converting mcount calls to 0f 1f 44 00 00
ftrace: allocating 20993 entries in 42 pages
..TIMER: vector=0x30 apic1=0 pin1=0 apic2=-1 pin2=-1
CPU0: Intel QEMU Virtual CPU version 0.9.1 stepping 03
Testing tracer nop: PASSED
Brought up 1 CPUs
Total of 1 processors activated (3989.94 BogoMIPS).
khelper used greatest stack depth: 2752 bytes left
Time:  8:34:26  Date: 06/19/09
NET: Registered protocol family 16
ACPI: bus type pci registered
PCI: PCI BIOS revision 2.10 entry at 0xfb120, last bus=0
PCI: Using configuration type 1 for base access
bio: create slab <bio-0> at 0
ACPI: EC: Look up EC in DSDT
ACPI: Interpreter enabled
ACPI: (supports S0 S5)
ACPI: Using IOAPIC for interrupt routing
ACPI: No dock devices found.
ACPI: PCI Root Bridge [PCI0] (0000:00)
pci 0000:00:01.1: reg 20 io port: [0xc000-0xc00f]
pci 0000:00:01.3: quirk: region b000-b03f claimed by PIIX4 ACPI
pci 0000:00:01.3: quirk: region b100-b10f claimed by PIIX4 SMB
pci 0000:00:02.0: reg 10 32bit mmio: [0xf0000000-0xf1ffffff]
pci 0000:00:02.0: reg 14 32bit mmio: [0xf2000000-0xf2000fff]
pci 0000:00:03.0: reg 10 io port: [0xc100-0xc1ff]
pci 0000:00:03.0: reg 14 32bit mmio: [0xf2001000-0xf20010ff]
pci_bus 0000:00: on NUMA node 0
ACPI: PCI Interrupt Routing Table [\_SB_.PCI0._PRT]
ACPI: PCI Interrupt Link [LNKA] (IRQs 5 *10 11)
ACPI: PCI Interrupt Link [LNKB] (IRQs 5 *10 11)
ACPI: PCI Interrupt Link [LNKC] (IRQs 5 10 *11)
ACPI: PCI Interrupt Link [LNKD] (IRQs 5 10 *11)
SCSI subsystem initialized
libata version 3.00 loaded.
usbcore: registered new interface driver usbfs
usbcore: registered new interface driver hub
usbcore: registered new device driver usb
PCI: Using ACPI for IRQ routing
NetLabel: Initializing
NetLabel:  domain hash size = 128
NetLabel:  protocols = UNLABELED CIPSOv4
NetLabel:  unlabeled traffic allowed by default
pnp: PnP ACPI init
ACPI: bus type pnp registered
pnp: PnP ACPI: found 7 devices
ACPI: ACPI bus type pnp unregistered
pci_bus 0000:00: resource 0 io:  [0x00-0xffff]
pci_bus 0000:00: resource 1 mem: [0x000000-0xffffffff]
NET: Registered protocol family 2
IP route cache hash table entries: 4096 (order: 2, 16384 bytes)
TCP established hash table entries: 16384 (order: 5, 131072 bytes)
TCP bind hash table entries: 16384 (order: 7, 655360 bytes)
TCP: Hash tables configured (established 16384 bind 16384)
TCP reno registered
NET: Registered protocol family 1
Trying to unpack rootfs image as initramfs...
debug: unmapping init memory dfce1000..dfff0000
microcode: CPU0 sig=0x623, pf=0x0, revision=0x0
Microcode Update Driver: v2.00 <tigran@aivazian.fsnet.co.uk>, Peter Oruba
====[ backtrace testing ]===========
Testing a backtrace from process context.
The following trace is a kernel self test and not a bug!
Pid: 1, comm: swapper Not tainted 2.6.30-06725-g1d89b30-dirty #126
Call Trace:
 [<c071abda>] ? printk+0x1d/0x1f
 [<c027d180>] ? backtrace_regression_test+0x0/0xe0
 [<c027d1b5>] backtrace_regression_test+0x35/0xe0
 [<c09bf0b0>] ? crash_notes_memory_init+0x0/0x37
 [<c02e7c0f>] ? __alloc_percpu+0xf/0x20
 [<c09bf0c5>] ? crash_notes_memory_init+0x15/0x37
 [<c020112f>] do_one_initcall+0x2f/0x160
 [<c03358b9>] ? create_proc_entry+0x59/0xa0
 [<c0293752>] ? register_irq_proc+0xb2/0xd0
 [<c02937df>] ? init_irq_proc+0x6f/0x80
 [<c09aa35d>] kernel_init+0x168/0x1eb
 [<c09aa1f5>] ? kernel_init+0x0/0x1eb
 [<c0203d8b>] kernel_thread_helper+0x7/0x10
Testing a backtrace from irq context.
The following trace is a kernel self test and not a bug!
Pid: 4, comm: ksoftirqd/0 Not tainted 2.6.30-06725-g1d89b30-dirty #126
Call Trace:
 [<c0267d59>] ? trace_hardirqs_on_caller+0x69/0x180
 [<c027d16d>] backtrace_test_irq_callback+0xd/0x20
 [<c023f743>] tasklet_action+0x83/0x100
 [<c024054a>] __do_softirq+0xca/0x1f0
 [<c0240480>] ? __do_softirq+0x0/0x1f0
 <IRQ>  [<c024084d>] ? ksoftirqd+0x7d/0x1b0
 [<c02407d0>] ? ksoftirqd+0x0/0x1b0
 [<c02527db>] ? kthread+0x4b/0x80
 [<c0252790>] ? kthread+0x0/0x80
 [<c0203d8b>] ? kernel_thread_helper+0x7/0x10
Testing a saved backtrace.
The following trace is a kernel self test and not a bug!
 [<c020d650>] save_stack_trace+0x30/0x50
 [<c027d241>] backtrace_regression_test+0xc1/0xe0
 [<c020112f>] do_one_initcall+0x2f/0x160
 [<c09aa35d>] kernel_init+0x168/0x1eb
 [<c0203d8b>] kernel_thread_helper+0x7/0x10
 [<ffffffff>] 0xffffffff
====[ end of backtrace testing ]====
audit: initializing netlink socket (disabled)
type=2000 audit(1245400465.957:1): initialized
Testing tracer sched_switch: PASSED
Testing tracer function: 
Switched to high resolution mode on CPU 0
PASSED
Testing dynamic ftrace: PASSED
Testing tracer irqsoff: PASSED
Testing tracer preemptoff: PASSED
Testing tracer preemptirqsoff: PASSED
Testing tracer wakeup: PASSED
Testing tracer wakeup_rt: PASSED
Testing tracer function_graph: PASSED
HugeTLB registered 4 MB page size, pre-allocated 0 pages
khelper used greatest stack depth: 2732 bytes left
VFS: Disk quotas dquot_6.5.2
Dquot-cache hash table entries: 1024 (order 0, 4096 bytes)
msgmni has been set to 948
SELinux:  Registering netfilter hooks
cryptomgr_test used greatest stack depth: 2444 bytes left
alg: No test for stdrng (krng)
Block layer SCSI generic (bsg) driver version 0.4 loaded (major 252)
io scheduler noop registered
io scheduler anticipatory registered
io scheduler deadline registered
io scheduler cfq registered (default)
pci 0000:00:00.0: Limiting direct PCI/PCI transfers
pci 0000:00:01.0: PIIX3: Enabling Passive Release
pci 0000:00:01.0: Activating ISA DMA hang workarounds
pci 0000:00:02.0: Boot video device
pci_hotplug: PCI Hot Plug PCI Core version: 0.5
input: Power Button as /class/input/input0
ACPI: Power Button [PWRF]
processor ACPI_CPU:00: registered as cooling_device0
ACPI Warning: \_PR_.CPU1._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPU2._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPU3._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPU4._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPU5._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPU6._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPU7._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPU8._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPU9._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPUA._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPUB._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPUC._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPUD._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
ACPI Warning: \_PR_.CPUE._MAT: Return type mismatch - found Integer, expected Buffer 20090521 nspredef-940
Non-volatile memory driver v1.3
Linux agpgart interface v0.103
[drm] Initialized drm 1.1.0 20060810
Serial: 8250/16550 driver, 4 ports, IRQ sharing enabled
serial8250: ttyS0 at I/O 0x3f8 (irq = 4) is a 16550A
Platform driver 'serial8250' needs updating - please use dev_pm_ops
00:06: ttyS0 at I/O 0x3f8 (irq = 4) is a 16550A
brd: module loaded
loop: module loaded
Driver 'sd' needs updating - please use bus_type methods
Driver 'sr' needs updating - please use bus_type methods
ata_piix 0000:00:01.1: version 2.13
ata_piix 0000:00:01.1: setting latency timer to 64
scsi0 : ata_piix
scsi1 : ata_piix
ata1: PATA max MWDMA2 cmd 0x1f0 ctl 0x3f6 bmdma 0xc000 irq 14
ata2: PATA max MWDMA2 cmd 0x170 ctl 0x376 bmdma 0xc008 irq 15
<6>8139cp: 10/100 PCI Ethernet driver v1.3 (Mar 22, 2004)
ACPI: PCI Interrupt Link [LNKC] enabled at IRQ 11
8139cp 0000:00:03.0: PCI INT A -> Link[LNKC] -> GSI 11 (level, high) -> IRQ 11
eth0: RTL-8139C+ at 0xe080e000, 52:54:00:12:34:56, IRQ 11
8139cp 0000:00:03.0: setting latency timer to 64
console [netcon0] enabled
netconsole: network logging started
ehci_hcd: USB 2.0 'Enhanced' Host Controller (EHCI) Driver
ehci_hcd: block sizes: qh 128 qtd 96 itd 160 sitd 96
ohci_hcd: USB 1.1 'Open' Host Controller (OHCI) Driver
ohci_hcd: block sizes: ed 64 td 64
uhci_hcd: USB Universal Host Controller Interface driver
usbcore: registered new interface driver usblp
usbcore: registered new interface driver libusual
PNP: PS/2 Controller [PNP0303:KBD,PNP0f13:MOU] at 0x60,0x64 irq 1,12
Platform driver 'i8042' needs updating - please use dev_pm_ops
serio: i8042 KBD port at 0x60,0x64 irq 1
serio: i8042 AUX port at 0x60,0x64 irq 12
mice: PS/2 mouse device common for all mice
input: AT Translated Set 2 keyboard as /class/input/input1
rtc_cmos 00:01: rtc core: registered rtc_cmos as rtc0
rtc0: alarms up to one day, 114 bytes nvram
cpuidle: using governor ladder
cpuidle: using governor menu
No iBFT detected.
ata1.00: ATA-7: QEMU HARDDISK, 0.9.1, max UDMA/100
ata1.00: 16777216 sectors, multi 16: LBA48 
ata1.00: configured for MWDMA2
usbcore: registered new interface driver hiddev
usbcore: registered new interface driver usbhid
usbhid: v2.6:USB HID core driver
ata2.00: ATAPI: QEMU DVD-ROM, 0.9.1, max UDMA/100
ata2.00: configured for MWDMA2
scsi 0:0:0:0: Direct-Access     ATA      QEMU HARDDISK    0.9. PQ: 0 ANSI: 5
Advanced Linux Sound Architecture Driver Version 1.0.20.
sd 0:0:0:0: [sda] 16777216 512-byte hardware sectors: (8.58 GB/8.00 GiB)
sd 0:0:0:0: [sda] Write Protect is off
sd 0:0:0:0: [sda] Mode Sense: 00 3a 00 00
sd 0:0:0:0: [sda] Write cache: disabled, read cache: enabled, doesn't support DPO or FUA
 sda: sda1 sda2 sda3
sd 0:0:0:0: Attached scsi generic sg0 type 0
scsi 1:0:0:0: CD-ROM            QEMU     QEMU DVD-ROM     0.9. PQ: 0 ANSI: 5
sr0: scsi3-mmc drive: 4x/4x xa/form2 tray
Uniform CD-ROM driver Revision: 3.20
sr 1:0:0:0: Attached scsi CD-ROM sr0
modprobe used greatest stack depth: 2332 bytes left
input: ImExPS/2 Generic Explorer Mouse as /class/input/input2
sr 1:0:0:0: Attached scsi generic sg1 type 5
sd 0:0:0:0: [sda] Attached SCSI disk
ALSA device list:
  No soundcards found.
Netfilter messages via NETLINK v0.30.
nf_conntrack version 0.5.0 (8191 buckets, 32764 max)
ctnetlink v0.93: registering with nfnetlink.
ip_tables: (C) 2000-2006 Netfilter Core Team
TCP cubic registered
Initializing XFRM netlink socket
NET: Registered protocol family 10
ip6_tables: (C) 2000-2006 Netfilter Core Team
IPv6 over IPv4 tunneling driver
NET: Registered protocol family 17
RPC: Registered udp transport module.
RPC: Registered tcp transport module.
Using IPI No-Shortcut mode
PM: Resume from disk failed.
registered taskstats version 1
Running tests on trace events:
Testing event kfree_skb: OK
Testing event block_rq_abort: OK
Testing event block_rq_insert: OK
Testing event block_rq_issue: OK
Testing event block_rq_requeue: OK
Testing event block_rq_complete: OK
Testing event block_bio_bounce: OK
Testing event block_bio_complete: OK
Testing event block_bio_backmerge: OK
Testing event block_bio_frontmerge: OK
Testing event block_bio_queue: OK
Testing event block_getrq: OK
Testing event block_sleeprq: OK
Testing event block_plug: OK
Testing event block_unplug_timer: OK
Testing event block_unplug_io: OK
Testing event block_split: OK
Testing event block_remap: OK
Testing event kmalloc: OK
Testing event kmem_cache_alloc: OK
Testing event kmalloc_node: OK
Testing event kmem_cache_alloc_node: OK
Testing event kfree: OK
Testing event kmem_cache_free: OK
Testing event lock_acquire: OK
Testing event lock_release: OK
Testing event lock_contended: OK
Testing event lock_acquired: OK
Testing event workqueue_insertion: OK
Testing event workqueue_execution: OK
Testing event workqueue_creation: OK
Testing event workqueue_destruction: OK
Testing event irq_handler_entry: OK
Testing event irq_handler_exit: OK
Testing event softirq_entry: OK
Testing event softirq_exit: OK
Testing event sched_kthread_stop: OK
Testing event sched_kthread_stop_ret: OK
Testing event sched_wait_task: OK
Testing event sched_wakeup: OK
Testing event sched_wakeup_new: OK
Testing event sched_switch: OK
Testing event sched_migrate_task: OK
Testing event sched_process_free: OK
Testing event sched_process_exit: OK
Testing event sched_process_wait: OK
Testing event sched_process_fork: OK
Testing event sched_signal_send: OK
Running tests on trace event systems:
Testing event system skb: OK
Testing event system block: OK
Testing event system kmem: OK
Testing event system lockdep: OK
Testing event system workqueue: OK
Testing event system irq: OK
Testing event system sched: OK
Running tests on all trace events:
Testing all events: OK
Running tests again, along with the function tracer
Running tests on trace events:
Testing event kfree_skb: OK
Testing event block_rq_abort: OK
Testing event block_rq_insert: OK
Testing event block_rq_issue: OK
Testing event block_rq_requeue: OK
Testing event block_rq_complete: OK
Testing event block_bio_bounce: OK
Testing event block_bio_complete: OK
Testing event block_bio_backmerge: OK
Testing event block_bio_frontmerge: OK
Testing event block_bio_queue: OK
Testing event block_getrq: OK
Testing event block_sleeprq: OK
Testing event block_plug: OK
Testing event block_unplug_timer: OK
Testing event block_unplug_io: OK
Testing event block_split: OK
Testing event block_remap: OK
Testing event kmalloc: OK
Testing event kmem_cache_alloc: OK
Testing event kmalloc_node: OK
Testing event kmem_cache_alloc_node: OK
Testing event kfree: OK
Testing event kmem_cache_free: OK
Testing event lock_acquire: OK
Testing event lock_release: OK
Testing event lock_contended: OK
Testing event lock_acquired: OK
Testing event workqueue_insertion: OK
Testing event workqueue_execution: OK
Testing event workqueue_creation: OK
Testing event workqueue_destruction: OK
Testing event irq_handler_entry: OK
Testing event irq_handler_exit: OK
Testing event softirq_entry: OK
Testing event softirq_exit: OK
Testing event sched_kthread_stop: OK
Testing event sched_kthread_stop_ret: OK
Testing event sched_wait_task: OK
Testing event sched_wakeup: OK
Testing event sched_wakeup_new: OK
Testing event sched_switch: OK
Testing event sched_migrate_task: OK
Testing event sched_process_free: OK
Testing event sched_process_exit: OK
Testing event sched_process_wait: OK
Testing event sched_process_fork: OK
Testing event sched_signal_send: OK
Running tests on trace event systems:
Testing event system skb: OK
Testing event system block: OK
Testing event system kmem: OK
Testing event system lockdep: OK
Testing event system workqueue: OK
Testing event system irq: OK
Testing event system sched: OK
Running tests on all trace events:
Testing all events: OK
  Magic number: 13:303:574
debug: unmapping init memory c09aa000..c0b53000
Write protecting the kernel text: 5264k
Write protecting the kernel read-only data: 1908k
async/0 used greatest stack depth: 2288 bytes left
async/1 used greatest stack depth: 1932 bytes left
kjournald starting.  Commit interval 5 seconds
EXT3-fs: mounted filesystem with writeback data mode.
type=1404 audit(1245400469.713:2): enforcing=1 old_enforcing=0 auid=4294967295 ses=4294967295
SELinux: 8192 avtab hash slots, 113721 rules.
SELinux: 8192 avtab hash slots, 113721 rules.
SELinux:  8 users, 11 roles, 2611 types, 123 bools, 1 sens, 1024 cats
SELinux:  73 classes, 113721 rules
SELinux:  class kernel_service not defined in policy
SELinux:  permission open in class sock_file not defined in policy
SELinux: the above unknown classes and permissions will be allowed
SELinux:  Completing initialization.
SELinux:  Setting up existing superblocks.
SELinux: initialized (dev sda3, type ext3), uses xattr
SELinux: initialized (dev tmpfs, type tmpfs), uses transition SIDs
SELinux: initialized (dev selinuxfs, type selinuxfs), uses genfs_contexts
SELinux: initialized (dev mqueue, type mqueue), uses transition SIDs
SELinux: initialized (dev hugetlbfs, type hugetlbfs), uses genfs_contexts
SELinux: initialized (dev devpts, type devpts), uses transition SIDs
SELinux: initialized (dev inotifyfs, type inotifyfs), uses genfs_contexts
SELinux: initialized (dev tmpfs, type tmpfs), uses transition SIDs
SELinux: initialized (dev anon_inodefs, type anon_inodefs), uses genfs_contexts
SELinux: initialized (dev pipefs, type pipefs), uses task SIDs
SELinux: initialized (dev debugfs, type debugfs), uses genfs_contexts
SELinux: initialized (dev sockfs, type sockfs), uses task SIDs
SELinux: initialized (dev proc, type proc), uses genfs_contexts
SELinux: initialized (dev bdev, type bdev), uses genfs_contexts
SELinux: initialized (dev rootfs, type rootfs), uses genfs_contexts
SELinux: initialized (dev sysfs, type sysfs), uses genfs_contexts
type=1403 audit(1245400471.093:3): policy loaded auid=4294967295 ses=4294967295
plymouth used greatest stack depth: 1768 bytes left
awk used greatest stack depth: 1620 bytes left
udevd version 127 started
udevadm used greatest stack depth: 1524 bytes left
udevd used greatest stack depth: 1520 bytes left
grep used greatest stack depth: 1504 bytes left
gzip used greatest stack depth: 1480 bytes left
EXT3 FS on sda3, internal journal
kjournald starting.  Commit interval 5 seconds
EXT3 FS on sda1, internal journal
EXT3-fs: mounted filesystem with writeback data mode.
SELinux: initialized (dev sda1, type ext3), uses xattr
SELinux: initialized (dev tmpfs, type tmpfs), uses transition SIDs
SELinux: initialized (dev cgroup, type cgroup), uses genfs_contexts
Adding 522104k swap on /dev/sda2.  Priority:-1 extents:1 across:522104k 
SELinux: initialized (dev binfmt_misc, type binfmt_misc), uses genfs_contexts
ip used greatest stack depth: 1244 bytes left
eth0: link up, 100Mbps, full-duplex, lpa 0x05E1
device eth0 entered promiscuous mode
br0: port 1(eth0) entering forwarding state
Clocksource tsc unstable (delta = -168975068 ns)
eth0: no IPv6 routers present
br0: no IPv6 routers present
br0: port 1(eth0) entering disabled state
sshd used greatest stack depth: 1212 bytes left
eth0: link up, 100Mbps, full-duplex, lpa 0x05E1
br0: port 1(eth0) entering forwarding state
ip used greatest stack depth: 1168 bytes left
eth0: no IPv6 routers present
br0: no IPv6 routers present

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] mm: clear N_HIGH_MEMORY map before se set it again -v4
       [not found]                                   ` <m3prd0havh.fsf-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
@ 2009-06-19 16:16                                     ` Yinghai Lu
  2009-06-20 23:43                                       ` Yinghai Lu
  1 sibling, 0 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-06-19 16:16 UTC (permalink / raw)
  To: Nathan Lynch, Andrew Morton, Pekka Enberg
  Cc: steiner-sJ/iWh9BUns, Christoph Lameter,
	suresh.b.siddha-ral2JQCrhuEAvxtiuMwx3w, Mel Gorman,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	rusty-8n+1lVoiYb80n/F98K4Iww,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn,
	hpa-YMNOUZJC4hwAvxtiuMwx3w, rientjes-hpIqsD4AKlfQT0dZR+AlfA,
	mingo-X9Un+BFzKDI, tglx-hfZtesqFncYOwBW4kG4KsQ

Nathan Lynch wrote:
> Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> writes:
>>> I bisected to the commit containing this change.  Reverting fixes the
>>> problem.
>>>
>> can you use following patch to see what happens to that nodemask?
> 
> # dmesg | grep node_states
> before clear: node_states [3]: 1
> after online check: node_states [3]: 0
> 

/*
 * Array of node states.
 */
nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
        [N_POSSIBLE] = NODE_MASK_ALL,
        [N_ONLINE] = { { [0] = 1UL } },
#ifndef CONFIG_NUMA
        [N_NORMAL_MEMORY] = { { [0] = 1UL } },
#ifdef CONFIG_HIGHMEM
        [N_HIGH_MEMORY] = { { [0] = 1UL } },
#endif
        [N_CPU] = { { [0] = 1UL } },
#endif  /* NUMA */
};
EXPORT_SYMBOL(node_states);

preset that


and

Zone PFN ranges:
  DMA      0x00000000 -> 0x00001000
  Normal   0x00001000 -> 0x0001fff0
  HighMem  0x0001fff0 -> 0x0001fff0
Movable zone start PFN for each node
early_node_map[2] active PFN ranges
    0: 0x00000000 -> 0x0000009f
    0: 0x00000100 -> 0x0001fff0
before clear: node_states [3]: 1
On node 0 totalpages: 130959
free_area_init_node: node 0, pgdat c0976960, node_mem_map c1151000
  DMA zone: 60 pages used for memmap
  DMA zone: 0 pages reserved
  DMA zone: 3939 pages, LIFO batch:0
  Normal zone: 1860 pages used for memmap
  Normal zone: 125100 pages, LIFO batch:31
after online check: node_states [3]: 0


you don't have high mem, so we should it.

really hate those preset value assume node 0 has RAM...


YH

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] mm: clear N_HIGH_MEMORY map before se set it again -v4
  2009-06-19  8:43                                 ` Nathan Lynch
@ 2009-06-19 16:16                                   ` Yinghai Lu
       [not found]                                   ` <m3prd0havh.fsf-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
  1 sibling, 0 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-06-19 16:16 UTC (permalink / raw)
  To: Nathan Lynch, Andrew Morton, Pekka Enberg
  Cc: Christoph Lameter, mingo, mel, tglx, hpa, suresh.b.siddha,
	linux-kernel, viro, rusty, steiner, rientjes, containers,
	Mel Gorman

Nathan Lynch wrote:
> Yinghai Lu <yinghai@kernel.org> writes:
>>> I bisected to the commit containing this change.  Reverting fixes the
>>> problem.
>>>
>> can you use following patch to see what happens to that nodemask?
> 
> # dmesg | grep node_states
> before clear: node_states [3]: 1
> after online check: node_states [3]: 0
> 

/*
 * Array of node states.
 */
nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
        [N_POSSIBLE] = NODE_MASK_ALL,
        [N_ONLINE] = { { [0] = 1UL } },
#ifndef CONFIG_NUMA
        [N_NORMAL_MEMORY] = { { [0] = 1UL } },
#ifdef CONFIG_HIGHMEM
        [N_HIGH_MEMORY] = { { [0] = 1UL } },
#endif
        [N_CPU] = { { [0] = 1UL } },
#endif  /* NUMA */
};
EXPORT_SYMBOL(node_states);

preset that


and

Zone PFN ranges:
  DMA      0x00000000 -> 0x00001000
  Normal   0x00001000 -> 0x0001fff0
  HighMem  0x0001fff0 -> 0x0001fff0
Movable zone start PFN for each node
early_node_map[2] active PFN ranges
    0: 0x00000000 -> 0x0000009f
    0: 0x00000100 -> 0x0001fff0
before clear: node_states [3]: 1
On node 0 totalpages: 130959
free_area_init_node: node 0, pgdat c0976960, node_mem_map c1151000
  DMA zone: 60 pages used for memmap
  DMA zone: 0 pages reserved
  DMA zone: 3939 pages, LIFO batch:0
  Normal zone: 1860 pages used for memmap
  Normal zone: 125100 pages, LIFO batch:31
after online check: node_states [3]: 0


you don't have high mem, so we should it.

really hate those preset value assume node 0 has RAM...


YH

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] mm: clear N_HIGH_MEMORY map before se set it again -v4
  2009-06-19  8:43                                 ` Nathan Lynch
@ 2009-06-20 23:43                                       ` Yinghai Lu
       [not found]                                   ` <m3prd0havh.fsf-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
  1 sibling, 0 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-06-20 23:43 UTC (permalink / raw)
  To: Nathan Lynch
  Cc: steiner-sJ/iWh9BUns, Christoph Lameter,
	suresh.b.siddha-ral2JQCrhuEAvxtiuMwx3w,
	mel-wPRd99KPJ+uzQB+pC5nmwQ,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	rusty-8n+1lVoiYb80n/F98K4Iww,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, tglx-hfZtesqFncYOwBW4kG4KsQ,
	viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn,
	hpa-YMNOUZJC4hwAvxtiuMwx3w, rientjes-hpIqsD4AKlfQT0dZR+AlfA,
	Andrew Morton, mingo-X9Un+BFzKDI

please check

[PATCH] x86: only clear node_states for 64bit

Nathan reported that
| commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
| Author: Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
| Date:   Tue Jun 16 15:33:00 2009 -0700
|
|    page-allocator: clear N_HIGH_MEMORY map before we set it again
|    
|    SRAT tables may contains nodes of very small size.  The arch code may
|    decide to not activate such a node.  However, currently the early boot
|    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
|    active although these nodes have no present pages.
|    
|    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too

the cpuset.mems cgroup attribute on an i386 kvm guest

try to fix it by only clear node_states[N_NORMAL_MEMORY] for 64bit only.
and need to do save/restore for that in find_zone_movable_pfn

Reported-by: Nathan Lynch <ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>

---
 arch/x86/mm/init_64.c |    2 ++
 mm/page_alloc.c       |   13 +++++++------
 2 files changed, 9 insertions(+), 6 deletions(-)

Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -598,6 +598,8 @@ void __init paging_init(void)
 
 	sparse_memory_present_with_active_regions(MAX_NUMNODES);
 	sparse_init();
+	/* clear the default setting with node 0 */
+	nodes_clear(node_states[N_NORMAL_MEMORY]);
 	free_area_init_nodes(max_zone_pfns);
 }
 
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -4038,6 +4038,8 @@ static void __init find_zone_movable_pfn
 	int i, nid;
 	unsigned long usable_startpfn;
 	unsigned long kernelcore_node, kernelcore_remaining;
+	/* save the state before borrow the nodemask */
+	nodemask_t saved_node_state = node_states[N_HIGH_MEMORY];
 	unsigned long totalpages = early_calculate_totalpages();
 	int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]);
 
@@ -4065,7 +4067,7 @@ static void __init find_zone_movable_pfn
 
 	/* If kernelcore was not specified, there is no ZONE_MOVABLE */
 	if (!required_kernelcore)
-		return;
+		goto out;
 
 	/* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */
 	find_usable_zone_for_movable();
@@ -4164,6 +4166,10 @@ restart:
 	for (nid = 0; nid < MAX_NUMNODES; nid++)
 		zone_movable_pfn[nid] =
 			roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES);
+
+out:
+	/* restore the node_state */
+	node_states[N_HIGH_MEMORY] = saved_node_state;
 }
 
 /* Any regular memory on that node ? */
@@ -4248,11 +4254,6 @@ void __init free_area_init_nodes(unsigne
 						early_node_map[i].start_pfn,
 						early_node_map[i].end_pfn);
 
-	/*
-	 * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init
-	 * that node_mask, clear it at first
-	 */
-	nodes_clear(node_states[N_HIGH_MEMORY]);
 	/* Initialise every node */
 	mminit_verify_pageflags_layout();
 	setup_nr_node_ids();

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] mm: clear N_HIGH_MEMORY map before se set it again -v4
@ 2009-06-20 23:43                                       ` Yinghai Lu
  0 siblings, 0 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-06-20 23:43 UTC (permalink / raw)
  To: Nathan Lynch
  Cc: Christoph Lameter, Andrew Morton, mingo, mel, tglx, hpa,
	suresh.b.siddha, linux-kernel, viro, rusty, steiner, rientjes,
	containers

please check

[PATCH] x86: only clear node_states for 64bit

Nathan reported that
| commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
| Author: Yinghai Lu <yinghai@kernel.org>
| Date:   Tue Jun 16 15:33:00 2009 -0700
|
|    page-allocator: clear N_HIGH_MEMORY map before we set it again
|    
|    SRAT tables may contains nodes of very small size.  The arch code may
|    decide to not activate such a node.  However, currently the early boot
|    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
|    active although these nodes have no present pages.
|    
|    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too

the cpuset.mems cgroup attribute on an i386 kvm guest

try to fix it by only clear node_states[N_NORMAL_MEMORY] for 64bit only.
and need to do save/restore for that in find_zone_movable_pfn

Reported-by: Nathan Lynch <ntl@pobox.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 arch/x86/mm/init_64.c |    2 ++
 mm/page_alloc.c       |   13 +++++++------
 2 files changed, 9 insertions(+), 6 deletions(-)

Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -598,6 +598,8 @@ void __init paging_init(void)
 
 	sparse_memory_present_with_active_regions(MAX_NUMNODES);
 	sparse_init();
+	/* clear the default setting with node 0 */
+	nodes_clear(node_states[N_NORMAL_MEMORY]);
 	free_area_init_nodes(max_zone_pfns);
 }
 
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -4038,6 +4038,8 @@ static void __init find_zone_movable_pfn
 	int i, nid;
 	unsigned long usable_startpfn;
 	unsigned long kernelcore_node, kernelcore_remaining;
+	/* save the state before borrow the nodemask */
+	nodemask_t saved_node_state = node_states[N_HIGH_MEMORY];
 	unsigned long totalpages = early_calculate_totalpages();
 	int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]);
 
@@ -4065,7 +4067,7 @@ static void __init find_zone_movable_pfn
 
 	/* If kernelcore was not specified, there is no ZONE_MOVABLE */
 	if (!required_kernelcore)
-		return;
+		goto out;
 
 	/* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */
 	find_usable_zone_for_movable();
@@ -4164,6 +4166,10 @@ restart:
 	for (nid = 0; nid < MAX_NUMNODES; nid++)
 		zone_movable_pfn[nid] =
 			roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES);
+
+out:
+	/* restore the node_state */
+	node_states[N_HIGH_MEMORY] = saved_node_state;
 }
 
 /* Any regular memory on that node ? */
@@ -4248,11 +4254,6 @@ void __init free_area_init_nodes(unsigne
 						early_node_map[i].start_pfn,
 						early_node_map[i].end_pfn);
 
-	/*
-	 * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init
-	 * that node_mask, clear it at first
-	 */
-	nodes_clear(node_states[N_HIGH_MEMORY]);
 	/* Initialise every node */
 	mminit_verify_pageflags_layout();
 	setup_nr_node_ids();

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] mm: clear N_HIGH_MEMORY map before se set it again -v4
       [not found]                                       ` <4A3D7419.8040305-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
@ 2009-06-22  4:39                                         ` Nathan Lynch
  0 siblings, 0 replies; 102+ messages in thread
From: Nathan Lynch @ 2009-06-22  4:39 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: steiner-sJ/iWh9BUns, Christoph Lameter,
	suresh.b.siddha-ral2JQCrhuEAvxtiuMwx3w,
	mel-wPRd99KPJ+uzQB+pC5nmwQ,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	rusty-8n+1lVoiYb80n/F98K4Iww,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, tglx-hfZtesqFncYOwBW4kG4KsQ,
	viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn,
	hpa-YMNOUZJC4hwAvxtiuMwx3w, rientjes-hpIqsD4AKlfQT0dZR+AlfA,
	Andrew Morton, mingo-X9Un+BFzKDI

Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> writes:
> please check
>
> [PATCH] x86: only clear node_states for 64bit
>
> Nathan reported that
> | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
> | Author: Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> | Date:   Tue Jun 16 15:33:00 2009 -0700
> |
> |    page-allocator: clear N_HIGH_MEMORY map before we set it again
> |    
> |    SRAT tables may contains nodes of very small size.  The arch code may
> |    decide to not activate such a node.  However, currently the early boot
> |    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
> |    active although these nodes have no present pages.
> |    
> |    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
>
> the cpuset.mems cgroup attribute on an i386 kvm guest
>
> try to fix it by only clear node_states[N_NORMAL_MEMORY] for 64bit only.
> and need to do save/restore for that in find_zone_movable_pfn

This works for my setup, thanks.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] mm: clear N_HIGH_MEMORY map before se set it again -v4
  2009-06-20 23:43                                       ` Yinghai Lu
  (?)
  (?)
@ 2009-06-22  4:39                                       ` Nathan Lynch
  2009-06-22 15:38                                         ` [PATCH] x86: only clear node_states for 64bit Yinghai Lu
       [not found]                                         ` <m3my807ug3.fsf-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
  -1 siblings, 2 replies; 102+ messages in thread
From: Nathan Lynch @ 2009-06-22  4:39 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Christoph Lameter, Andrew Morton, mingo, mel, tglx, hpa,
	suresh.b.siddha, linux-kernel, viro, rusty, steiner, rientjes,
	containers

Yinghai Lu <yinghai@kernel.org> writes:
> please check
>
> [PATCH] x86: only clear node_states for 64bit
>
> Nathan reported that
> | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
> | Author: Yinghai Lu <yinghai@kernel.org>
> | Date:   Tue Jun 16 15:33:00 2009 -0700
> |
> |    page-allocator: clear N_HIGH_MEMORY map before we set it again
> |    
> |    SRAT tables may contains nodes of very small size.  The arch code may
> |    decide to not activate such a node.  However, currently the early boot
> |    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
> |    active although these nodes have no present pages.
> |    
> |    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
>
> the cpuset.mems cgroup attribute on an i386 kvm guest
>
> try to fix it by only clear node_states[N_NORMAL_MEMORY] for 64bit only.
> and need to do save/restore for that in find_zone_movable_pfn

This works for my setup, thanks.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* [PATCH] x86: only clear node_states for 64bit
       [not found]                                         ` <m3my807ug3.fsf-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
@ 2009-06-22 15:38                                           ` Yinghai Lu
  0 siblings, 0 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-06-22 15:38 UTC (permalink / raw)
  To: Christoph Lameter, Andrew Morton, mingo-X9Un+BFzKDI,
	tglx-hfZtesqFncYOwBW4kG4KsQ, hpa-YMNOUZJC4hwAvxtiuMwx3w
  Cc: steiner-sJ/iWh9BUns, suresh.b.siddha-ral2JQCrhuEAvxtiuMwx3w,
	mel-wPRd99KPJ+uzQB+pC5nmwQ,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	rusty-8n+1lVoiYb80n/F98K4Iww,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Nathan Lynch,
	viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn,
	rientjes-hpIqsD4AKlfQT0dZR+AlfA

Nathan reported that
| commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
| Author: Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
| Date:   Tue Jun 16 15:33:00 2009 -0700
|
|    page-allocator: clear N_HIGH_MEMORY map before we set it again
|    
|    SRAT tables may contains nodes of very small size.  The arch code may
|    decide to not activate such a node.  However, currently the early boot
|    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
|    active although these nodes have no present pages.
|    
|    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too

the cpuset.mems cgroup attribute on an i386 kvm guest

fix it by only clearing node_states[N_NORMAL_MEMORY] for 64bit only.
and need to do save/restore for that in find_zone_movable_pfn

Reported-by: Nathan Lynch <ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
Tested-by: Nathan Lynch <ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>

---
 arch/x86/mm/init_64.c |    2 ++
 mm/page_alloc.c       |   13 +++++++------
 2 files changed, 9 insertions(+), 6 deletions(-)

Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -598,6 +598,8 @@ void __init paging_init(void)
 
 	sparse_memory_present_with_active_regions(MAX_NUMNODES);
 	sparse_init();
+	/* clear the default setting with node 0 */
+	nodes_clear(node_states[N_NORMAL_MEMORY]);
 	free_area_init_nodes(max_zone_pfns);
 }
 
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -4037,6 +4037,8 @@ static void __init find_zone_movable_pfn
 	int i, nid;
 	unsigned long usable_startpfn;
 	unsigned long kernelcore_node, kernelcore_remaining;
+	/* save the state before borrow the nodemask */
+	nodemask_t saved_node_state = node_states[N_HIGH_MEMORY];
 	unsigned long totalpages = early_calculate_totalpages();
 	int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]);
 
@@ -4064,7 +4066,7 @@ static void __init find_zone_movable_pfn
 
 	/* If kernelcore was not specified, there is no ZONE_MOVABLE */
 	if (!required_kernelcore)
-		return;
+		goto out;
 
 	/* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */
 	find_usable_zone_for_movable();
@@ -4163,6 +4165,10 @@ restart:
 	for (nid = 0; nid < MAX_NUMNODES; nid++)
 		zone_movable_pfn[nid] =
 			roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES);
+
+out:
+	/* restore the node_state */
+	node_states[N_HIGH_MEMORY] = saved_node_state;
 }
 
 /* Any regular memory on that node ? */
@@ -4247,11 +4253,6 @@ void __init free_area_init_nodes(unsigne
 						early_node_map[i].start_pfn,
 						early_node_map[i].end_pfn);
 
-	/*
-	 * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init
-	 * that node_mask, clear it at first
-	 */
-	nodes_clear(node_states[N_HIGH_MEMORY]);
 	/* Initialise every node */
 	mminit_verify_pageflags_layout();
 	setup_nr_node_ids();

^ permalink raw reply	[flat|nested] 102+ messages in thread

* [PATCH] x86: only clear node_states for 64bit
  2009-06-22  4:39                                       ` Nathan Lynch
@ 2009-06-22 15:38                                         ` Yinghai Lu
       [not found]                                           ` <4A3FA58A.3010909-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
  2009-06-26 20:54                                           ` Andrew Morton
       [not found]                                         ` <m3my807ug3.fsf-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
  1 sibling, 2 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-06-22 15:38 UTC (permalink / raw)
  To: Christoph Lameter, Andrew Morton, mingo, tglx, hpa
  Cc: Nathan Lynch, mel, suresh.b.siddha, linux-kernel, viro, rusty,
	steiner, rientjes, containers

Nathan reported that
| commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
| Author: Yinghai Lu <yinghai@kernel.org>
| Date:   Tue Jun 16 15:33:00 2009 -0700
|
|    page-allocator: clear N_HIGH_MEMORY map before we set it again
|    
|    SRAT tables may contains nodes of very small size.  The arch code may
|    decide to not activate such a node.  However, currently the early boot
|    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
|    active although these nodes have no present pages.
|    
|    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too

the cpuset.mems cgroup attribute on an i386 kvm guest

fix it by only clearing node_states[N_NORMAL_MEMORY] for 64bit only.
and need to do save/restore for that in find_zone_movable_pfn

Reported-by: Nathan Lynch <ntl@pobox.com>
Tested-by: Nathan Lynch <ntl@pobox.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 arch/x86/mm/init_64.c |    2 ++
 mm/page_alloc.c       |   13 +++++++------
 2 files changed, 9 insertions(+), 6 deletions(-)

Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -598,6 +598,8 @@ void __init paging_init(void)
 
 	sparse_memory_present_with_active_regions(MAX_NUMNODES);
 	sparse_init();
+	/* clear the default setting with node 0 */
+	nodes_clear(node_states[N_NORMAL_MEMORY]);
 	free_area_init_nodes(max_zone_pfns);
 }
 
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -4037,6 +4037,8 @@ static void __init find_zone_movable_pfn
 	int i, nid;
 	unsigned long usable_startpfn;
 	unsigned long kernelcore_node, kernelcore_remaining;
+	/* save the state before borrow the nodemask */
+	nodemask_t saved_node_state = node_states[N_HIGH_MEMORY];
 	unsigned long totalpages = early_calculate_totalpages();
 	int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]);
 
@@ -4064,7 +4066,7 @@ static void __init find_zone_movable_pfn
 
 	/* If kernelcore was not specified, there is no ZONE_MOVABLE */
 	if (!required_kernelcore)
-		return;
+		goto out;
 
 	/* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */
 	find_usable_zone_for_movable();
@@ -4163,6 +4165,10 @@ restart:
 	for (nid = 0; nid < MAX_NUMNODES; nid++)
 		zone_movable_pfn[nid] =
 			roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES);
+
+out:
+	/* restore the node_state */
+	node_states[N_HIGH_MEMORY] = saved_node_state;
 }
 
 /* Any regular memory on that node ? */
@@ -4247,11 +4253,6 @@ void __init free_area_init_nodes(unsigne
 						early_node_map[i].start_pfn,
 						early_node_map[i].end_pfn);
 
-	/*
-	 * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init
-	 * that node_mask, clear it at first
-	 */
-	nodes_clear(node_states[N_HIGH_MEMORY]);
 	/* Initialise every node */
 	mminit_verify_pageflags_layout();
 	setup_nr_node_ids();

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] x86: only clear node_states for 64bit
       [not found]                                           ` <4A3FA58A.3010909-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
@ 2009-06-26 20:54                                             ` Andrew Morton
  0 siblings, 0 replies; 102+ messages in thread
From: Andrew Morton @ 2009-06-26 20:54 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: steiner-sJ/iWh9BUns, cl-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b,
	suresh.b.siddha-ral2JQCrhuEAvxtiuMwx3w,
	mel-wPRd99KPJ+uzQB+pC5nmwQ,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	rusty-8n+1lVoiYb80n/F98K4Iww,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, ntl-e+AXbWqSrlAAvxtiuMwx3w,
	viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn,
	hpa-YMNOUZJC4hwAvxtiuMwx3w, rientjes-hpIqsD4AKlfQT0dZR+AlfA,
	mingo-X9Un+BFzKDI, tglx-hfZtesqFncYOwBW4kG4KsQ

On Mon, 22 Jun 2009 08:38:50 -0700
Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:

> Nathan reported that
> | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
> | Author: Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> | Date:   Tue Jun 16 15:33:00 2009 -0700
> |
> |    page-allocator: clear N_HIGH_MEMORY map before we set it again
> |    
> |    SRAT tables may contains nodes of very small size.  The arch code may
> |    decide to not activate such a node.  However, currently the early boot
> |    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
> |    active although these nodes have no present pages.
> |    
> |    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
> 
> the cpuset.mems cgroup attribute on an i386 kvm guest
> 
> fix it by only clearing node_states[N_NORMAL_MEMORY] for 64bit only.
> and need to do save/restore for that in find_zone_movable_pfn
> 

There appear to be some words omitted from this changelog - it doesn't
make sense.

I think that perhaps a line got deleted before "the cpuset.mems cgroup
...".  That was the line which actualy describes the bug which we're
fixing.  Or perhaps it was a single word?  "zeroes".


I did this:

Nathan reported that
: 
: | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
: | Author: Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
: | Date:   Tue Jun 16 15:33:00 2009 -0700
: |
: |    page-allocator: clear N_HIGH_MEMORY map before we set it again
: |
: |    SRAT tables may contains nodes of very small size.  The arch code may
: |    decide to not activate such a node.  However, currently the early boot
: |    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
: |    active although these nodes have no present pages.
: |
: |    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
: 
: unintentionally and incorrectly clears the cpuset.mems cgroup attribute on
: an i386 kvm guest
: 
: Fix this by only clearing node_states[N_NORMAL_MEMORY] for 64bit only. 
: and need to do save/restore for that in find_zone_movable_pfn

Please check whether that is correct.  If not, how should it be changed?

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] x86: only clear node_states for 64bit
  2009-06-22 15:38                                         ` [PATCH] x86: only clear node_states for 64bit Yinghai Lu
       [not found]                                           ` <4A3FA58A.3010909-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
@ 2009-06-26 20:54                                           ` Andrew Morton
  2009-06-26 21:09                                             ` Yinghai Lu
       [not found]                                             ` <20090626135428.d8f88a70.akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
  1 sibling, 2 replies; 102+ messages in thread
From: Andrew Morton @ 2009-06-26 20:54 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: cl, mingo, tglx, hpa, ntl, mel, suresh.b.siddha, linux-kernel,
	viro, rusty, steiner, rientjes, containers

On Mon, 22 Jun 2009 08:38:50 -0700
Yinghai Lu <yinghai@kernel.org> wrote:

> Nathan reported that
> | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
> | Author: Yinghai Lu <yinghai@kernel.org>
> | Date:   Tue Jun 16 15:33:00 2009 -0700
> |
> |    page-allocator: clear N_HIGH_MEMORY map before we set it again
> |    
> |    SRAT tables may contains nodes of very small size.  The arch code may
> |    decide to not activate such a node.  However, currently the early boot
> |    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
> |    active although these nodes have no present pages.
> |    
> |    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
> 
> the cpuset.mems cgroup attribute on an i386 kvm guest
> 
> fix it by only clearing node_states[N_NORMAL_MEMORY] for 64bit only.
> and need to do save/restore for that in find_zone_movable_pfn
> 

There appear to be some words omitted from this changelog - it doesn't
make sense.

I think that perhaps a line got deleted before "the cpuset.mems cgroup
...".  That was the line which actualy describes the bug which we're
fixing.  Or perhaps it was a single word?  "zeroes".


I did this:

Nathan reported that
: 
: | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
: | Author: Yinghai Lu <yinghai@kernel.org>
: | Date:   Tue Jun 16 15:33:00 2009 -0700
: |
: |    page-allocator: clear N_HIGH_MEMORY map before we set it again
: |
: |    SRAT tables may contains nodes of very small size.  The arch code may
: |    decide to not activate such a node.  However, currently the early boot
: |    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
: |    active although these nodes have no present pages.
: |
: |    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
: 
: unintentionally and incorrectly clears the cpuset.mems cgroup attribute on
: an i386 kvm guest
: 
: Fix this by only clearing node_states[N_NORMAL_MEMORY] for 64bit only. 
: and need to do save/restore for that in find_zone_movable_pfn

Please check whether that is correct.  If not, how should it be changed?


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] x86: only clear node_states for 64bit
       [not found]                                             ` <20090626135428.d8f88a70.akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
@ 2009-06-26 21:09                                               ` Yinghai Lu
  0 siblings, 0 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-06-26 21:09 UTC (permalink / raw)
  To: Andrew Morton
  Cc: steiner-sJ/iWh9BUns, cl-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b,
	suresh.b.siddha-ral2JQCrhuEAvxtiuMwx3w,
	mel-wPRd99KPJ+uzQB+pC5nmwQ,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	rusty-8n+1lVoiYb80n/F98K4Iww,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, ntl-e+AXbWqSrlAAvxtiuMwx3w,
	viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn,
	hpa-YMNOUZJC4hwAvxtiuMwx3w, rientjes-hpIqsD4AKlfQT0dZR+AlfA,
	mingo-X9Un+BFzKDI, tglx-hfZtesqFncYOwBW4kG4KsQ

Andrew Morton wrote:
> On Mon, 22 Jun 2009 08:38:50 -0700
> Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
> 
>> Nathan reported that
>> | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
>> | Author: Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
>> | Date:   Tue Jun 16 15:33:00 2009 -0700
>> |
>> |    page-allocator: clear N_HIGH_MEMORY map before we set it again
>> |    
>> |    SRAT tables may contains nodes of very small size.  The arch code may
>> |    decide to not activate such a node.  However, currently the early boot
>> |    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
>> |    active although these nodes have no present pages.
>> |    
>> |    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
>>
>> the cpuset.mems cgroup attribute on an i386 kvm guest
>>
>> fix it by only clearing node_states[N_NORMAL_MEMORY] for 64bit only.
>> and need to do save/restore for that in find_zone_movable_pfn
>>
> 
> There appear to be some words omitted from this changelog - it doesn't
> make sense.
> 
> I think that perhaps a line got deleted before "the cpuset.mems cgroup
> ...".  That was the line which actualy describes the bug which we're
> fixing.  Or perhaps it was a single word?  "zeroes".
> 
> 
> I did this:
> 
> Nathan reported that
> : 
> : | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
> : | Author: Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> : | Date:   Tue Jun 16 15:33:00 2009 -0700
> : |
> : |    page-allocator: clear N_HIGH_MEMORY map before we set it again
> : |
> : |    SRAT tables may contains nodes of very small size.  The arch code may
> : |    decide to not activate such a node.  However, currently the early boot
> : |    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
> : |    active although these nodes have no present pages.
> : |
> : |    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
> : 

"
> : unintentionally and incorrectly clears the cpuset.mems cgroup attribute on
> : an i386 kvm guest
"
==> 

32bit assume NORMAL_MEMORY bit and HIGH_MEMORY bit are set for Node0 always.

and some code only check if HIGH_MEMORY is there to know if NORMAL_MEMORY is there.

YH

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] x86: only clear node_states for 64bit
  2009-06-26 20:54                                           ` Andrew Morton
@ 2009-06-26 21:09                                             ` Yinghai Lu
       [not found]                                               ` <4A4538FE.2090101-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
       [not found]                                             ` <20090626135428.d8f88a70.akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
  1 sibling, 1 reply; 102+ messages in thread
From: Yinghai Lu @ 2009-06-26 21:09 UTC (permalink / raw)
  To: Andrew Morton
  Cc: cl, mingo, tglx, hpa, ntl, mel, suresh.b.siddha, linux-kernel,
	viro, rusty, steiner, rientjes, containers

Andrew Morton wrote:
> On Mon, 22 Jun 2009 08:38:50 -0700
> Yinghai Lu <yinghai@kernel.org> wrote:
> 
>> Nathan reported that
>> | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
>> | Author: Yinghai Lu <yinghai@kernel.org>
>> | Date:   Tue Jun 16 15:33:00 2009 -0700
>> |
>> |    page-allocator: clear N_HIGH_MEMORY map before we set it again
>> |    
>> |    SRAT tables may contains nodes of very small size.  The arch code may
>> |    decide to not activate such a node.  However, currently the early boot
>> |    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
>> |    active although these nodes have no present pages.
>> |    
>> |    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
>>
>> the cpuset.mems cgroup attribute on an i386 kvm guest
>>
>> fix it by only clearing node_states[N_NORMAL_MEMORY] for 64bit only.
>> and need to do save/restore for that in find_zone_movable_pfn
>>
> 
> There appear to be some words omitted from this changelog - it doesn't
> make sense.
> 
> I think that perhaps a line got deleted before "the cpuset.mems cgroup
> ...".  That was the line which actualy describes the bug which we're
> fixing.  Or perhaps it was a single word?  "zeroes".
> 
> 
> I did this:
> 
> Nathan reported that
> : 
> : | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
> : | Author: Yinghai Lu <yinghai@kernel.org>
> : | Date:   Tue Jun 16 15:33:00 2009 -0700
> : |
> : |    page-allocator: clear N_HIGH_MEMORY map before we set it again
> : |
> : |    SRAT tables may contains nodes of very small size.  The arch code may
> : |    decide to not activate such a node.  However, currently the early boot
> : |    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
> : |    active although these nodes have no present pages.
> : |
> : |    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
> : 

"
> : unintentionally and incorrectly clears the cpuset.mems cgroup attribute on
> : an i386 kvm guest
"
==> 

32bit assume NORMAL_MEMORY bit and HIGH_MEMORY bit are set for Node0 always.

and some code only check if HIGH_MEMORY is there to know if NORMAL_MEMORY is there.

YH


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] x86: only clear node_states for 64bit
  2009-06-26 21:09                                             ` Yinghai Lu
@ 2009-06-27 17:17                                                   ` Ingo Molnar
  0 siblings, 0 replies; 102+ messages in thread
From: Ingo Molnar @ 2009-06-27 17:17 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: steiner-sJ/iWh9BUns, cl-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b,
	suresh.b.siddha-ral2JQCrhuEAvxtiuMwx3w,
	mel-wPRd99KPJ+uzQB+pC5nmwQ,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	rusty-8n+1lVoiYb80n/F98K4Iww,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, ntl-e+AXbWqSrlAAvxtiuMwx3w,
	viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn,
	hpa-YMNOUZJC4hwAvxtiuMwx3w, rientjes-hpIqsD4AKlfQT0dZR+AlfA,
	Andrew Morton, tglx-hfZtesqFncYOwBW4kG4KsQ


* Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:

> Andrew Morton wrote:
> > On Mon, 22 Jun 2009 08:38:50 -0700
> > Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
> > 
> >> Nathan reported that
> >> | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
> >> | Author: Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> >> | Date:   Tue Jun 16 15:33:00 2009 -0700
> >> |
> >> |    page-allocator: clear N_HIGH_MEMORY map before we set it again
> >> |    
> >> |    SRAT tables may contains nodes of very small size.  The arch code may
> >> |    decide to not activate such a node.  However, currently the early boot
> >> |    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
> >> |    active although these nodes have no present pages.
> >> |    
> >> |    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
> >>
> >> the cpuset.mems cgroup attribute on an i386 kvm guest
> >>
> >> fix it by only clearing node_states[N_NORMAL_MEMORY] for 64bit only.
> >> and need to do save/restore for that in find_zone_movable_pfn
> >>
> > 
> > There appear to be some words omitted from this changelog - it doesn't
> > make sense.
> > 
> > I think that perhaps a line got deleted before "the cpuset.mems cgroup
> > ...".  That was the line which actualy describes the bug which we're
> > fixing.  Or perhaps it was a single word?  "zeroes".
> > 
> > 
> > I did this:
> > 
> > Nathan reported that
> > : 
> > : | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
> > : | Author: Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> > : | Date:   Tue Jun 16 15:33:00 2009 -0700
> > : |
> > : |    page-allocator: clear N_HIGH_MEMORY map before we set it again
> > : |
> > : |    SRAT tables may contains nodes of very small size.  The arch code may
> > : |    decide to not activate such a node.  However, currently the early boot
> > : |    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
> > : |    active although these nodes have no present pages.
> > : |
> > : |    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
> > : 
> 
> "
> > : unintentionally and incorrectly clears the cpuset.mems cgroup attribute on
> > : an i386 kvm guest
> "
> ==> 
> 
> 32bit assume NORMAL_MEMORY bit and HIGH_MEMORY bit are set for 
> Node0 always.

Where in the code is this assumption?

> and some code only check if HIGH_MEMORY is there to know if 
> NORMAL_MEMORY is there.

Which code is that exactly?

	Ingo

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] x86: only clear node_states for 64bit
@ 2009-06-27 17:17                                                   ` Ingo Molnar
  0 siblings, 0 replies; 102+ messages in thread
From: Ingo Molnar @ 2009-06-27 17:17 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Andrew Morton, cl, tglx, hpa, ntl, mel, suresh.b.siddha,
	linux-kernel, viro, rusty, steiner, rientjes, containers


* Yinghai Lu <yinghai@kernel.org> wrote:

> Andrew Morton wrote:
> > On Mon, 22 Jun 2009 08:38:50 -0700
> > Yinghai Lu <yinghai@kernel.org> wrote:
> > 
> >> Nathan reported that
> >> | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
> >> | Author: Yinghai Lu <yinghai@kernel.org>
> >> | Date:   Tue Jun 16 15:33:00 2009 -0700
> >> |
> >> |    page-allocator: clear N_HIGH_MEMORY map before we set it again
> >> |    
> >> |    SRAT tables may contains nodes of very small size.  The arch code may
> >> |    decide to not activate such a node.  However, currently the early boot
> >> |    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
> >> |    active although these nodes have no present pages.
> >> |    
> >> |    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
> >>
> >> the cpuset.mems cgroup attribute on an i386 kvm guest
> >>
> >> fix it by only clearing node_states[N_NORMAL_MEMORY] for 64bit only.
> >> and need to do save/restore for that in find_zone_movable_pfn
> >>
> > 
> > There appear to be some words omitted from this changelog - it doesn't
> > make sense.
> > 
> > I think that perhaps a line got deleted before "the cpuset.mems cgroup
> > ...".  That was the line which actualy describes the bug which we're
> > fixing.  Or perhaps it was a single word?  "zeroes".
> > 
> > 
> > I did this:
> > 
> > Nathan reported that
> > : 
> > : | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
> > : | Author: Yinghai Lu <yinghai@kernel.org>
> > : | Date:   Tue Jun 16 15:33:00 2009 -0700
> > : |
> > : |    page-allocator: clear N_HIGH_MEMORY map before we set it again
> > : |
> > : |    SRAT tables may contains nodes of very small size.  The arch code may
> > : |    decide to not activate such a node.  However, currently the early boot
> > : |    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
> > : |    active although these nodes have no present pages.
> > : |
> > : |    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
> > : 
> 
> "
> > : unintentionally and incorrectly clears the cpuset.mems cgroup attribute on
> > : an i386 kvm guest
> "
> ==> 
> 
> 32bit assume NORMAL_MEMORY bit and HIGH_MEMORY bit are set for 
> Node0 always.

Where in the code is this assumption?

> and some code only check if HIGH_MEMORY is there to know if 
> NORMAL_MEMORY is there.

Which code is that exactly?

	Ingo

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] x86: only clear node_states for 64bit
  2009-06-27 17:17                                                   ` Ingo Molnar
@ 2009-06-27 20:40                                                       ` Yinghai Lu
  -1 siblings, 0 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-06-27 20:40 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: steiner-sJ/iWh9BUns, cl-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b,
	suresh.b.siddha-ral2JQCrhuEAvxtiuMwx3w,
	mel-wPRd99KPJ+uzQB+pC5nmwQ,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	rusty-8n+1lVoiYb80n/F98K4Iww,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, ntl-e+AXbWqSrlAAvxtiuMwx3w,
	viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn,
	hpa-YMNOUZJC4hwAvxtiuMwx3w, rientjes-hpIqsD4AKlfQT0dZR+AlfA,
	Andrew Morton, tglx-hfZtesqFncYOwBW4kG4KsQ

Ingo Molnar wrote:
> * Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
> 
>> Andrew Morton wrote:
>>> On Mon, 22 Jun 2009 08:38:50 -0700
>>> Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
>>>
>>>> Nathan reported that
>>>> | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
>>>> | Author: Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
>>>> | Date:   Tue Jun 16 15:33:00 2009 -0700
>>>> |
>>>> |    page-allocator: clear N_HIGH_MEMORY map before we set it again
>>>> |    
>>>> |    SRAT tables may contains nodes of very small size.  The arch code may
>>>> |    decide to not activate such a node.  However, currently the early boot
>>>> |    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
>>>> |    active although these nodes have no present pages.
>>>> |    
>>>> |    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
>>>>
>>>> the cpuset.mems cgroup attribute on an i386 kvm guest
>>>>
>>>> fix it by only clearing node_states[N_NORMAL_MEMORY] for 64bit only.
>>>> and need to do save/restore for that in find_zone_movable_pfn
>>>>
>>> There appear to be some words omitted from this changelog - it doesn't
>>> make sense.
>>>
>>> I think that perhaps a line got deleted before "the cpuset.mems cgroup
>>> ...".  That was the line which actualy describes the bug which we're
>>> fixing.  Or perhaps it was a single word?  "zeroes".
>>>
>>>
>>> I did this:
>>>
>>> Nathan reported that
>>> : 
>>> : | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
>>> : | Author: Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
>>> : | Date:   Tue Jun 16 15:33:00 2009 -0700
>>> : |
>>> : |    page-allocator: clear N_HIGH_MEMORY map before we set it again
>>> : |
>>> : |    SRAT tables may contains nodes of very small size.  The arch code may
>>> : |    decide to not activate such a node.  However, currently the early boot
>>> : |    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
>>> : |    active although these nodes have no present pages.
>>> : |
>>> : |    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
>>> : 
>> "
>>> : unintentionally and incorrectly clears the cpuset.mems cgroup attribute on
>>> : an i386 kvm guest
>> "
>> ==> 
>>
>> 32bit assume NORMAL_MEMORY bit and HIGH_MEMORY bit are set for 
>> Node0 always.
> 
> Where in the code is this assumption?

in mm/page_alloc.c
/*
 * Array of node states.
 */
nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
        [N_POSSIBLE] = NODE_MASK_ALL,
        [N_ONLINE] = { { [0] = 1UL } },
#ifndef CONFIG_NUMA
        [N_NORMAL_MEMORY] = { { [0] = 1UL } },
#ifdef CONFIG_HIGHMEM
        [N_HIGH_MEMORY] = { { [0] = 1UL } },
#endif
        [N_CPU] = { { [0] = 1UL } },
#endif  /* NUMA */
};
EXPORT_SYMBOL(node_states);

for x86 64bit, we clear POSSIBLE and ONLINE in arch/x86/mm/numa_64.c::initmem_init
and this patch clear NORMAL in arch/x86/mm/init_64.c::paging_init

for x86 32bit: ONLINE get cleared in get_memcfg_from_srat()
and NORMAL and HIGH_MEMORY are not cleared
before try to set new in mm/page_alloc.c::free_area_init_nodes

> 
>> and some code only check if HIGH_MEMORY is there to know if 
>> NORMAL_MEMORY is there.
> 
> Which code is that exactly?
> 
with grep:
arch/x86/mm/init_64.c:	nodes_clear(node_states[N_NORMAL_MEMORY]);
drivers/base/node.c:	return print_nodes_state(N_NORMAL_MEMORY, buf);
include/linux/nodemask.h:	N_NORMAL_MEMORY,	/* The node has regular memory */
include/linux/nodemask.h:	N_HIGH_MEMORY = N_NORMAL_MEMORY,
mm/memcontrol.c:	if (!node_state(node, N_NORMAL_MEMORY))
mm/page_alloc.c:	[N_NORMAL_MEMORY] = { { [0] = 1UL } },
mm/page_alloc.c:			node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY);
mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
mm/slub.c:		for_each_node_state(node, N_NORMAL_MEMORY) {
mm/slub.c:		for_each_node_state(node, N_NORMAL_MEMORY) {
mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY)

Documentation/cgroups/cpusets.txt:automatically tracks the value of node_states[N_HIGH_MEMORY]--i.e.,
Documentation/memory-hotplug.txt:status_change_nid is set node id when N_HIGH_MEMORY of nodemask is (will be)
arch/ia64/kernel/uncached.c:		if (!node_state(nid, N_HIGH_MEMORY))
drivers/base/node.c:	return print_nodes_state(N_HIGH_MEMORY, buf);
include/linux/cpuset.h:#define cpuset_current_mems_allowed (node_states[N_HIGH_MEMORY])
include/linux/nodemask.h:	N_HIGH_MEMORY,		/* The node has regular or high memory */
include/linux/nodemask.h:	N_HIGH_MEMORY = N_NORMAL_MEMORY,
kernel/cpuset.c: * found any online mems, return node_states[N_HIGH_MEMORY].
kernel/cpuset.c: * of node_states[N_HIGH_MEMORY].
kernel/cpuset.c:					node_states[N_HIGH_MEMORY]))
kernel/cpuset.c:					node_states[N_HIGH_MEMORY]);
kernel/cpuset.c:		*pmask = node_states[N_HIGH_MEMORY];
kernel/cpuset.c:	BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY]));
kernel/cpuset.c:	 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
kernel/cpuset.c:				node_states[N_HIGH_MEMORY]))
kernel/cpuset.c:		    nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
kernel/cpuset.c:						node_states[N_HIGH_MEMORY]);
kernel/cpuset.c: * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY].
kernel/cpuset.c: * Call this routine anytime after node_states[N_HIGH_MEMORY] changes.
kernel/cpuset.c:		top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
kernel/cpuset.c:	top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
kernel/cpuset.c: * subset of node_states[N_HIGH_MEMORY], even if this means going outside the
mm/memcontrol.c:		for_each_node_state(node, N_HIGH_MEMORY) {
mm/memory_hotplug.c:		node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);
mm/mempolicy.c:	if (!nodes_subset(new, node_states[N_HIGH_MEMORY])) {
mm/mempolicy.c:	for_each_node_state(nid, N_HIGH_MEMORY) {
mm/mempolicy.c:		if (!nodes_subset(nodes, node_states[N_HIGH_MEMORY]))
mm/mempolicy.c:			nodes = node_states[N_HIGH_MEMORY];
mm/mempolicy.c:			&node_states[N_HIGH_MEMORY], MPOL_MF_STATS, md);
mm/mempolicy.c:	for_each_node_state(n, N_HIGH_MEMORY)
mm/migrate.c:			if (!node_state(node, N_HIGH_MEMORY))
mm/oom_kill.c:	nodemask_t nodes = node_states[N_HIGH_MEMORY];
mm/page-writeback.c:	for_each_node_state(node, N_HIGH_MEMORY) {
mm/page_alloc.c:	[N_HIGH_MEMORY] = { { [0] = 1UL } },
mm/page_alloc.c: * tasks mems_allowed, or node_states[N_HIGH_MEMORY].)
mm/page_alloc.c:					&node_states[N_HIGH_MEMORY];
mm/page_alloc.c:	for_each_node_state(n, N_HIGH_MEMORY) {
mm/page_alloc.c:				(nodes_weight(node_states[N_HIGH_MEMORY]) + 1);
mm/page_alloc.c: * Populate N_HIGH_MEMORY for calculating usable_nodes.
mm/page_alloc.c:			node_set_state(early_node_map[i].nid, N_HIGH_MEMORY);
mm/page_alloc.c:	nodemask_t saved_node_state = node_states[N_HIGH_MEMORY];
mm/page_alloc.c:	int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]);
mm/page_alloc.c:	for_each_node_state(nid, N_HIGH_MEMORY) {
mm/page_alloc.c:	node_states[N_HIGH_MEMORY] = saved_node_state;
mm/page_alloc.c:			node_set_state(nid, N_HIGH_MEMORY);
mm/vmalloc.c:		for_each_node_state(nr, N_HIGH_MEMORY)
mm/vmscan.c:		for_each_node_state(nid, N_HIGH_MEMORY) {
mm/vmscan.c:	for_each_node_state(nid, N_HIGH_MEMORY)
mm/vmstat.c:	if (!node_state(pgdat->node_id, N_HIGH_MEMORY))

for 64bit N_HIGH_MEMORY == NORMAL_MEMORY

for 32bit, there are more reference to N_HIGH_MEMORY...

YH

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] x86: only clear node_states for 64bit
@ 2009-06-27 20:40                                                       ` Yinghai Lu
  0 siblings, 0 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-06-27 20:40 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, cl, tglx, hpa, ntl, mel, suresh.b.siddha,
	linux-kernel, viro, rusty, steiner, rientjes, containers

Ingo Molnar wrote:
> * Yinghai Lu <yinghai@kernel.org> wrote:
> 
>> Andrew Morton wrote:
>>> On Mon, 22 Jun 2009 08:38:50 -0700
>>> Yinghai Lu <yinghai@kernel.org> wrote:
>>>
>>>> Nathan reported that
>>>> | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
>>>> | Author: Yinghai Lu <yinghai@kernel.org>
>>>> | Date:   Tue Jun 16 15:33:00 2009 -0700
>>>> |
>>>> |    page-allocator: clear N_HIGH_MEMORY map before we set it again
>>>> |    
>>>> |    SRAT tables may contains nodes of very small size.  The arch code may
>>>> |    decide to not activate such a node.  However, currently the early boot
>>>> |    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
>>>> |    active although these nodes have no present pages.
>>>> |    
>>>> |    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
>>>>
>>>> the cpuset.mems cgroup attribute on an i386 kvm guest
>>>>
>>>> fix it by only clearing node_states[N_NORMAL_MEMORY] for 64bit only.
>>>> and need to do save/restore for that in find_zone_movable_pfn
>>>>
>>> There appear to be some words omitted from this changelog - it doesn't
>>> make sense.
>>>
>>> I think that perhaps a line got deleted before "the cpuset.mems cgroup
>>> ...".  That was the line which actualy describes the bug which we're
>>> fixing.  Or perhaps it was a single word?  "zeroes".
>>>
>>>
>>> I did this:
>>>
>>> Nathan reported that
>>> : 
>>> : | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
>>> : | Author: Yinghai Lu <yinghai@kernel.org>
>>> : | Date:   Tue Jun 16 15:33:00 2009 -0700
>>> : |
>>> : |    page-allocator: clear N_HIGH_MEMORY map before we set it again
>>> : |
>>> : |    SRAT tables may contains nodes of very small size.  The arch code may
>>> : |    decide to not activate such a node.  However, currently the early boot
>>> : |    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
>>> : |    active although these nodes have no present pages.
>>> : |
>>> : |    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
>>> : 
>> "
>>> : unintentionally and incorrectly clears the cpuset.mems cgroup attribute on
>>> : an i386 kvm guest
>> "
>> ==> 
>>
>> 32bit assume NORMAL_MEMORY bit and HIGH_MEMORY bit are set for 
>> Node0 always.
> 
> Where in the code is this assumption?

in mm/page_alloc.c
/*
 * Array of node states.
 */
nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
        [N_POSSIBLE] = NODE_MASK_ALL,
        [N_ONLINE] = { { [0] = 1UL } },
#ifndef CONFIG_NUMA
        [N_NORMAL_MEMORY] = { { [0] = 1UL } },
#ifdef CONFIG_HIGHMEM
        [N_HIGH_MEMORY] = { { [0] = 1UL } },
#endif
        [N_CPU] = { { [0] = 1UL } },
#endif  /* NUMA */
};
EXPORT_SYMBOL(node_states);

for x86 64bit, we clear POSSIBLE and ONLINE in arch/x86/mm/numa_64.c::initmem_init
and this patch clear NORMAL in arch/x86/mm/init_64.c::paging_init

for x86 32bit: ONLINE get cleared in get_memcfg_from_srat()
and NORMAL and HIGH_MEMORY are not cleared
before try to set new in mm/page_alloc.c::free_area_init_nodes

> 
>> and some code only check if HIGH_MEMORY is there to know if 
>> NORMAL_MEMORY is there.
> 
> Which code is that exactly?
> 
with grep:
arch/x86/mm/init_64.c:	nodes_clear(node_states[N_NORMAL_MEMORY]);
drivers/base/node.c:	return print_nodes_state(N_NORMAL_MEMORY, buf);
include/linux/nodemask.h:	N_NORMAL_MEMORY,	/* The node has regular memory */
include/linux/nodemask.h:	N_HIGH_MEMORY = N_NORMAL_MEMORY,
mm/memcontrol.c:	if (!node_state(node, N_NORMAL_MEMORY))
mm/page_alloc.c:	[N_NORMAL_MEMORY] = { { [0] = 1UL } },
mm/page_alloc.c:			node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY);
mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
mm/slub.c:		for_each_node_state(node, N_NORMAL_MEMORY) {
mm/slub.c:		for_each_node_state(node, N_NORMAL_MEMORY) {
mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY)

Documentation/cgroups/cpusets.txt:automatically tracks the value of node_states[N_HIGH_MEMORY]--i.e.,
Documentation/memory-hotplug.txt:status_change_nid is set node id when N_HIGH_MEMORY of nodemask is (will be)
arch/ia64/kernel/uncached.c:		if (!node_state(nid, N_HIGH_MEMORY))
drivers/base/node.c:	return print_nodes_state(N_HIGH_MEMORY, buf);
include/linux/cpuset.h:#define cpuset_current_mems_allowed (node_states[N_HIGH_MEMORY])
include/linux/nodemask.h:	N_HIGH_MEMORY,		/* The node has regular or high memory */
include/linux/nodemask.h:	N_HIGH_MEMORY = N_NORMAL_MEMORY,
kernel/cpuset.c: * found any online mems, return node_states[N_HIGH_MEMORY].
kernel/cpuset.c: * of node_states[N_HIGH_MEMORY].
kernel/cpuset.c:					node_states[N_HIGH_MEMORY]))
kernel/cpuset.c:					node_states[N_HIGH_MEMORY]);
kernel/cpuset.c:		*pmask = node_states[N_HIGH_MEMORY];
kernel/cpuset.c:	BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY]));
kernel/cpuset.c:	 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
kernel/cpuset.c:				node_states[N_HIGH_MEMORY]))
kernel/cpuset.c:		    nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
kernel/cpuset.c:						node_states[N_HIGH_MEMORY]);
kernel/cpuset.c: * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY].
kernel/cpuset.c: * Call this routine anytime after node_states[N_HIGH_MEMORY] changes.
kernel/cpuset.c:		top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
kernel/cpuset.c:	top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
kernel/cpuset.c: * subset of node_states[N_HIGH_MEMORY], even if this means going outside the
mm/memcontrol.c:		for_each_node_state(node, N_HIGH_MEMORY) {
mm/memory_hotplug.c:		node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);
mm/mempolicy.c:	if (!nodes_subset(new, node_states[N_HIGH_MEMORY])) {
mm/mempolicy.c:	for_each_node_state(nid, N_HIGH_MEMORY) {
mm/mempolicy.c:		if (!nodes_subset(nodes, node_states[N_HIGH_MEMORY]))
mm/mempolicy.c:			nodes = node_states[N_HIGH_MEMORY];
mm/mempolicy.c:			&node_states[N_HIGH_MEMORY], MPOL_MF_STATS, md);
mm/mempolicy.c:	for_each_node_state(n, N_HIGH_MEMORY)
mm/migrate.c:			if (!node_state(node, N_HIGH_MEMORY))
mm/oom_kill.c:	nodemask_t nodes = node_states[N_HIGH_MEMORY];
mm/page-writeback.c:	for_each_node_state(node, N_HIGH_MEMORY) {
mm/page_alloc.c:	[N_HIGH_MEMORY] = { { [0] = 1UL } },
mm/page_alloc.c: * tasks mems_allowed, or node_states[N_HIGH_MEMORY].)
mm/page_alloc.c:					&node_states[N_HIGH_MEMORY];
mm/page_alloc.c:	for_each_node_state(n, N_HIGH_MEMORY) {
mm/page_alloc.c:				(nodes_weight(node_states[N_HIGH_MEMORY]) + 1);
mm/page_alloc.c: * Populate N_HIGH_MEMORY for calculating usable_nodes.
mm/page_alloc.c:			node_set_state(early_node_map[i].nid, N_HIGH_MEMORY);
mm/page_alloc.c:	nodemask_t saved_node_state = node_states[N_HIGH_MEMORY];
mm/page_alloc.c:	int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]);
mm/page_alloc.c:	for_each_node_state(nid, N_HIGH_MEMORY) {
mm/page_alloc.c:	node_states[N_HIGH_MEMORY] = saved_node_state;
mm/page_alloc.c:			node_set_state(nid, N_HIGH_MEMORY);
mm/vmalloc.c:		for_each_node_state(nr, N_HIGH_MEMORY)
mm/vmscan.c:		for_each_node_state(nid, N_HIGH_MEMORY) {
mm/vmscan.c:	for_each_node_state(nid, N_HIGH_MEMORY)
mm/vmstat.c:	if (!node_state(pgdat->node_id, N_HIGH_MEMORY))

for 64bit N_HIGH_MEMORY == NORMAL_MEMORY

for 32bit, there are more reference to N_HIGH_MEMORY...

YH


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] x86: only clear node_states for 64bit
  2009-06-27 20:40                                                       ` Yinghai Lu
@ 2009-06-29  7:39                                                           ` Yinghai Lu
  -1 siblings, 0 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-06-29  7:39 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: steiner-sJ/iWh9BUns, cl-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b,
	suresh.b.siddha-ral2JQCrhuEAvxtiuMwx3w,
	mel-wPRd99KPJ+uzQB+pC5nmwQ,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	rusty-8n+1lVoiYb80n/F98K4Iww,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, ntl-e+AXbWqSrlAAvxtiuMwx3w,
	viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn,
	hpa-YMNOUZJC4hwAvxtiuMwx3w, rientjes-hpIqsD4AKlfQT0dZR+AlfA,
	Andrew Morton, tglx-hfZtesqFncYOwBW4kG4KsQ

Yinghai Lu wrote:
> Ingo Molnar wrote:
>> * Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
>>
>>> Andrew Morton wrote:
>>>> On Mon, 22 Jun 2009 08:38:50 -0700
>>>> Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
>>>>
>>>>> Nathan reported that
>>>>> | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
>>>>> | Author: Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
>>>>> | Date:   Tue Jun 16 15:33:00 2009 -0700
>>>>> |
>>>>> |    page-allocator: clear N_HIGH_MEMORY map before we set it again
>>>>> |    
>>>>> |    SRAT tables may contains nodes of very small size.  The arch code may
>>>>> |    decide to not activate such a node.  However, currently the early boot
>>>>> |    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
>>>>> |    active although these nodes have no present pages.
>>>>> |    
>>>>> |    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
>>>>>
>>>>> the cpuset.mems cgroup attribute on an i386 kvm guest
>>>>>
>>>>> fix it by only clearing node_states[N_NORMAL_MEMORY] for 64bit only.
>>>>> and need to do save/restore for that in find_zone_movable_pfn
>>>>>
>>>> There appear to be some words omitted from this changelog - it doesn't
>>>> make sense.
>>>>
>>>> I think that perhaps a line got deleted before "the cpuset.mems cgroup
>>>> ...".  That was the line which actualy describes the bug which we're
>>>> fixing.  Or perhaps it was a single word?  "zeroes".
>>>>
>>>>
>>>> I did this:
>>>>
>>>> Nathan reported that
>>>> : 
>>>> : | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
>>>> : | Author: Yinghai Lu <yinghai-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
>>>> : | Date:   Tue Jun 16 15:33:00 2009 -0700
>>>> : |
>>>> : |    page-allocator: clear N_HIGH_MEMORY map before we set it again
>>>> : |
>>>> : |    SRAT tables may contains nodes of very small size.  The arch code may
>>>> : |    decide to not activate such a node.  However, currently the early boot
>>>> : |    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
>>>> : |    active although these nodes have no present pages.
>>>> : |
>>>> : |    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
>>>> : 
>>> "
>>>> : unintentionally and incorrectly clears the cpuset.mems cgroup attribute on
>>>> : an i386 kvm guest
>>> "
>>> ==> 
>>>
>>> 32bit assume NORMAL_MEMORY bit and HIGH_MEMORY bit are set for 
>>> Node0 always.
>> Where in the code is this assumption?
> 
> in mm/page_alloc.c
> /*
>  * Array of node states.
>  */
> nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
>         [N_POSSIBLE] = NODE_MASK_ALL,
>         [N_ONLINE] = { { [0] = 1UL } },
> #ifndef CONFIG_NUMA
>         [N_NORMAL_MEMORY] = { { [0] = 1UL } },
> #ifdef CONFIG_HIGHMEM
>         [N_HIGH_MEMORY] = { { [0] = 1UL } },
> #endif
>         [N_CPU] = { { [0] = 1UL } },
> #endif  /* NUMA */
> };
> EXPORT_SYMBOL(node_states);
> 
> for x86 64bit, we clear POSSIBLE and ONLINE in arch/x86/mm/numa_64.c::initmem_init
> and this patch clear NORMAL in arch/x86/mm/init_64.c::paging_init
> 
> for x86 32bit: ONLINE get cleared in get_memcfg_from_srat()
> and NORMAL and HIGH_MEMORY are not cleared
> before try to set new in mm/page_alloc.c::free_area_init_nodes
> 
>>> and some code only check if HIGH_MEMORY is there to know if 
>>> NORMAL_MEMORY is there.
>> Which code is that exactly?
>>
> with grep:
> arch/x86/mm/init_64.c:	nodes_clear(node_states[N_NORMAL_MEMORY]);
> drivers/base/node.c:	return print_nodes_state(N_NORMAL_MEMORY, buf);
> include/linux/nodemask.h:	N_NORMAL_MEMORY,	/* The node has regular memory */
> include/linux/nodemask.h:	N_HIGH_MEMORY = N_NORMAL_MEMORY,
> mm/memcontrol.c:	if (!node_state(node, N_NORMAL_MEMORY))
> mm/page_alloc.c:	[N_NORMAL_MEMORY] = { { [0] = 1UL } },
> mm/page_alloc.c:			node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY);
> mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
> mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
> mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
> mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
> mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
> mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
> mm/slub.c:		for_each_node_state(node, N_NORMAL_MEMORY) {
> mm/slub.c:		for_each_node_state(node, N_NORMAL_MEMORY) {
> mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY)
> 
> Documentation/cgroups/cpusets.txt:automatically tracks the value of node_states[N_HIGH_MEMORY]--i.e.,
> Documentation/memory-hotplug.txt:status_change_nid is set node id when N_HIGH_MEMORY of nodemask is (will be)
> arch/ia64/kernel/uncached.c:		if (!node_state(nid, N_HIGH_MEMORY))
> drivers/base/node.c:	return print_nodes_state(N_HIGH_MEMORY, buf);
> include/linux/cpuset.h:#define cpuset_current_mems_allowed (node_states[N_HIGH_MEMORY])
> include/linux/nodemask.h:	N_HIGH_MEMORY,		/* The node has regular or high memory */
> include/linux/nodemask.h:	N_HIGH_MEMORY = N_NORMAL_MEMORY,
> kernel/cpuset.c: * found any online mems, return node_states[N_HIGH_MEMORY].
> kernel/cpuset.c: * of node_states[N_HIGH_MEMORY].
> kernel/cpuset.c:					node_states[N_HIGH_MEMORY]))
> kernel/cpuset.c:					node_states[N_HIGH_MEMORY]);
> kernel/cpuset.c:		*pmask = node_states[N_HIGH_MEMORY];
> kernel/cpuset.c:	BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY]));
> kernel/cpuset.c:	 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
> kernel/cpuset.c:				node_states[N_HIGH_MEMORY]))
> kernel/cpuset.c:		    nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
> kernel/cpuset.c:						node_states[N_HIGH_MEMORY]);
> kernel/cpuset.c: * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY].
> kernel/cpuset.c: * Call this routine anytime after node_states[N_HIGH_MEMORY] changes.
> kernel/cpuset.c:		top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
> kernel/cpuset.c:	top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
> kernel/cpuset.c: * subset of node_states[N_HIGH_MEMORY], even if this means going outside the
> mm/memcontrol.c:		for_each_node_state(node, N_HIGH_MEMORY) {
> mm/memory_hotplug.c:		node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);
> mm/mempolicy.c:	if (!nodes_subset(new, node_states[N_HIGH_MEMORY])) {
> mm/mempolicy.c:	for_each_node_state(nid, N_HIGH_MEMORY) {
> mm/mempolicy.c:		if (!nodes_subset(nodes, node_states[N_HIGH_MEMORY]))
> mm/mempolicy.c:			nodes = node_states[N_HIGH_MEMORY];
> mm/mempolicy.c:			&node_states[N_HIGH_MEMORY], MPOL_MF_STATS, md);
> mm/mempolicy.c:	for_each_node_state(n, N_HIGH_MEMORY)
> mm/migrate.c:			if (!node_state(node, N_HIGH_MEMORY))
> mm/oom_kill.c:	nodemask_t nodes = node_states[N_HIGH_MEMORY];
> mm/page-writeback.c:	for_each_node_state(node, N_HIGH_MEMORY) {
> mm/page_alloc.c:	[N_HIGH_MEMORY] = { { [0] = 1UL } },
> mm/page_alloc.c: * tasks mems_allowed, or node_states[N_HIGH_MEMORY].)
> mm/page_alloc.c:					&node_states[N_HIGH_MEMORY];
> mm/page_alloc.c:	for_each_node_state(n, N_HIGH_MEMORY) {
> mm/page_alloc.c:				(nodes_weight(node_states[N_HIGH_MEMORY]) + 1);
> mm/page_alloc.c: * Populate N_HIGH_MEMORY for calculating usable_nodes.
> mm/page_alloc.c:			node_set_state(early_node_map[i].nid, N_HIGH_MEMORY);
> mm/page_alloc.c:	nodemask_t saved_node_state = node_states[N_HIGH_MEMORY];
> mm/page_alloc.c:	int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]);
> mm/page_alloc.c:	for_each_node_state(nid, N_HIGH_MEMORY) {
> mm/page_alloc.c:	node_states[N_HIGH_MEMORY] = saved_node_state;
> mm/page_alloc.c:			node_set_state(nid, N_HIGH_MEMORY);
> mm/vmalloc.c:		for_each_node_state(nr, N_HIGH_MEMORY)
> mm/vmscan.c:		for_each_node_state(nid, N_HIGH_MEMORY) {
> mm/vmscan.c:	for_each_node_state(nid, N_HIGH_MEMORY)
> mm/vmstat.c:	if (!node_state(pgdat->node_id, N_HIGH_MEMORY))
> 
> for 64bit N_HIGH_MEMORY == NORMAL_MEMORY
> 
> for 32bit, there are more reference to N_HIGH_MEMORY...
> 

 - Why is this patch good/desired?

fix the broken with cpuset.mems cgroup attribute on an i386 kvm guest

 - What did prior code do, and why was that wrong?

clear node_states[N_HIGH_MEMORY] for 32 bit and 64bit.
actually we only need clear that for 64bit to make that right for some strange
case like small range in one node.

 - What were the bad effects. (crash, right?)

cpuset.mems can not be used ...

 - What does this patch do to achieve that good status?


fix the problem with cpuset.mems and only keep the clearing for x86 64bit.

YH

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [PATCH] x86: only clear node_states for 64bit
@ 2009-06-29  7:39                                                           ` Yinghai Lu
  0 siblings, 0 replies; 102+ messages in thread
From: Yinghai Lu @ 2009-06-29  7:39 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, cl, tglx, hpa, ntl, mel, suresh.b.siddha,
	linux-kernel, viro, rusty, steiner, rientjes, containers

Yinghai Lu wrote:
> Ingo Molnar wrote:
>> * Yinghai Lu <yinghai@kernel.org> wrote:
>>
>>> Andrew Morton wrote:
>>>> On Mon, 22 Jun 2009 08:38:50 -0700
>>>> Yinghai Lu <yinghai@kernel.org> wrote:
>>>>
>>>>> Nathan reported that
>>>>> | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
>>>>> | Author: Yinghai Lu <yinghai@kernel.org>
>>>>> | Date:   Tue Jun 16 15:33:00 2009 -0700
>>>>> |
>>>>> |    page-allocator: clear N_HIGH_MEMORY map before we set it again
>>>>> |    
>>>>> |    SRAT tables may contains nodes of very small size.  The arch code may
>>>>> |    decide to not activate such a node.  However, currently the early boot
>>>>> |    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
>>>>> |    active although these nodes have no present pages.
>>>>> |    
>>>>> |    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
>>>>>
>>>>> the cpuset.mems cgroup attribute on an i386 kvm guest
>>>>>
>>>>> fix it by only clearing node_states[N_NORMAL_MEMORY] for 64bit only.
>>>>> and need to do save/restore for that in find_zone_movable_pfn
>>>>>
>>>> There appear to be some words omitted from this changelog - it doesn't
>>>> make sense.
>>>>
>>>> I think that perhaps a line got deleted before "the cpuset.mems cgroup
>>>> ...".  That was the line which actualy describes the bug which we're
>>>> fixing.  Or perhaps it was a single word?  "zeroes".
>>>>
>>>>
>>>> I did this:
>>>>
>>>> Nathan reported that
>>>> : 
>>>> : | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74
>>>> : | Author: Yinghai Lu <yinghai@kernel.org>
>>>> : | Date:   Tue Jun 16 15:33:00 2009 -0700
>>>> : |
>>>> : |    page-allocator: clear N_HIGH_MEMORY map before we set it again
>>>> : |
>>>> : |    SRAT tables may contains nodes of very small size.  The arch code may
>>>> : |    decide to not activate such a node.  However, currently the early boot
>>>> : |    code sets N_HIGH_MEMORY for such nodes.  These nodes therefore seem to be
>>>> : |    active although these nodes have no present pages.
>>>> : |
>>>> : |    For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too
>>>> : 
>>> "
>>>> : unintentionally and incorrectly clears the cpuset.mems cgroup attribute on
>>>> : an i386 kvm guest
>>> "
>>> ==> 
>>>
>>> 32bit assume NORMAL_MEMORY bit and HIGH_MEMORY bit are set for 
>>> Node0 always.
>> Where in the code is this assumption?
> 
> in mm/page_alloc.c
> /*
>  * Array of node states.
>  */
> nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
>         [N_POSSIBLE] = NODE_MASK_ALL,
>         [N_ONLINE] = { { [0] = 1UL } },
> #ifndef CONFIG_NUMA
>         [N_NORMAL_MEMORY] = { { [0] = 1UL } },
> #ifdef CONFIG_HIGHMEM
>         [N_HIGH_MEMORY] = { { [0] = 1UL } },
> #endif
>         [N_CPU] = { { [0] = 1UL } },
> #endif  /* NUMA */
> };
> EXPORT_SYMBOL(node_states);
> 
> for x86 64bit, we clear POSSIBLE and ONLINE in arch/x86/mm/numa_64.c::initmem_init
> and this patch clear NORMAL in arch/x86/mm/init_64.c::paging_init
> 
> for x86 32bit: ONLINE get cleared in get_memcfg_from_srat()
> and NORMAL and HIGH_MEMORY are not cleared
> before try to set new in mm/page_alloc.c::free_area_init_nodes
> 
>>> and some code only check if HIGH_MEMORY is there to know if 
>>> NORMAL_MEMORY is there.
>> Which code is that exactly?
>>
> with grep:
> arch/x86/mm/init_64.c:	nodes_clear(node_states[N_NORMAL_MEMORY]);
> drivers/base/node.c:	return print_nodes_state(N_NORMAL_MEMORY, buf);
> include/linux/nodemask.h:	N_NORMAL_MEMORY,	/* The node has regular memory */
> include/linux/nodemask.h:	N_HIGH_MEMORY = N_NORMAL_MEMORY,
> mm/memcontrol.c:	if (!node_state(node, N_NORMAL_MEMORY))
> mm/page_alloc.c:	[N_NORMAL_MEMORY] = { { [0] = 1UL } },
> mm/page_alloc.c:			node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY);
> mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
> mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
> mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
> mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
> mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
> mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY) {
> mm/slub.c:		for_each_node_state(node, N_NORMAL_MEMORY) {
> mm/slub.c:		for_each_node_state(node, N_NORMAL_MEMORY) {
> mm/slub.c:	for_each_node_state(node, N_NORMAL_MEMORY)
> 
> Documentation/cgroups/cpusets.txt:automatically tracks the value of node_states[N_HIGH_MEMORY]--i.e.,
> Documentation/memory-hotplug.txt:status_change_nid is set node id when N_HIGH_MEMORY of nodemask is (will be)
> arch/ia64/kernel/uncached.c:		if (!node_state(nid, N_HIGH_MEMORY))
> drivers/base/node.c:	return print_nodes_state(N_HIGH_MEMORY, buf);
> include/linux/cpuset.h:#define cpuset_current_mems_allowed (node_states[N_HIGH_MEMORY])
> include/linux/nodemask.h:	N_HIGH_MEMORY,		/* The node has regular or high memory */
> include/linux/nodemask.h:	N_HIGH_MEMORY = N_NORMAL_MEMORY,
> kernel/cpuset.c: * found any online mems, return node_states[N_HIGH_MEMORY].
> kernel/cpuset.c: * of node_states[N_HIGH_MEMORY].
> kernel/cpuset.c:					node_states[N_HIGH_MEMORY]))
> kernel/cpuset.c:					node_states[N_HIGH_MEMORY]);
> kernel/cpuset.c:		*pmask = node_states[N_HIGH_MEMORY];
> kernel/cpuset.c:	BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY]));
> kernel/cpuset.c:	 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
> kernel/cpuset.c:				node_states[N_HIGH_MEMORY]))
> kernel/cpuset.c:		    nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
> kernel/cpuset.c:						node_states[N_HIGH_MEMORY]);
> kernel/cpuset.c: * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY].
> kernel/cpuset.c: * Call this routine anytime after node_states[N_HIGH_MEMORY] changes.
> kernel/cpuset.c:		top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
> kernel/cpuset.c:	top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
> kernel/cpuset.c: * subset of node_states[N_HIGH_MEMORY], even if this means going outside the
> mm/memcontrol.c:		for_each_node_state(node, N_HIGH_MEMORY) {
> mm/memory_hotplug.c:		node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);
> mm/mempolicy.c:	if (!nodes_subset(new, node_states[N_HIGH_MEMORY])) {
> mm/mempolicy.c:	for_each_node_state(nid, N_HIGH_MEMORY) {
> mm/mempolicy.c:		if (!nodes_subset(nodes, node_states[N_HIGH_MEMORY]))
> mm/mempolicy.c:			nodes = node_states[N_HIGH_MEMORY];
> mm/mempolicy.c:			&node_states[N_HIGH_MEMORY], MPOL_MF_STATS, md);
> mm/mempolicy.c:	for_each_node_state(n, N_HIGH_MEMORY)
> mm/migrate.c:			if (!node_state(node, N_HIGH_MEMORY))
> mm/oom_kill.c:	nodemask_t nodes = node_states[N_HIGH_MEMORY];
> mm/page-writeback.c:	for_each_node_state(node, N_HIGH_MEMORY) {
> mm/page_alloc.c:	[N_HIGH_MEMORY] = { { [0] = 1UL } },
> mm/page_alloc.c: * tasks mems_allowed, or node_states[N_HIGH_MEMORY].)
> mm/page_alloc.c:					&node_states[N_HIGH_MEMORY];
> mm/page_alloc.c:	for_each_node_state(n, N_HIGH_MEMORY) {
> mm/page_alloc.c:				(nodes_weight(node_states[N_HIGH_MEMORY]) + 1);
> mm/page_alloc.c: * Populate N_HIGH_MEMORY for calculating usable_nodes.
> mm/page_alloc.c:			node_set_state(early_node_map[i].nid, N_HIGH_MEMORY);
> mm/page_alloc.c:	nodemask_t saved_node_state = node_states[N_HIGH_MEMORY];
> mm/page_alloc.c:	int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]);
> mm/page_alloc.c:	for_each_node_state(nid, N_HIGH_MEMORY) {
> mm/page_alloc.c:	node_states[N_HIGH_MEMORY] = saved_node_state;
> mm/page_alloc.c:			node_set_state(nid, N_HIGH_MEMORY);
> mm/vmalloc.c:		for_each_node_state(nr, N_HIGH_MEMORY)
> mm/vmscan.c:		for_each_node_state(nid, N_HIGH_MEMORY) {
> mm/vmscan.c:	for_each_node_state(nid, N_HIGH_MEMORY)
> mm/vmstat.c:	if (!node_state(pgdat->node_id, N_HIGH_MEMORY))
> 
> for 64bit N_HIGH_MEMORY == NORMAL_MEMORY
> 
> for 32bit, there are more reference to N_HIGH_MEMORY...
> 

 - Why is this patch good/desired?

fix the broken with cpuset.mems cgroup attribute on an i386 kvm guest

 - What did prior code do, and why was that wrong?

clear node_states[N_HIGH_MEMORY] for 32 bit and 64bit.
actually we only need clear that for 64bit to make that right for some strange
case like small range in one node.

 - What were the bad effects. (crash, right?)

cpuset.mems can not be used ...

 - What does this patch do to achieve that good status?


fix the problem with cpuset.mems and only keep the clearing for x86 64bit.

YH

^ permalink raw reply	[flat|nested] 102+ messages in thread

end of thread, other threads:[~2009-06-29  7:42 UTC | newest]

Thread overview: 102+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-05-09  6:45 [PATCH 1/3] x86: remove MEMORY_HOTPLUG_RESERVE related code Yinghai Lu
2009-05-09  6:48 ` [PATCH 2/3] x86: add numa_move_cpus_to_node Yinghai Lu
2009-05-09  7:05   ` Justin P. Mattock
2009-05-12  1:27   ` Christoph Lameter
2009-05-11 21:53     ` Yinghai Lu
2009-05-12 20:59       ` Christoph Lameter
2009-05-12 17:16         ` Yinghai Lu
2009-05-12 21:21           ` Christoph Lameter
2009-05-13  5:39             ` Yinghai Lu
2009-05-14 19:34               ` Christoph Lameter
2009-05-14 20:58                 ` Yinghai Lu
2009-05-09  6:50 ` [PATCH 3/3] x86: fix node_possible_map logic -v2 Yinghai Lu
2009-05-11 17:53   ` Jack Steiner
2009-05-11 19:15     ` Yinghai Lu
2009-05-11 19:36       ` Yinghai Lu
2009-05-11 19:27     ` David Rientjes
2009-05-11 21:12       ` H. Peter Anvin
2009-05-11 21:26         ` Alan Cox
2009-05-11 22:25         ` David Rientjes
2009-05-12 15:06           ` Jack Steiner
2009-05-12 15:10             ` Yinghai Lu
2009-05-12 16:16               ` Jack Steiner
2009-05-12 16:40                 ` Yinghai Lu
2009-05-12 18:03                   ` Jack Steiner
2009-05-12 21:31                     ` Yinghai Lu
2009-05-12 21:58                       ` Jack Steiner
2009-05-12 23:13                         ` Yinghai Lu
2009-05-12 23:26                           ` Yinghai Lu
2009-05-12 15:43             ` Andi Kleen
2009-05-13  1:34             ` [PATCH] x86: fix system without memory on node0 Yinghai Lu
2009-05-13  8:00               ` Andi Kleen
2009-05-13 15:58                 ` Yinghai Lu
2009-05-13 13:35               ` Ingo Molnar
2009-05-13 16:52               ` Jack Steiner
2009-05-13 17:43                 ` Yinghai Lu
2009-05-13 18:08                 ` Yinghai Lu
2009-05-12  7:15         ` [PATCH 3/3] x86: fix node_possible_map logic -v2 Andi Kleen
2009-05-11 21:33       ` Jack Steiner
2009-05-11 22:56         ` David Rientjes
2009-05-11 23:00           ` Yinghai Lu
2009-05-12  7:09       ` Andi Kleen
2009-05-12  1:02 ` [PATCH 1/3] x86: remove MEMORY_HOTPLUG_RESERVE related code Christoph Lameter
2009-05-12 11:16 ` Mel Gorman
2009-05-13  5:29   ` Yinghai Lu
2009-05-13  9:55     ` Mel Gorman
2009-05-13  6:13   ` [PATCH] x86: remove MEMORY_HOTPLUG_RESERVE related code -v2 Yinghai Lu
2009-05-13 14:59     ` Mel Gorman
2009-05-14 16:38       ` [PATCH 1/5] " Yinghai Lu
2009-05-14 16:40         ` [PATCH 2/5] x86: add numa_move_cpus_to_node Yinghai Lu
2009-05-14 16:41         ` [PATCH 3/5] x86: fix node_possible_map logic -v2 Yinghai Lu
2009-05-18  7:40           ` [tip:x86/mm] x86, mm: Fix node_possible_map logic tip-bot for Yinghai Lu
2009-05-14 16:42         ` [PATCH 4/5] x86: fix system without memory on node0 -v2 Yinghai Lu
2009-05-18  7:40           ` [tip:x86/mm] x86: fix system without memory on node0 tip-bot for Yinghai Lu
2009-05-14 16:43         ` [PATCH 5/5] mm: clear N_HIGH_MEMORY map before se set it again -v2 Yinghai Lu
2009-05-14 16:54           ` Andrew Morton
2009-05-14 17:05             ` Yinghai Lu
2009-05-14 17:25               ` Andrew Morton
2009-05-14 17:34                 ` Yinghai Lu
2009-05-14 19:44                   ` Christoph Lameter
2009-06-04  5:16                   ` [RESEND PATCH] " Yinghai Lu
2009-06-04 16:38                     ` Christoph Lameter
2009-06-04 16:48                       ` Yinghai Lu
2009-06-04 17:11                         ` Christoph Lameter
2009-06-04 17:26                           ` [PATCH] mm: clear N_HIGH_MEMORY map before se set it again -v4 Yinghai Lu
2009-06-19  6:42                             ` Nathan Lynch
2009-06-19  8:18                               ` Yinghai Lu
     [not found]                                 ` <4A3B49BA.40100-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2009-06-19  8:43                                   ` Nathan Lynch
2009-06-19  8:43                                 ` Nathan Lynch
2009-06-19 16:16                                   ` Yinghai Lu
     [not found]                                   ` <m3prd0havh.fsf-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2009-06-19 16:16                                     ` Yinghai Lu
2009-06-20 23:43                                     ` Yinghai Lu
2009-06-20 23:43                                       ` Yinghai Lu
     [not found]                                       ` <4A3D7419.8040305-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2009-06-22  4:39                                         ` Nathan Lynch
2009-06-22  4:39                                       ` Nathan Lynch
2009-06-22 15:38                                         ` [PATCH] x86: only clear node_states for 64bit Yinghai Lu
     [not found]                                           ` <4A3FA58A.3010909-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2009-06-26 20:54                                             ` Andrew Morton
2009-06-26 20:54                                           ` Andrew Morton
2009-06-26 21:09                                             ` Yinghai Lu
     [not found]                                               ` <4A4538FE.2090101-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2009-06-27 17:17                                                 ` Ingo Molnar
2009-06-27 17:17                                                   ` Ingo Molnar
     [not found]                                                   ` <20090627171714.GD21595-X9Un+BFzKDI@public.gmane.org>
2009-06-27 20:40                                                     ` Yinghai Lu
2009-06-27 20:40                                                       ` Yinghai Lu
     [not found]                                                       ` <4A4683B2.106-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2009-06-29  7:39                                                         ` Yinghai Lu
2009-06-29  7:39                                                           ` Yinghai Lu
     [not found]                                             ` <20090626135428.d8f88a70.akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
2009-06-26 21:09                                               ` Yinghai Lu
     [not found]                                         ` <m3my807ug3.fsf-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2009-06-22 15:38                                           ` Yinghai Lu
     [not found]                               ` <m3bpokiv0u.fsf-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2009-06-19  8:18                                 ` [PATCH] mm: clear N_HIGH_MEMORY map before se set it again -v4 Yinghai Lu
     [not found]                             ` <4A2803D1.4070001-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2009-06-19  6:42                               ` Nathan Lynch
2009-05-18  7:39         ` [tip:x86/mm] mm, x86: remove MEMORY_HOTPLUG_RESERVE related code tip-bot for Yinghai Lu
     [not found] ` <20090511095022.GA23121@elte.hu>
     [not found]   ` <20090511163158.c4e4d334.akpm@linux-foundation.org>
     [not found]     ` <20090512090704.GC18004@elte.hu>
     [not found]       ` <4A0A6700.3070100@kernel.org>
     [not found]         ` <20090513133635.GB7384@elte.hu>
     [not found]           ` <4A0AFA6E.5050200@kernel.org>
     [not found]             ` <20090515173521.GA29647@elte.hu>
2009-05-15 21:38               ` tip: patches in git for irq and numa Yinghai Lu
2009-05-18  7:29                 ` Ingo Molnar
2009-05-18 13:50                   ` Peter Zijlstra
2009-05-18 13:56                     ` Ingo Molnar
2009-05-18 15:03                     ` Yinghai Lu
2009-05-18 15:09                       ` Ingo Molnar
2009-05-18 15:11                       ` Peter Zijlstra
2009-05-18 17:23                         ` Yinghai Lu
2009-05-19  9:37                           ` Ingo Molnar
2009-05-19 10:31                             ` Peter Zijlstra
2009-05-19 12:26                               ` Ingo Molnar
2009-05-19  9:39                           ` [tip:irq/numa] x86, io-apic: Don't mark pin_programmed early tip-bot for Yinghai Lu
2009-05-19 12:30                           ` tip-bot for Yinghai Lu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.