From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1760170AbZEMPAK (ORCPT ); Wed, 13 May 2009 11:00:10 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1754906AbZEMO7y (ORCPT ); Wed, 13 May 2009 10:59:54 -0400 Received: from gir.skynet.ie ([193.1.99.77]:53282 "EHLO gir.skynet.ie" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753016AbZEMO7x (ORCPT ); Wed, 13 May 2009 10:59:53 -0400 Date: Wed, 13 May 2009 15:59:50 +0100 From: Mel Gorman To: Yinghai Lu Cc: Ingo Molnar , Thomas Gleixner , "H. Peter Anvin" , Andrew Morton , Christoph Lameter , Suresh Siddha , "linux-kernel@vger.kernel.org" , Al Viro , Rusty Russell Subject: Re: [PATCH] x86: remove MEMORY_HOTPLUG_RESERVE related code -v2 Message-ID: <20090513145950.GB28097@csn.ul.ie> References: <4A05269D.8000701@kernel.org> <20090512111623.GG25923@csn.ul.ie> <4A0A64FB.4080504@kernel.org> MIME-Version: 1.0 Content-Type: text/plain; charset=iso-8859-15 Content-Disposition: inline In-Reply-To: <4A0A64FB.4080504@kernel.org> User-Agent: Mutt/1.5.17+20080114 (2008-01-14) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Tue, May 12, 2009 at 11:13:15PM -0700, Yinghai Lu wrote: > > after > | commit b263295dbffd33b0fbff670720fa178c30e3392a > | Author: Christoph Lameter > | Date: Wed Jan 30 13:30:47 2008 +0100 > | > | x86: 64-bit, make sparsemem vmemmap the only memory model > > we don't have MEMORY_HOTPLUG_RESERVE anymore. > > Historically, x86-64 had an architecture-specific method for memory hotplug > whereby it scanned the SRAT for physical memory ranges that could be > potentially used for memory hot-add later. By reserving those ranges > without physical memory, the memmap would be allocated and left dormant > until needed. This depended on the DISCONTIG memory model which has been > removed so the code implementing HOTPLUG_RESERVE is now dead. > > This patch removes the dead code used by MEMORY_HOTPLUG_RESERVE > > Changelog updated by Mel. > > v2: updated changelog, and remove hotadd= in doc > > [ Impact: remove dead code ] > > Signed-off-by: Yinghai Lu > Reviewed-by: Christoph Lameter > Cc: Mel Gorman Patch looks good and successfully boot-tested on a small number of machines. Nice work. Reviewed-by: Mel Gorman > > --- > Documentation/x86/x86_64/boot-options.txt | 5 -- > arch/x86/include/asm/numa_64.h | 3 - > arch/x86/mm/numa_64.c | 5 -- > arch/x86/mm/srat_64.c | 63 +++++---------------------- > include/linux/mm.h | 2 > mm/page_alloc.c | 69 ------------------------------ > 6 files changed, 12 insertions(+), 135 deletions(-) > > Index: linux-2.6/arch/x86/include/asm/numa_64.h > =================================================================== > --- linux-2.6.orig/arch/x86/include/asm/numa_64.h > +++ linux-2.6/arch/x86/include/asm/numa_64.h > @@ -17,9 +17,6 @@ extern int compute_hash_shift(struct boo > extern void numa_init_array(void); > extern int numa_off; > > -extern void srat_reserve_add_area(int nodeid); > -extern int hotadd_percent; > - > extern s16 apicid_to_node[MAX_LOCAL_APIC]; > > extern unsigned long numa_free_all_bootmem(void); > Index: linux-2.6/arch/x86/mm/numa_64.c > =================================================================== > --- linux-2.6.orig/arch/x86/mm/numa_64.c > +++ linux-2.6/arch/x86/mm/numa_64.c > @@ -272,9 +272,6 @@ void __init setup_node_bootmem(int nodei > reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, > bootmap_pages< > -#ifdef CONFIG_ACPI_NUMA > - srat_reserve_add_area(nodeid); > -#endif > node_set_online(nodeid); > } > > @@ -593,8 +590,6 @@ static __init int numa_setup(char *opt) > #ifdef CONFIG_ACPI_NUMA > if (!strncmp(opt, "noacpi", 6)) > acpi_numa = -1; > - if (!strncmp(opt, "hotadd=", 7)) > - hotadd_percent = simple_strtoul(opt+7, NULL, 10); > #endif > return 0; > } > Index: linux-2.6/arch/x86/mm/srat_64.c > =================================================================== > --- linux-2.6.orig/arch/x86/mm/srat_64.c > +++ linux-2.6/arch/x86/mm/srat_64.c > @@ -31,8 +31,6 @@ static nodemask_t nodes_parsed __initdat > static nodemask_t cpu_nodes_parsed __initdata; > static struct bootnode nodes[MAX_NUMNODES] __initdata; > static struct bootnode nodes_add[MAX_NUMNODES]; > -static int found_add_area __initdata; > -int hotadd_percent __initdata = 0; > > static int num_node_memblks __initdata; > static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; > @@ -66,9 +64,6 @@ static __init void cutoff_node(int i, un > { > struct bootnode *nd = &nodes[i]; > > - if (found_add_area) > - return; > - > if (nd->start < start) { > nd->start = start; > if (nd->end < nd->start) > @@ -86,7 +81,6 @@ static __init void bad_srat(void) > int i; > printk(KERN_ERR "SRAT: SRAT not used.\n"); > acpi_numa = -1; > - found_add_area = 0; > for (i = 0; i < MAX_LOCAL_APIC; i++) > apicid_to_node[i] = NUMA_NO_NODE; > for (i = 0; i < MAX_NUMNODES; i++) > @@ -182,24 +176,21 @@ acpi_numa_processor_affinity_init(struct > pxm, apic_id, node); > } > > -static int update_end_of_memory(unsigned long end) {return -1;} > -static int hotadd_enough_memory(struct bootnode *nd) {return 1;} > #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE > static inline int save_add_info(void) {return 1;} > #else > static inline int save_add_info(void) {return 0;} > #endif > /* > - * Update nodes_add and decide if to include add are in the zone. > - * Both SPARSE and RESERVE need nodes_add information. > - * This code supports one contiguous hot add area per node. > + * Update nodes_add[] > + * This code supports one contiguous hot add area per node > */ > -static int __init > -reserve_hotadd(int node, unsigned long start, unsigned long end) > +static void __init > +update_nodes_add(int node, unsigned long start, unsigned long end) > { > unsigned long s_pfn = start >> PAGE_SHIFT; > unsigned long e_pfn = end >> PAGE_SHIFT; > - int ret = 0, changed = 0; > + int changed = 0; > struct bootnode *nd = &nodes_add[node]; > > /* I had some trouble with strange memory hotadd regions breaking > @@ -210,7 +201,7 @@ reserve_hotadd(int node, unsigned long s > mistakes */ > if ((signed long)(end - start) < NODE_MIN_SIZE) { > printk(KERN_ERR "SRAT: Hotplug area too small\n"); > - return -1; > + return; > } > > /* This check might be a bit too strict, but I'm keeping it for now. */ > @@ -218,12 +209,7 @@ reserve_hotadd(int node, unsigned long s > printk(KERN_ERR > "SRAT: Hotplug area %lu -> %lu has existing memory\n", > s_pfn, e_pfn); > - return -1; > - } > - > - if (!hotadd_enough_memory(&nodes_add[node])) { > - printk(KERN_ERR "SRAT: Hotplug area too large\n"); > - return -1; > + return; > } > > /* Looks good */ > @@ -245,11 +231,9 @@ reserve_hotadd(int node, unsigned long s > printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); > } > > - ret = update_end_of_memory(nd->end); > - > if (changed) > - printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end); > - return ret; > + printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", > + nd->start, nd->end); > } > > /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ > @@ -310,13 +294,10 @@ acpi_numa_memory_affinity_init(struct ac > start, end); > e820_register_active_regions(node, start >> PAGE_SHIFT, > end >> PAGE_SHIFT); > - push_node_boundaries(node, nd->start >> PAGE_SHIFT, > - nd->end >> PAGE_SHIFT); > > - if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && > - (reserve_hotadd(node, start, end) < 0)) { > - /* Ignore hotadd region. Undo damage */ > - printk(KERN_NOTICE "SRAT: Hotplug region ignored\n"); > + if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { > + update_nodes_add(node, start, end); > + /* restore nodes[node] */ > *nd = oldnode; > if ((nd->start | nd->end) == 0) > node_clear(node, nodes_parsed); > @@ -510,26 +491,6 @@ static int null_slit_node_compare(int a, > } > #endif /* CONFIG_NUMA_EMU */ > > -void __init srat_reserve_add_area(int nodeid) > -{ > - if (found_add_area && nodes_add[nodeid].end) { > - u64 total_mb; > - > - printk(KERN_INFO "SRAT: Reserving hot-add memory space " > - "for node %d at %Lx-%Lx\n", > - nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end); > - total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start) > - >> PAGE_SHIFT; > - total_mb *= sizeof(struct page); > - total_mb >>= 20; > - printk(KERN_INFO "SRAT: This will cost you %Lu MB of " > - "pre-allocated memory.\n", (unsigned long long)total_mb); > - reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start, > - nodes_add[nodeid].end - nodes_add[nodeid].start, > - BOOTMEM_DEFAULT); > - } > -} > - > int __node_distance(int a, int b) > { > int index; > Index: linux-2.6/include/linux/mm.h > =================================================================== > --- linux-2.6.orig/include/linux/mm.h > +++ linux-2.6/include/linux/mm.h > @@ -1032,8 +1032,6 @@ extern void add_active_range(unsigned in > unsigned long end_pfn); > extern void remove_active_range(unsigned int nid, unsigned long start_pfn, > unsigned long end_pfn); > -extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn, > - unsigned long end_pfn); > extern void remove_all_active_ranges(void); > extern unsigned long absent_pages_in_range(unsigned long start_pfn, > unsigned long end_pfn); > Index: linux-2.6/mm/page_alloc.c > =================================================================== > --- linux-2.6.orig/mm/page_alloc.c > +++ linux-2.6/mm/page_alloc.c > @@ -150,10 +150,6 @@ static unsigned long __meminitdata dma_r > static int __meminitdata nr_nodemap_entries; > static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; > static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; > -#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE > - static unsigned long __meminitdata node_boundary_start_pfn[MAX_NUMNODES]; > - static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES]; > -#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */ > static unsigned long __initdata required_kernelcore; > static unsigned long __initdata required_movablecore; > static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; > @@ -3121,64 +3117,6 @@ void __init sparse_memory_present_with_a > } > > /** > - * push_node_boundaries - Push node boundaries to at least the requested boundary > - * @nid: The nid of the node to push the boundary for > - * @start_pfn: The start pfn of the node > - * @end_pfn: The end pfn of the node > - * > - * In reserve-based hot-add, mem_map is allocated that is unused until hotadd > - * time. Specifically, on x86_64, SRAT will report ranges that can potentially > - * be hotplugged even though no physical memory exists. This function allows > - * an arch to push out the node boundaries so mem_map is allocated that can > - * be used later. > - */ > -#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE > -void __init push_node_boundaries(unsigned int nid, > - unsigned long start_pfn, unsigned long end_pfn) > -{ > - mminit_dprintk(MMINIT_TRACE, "zoneboundary", > - "Entering push_node_boundaries(%u, %lu, %lu)\n", > - nid, start_pfn, end_pfn); > - > - /* Initialise the boundary for this node if necessary */ > - if (node_boundary_end_pfn[nid] == 0) > - node_boundary_start_pfn[nid] = -1UL; > - > - /* Update the boundaries */ > - if (node_boundary_start_pfn[nid] > start_pfn) > - node_boundary_start_pfn[nid] = start_pfn; > - if (node_boundary_end_pfn[nid] < end_pfn) > - node_boundary_end_pfn[nid] = end_pfn; > -} > - > -/* If necessary, push the node boundary out for reserve hotadd */ > -static void __meminit account_node_boundary(unsigned int nid, > - unsigned long *start_pfn, unsigned long *end_pfn) > -{ > - mminit_dprintk(MMINIT_TRACE, "zoneboundary", > - "Entering account_node_boundary(%u, %lu, %lu)\n", > - nid, *start_pfn, *end_pfn); > - > - /* Return if boundary information has not been provided */ > - if (node_boundary_end_pfn[nid] == 0) > - return; > - > - /* Check the boundaries and update if necessary */ > - if (node_boundary_start_pfn[nid] < *start_pfn) > - *start_pfn = node_boundary_start_pfn[nid]; > - if (node_boundary_end_pfn[nid] > *end_pfn) > - *end_pfn = node_boundary_end_pfn[nid]; > -} > -#else > -void __init push_node_boundaries(unsigned int nid, > - unsigned long start_pfn, unsigned long end_pfn) {} > - > -static void __meminit account_node_boundary(unsigned int nid, > - unsigned long *start_pfn, unsigned long *end_pfn) {} > -#endif > - > - > -/** > * get_pfn_range_for_nid - Return the start and end page frames for a node > * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned. > * @start_pfn: Passed by reference. On return, it will have the node start_pfn. > @@ -3203,9 +3141,6 @@ void __meminit get_pfn_range_for_nid(uns > > if (*start_pfn == -1UL) > *start_pfn = 0; > - > - /* Push the node boundaries out if requested */ > - account_node_boundary(nid, start_pfn, end_pfn); > } > > /* > @@ -3810,10 +3745,6 @@ void __init remove_all_active_ranges(voi > { > memset(early_node_map, 0, sizeof(early_node_map)); > nr_nodemap_entries = 0; > -#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE > - memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn)); > - memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn)); > -#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */ > } > > /* Compare two active node_active_regions */ > Index: linux-2.6/Documentation/x86/x86_64/boot-options.txt > =================================================================== > --- linux-2.6.orig/Documentation/x86/x86_64/boot-options.txt > +++ linux-2.6/Documentation/x86/x86_64/boot-options.txt > @@ -150,11 +150,6 @@ NUMA > Otherwise, the remaining system RAM is allocated to an > additional node. > > - numa=hotadd=percent > - Only allow hotadd memory to preallocate page structures upto > - percent of already available memory. > - numa=hotadd=0 will disable hotadd memory. > - > ACPI > > acpi=off Don't enable ACPI > -- Mel Gorman Part-time Phd Student Linux Technology Center University of Limerick IBM Dublin Software Lab