From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932356Ab1DGCHe (ORCPT ); Wed, 6 Apr 2011 22:07:34 -0400 Received: from hera.kernel.org ([140.211.167.34]:50978 "EHLO hera.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932129Ab1DGCHa (ORCPT ); Wed, 6 Apr 2011 22:07:30 -0400 Date: Thu, 7 Apr 2011 02:07:18 GMT From: tip-bot for Tejun Heo Cc: linux-kernel@vger.kernel.org, hpa@zytor.com, mingo@redhat.com, yinghai@kernel.org, tj@kernel.org, tglx@linutronix.de, rientjes@google.com Reply-To: mingo@redhat.com, hpa@zytor.com, linux-kernel@vger.kernel.org, yinghai@kernel.org, tj@kernel.org, tglx@linutronix.de, rientjes@google.com In-Reply-To: <1301955840-7246-10-git-send-email-tj@kernel.org> References: <1301955840-7246-10-git-send-email-tj@kernel.org> To: linux-tip-commits@vger.kernel.org Subject: [tip:x86/numa] x86-32, numa: Move lowmem address space reservation to init_alloc_remap() Message-ID: Git-Commit-ID: 0e9f93c1c04c8ab10cc564df54a7ad0f83c67796 X-Mailer: tip-git-log-daemon Robot-ID: Robot-Unsubscribe: Contact to get blacklisted from these emails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Disposition: inline X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.2.3 (hera.kernel.org [127.0.0.1]); Thu, 07 Apr 2011 02:07:19 +0000 (UTC) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Commit-ID: 0e9f93c1c04c8ab10cc564df54a7ad0f83c67796 Gitweb: http://git.kernel.org/tip/0e9f93c1c04c8ab10cc564df54a7ad0f83c67796 Author: Tejun Heo AuthorDate: Tue, 5 Apr 2011 00:23:55 +0200 Committer: H. Peter Anvin CommitDate: Wed, 6 Apr 2011 17:57:27 -0700 x86-32, numa: Move lowmem address space reservation to init_alloc_remap() Remap alloc init is done in the following stages. 1. init_alloc_remap() calculates how much memory is necessary for each node and reserves node local memory. 2. initmem_init() collects how much each node needs and reserves a single contiguous lowmem area which can contain all. 3. init_remap_allocator() initializes allocator parameters from the determined lowmem address and per-node offsets. 4. Actual remap happens. There is no reason for the lowmem remap area to be reserved as a single contiguous area at one go. They don't interact with each other and the memblock allocator will put them side-by-side anyway. This patch breaks up the single lowmem address reservation and put per-node lowmem address reservation into init_alloc_remap() and initializes allocator parameters directly in the function as all the addresses are determined there. This merges steps 2 and 3 into 1. While at it, remove now largely irrelevant comments in init_alloc_remap(). This change causes the following behavior changes. * Remap lowmem areas are allocated in smaller per-node chunks. * Remap lowmem area reservation failure fail future remap allocations instead of panicking. * Remap allocator initialization is less verbose. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1301955840-7246-10-git-send-email-tj@kernel.org Acked-by: Yinghai Lu Cc: David Rientjes Signed-off-by: H. Peter Anvin --- arch/x86/mm/numa_32.c | 82 +++++++++++++++---------------------------------- 1 files changed, 25 insertions(+), 57 deletions(-) diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index c127543..12bb34c 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -108,9 +108,6 @@ static unsigned long node_remap_size[MAX_NUMNODES]; static void *node_remap_start_vaddr[MAX_NUMNODES]; void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); -static unsigned long kva_start_pfn; -static unsigned long kva_pages; - int __cpuinit numa_cpu_node(int cpu) { return apic->x86_32_numa_cpu_node(cpu); @@ -266,7 +263,8 @@ void resume_map_numa_kva(pgd_t *pgd_base) static __init unsigned long init_alloc_remap(int nid, unsigned long offset) { unsigned long size; - u64 node_pa; + u64 node_pa, remap_pa; + void *remap_va; /* * The acpi/srat node info can show hot-add memroy zones where @@ -287,6 +285,7 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset) size += ALIGN(sizeof(pg_data_t), PAGE_SIZE); size = ALIGN(size, LARGE_PAGE_BYTES); + /* allocate node memory and the lowmem remap area */ node_pa = memblock_find_in_range(node_start_pfn[nid] << PAGE_SHIFT, (u64)node_end_pfn[nid] << PAGE_SHIFT, size, LARGE_PAGE_BYTES); @@ -295,45 +294,35 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset) size, nid); return 0; } + memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM"); + + remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, + max_low_pfn << PAGE_SHIFT, + size, LARGE_PAGE_BYTES); + if (remap_pa == MEMBLOCK_ERROR) { + pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n", + size, nid); + memblock_x86_free_range(node_pa, node_pa + size); + return 0; + } + memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG"); + remap_va = phys_to_virt(remap_pa); + /* initialize remap allocator parameters */ + node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT; node_remap_size[nid] = size >> PAGE_SHIFT; node_remap_offset[nid] = offset; - printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of node %d at %llx\n", - size >> PAGE_SHIFT, nid, node_pa >> PAGE_SHIFT); - /* - * prevent kva address below max_low_pfn want it on system - * with less memory later. - * layout will be: KVA address , KVA RAM - * - * we are supposed to only record the one less then - * max_low_pfn but we could have some hole in high memory, - * and it will only check page_is_ram(pfn) && - * !page_is_reserved_early(pfn) to decide to use it as free. - * So memblock_x86_reserve_range here, hope we don't run out - * of that array - */ - memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM"); + node_remap_start_vaddr[nid] = remap_va; + node_remap_end_vaddr[nid] = remap_va + size; + node_remap_alloc_vaddr[nid] = remap_va + ALIGN(sizeof(pg_data_t), PAGE_SIZE); - node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT; + printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n", + nid, node_pa, node_pa + size, remap_va, remap_va + size); return size >> PAGE_SHIFT; } -static void init_remap_allocator(int nid) -{ - node_remap_start_vaddr[nid] = pfn_to_kaddr( - kva_start_pfn + node_remap_offset[nid]); - node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + - (node_remap_size[nid] * PAGE_SIZE); - node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + - ALIGN(sizeof(pg_data_t), PAGE_SIZE); - - printk(KERN_DEBUG "node %d will remap to vaddr %08lx - %08lx\n", nid, - (ulong) node_remap_start_vaddr[nid], - (ulong) node_remap_end_vaddr[nid]); -} - void __init initmem_init(void) { unsigned long reserve_pages = 0; @@ -352,25 +341,7 @@ void __init initmem_init(void) for_each_online_node(nid) reserve_pages += init_alloc_remap(nid, reserve_pages); - kva_pages = roundup(reserve_pages, PTRS_PER_PTE); - printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n", - reserve_pages); - - kva_start_pfn = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, - max_low_pfn << PAGE_SHIFT, - kva_pages << PAGE_SHIFT, - PTRS_PER_PTE << PAGE_SHIFT) >> PAGE_SHIFT; - if (kva_start_pfn == MEMBLOCK_ERROR) - panic("Can not get kva space\n"); - - printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n", - kva_start_pfn, max_low_pfn); - printk(KERN_INFO "max_pfn = %lx\n", max_pfn); - - /* avoid clash with initrd */ - memblock_x86_reserve_range(kva_start_pfn< max_low_pfn) @@ -390,11 +361,8 @@ void __init initmem_init(void) printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", (ulong) pfn_to_kaddr(max_low_pfn)); - for_each_online_node(nid) { - init_remap_allocator(nid); - + for_each_online_node(nid) allocate_pgdat(nid); - } remap_numa_kva(); printk(KERN_DEBUG "High memory starts at vaddr %08lx\n",