From: tip-bot for Yinghai Lu <tipbot@zytor.com>
To: linux-tip-commits@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, hpa@zytor.com, mingo@kernel.org,
konrad.wilk@oracle.com, yinghai@kernel.org, penberg@kernel.org,
tangchen@cn.fujitsu.com, jacob.shin@amd.com, tglx@linutronix.de,
hpa@linux.intel.com
Subject: [tip:x86/mm] x86, mm: Make init_mem_mapping be able to be called several times
Date: Fri, 14 Jun 2013 14:33:51 -0700 [thread overview]
Message-ID: <tip-ae4ffbb606770c7918e627e36c84b627250b1dbb@git.kernel.org> (raw)
In-Reply-To: <1371128589-8953-22-git-send-email-tangchen@cn.fujitsu.com>
Commit-ID: ae4ffbb606770c7918e627e36c84b627250b1dbb
Gitweb: http://git.kernel.org/tip/ae4ffbb606770c7918e627e36c84b627250b1dbb
Author: Yinghai Lu <yinghai@kernel.org>
AuthorDate: Thu, 13 Jun 2013 21:03:08 +0800
Committer: H. Peter Anvin <hpa@linux.intel.com>
CommitDate: Fri, 14 Jun 2013 14:05:43 -0700
x86, mm: Make init_mem_mapping be able to be called several times
Prepare to put page table on local nodes.
Move calling of init_mem_mapping() to early_initmem_init().
Rework alloc_low_pages to allocate page table in following order:
BRK, local node, low range
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1371128589-8953-22-git-send-email-tangchen@cn.fujitsu.com
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Jacob Shin <jacob.shin@amd.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Tang Chen <tangchen@cn.fujitsu.com>
Tested-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
arch/x86/include/asm/pgtable.h | 2 +-
arch/x86/kernel/setup.c | 1 -
arch/x86/mm/init.c | 100 ++++++++++++++++++++++++++---------------
arch/x86/mm/numa.c | 24 ++++++++++
4 files changed, 88 insertions(+), 39 deletions(-)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1e67223..868687c 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -621,7 +621,7 @@ static inline int pgd_none(pgd_t pgd)
#ifndef __ASSEMBLY__
extern int direct_gbpages;
-void init_mem_mapping(void);
+void init_mem_mapping(unsigned long begin, unsigned long end);
void early_alloc_pgt_buf(void);
/* local pte updates need not use xchg for locking */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index fd0d5be..9ccbd60 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1132,7 +1132,6 @@ void __init setup_arch(char **cmdline_p)
acpi_boot_table_init();
early_acpi_boot_init();
early_initmem_init();
- init_mem_mapping();
memblock.current_limit = get_max_mapped();
early_trap_pf_init();
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 5f38e72..9ff71ff 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -24,7 +24,10 @@ static unsigned long __initdata pgt_buf_start;
static unsigned long __initdata pgt_buf_end;
static unsigned long __initdata pgt_buf_top;
-static unsigned long min_pfn_mapped;
+static unsigned long low_min_pfn_mapped;
+static unsigned long low_max_pfn_mapped;
+static unsigned long local_min_pfn_mapped;
+static unsigned long local_max_pfn_mapped;
static bool __initdata can_use_brk_pgt = true;
@@ -52,10 +55,17 @@ __ref void *alloc_low_pages(unsigned int num)
if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) {
unsigned long ret;
- if (min_pfn_mapped >= max_pfn_mapped)
- panic("alloc_low_page: ran out of memory");
- ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT,
- max_pfn_mapped << PAGE_SHIFT,
+ if (local_min_pfn_mapped >= local_max_pfn_mapped) {
+ if (low_min_pfn_mapped >= low_max_pfn_mapped)
+ panic("alloc_low_page: ran out of memory");
+ ret = memblock_find_in_range(
+ low_min_pfn_mapped << PAGE_SHIFT,
+ low_max_pfn_mapped << PAGE_SHIFT,
+ PAGE_SIZE * num , PAGE_SIZE);
+ } else
+ ret = memblock_find_in_range(
+ local_min_pfn_mapped << PAGE_SHIFT,
+ local_max_pfn_mapped << PAGE_SHIFT,
PAGE_SIZE * num , PAGE_SIZE);
if (!ret)
panic("alloc_low_page: can not alloc memory");
@@ -412,67 +422,88 @@ static unsigned long __init get_new_step_size(unsigned long step_size)
return step_size;
}
-void __init init_mem_mapping(void)
+void __init init_mem_mapping(unsigned long begin, unsigned long end)
{
- unsigned long end, real_end, start, last_start;
+ unsigned long real_end, start, last_start;
unsigned long step_size;
unsigned long addr;
unsigned long mapped_ram_size = 0;
unsigned long new_mapped_ram_size;
+ bool is_low = false;
+
+ if (!begin) {
+ probe_page_size_mask();
+ /* the ISA range is always mapped regardless of memory holes */
+ init_memory_mapping(0, ISA_END_ADDRESS);
+ begin = ISA_END_ADDRESS;
+ is_low = true;
+ }
- probe_page_size_mask();
-
-#ifdef CONFIG_X86_64
- end = max_pfn << PAGE_SHIFT;
-#else
- end = max_low_pfn << PAGE_SHIFT;
-#endif
-
- /* the ISA range is always mapped regardless of memory holes */
- init_memory_mapping(0, ISA_END_ADDRESS);
+ if (begin >= end)
+ return;
/* xen has big range in reserved near end of ram, skip it at first.*/
- addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE, PMD_SIZE);
+ addr = memblock_find_in_range(begin, end, PMD_SIZE, PMD_SIZE);
real_end = addr + PMD_SIZE;
/* step_size need to be small so pgt_buf from BRK could cover it */
step_size = PMD_SIZE;
- max_pfn_mapped = 0; /* will get exact value next */
- min_pfn_mapped = real_end >> PAGE_SHIFT;
+ local_max_pfn_mapped = begin >> PAGE_SHIFT;
+ local_min_pfn_mapped = real_end >> PAGE_SHIFT;
last_start = start = real_end;
/*
- * We start from the top (end of memory) and go to the bottom.
- * The memblock_find_in_range() gets us a block of RAM from the
- * end of RAM in [min_pfn_mapped, max_pfn_mapped) used as new pages
- * for page table.
+ * alloc_low_pages() will allocate pagetable pages in the following
+ * order:
+ * BRK, local node, low range
+ *
+ * That means it will first use up all the BRK memory, then try to get
+ * us a block of RAM from [local_min_pfn_mapped, local_max_pfn_mapped)
+ * used as new pagetable pages. If no memory on the local node has
+ * been mapped, it will allocate memory from
+ * [low_min_pfn_mapped, low_max_pfn_mapped).
*/
- while (last_start > ISA_END_ADDRESS) {
+ while (last_start > begin) {
if (last_start > step_size) {
start = round_down(last_start - 1, step_size);
- if (start < ISA_END_ADDRESS)
- start = ISA_END_ADDRESS;
+ if (start < begin)
+ start = begin;
} else
- start = ISA_END_ADDRESS;
+ start = begin;
new_mapped_ram_size = init_range_memory_mapping(start,
last_start);
+ if ((last_start >> PAGE_SHIFT) > local_max_pfn_mapped)
+ local_max_pfn_mapped = last_start >> PAGE_SHIFT;
+ local_min_pfn_mapped = start >> PAGE_SHIFT;
last_start = start;
- min_pfn_mapped = last_start >> PAGE_SHIFT;
/* only increase step_size after big range get mapped */
if (new_mapped_ram_size > mapped_ram_size)
step_size = get_new_step_size(step_size);
mapped_ram_size += new_mapped_ram_size;
}
- if (real_end < end)
+ if (real_end < end) {
init_range_memory_mapping(real_end, end);
+ if ((end >> PAGE_SHIFT) > local_max_pfn_mapped)
+ local_max_pfn_mapped = end >> PAGE_SHIFT;
+ }
+ if (is_low) {
+ low_min_pfn_mapped = local_min_pfn_mapped;
+ low_max_pfn_mapped = local_max_pfn_mapped;
+ }
+}
+
+#ifndef CONFIG_NUMA
+void __init early_initmem_init(void)
+{
#ifdef CONFIG_X86_64
- if (max_pfn > max_low_pfn) {
- /* can we preseve max_low_pfn ?*/
+ init_mem_mapping(0, max_pfn << PAGE_SHIFT);
+ if (max_pfn > max_low_pfn)
max_low_pfn = max_pfn;
}
#else
+ init_mem_mapping(0, max_low_pfn << PAGE_SHIFT);
early_ioremap_page_table_range_init();
#endif
@@ -481,11 +512,6 @@ void __init init_mem_mapping(void)
early_memtest(0, max_pfn_mapped << PAGE_SHIFT);
}
-
-#ifndef CONFIG_NUMA
-void __init early_initmem_init(void)
-{
-}
#endif
/*
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 7d76936..9b18ee8 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -17,8 +17,10 @@
#include <asm/dma.h>
#include <asm/acpi.h>
#include <asm/amd_nb.h>
+#include <asm/tlbflush.h>
#include "numa_internal.h"
+#include "mm_internal.h"
int __initdata numa_off;
nodemask_t numa_nodes_parsed __initdata;
@@ -665,9 +667,31 @@ static void __init early_x86_numa_init(void)
numa_init(dummy_numa_init);
}
+#ifdef CONFIG_X86_64
+static void __init early_x86_numa_init_mapping(void)
+{
+ init_mem_mapping(0, max_pfn << PAGE_SHIFT);
+ if (max_pfn > max_low_pfn)
+ max_low_pfn = max_pfn;
+}
+#else
+static void __init early_x86_numa_init_mapping(void)
+{
+ init_mem_mapping(0, max_low_pfn << PAGE_SHIFT);
+ early_ioremap_page_table_range_init();
+}
+#endif
+
void __init early_initmem_init(void)
{
early_x86_numa_init();
+
+ early_x86_numa_init_mapping();
+
+ load_cr3(swapper_pg_dir);
+ __flush_tlb_all();
+
+ early_memtest(0, max_pfn_mapped<<PAGE_SHIFT);
}
void __init x86_numa_init(void)
next prev parent reply other threads:[~2013-06-14 21:34 UTC|newest]
Thread overview: 87+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-06-13 13:02 [Part1 PATCH v5 00/22] x86, ACPI, numa: Parse numa info earlier Tang Chen
2013-06-13 13:02 ` [Part1 PATCH v5 01/22] x86: Change get_ramdisk_{image|size}() to global Tang Chen
2013-06-14 21:30 ` [tip:x86/mm] " tip-bot for Yinghai Lu
2013-06-13 13:02 ` [Part1 PATCH v5 02/22] x86, microcode: Use common get_ramdisk_{image|size}() Tang Chen
2013-06-14 21:31 ` [tip:x86/mm] x86, microcode: Use common get_ramdisk_{image|size}( ) tip-bot for Yinghai Lu
2013-06-13 13:02 ` [Part1 PATCH v5 03/22] x86, ACPI, mm: Kill max_low_pfn_mapped Tang Chen
2013-06-14 21:31 ` [tip:x86/mm] " tip-bot for Yinghai Lu
2013-06-17 21:04 ` [Part1 PATCH v5 03/22] " Tejun Heo
2013-06-17 21:13 ` Yinghai Lu
2013-06-17 23:08 ` Tejun Heo
2013-06-13 13:02 ` [Part1 PATCH v5 04/22] x86, ACPI: Search buffer above 4GB in a second try for acpi initrd table override Tang Chen
2013-06-14 21:31 ` [tip:x86/mm] " tip-bot for Yinghai Lu
2013-06-17 21:06 ` [Part1 PATCH v5 04/22] " Tejun Heo
2013-06-13 13:02 ` [Part1 PATCH v5 05/22] x86, ACPI: Increase acpi initrd override tables number limit Tang Chen
2013-06-14 21:31 ` [tip:x86/mm] " tip-bot for Yinghai Lu
2013-06-13 13:02 ` [Part1 PATCH v5 06/22] x86, ACPI: Split acpi_initrd_override() into find/copy two steps Tang Chen
2013-06-14 21:31 ` [tip:x86/mm] x86, ACPI: Split acpi_initrd_override() into find/ copy " tip-bot for Yinghai Lu
2013-06-13 13:02 ` [Part1 PATCH v5 07/22] x86, ACPI: Store override acpi tables phys addr in cpio files info array Tang Chen
2013-06-14 21:31 ` [tip:x86/mm] " tip-bot for Yinghai Lu
2013-06-17 23:38 ` [Part1 PATCH v5 07/22] " Tejun Heo
2013-06-17 23:40 ` Yinghai Lu
2013-06-17 23:52 ` Tejun Heo
2013-06-13 13:02 ` [Part1 PATCH v5 08/22] x86, ACPI: Make acpi_initrd_override_find work with 32bit flat mode Tang Chen
2013-06-14 21:31 ` [tip:x86/mm] " tip-bot for Yinghai Lu
2013-06-18 0:07 ` [Part1 PATCH v5 08/22] " Tejun Heo
2013-06-13 13:02 ` [Part1 PATCH v5 09/22] x86, ACPI: Find acpi tables in initrd early from head_32.S/head64.c Tang Chen
2013-06-14 21:32 ` [tip:x86/mm] " tip-bot for Yinghai Lu
2013-06-18 0:33 ` [Part1 PATCH v5 09/22] " Tejun Heo
2013-06-13 13:02 ` [Part1 PATCH v5 10/22] x86, mm, numa: Move two functions calling on successful path later Tang Chen
2013-06-14 21:32 ` [tip:x86/mm] " tip-bot for Yinghai Lu
2013-06-18 0:53 ` [Part1 PATCH v5 10/22] " Tejun Heo
2013-06-13 13:02 ` [Part1 PATCH v5 11/22] x86, mm, numa: Call numa_meminfo_cover_memory() checking early Tang Chen
2013-06-14 21:32 ` [tip:x86/mm] " tip-bot for Yinghai Lu
2013-06-18 1:05 ` [Part1 PATCH v5 11/22] " Tejun Heo
2013-06-13 13:02 ` [Part1 PATCH v5 12/22] x86, mm, numa: Move node_map_pfn_alignment() to x86 Tang Chen
2013-06-14 21:32 ` [tip:x86/mm] " tip-bot for Yinghai Lu
2013-06-18 1:08 ` [Part1 PATCH v5 12/22] " Tejun Heo
2013-06-13 13:03 ` [Part1 PATCH v5 13/22] x86, mm, numa: Use numa_meminfo to check node_map_pfn alignment Tang Chen
2013-06-14 21:32 ` [tip:x86/mm] " tip-bot for Yinghai Lu
2013-06-18 1:40 ` [Part1 PATCH v5 13/22] " Tejun Heo
2013-06-13 13:03 ` [Part1 PATCH v5 14/22] x86, mm, numa: Set memblock nid later Tang Chen
2013-06-14 21:32 ` [tip:x86/mm] " tip-bot for Yinghai Lu
2013-06-18 1:45 ` [Part1 PATCH v5 14/22] " Tejun Heo
2013-06-13 13:03 ` [Part1 PATCH v5 15/22] x86, mm, numa: Move node_possible_map setting later Tang Chen
2013-06-14 21:32 ` [tip:x86/mm] " tip-bot for Yinghai Lu
2013-06-13 13:03 ` [Part1 PATCH v5 16/22] x86, mm, numa: Move numa emulation handling down Tang Chen
2013-06-14 21:33 ` [tip:x86/mm] " tip-bot for Yinghai Lu
2013-06-18 1:58 ` [Part1 PATCH v5 16/22] " Tejun Heo
2013-06-18 6:22 ` Yinghai Lu
2013-06-18 7:13 ` Yinghai Lu
2013-06-19 21:25 ` Yinghai Lu
2013-06-13 13:03 ` [Part1 PATCH v5 17/22] x86, ACPI, numa, ia64: split SLIT handling out Tang Chen
2013-06-14 21:33 ` [tip:x86/mm] " tip-bot for Yinghai Lu
2013-06-13 13:03 ` [Part1 PATCH v5 18/22] x86, mm, numa: Add early_initmem_init() stub Tang Chen
2013-06-14 21:33 ` [tip:x86/mm] " tip-bot for Yinghai Lu
2013-06-13 13:03 ` [Part1 PATCH v5 19/22] x86, mm: Parse numa info earlier Tang Chen
2013-06-14 21:33 ` [tip:x86/mm] " tip-bot for Yinghai Lu
2013-06-13 13:03 ` [Part1 PATCH v5 20/22] x86, mm: Add comments for step_size shift Tang Chen
2013-06-14 21:33 ` [tip:x86/mm] " tip-bot for Yinghai Lu
2013-06-13 13:03 ` [Part1 PATCH v5 21/22] x86, mm: Make init_mem_mapping be able to be called several times Tang Chen
2013-06-13 18:35 ` Konrad Rzeszutek Wilk
2013-06-13 22:47 ` Yinghai Lu
2013-06-14 5:08 ` Tang Chen
2013-06-14 21:33 ` tip-bot for Yinghai Lu [this message]
2013-06-13 13:03 ` [Part1 PATCH v5 22/22] x86, mm, numa: Put pagetable on local node ram for 64bit Tang Chen
2013-06-14 21:34 ` [tip:x86/mm] " tip-bot for Yinghai Lu
2013-06-18 2:03 ` [Part1 PATCH v5 00/22] x86, ACPI, numa: Parse numa info earlier Tejun Heo
2013-06-18 5:47 ` Tang Chen
2013-06-18 17:21 ` Tejun Heo
2013-06-20 5:52 ` Tang Chen
2013-06-20 6:17 ` Tejun Heo
2013-06-21 9:19 ` Tang Chen
2013-06-21 18:25 ` Tejun Heo
2013-06-24 3:51 ` Tang Chen
2013-06-24 7:26 ` Tang Chen
2013-06-24 19:59 ` Tejun Heo
2013-06-18 17:10 ` Vasilis Liaskovitis
2013-06-18 20:19 ` Yinghai Lu
2013-06-19 10:05 ` Vasilis Liaskovitis
2013-06-20 18:42 ` Yinghai Lu
2013-06-24 9:40 ` Gu Zheng
2013-06-21 5:19 ` H. Peter Anvin
2013-06-21 6:06 ` Tang Chen
2013-06-21 6:10 ` H. Peter Anvin
2013-06-21 6:20 ` Tang Chen
2013-06-21 6:26 ` Tejun Heo
2013-06-21 20:18 ` Yinghai Lu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=tip-ae4ffbb606770c7918e627e36c84b627250b1dbb@git.kernel.org \
--to=tipbot@zytor.com \
--cc=hpa@linux.intel.com \
--cc=hpa@zytor.com \
--cc=jacob.shin@amd.com \
--cc=konrad.wilk@oracle.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-tip-commits@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=penberg@kernel.org \
--cc=tangchen@cn.fujitsu.com \
--cc=tglx@linutronix.de \
--cc=yinghai@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).