All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yinghai Lu <yinghai@kernel.org>
To: Ingo Molnar <mingo@elte.hu>, Thomas Gleixner <tglx@linutronix.de>,
	"H. Peter Anvin" <hpa@zytor.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	David Miller <davem@davemloft.net>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	Yinghai Lu <yinghai@kernel.org>
Subject: [PATCH 2/4] x86: add find_e820_area_node
Date: Tue, 23 Mar 2010 01:39:45 -0700	[thread overview]
Message-ID: <1269333587-1866-3-git-send-email-yinghai@kernel.org> (raw)
In-Reply-To: <1269333587-1866-1-git-send-email-yinghai@kernel.org>

David Miller pointed out that early_res have problem to find node data on correct node
when we have
node0: [0, 2g), [4g, 6g), [10g, 14g)
node1: [6g, 10g), [14g, 18g)
the cross node case

the problem is there for x86 bits even before we are using early_res for bootmem replacement.
after early_res for bootmem replacement, alloc_bootmem_node still can get range on correct node

this patch is fixing problem before bootmem or early_res replacement for bootmem.

now only user is for x86 64bit numa to find node data.

the point is use early_node_map with find_e820_area_node()

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 arch/x86/include/asm/e820.h |    1 +
 arch/x86/kernel/e820.c      |   15 +++++++++++++++
 arch/x86/mm/numa_64.c       |    4 ++--
 include/linux/mm.h          |    2 ++
 mm/page_alloc.c             |   37 +++++++++++++++++++++++--------------
 5 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index ec8a52d..41553af 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -116,6 +116,7 @@ extern unsigned long end_user_pfn;
 
 extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
 extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
+u64 find_e820_area_node(int nid, u64 start, u64 end, u64 size, u64 align);
 extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
 #include <linux/early_res.h>
 
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 740b440..05ee724 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -787,6 +787,21 @@ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
 	return -1ULL;
 }
 
+u64 __init find_e820_area_node(int nid, u64 start, u64 end, u64 size, u64 align)
+{
+	u64 addr;
+	/*
+	 * need to call this function after e820_register_active_regions
+	 * so early_node_map[] is set
+	 */
+	addr = find_memory_core_early(nid, size, align, start, end);
+	if (addr != -1ULL)
+		return addr;
+
+	/* fallback, should already have start end in the node range */
+	return find_e820_area(start, end, size, align);
+}
+
 /*
  * pre allocated 4k and reserved it in e820
  */
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 8948f47..ffc5ad5 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -174,7 +174,7 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
 	if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) &&
 	    end > (MAX_DMA32_PFN<<PAGE_SHIFT))
 		start = MAX_DMA32_PFN<<PAGE_SHIFT;
-	mem = find_e820_area(start, end, size, align);
+	mem = find_e820_area_node(nodeid, start, end, size, align);
 	if (mem != -1L)
 		return __va(mem);
 
@@ -184,7 +184,7 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
 		start = MAX_DMA32_PFN<<PAGE_SHIFT;
 	else
 		start = MAX_DMA_PFN<<PAGE_SHIFT;
-	mem = find_e820_area(start, end, size, align);
+	mem = find_e820_area_node(nodeid, start, end, size, align);
 	if (mem != -1L)
 		return __va(mem);
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e70f21b..5c2d17e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1160,6 +1160,8 @@ extern void free_bootmem_with_active_regions(int nid,
 						unsigned long max_low_pfn);
 int add_from_early_node_map(struct range *range, int az,
 				   int nr_range, int nid);
+u64 __init find_memory_core_early(int nid, u64 size, u64 align,
+					u64 goal, u64 limit);
 void *__alloc_memory_core_early(int nodeid, u64 size, u64 align,
 				 u64 goal, u64 limit);
 typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d03c946..eef3757 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3408,12 +3408,11 @@ int __init add_from_early_node_map(struct range *range, int az,
 	return nr_range;
 }
 
-#ifdef CONFIG_NO_BOOTMEM
-void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
+#ifdef CONFIG_HAVE_EARLY_RES
+u64 __init find_memory_core_early(int nid, u64 size, u64 align,
 					u64 goal, u64 limit)
 {
 	int i;
-	void *ptr;
 
 	/* need to go over early_node_map to find out good range for node */
 	for_each_active_range_index_in_nid(i, nid) {
@@ -3430,20 +3429,30 @@ void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
 		if (addr == -1ULL)
 			continue;
 
-#if 0
-		printk(KERN_DEBUG "alloc (nid=%d %llx - %llx) (%llx - %llx) %llx %llx => %llx\n",
-				nid,
-				ei_start, ei_last, goal, limit, size,
-				align, addr);
+		return addr;
+	}
+
+	return -1ULL;
+}
 #endif
 
-		ptr = phys_to_virt(addr);
-		memset(ptr, 0, size);
-		reserve_early_without_check(addr, addr + size, "BOOTMEM");
-		return ptr;
-	}
+#ifdef CONFIG_NO_BOOTMEM
+void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
+					u64 goal, u64 limit)
+{
+	void *ptr;
 
-	return NULL;
+	u64 addr;
+
+	addr = find_memory_core_early(nid, size, align, goal, limit);
+
+	if (addr == -1ULL)
+		return NULL;
+
+	ptr = phys_to_virt(addr);
+	memset(ptr, 0, size);
+	reserve_early_without_check(addr, addr + size, "BOOTMEM");
+	return ptr;
 }
 #endif
 
-- 
1.6.4.2


WARNING: multiple messages have this Message-ID (diff)
From: Yinghai Lu <yinghai@kernel.org>
To: Ingo Molnar <mingo@elte.hu>, Thomas Gleixner <tglx@linutronix.de>,
	"H. Peter Anvin" <hpa@zytor.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	David Miller <davem@davemloft.net>,
	Be
Cc: linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	Yinghai Lu <yinghai@kernel.org>
Subject: [PATCH 2/4] x86: add find_e820_area_node
Date: Tue, 23 Mar 2010 01:39:45 -0700	[thread overview]
Message-ID: <1269333587-1866-3-git-send-email-yinghai@kernel.org> (raw)
In-Reply-To: <1269333587-1866-1-git-send-email-yinghai@kernel.org>

David Miller pointed out that early_res have problem to find node data on correct node
when we have
node0: [0, 2g), [4g, 6g), [10g, 14g)
node1: [6g, 10g), [14g, 18g)
the cross node case

the problem is there for x86 bits even before we are using early_res for bootmem replacement.
after early_res for bootmem replacement, alloc_bootmem_node still can get range on correct node

this patch is fixing problem before bootmem or early_res replacement for bootmem.

now only user is for x86 64bit numa to find node data.

the point is use early_node_map with find_e820_area_node()

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 arch/x86/include/asm/e820.h |    1 +
 arch/x86/kernel/e820.c      |   15 +++++++++++++++
 arch/x86/mm/numa_64.c       |    4 ++--
 include/linux/mm.h          |    2 ++
 mm/page_alloc.c             |   37 +++++++++++++++++++++++--------------
 5 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index ec8a52d..41553af 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -116,6 +116,7 @@ extern unsigned long end_user_pfn;
 
 extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
 extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
+u64 find_e820_area_node(int nid, u64 start, u64 end, u64 size, u64 align);
 extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
 #include <linux/early_res.h>
 
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 740b440..05ee724 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -787,6 +787,21 @@ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
 	return -1ULL;
 }
 
+u64 __init find_e820_area_node(int nid, u64 start, u64 end, u64 size, u64 align)
+{
+	u64 addr;
+	/*
+	 * need to call this function after e820_register_active_regions
+	 * so early_node_map[] is set
+	 */
+	addr = find_memory_core_early(nid, size, align, start, end);
+	if (addr != -1ULL)
+		return addr;
+
+	/* fallback, should already have start end in the node range */
+	return find_e820_area(start, end, size, align);
+}
+
 /*
  * pre allocated 4k and reserved it in e820
  */
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 8948f47..ffc5ad5 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -174,7 +174,7 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
 	if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) &&
 	    end > (MAX_DMA32_PFN<<PAGE_SHIFT))
 		start = MAX_DMA32_PFN<<PAGE_SHIFT;
-	mem = find_e820_area(start, end, size, align);
+	mem = find_e820_area_node(nodeid, start, end, size, align);
 	if (mem != -1L)
 		return __va(mem);
 
@@ -184,7 +184,7 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
 		start = MAX_DMA32_PFN<<PAGE_SHIFT;
 	else
 		start = MAX_DMA_PFN<<PAGE_SHIFT;
-	mem = find_e820_area(start, end, size, align);
+	mem = find_e820_area_node(nodeid, start, end, size, align);
 	if (mem != -1L)
 		return __va(mem);
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e70f21b..5c2d17e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1160,6 +1160,8 @@ extern void free_bootmem_with_active_regions(int nid,
 						unsigned long max_low_pfn);
 int add_from_early_node_map(struct range *range, int az,
 				   int nr_range, int nid);
+u64 __init find_memory_core_early(int nid, u64 size, u64 align,
+					u64 goal, u64 limit);
 void *__alloc_memory_core_early(int nodeid, u64 size, u64 align,
 				 u64 goal, u64 limit);
 typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d03c946..eef3757 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3408,12 +3408,11 @@ int __init add_from_early_node_map(struct range *range, int az,
 	return nr_range;
 }
 
-#ifdef CONFIG_NO_BOOTMEM
-void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
+#ifdef CONFIG_HAVE_EARLY_RES
+u64 __init find_memory_core_early(int nid, u64 size, u64 align,
 					u64 goal, u64 limit)
 {
 	int i;
-	void *ptr;
 
 	/* need to go over early_node_map to find out good range for node */
 	for_each_active_range_index_in_nid(i, nid) {
@@ -3430,20 +3429,30 @@ void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
 		if (addr == -1ULL)
 			continue;
 
-#if 0
-		printk(KERN_DEBUG "alloc (nid=%d %llx - %llx) (%llx - %llx) %llx %llx => %llx\n",
-				nid,
-				ei_start, ei_last, goal, limit, size,
-				align, addr);
+		return addr;
+	}
+
+	return -1ULL;
+}
 #endif
 
-		ptr = phys_to_virt(addr);
-		memset(ptr, 0, size);
-		reserve_early_without_check(addr, addr + size, "BOOTMEM");
-		return ptr;
-	}
+#ifdef CONFIG_NO_BOOTMEM
+void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
+					u64 goal, u64 limit)
+{
+	void *ptr;
 
-	return NULL;
+	u64 addr;
+
+	addr = find_memory_core_early(nid, size, align, goal, limit);
+
+	if (addr == -1ULL)
+		return NULL;
+
+	ptr = phys_to_virt(addr);
+	memset(ptr, 0, size);
+	reserve_early_without_check(addr, addr + size, "BOOTMEM");
+	return ptr;
 }
 #endif
 
-- 
1.6.4.2

  parent reply	other threads:[~2010-03-23  8:43 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-03-23  8:39 [PATCH 00/04] use lmb with x86 Yinghai Lu
2010-03-23  8:39 ` Yinghai Lu
2010-03-23  8:39 ` [PATCH 1/4] x86: do not free zero sized per cpu areas Yinghai Lu
2010-03-23  8:39   ` Yinghai Lu
2010-03-23  8:39   ` Yinghai Lu
2010-03-23  8:39 ` Yinghai Lu [this message]
2010-03-23  8:39   ` [PATCH 2/4] x86: add find_e820_area_node Yinghai Lu
2010-03-23  8:39 ` [PATCH 3/4] x86: add sanitize_e820_map Yinghai Lu
2010-03-23  8:39   ` Yinghai Lu
2010-03-23  8:39 ` [RFC PATCH -v2 4/4] x86: use lmb to replace early_res Yinghai Lu
2010-03-23  8:39   ` Yinghai Lu
2010-03-23  9:14   ` Ingo Molnar
2010-03-23 10:36   ` [RFC PATCH -v3 1/2] lmb: seperate region array from lmb_region struct Yinghai Lu
2010-03-23 10:36     ` Yinghai Lu
2010-03-23 10:42     ` Ingo Molnar
2010-03-23 13:18       ` Paul Mundt
2010-03-23 17:17         ` Yinghai Lu
2010-03-23 18:13           ` Paul Mundt
2010-03-24  4:45       ` Benjamin Herrenschmidt
2010-03-24  5:36         ` [RFC PATCH v4 " Yinghai Lu
2010-03-24  5:37         ` [RFC PATCH -v4 2/2] x86: use lmb to replace early_res Yinghai Lu
2010-03-24  5:46         ` [RFC PATCH -v3 1/2] lmb: seperate region array from lmb_region struct Yinghai Lu
2010-03-24  7:41           ` Benjamin Herrenschmidt
2010-03-23 15:07     ` Thomas Gleixner
2010-03-23 17:38       ` Yinghai Lu
2010-03-23 18:08         ` Ingo Molnar
2010-03-23 10:37   ` [RFC PATCH -v3 2/2] x86: use lmb to replace early_res Yinghai Lu
2010-03-23 10:37     ` Yinghai Lu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1269333587-1866-3-git-send-email-yinghai@kernel.org \
    --to=yinghai@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=benh@kernel.crashing.org \
    --cc=davem@davemloft.net \
    --cc=hpa@zytor.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.