All of lore.kernel.org
 help / color / mirror / Atom feed
* + mm-fix-goal-calculating-with-usemap.patch added to -mm tree
@ 2012-06-26 23:46 akpm
       [not found] ` <CAE9FiQUeQG6nr_k54ixEA4pvRT00e4bWoMJ+m0NO=FPEnBDB8Q@mail.gmail.com>
  0 siblings, 1 reply; 5+ messages in thread
From: akpm @ 2012-06-26 23:46 UTC (permalink / raw)
  To: mm-commits; +Cc: yinghai, hannes, tj


The patch titled
     Subject: mm: fix goal calculating with usemap
has been added to the -mm tree.  Its filename is
     mm-fix-goal-calculating-with-usemap.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/SubmitChecklist when testing your code ***

The -mm tree is included into linux-next and is updated
there every 3-4 working days

------------------------------------------------------
From: Yinghai Lu <yinghai@kernel.org>
Subject: mm: fix goal calculating with usemap

PAGE_SECTION_MASK should be used with pfn instead of pa.

Also restore the old behavoir: limit the allocating to same section at
first.  need to expose __alloc_bootmem_node_nopanic with limit.  

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 include/linux/bootmem.h |    5 +++++
 mm/bootmem.c            |    2 +-
 mm/nobootmem.c          |    2 +-
 mm/sparse.c             |   22 ++++++++++++++++------
 4 files changed, 23 insertions(+), 8 deletions(-)

diff -puN include/linux/bootmem.h~mm-fix-goal-calculating-with-usemap include/linux/bootmem.h
--- a/include/linux/bootmem.h~mm-fix-goal-calculating-with-usemap
+++ a/include/linux/bootmem.h
@@ -91,6 +91,11 @@ extern void *__alloc_bootmem_node_nopani
 				  unsigned long size,
 				  unsigned long align,
 				  unsigned long goal);
+void *___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
+				  unsigned long size,
+				  unsigned long align,
+				  unsigned long goal,
+				  unsigned long limit);
 extern void *__alloc_bootmem_low(unsigned long size,
 				 unsigned long align,
 				 unsigned long goal);
diff -puN mm/bootmem.c~mm-fix-goal-calculating-with-usemap mm/bootmem.c
--- a/mm/bootmem.c~mm-fix-goal-calculating-with-usemap
+++ a/mm/bootmem.c
@@ -698,7 +698,7 @@ void * __init __alloc_bootmem(unsigned l
 	return ___alloc_bootmem(size, align, goal, limit);
 }
 
-static void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
+void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
 				unsigned long size, unsigned long align,
 				unsigned long goal, unsigned long limit)
 {
diff -puN mm/nobootmem.c~mm-fix-goal-calculating-with-usemap mm/nobootmem.c
--- a/mm/nobootmem.c~mm-fix-goal-calculating-with-usemap
+++ a/mm/nobootmem.c
@@ -274,7 +274,7 @@ void * __init __alloc_bootmem(unsigned l
 	return ___alloc_bootmem(size, align, goal, limit);
 }
 
-static void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
+void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
 						   unsigned long size,
 						   unsigned long align,
 						   unsigned long goal,
diff -puN mm/sparse.c~mm-fix-goal-calculating-with-usemap mm/sparse.c
--- a/mm/sparse.c~mm-fix-goal-calculating-with-usemap
+++ a/mm/sparse.c
@@ -275,8 +275,9 @@ static unsigned long * __init
 sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
 					 unsigned long size)
 {
-	pg_data_t *host_pgdat;
-	unsigned long goal;
+	unsigned long goal, limit;
+	unsigned long *p;
+	int nid;
 	/*
 	 * A page may contain usemaps for other sections preventing the
 	 * page being freed and making a section unremovable while
@@ -287,10 +288,19 @@ sparse_early_usemaps_alloc_pgdat_section
 	 * from the same section as the pgdat where possible to avoid
 	 * this problem.
 	 */
-	goal = __pa(pgdat) & PAGE_SECTION_MASK;
-	host_pgdat = NODE_DATA(early_pfn_to_nid(goal >> PAGE_SHIFT));
-	return __alloc_bootmem_node_nopanic(host_pgdat, size,
-					    SMP_CACHE_BYTES, goal);
+	goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
+	limit = goal + (1UL << PA_SECTION_SHIFT);
+	nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
+
+again:
+	p = ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size,
+					  SMP_CACHE_BYTES, goal, limit);
+	if (!p && limit) {
+		limit = 0;
+		goto again;
+	}
+
+	return p;
 }
 
 static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
_
Subject: Subject: mm: fix goal calculating with usemap

Patches currently in -mm which might be from yinghai@kernel.org are

linux-next.patch
mm-fix-goal-calculating-with-usemap.patch
mm-memblockc-memblock_double_array-cosmetic-cleanups.patch


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [patch 1/2] mm: sparse: fix section usemap placement calculation
       [not found]       ` <20120710221559.GH1779@cmpxchg.org>
@ 2012-07-10 22:17           ` Johannes Weiner
  2012-07-10 22:18           ` Johannes Weiner
  1 sibling, 0 replies; 5+ messages in thread
From: Johannes Weiner @ 2012-07-10 22:17 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: akpm, mm-commits, tj, linux-mm, linux-kernel

From: Yinghai Lu <yinghai@kernel.org>

238305b "mm: remove sparsemem allocation details from the bootmem
allocator" introduced a bug in the allocation goal calculation that
put section usemaps not in the same section as the node descriptors,
creating unnecessary hotplug dependencies between them:

[    0.000000] node 0 must be removed before remove section 16399
[    0.000000] node 1 must be removed before remove section 16399
[    0.000000] node 2 must be removed before remove section 16399
[    0.000000] node 3 must be removed before remove section 16399
[    0.000000] node 4 must be removed before remove section 16399
[    0.000000] node 5 must be removed before remove section 16399
[    0.000000] node 6 must be removed before remove section 16399

The reason is that it applies PAGE_SECTION_MASK to the physical
address of the node descriptor when finding a suitable place to put
the usemap, when this mask is actually intended to be used with PFNs.
Because the PFN mask is wider, the target address will point beyond
the wanted section holding the node descriptor and the node must be
offlined before the section holding the usemap can go.

Fix this by extending the mask to address width before use.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
---
 mm/sparse.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/mm/sparse.c b/mm/sparse.c
index 6a4bf91..e861397 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -287,7 +287,7 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
 	 * from the same section as the pgdat where possible to avoid
 	 * this problem.
 	 */
-	goal = __pa(pgdat) & PAGE_SECTION_MASK;
+	goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
 	host_pgdat = NODE_DATA(early_pfn_to_nid(goal >> PAGE_SHIFT));
 	return __alloc_bootmem_node_nopanic(host_pgdat, size,
 					    SMP_CACHE_BYTES, goal);
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [patch 1/2] mm: sparse: fix section usemap placement calculation
@ 2012-07-10 22:17           ` Johannes Weiner
  0 siblings, 0 replies; 5+ messages in thread
From: Johannes Weiner @ 2012-07-10 22:17 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: akpm, mm-commits, tj, linux-mm, linux-kernel

From: Yinghai Lu <yinghai@kernel.org>

238305b "mm: remove sparsemem allocation details from the bootmem
allocator" introduced a bug in the allocation goal calculation that
put section usemaps not in the same section as the node descriptors,
creating unnecessary hotplug dependencies between them:

[    0.000000] node 0 must be removed before remove section 16399
[    0.000000] node 1 must be removed before remove section 16399
[    0.000000] node 2 must be removed before remove section 16399
[    0.000000] node 3 must be removed before remove section 16399
[    0.000000] node 4 must be removed before remove section 16399
[    0.000000] node 5 must be removed before remove section 16399
[    0.000000] node 6 must be removed before remove section 16399

The reason is that it applies PAGE_SECTION_MASK to the physical
address of the node descriptor when finding a suitable place to put
the usemap, when this mask is actually intended to be used with PFNs.
Because the PFN mask is wider, the target address will point beyond
the wanted section holding the node descriptor and the node must be
offlined before the section holding the usemap can go.

Fix this by extending the mask to address width before use.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
---
 mm/sparse.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/mm/sparse.c b/mm/sparse.c
index 6a4bf91..e861397 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -287,7 +287,7 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
 	 * from the same section as the pgdat where possible to avoid
 	 * this problem.
 	 */
-	goal = __pa(pgdat) & PAGE_SECTION_MASK;
+	goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
 	host_pgdat = NODE_DATA(early_pfn_to_nid(goal >> PAGE_SHIFT));
 	return __alloc_bootmem_node_nopanic(host_pgdat, size,
 					    SMP_CACHE_BYTES, goal);
-- 
1.7.7.6

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [patch 2/2] mm: sparse: fix usemap allocation above node descriptor section
       [not found]       ` <20120710221559.GH1779@cmpxchg.org>
@ 2012-07-10 22:18           ` Johannes Weiner
  2012-07-10 22:18           ` Johannes Weiner
  1 sibling, 0 replies; 5+ messages in thread
From: Johannes Weiner @ 2012-07-10 22:18 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: akpm, mm-commits, tj, linux-mm, linux-kernel

From: Yinghai Lu <yinghai@kernel.org>

After f5bf18f "bootmem/sparsemem: remove limit constraint in
alloc_bootmem_section", usemap allocations may easily be placed
outside the optimal section that holds the node descriptor, even if
there is space available in that section.  This results in unnecessary
hotplug dependencies that need to have the node unplugged before the
section holding the usemap.

The reason is that the bootmem allocator doesn't guarantee a linear
search starting from the passed allocation goal but may start out at a
much higher address absent an upper limit.

Fix this by trying the allocation with the limit at the section end,
then retry without if that fails.  This keeps the fix from f5bf18f of
not panicking if the allocation does not fit in the section, but still
makes sure to try to stay within the section at first.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: stable@kernel.org [3.3, 3.4]
---
 include/linux/bootmem.h |    5 +++++
 mm/bootmem.c            |    2 +-
 mm/nobootmem.c          |    2 +-
 mm/sparse.c             |   18 +++++++++++++-----
 4 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 324fe08..6d6795d 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -91,6 +91,11 @@ extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat,
 				  unsigned long size,
 				  unsigned long align,
 				  unsigned long goal);
+void *___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
+				  unsigned long size,
+				  unsigned long align,
+				  unsigned long goal,
+				  unsigned long limit);
 extern void *__alloc_bootmem_low(unsigned long size,
 				 unsigned long align,
 				 unsigned long goal);
diff --git a/mm/bootmem.c b/mm/bootmem.c
index ec4fcb7..7309663 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -698,7 +698,7 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align,
 	return ___alloc_bootmem(size, align, goal, limit);
 }
 
-static void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
+void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
 				unsigned long size, unsigned long align,
 				unsigned long goal, unsigned long limit)
 {
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 6773ba5..4055730 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -282,7 +282,7 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align,
 	return ___alloc_bootmem(size, align, goal, limit);
 }
 
-static void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
+void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
 						   unsigned long size,
 						   unsigned long align,
 						   unsigned long goal,
diff --git a/mm/sparse.c b/mm/sparse.c
index e861397..c7bb952 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -275,8 +275,9 @@ static unsigned long * __init
 sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
 					 unsigned long size)
 {
-	pg_data_t *host_pgdat;
-	unsigned long goal;
+	unsigned long goal, limit;
+	unsigned long *p;
+	int nid;
 	/*
 	 * A page may contain usemaps for other sections preventing the
 	 * page being freed and making a section unremovable while
@@ -288,9 +289,16 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
 	 * this problem.
 	 */
 	goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
-	host_pgdat = NODE_DATA(early_pfn_to_nid(goal >> PAGE_SHIFT));
-	return __alloc_bootmem_node_nopanic(host_pgdat, size,
-					    SMP_CACHE_BYTES, goal);
+	limit = goal + (1UL << PA_SECTION_SHIFT);
+	nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
+again:
+	p = ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size,
+					  SMP_CACHE_BYTES, goal, limit);
+	if (!p && limit) {
+		limit = 0;
+		goto again;
+	}
+	return p;
 }
 
 static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [patch 2/2] mm: sparse: fix usemap allocation above node descriptor section
@ 2012-07-10 22:18           ` Johannes Weiner
  0 siblings, 0 replies; 5+ messages in thread
From: Johannes Weiner @ 2012-07-10 22:18 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: akpm, mm-commits, tj, linux-mm, linux-kernel

From: Yinghai Lu <yinghai@kernel.org>

After f5bf18f "bootmem/sparsemem: remove limit constraint in
alloc_bootmem_section", usemap allocations may easily be placed
outside the optimal section that holds the node descriptor, even if
there is space available in that section.  This results in unnecessary
hotplug dependencies that need to have the node unplugged before the
section holding the usemap.

The reason is that the bootmem allocator doesn't guarantee a linear
search starting from the passed allocation goal but may start out at a
much higher address absent an upper limit.

Fix this by trying the allocation with the limit at the section end,
then retry without if that fails.  This keeps the fix from f5bf18f of
not panicking if the allocation does not fit in the section, but still
makes sure to try to stay within the section at first.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: stable@kernel.org [3.3, 3.4]
---
 include/linux/bootmem.h |    5 +++++
 mm/bootmem.c            |    2 +-
 mm/nobootmem.c          |    2 +-
 mm/sparse.c             |   18 +++++++++++++-----
 4 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 324fe08..6d6795d 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -91,6 +91,11 @@ extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat,
 				  unsigned long size,
 				  unsigned long align,
 				  unsigned long goal);
+void *___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
+				  unsigned long size,
+				  unsigned long align,
+				  unsigned long goal,
+				  unsigned long limit);
 extern void *__alloc_bootmem_low(unsigned long size,
 				 unsigned long align,
 				 unsigned long goal);
diff --git a/mm/bootmem.c b/mm/bootmem.c
index ec4fcb7..7309663 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -698,7 +698,7 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align,
 	return ___alloc_bootmem(size, align, goal, limit);
 }
 
-static void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
+void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
 				unsigned long size, unsigned long align,
 				unsigned long goal, unsigned long limit)
 {
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 6773ba5..4055730 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -282,7 +282,7 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align,
 	return ___alloc_bootmem(size, align, goal, limit);
 }
 
-static void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
+void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
 						   unsigned long size,
 						   unsigned long align,
 						   unsigned long goal,
diff --git a/mm/sparse.c b/mm/sparse.c
index e861397..c7bb952 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -275,8 +275,9 @@ static unsigned long * __init
 sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
 					 unsigned long size)
 {
-	pg_data_t *host_pgdat;
-	unsigned long goal;
+	unsigned long goal, limit;
+	unsigned long *p;
+	int nid;
 	/*
 	 * A page may contain usemaps for other sections preventing the
 	 * page being freed and making a section unremovable while
@@ -288,9 +289,16 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
 	 * this problem.
 	 */
 	goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
-	host_pgdat = NODE_DATA(early_pfn_to_nid(goal >> PAGE_SHIFT));
-	return __alloc_bootmem_node_nopanic(host_pgdat, size,
-					    SMP_CACHE_BYTES, goal);
+	limit = goal + (1UL << PA_SECTION_SHIFT);
+	nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
+again:
+	p = ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size,
+					  SMP_CACHE_BYTES, goal, limit);
+	if (!p && limit) {
+		limit = 0;
+		goto again;
+	}
+	return p;
 }
 
 static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
-- 
1.7.7.6

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2012-07-10 22:19 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-06-26 23:46 + mm-fix-goal-calculating-with-usemap.patch added to -mm tree akpm
     [not found] ` <CAE9FiQUeQG6nr_k54ixEA4pvRT00e4bWoMJ+m0NO=FPEnBDB8Q@mail.gmail.com>
     [not found]   ` <CAE9FiQX_ovuiGHShf72kLOe4WJybZiyWiGaQ9KUnc1jm3cvdHw@mail.gmail.com>
     [not found]     ` <20120710212005.GG1779@cmpxchg.org>
     [not found]       ` <20120710221559.GH1779@cmpxchg.org>
2012-07-10 22:17         ` [patch 1/2] mm: sparse: fix section usemap placement calculation Johannes Weiner
2012-07-10 22:17           ` Johannes Weiner
2012-07-10 22:18         ` [patch 2/2] mm: sparse: fix usemap allocation above node descriptor section Johannes Weiner
2012-07-10 22:18           ` Johannes Weiner

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.