All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH] mm: Remove NUMA_INTERLEAVE_HIT
@ 2012-01-12 15:07 Peter Zijlstra
  2012-01-12 17:37 ` Christoph Lameter
                   ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Peter Zijlstra @ 2012-01-12 15:07 UTC (permalink / raw)
  To: Mel Gorman, Christoph Lameter, Andi Kleen
  Cc: Lee Schermerhorn, Andrew Morton, linux-mm

Since the NUMA_INTERLEAVE_HIT statistic is useless on its own; it wants
to be compared to either a total of interleave allocations or to a miss
count, remove it.

Fixing it would be possible, but since we've gone years without these
statistics I figure we can continue that way.

This cleans up some of the weird MPOL_INTERLEAVE allocation exceptions.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 drivers/base/node.c    |    2 +-
 include/linux/mmzone.h |    1 -
 mm/mempolicy.c         |   66 +++++++++++++++--------------------------------
 3 files changed, 22 insertions(+), 47 deletions(-)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 5693ece..942cdbc 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -172,7 +172,7 @@ static ssize_t node_read_numastat(struct sys_device * dev,
 		       node_page_state(dev->id, NUMA_HIT),
 		       node_page_state(dev->id, NUMA_MISS),
 		       node_page_state(dev->id, NUMA_FOREIGN),
-		       node_page_state(dev->id, NUMA_INTERLEAVE_HIT),
+		       0,
 		       node_page_state(dev->id, NUMA_LOCAL),
 		       node_page_state(dev->id, NUMA_OTHER));
 }
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3ac040f..3a3be81 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -111,7 +111,6 @@ enum zone_stat_item {
 	NUMA_HIT,		/* allocated in intended node */
 	NUMA_MISS,		/* allocated in non intended node */
 	NUMA_FOREIGN,		/* was intended here, hit elsewhere */
-	NUMA_INTERLEAVE_HIT,	/* interleaver preferred this zone */
 	NUMA_LOCAL,		/* allocation from local node */
 	NUMA_OTHER,		/* allocation from other node */
 #endif
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index c3fdbcb..2c48c45 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1530,11 +1530,29 @@ static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy)
 	return NULL;
 }
 
+/* Do dynamic interleaving for a process */
+static unsigned interleave_nodes(struct mempolicy *policy)
+{
+	unsigned nid, next;
+	struct task_struct *me = current;
+
+	nid = me->il_next;
+	next = next_node(nid, policy->v.nodes);
+	if (next >= MAX_NUMNODES)
+		next = first_node(policy->v.nodes);
+	if (next < MAX_NUMNODES)
+		me->il_next = next;
+	return nid;
+}
+
 /* Return a zonelist indicated by gfp for node representing a mempolicy */
 static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy,
 	int nd)
 {
 	switch (policy->mode) {
+	case MPOL_INTERLEAVE:
+		nd = interleave_nodes(policy);
+		break;
 	case MPOL_PREFERRED:
 		if (!(policy->flags & MPOL_F_LOCAL))
 			nd = policy->v.preferred_node;
@@ -1556,21 +1574,6 @@ static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy,
 	return node_zonelist(nd, gfp);
 }
 
-/* Do dynamic interleaving for a process */
-static unsigned interleave_nodes(struct mempolicy *policy)
-{
-	unsigned nid, next;
-	struct task_struct *me = current;
-
-	nid = me->il_next;
-	next = next_node(nid, policy->v.nodes);
-	if (next >= MAX_NUMNODES)
-		next = first_node(policy->v.nodes);
-	if (next < MAX_NUMNODES)
-		me->il_next = next;
-	return nid;
-}
-
 /*
  * Depending on the memory policy provide a node from which to allocate the
  * next slab entry.
@@ -1801,21 +1804,6 @@ out:
 	return ret;
 }
 
-/* Allocate a page in interleaved policy.
-   Own path because it needs to do special accounting. */
-static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
-					unsigned nid)
-{
-	struct zonelist *zl;
-	struct page *page;
-
-	zl = node_zonelist(nid, gfp);
-	page = __alloc_pages(gfp, order, zl);
-	if (page && page_zone(page) == zonelist_zone(&zl->_zonerefs[0]))
-		inc_zone_page_state(page, NUMA_INTERLEAVE_HIT);
-	return page;
-}
-
 /**
  * 	alloc_pages_vma	- Allocate a page for a VMA.
  *
@@ -1848,15 +1836,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
 	struct page *page;
 
 	get_mems_allowed();
-	if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
-		unsigned nid;
-
-		nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
-		mpol_cond_put(pol);
-		page = alloc_page_interleave(gfp, order, nid);
-		put_mems_allowed();
-		return page;
-	}
 	zl = policy_zonelist(gfp, pol, node);
 	if (unlikely(mpol_needs_cond_ref(pol))) {
 		/*
@@ -1909,12 +1888,9 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
 	 * No reference counting needed for current->mempolicy
 	 * nor system default_policy
 	 */
-	if (pol->mode == MPOL_INTERLEAVE)
-		page = alloc_page_interleave(gfp, order, interleave_nodes(pol));
-	else
-		page = __alloc_pages_nodemask(gfp, order,
-				policy_zonelist(gfp, pol, numa_node_id()),
-				policy_nodemask(gfp, pol));
+	page = __alloc_pages_nodemask(gfp, order,
+			policy_zonelist(gfp, pol, numa_node_id()),
+			policy_nodemask(gfp, pol));
 	put_mems_allowed();
 	return page;
 }

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [RFC][PATCH] mm: Remove NUMA_INTERLEAVE_HIT
  2012-01-12 15:07 [RFC][PATCH] mm: Remove NUMA_INTERLEAVE_HIT Peter Zijlstra
@ 2012-01-12 17:37 ` Christoph Lameter
  2012-01-12 18:26 ` Andi Kleen
  2012-05-18 10:22 ` [tip:sched/numa] mm/mpol: " tip-bot for Peter Zijlstra
  2 siblings, 0 replies; 13+ messages in thread
From: Christoph Lameter @ 2012-01-12 17:37 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Mel Gorman, Andi Kleen, Lee Schermerhorn, Andrew Morton, linux-mm

On Thu, 12 Jan 2012, Peter Zijlstra wrote:

> Since the NUMA_INTERLEAVE_HIT statistic is useless on its own; it wants
> to be compared to either a total of interleave allocations or to a miss
> count, remove it.
>
> Fixing it would be possible, but since we've gone years without these
> statistics I figure we can continue that way.

Never found any use for it.

Acked-by: Christoph Lameter <cl@linux.com>

> This cleans up some of the weird MPOL_INTERLEAVE allocation exceptions.

What others are there? Exceptions in terms of special casing in various
functions?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC][PATCH] mm: Remove NUMA_INTERLEAVE_HIT
  2012-01-12 15:07 [RFC][PATCH] mm: Remove NUMA_INTERLEAVE_HIT Peter Zijlstra
  2012-01-12 17:37 ` Christoph Lameter
@ 2012-01-12 18:26 ` Andi Kleen
  2012-01-12 19:02   ` KOSAKI Motohiro
  2012-01-12 20:13   ` Peter Zijlstra
  2012-05-18 10:22 ` [tip:sched/numa] mm/mpol: " tip-bot for Peter Zijlstra
  2 siblings, 2 replies; 13+ messages in thread
From: Andi Kleen @ 2012-01-12 18:26 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Mel Gorman, Christoph Lameter, Andi Kleen, Lee Schermerhorn,
	Andrew Morton, linux-mm

On Thu, Jan 12, 2012 at 04:07:00PM +0100, Peter Zijlstra wrote:
> Since the NUMA_INTERLEAVE_HIT statistic is useless on its own; it wants
> to be compared to either a total of interleave allocations or to a miss
> count, remove it.

Nack!

This would break the numactl testsuite.

-Andi

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC][PATCH] mm: Remove NUMA_INTERLEAVE_HIT
  2012-01-12 18:26 ` Andi Kleen
@ 2012-01-12 19:02   ` KOSAKI Motohiro
  2012-01-12 19:10     ` Andi Kleen
  2012-01-12 20:13   ` Peter Zijlstra
  1 sibling, 1 reply; 13+ messages in thread
From: KOSAKI Motohiro @ 2012-01-12 19:02 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Peter Zijlstra, Mel Gorman, Christoph Lameter, Lee Schermerhorn,
	Andrew Morton, linux-mm

(1/12/12 1:26 PM), Andi Kleen wrote:
> On Thu, Jan 12, 2012 at 04:07:00PM +0100, Peter Zijlstra wrote:
>> Since the NUMA_INTERLEAVE_HIT statistic is useless on its own; it wants
>> to be compared to either a total of interleave allocations or to a miss
>> count, remove it.
>
> Nack!
>
> This would break the numactl testsuite.

This seems slightly strange reason to me. Almost useless/deprecated feature removement broke ltp testsuite. But endusers never complained. Because they never use testcases for development. So, May I clarify your intention? To use Documention/feature-removal-schedule.txt solve your worry?

Personally, I haven't observed NUMA_INTERLEAVE_HIT is used on production environment. But, I also haven't
felt this feature is a code maintenance bottleneck. So, I'd like to just ask.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC][PATCH] mm: Remove NUMA_INTERLEAVE_HIT
  2012-01-12 19:02   ` KOSAKI Motohiro
@ 2012-01-12 19:10     ` Andi Kleen
  0 siblings, 0 replies; 13+ messages in thread
From: Andi Kleen @ 2012-01-12 19:10 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Andi Kleen, Peter Zijlstra, Mel Gorman, Christoph Lameter,
	Lee Schermerhorn, Andrew Morton, linux-mm

> This seems slightly strange reason to me. Almost useless/deprecated feature 
> removement broke ltp testsuite. But endusers never complained. Because they 

Don't know about that, but it sounds like a regression that should
have been reverted. Testing is important.

> never use testcases for development. 

It's a feature for developers. I originally added it for debugging
this code.

> So, May I clarify your intention? To 
> use Documention/feature-removal-schedule.txt solve your worry?

I just want it to stay so that the test suite keeps working.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC][PATCH] mm: Remove NUMA_INTERLEAVE_HIT
  2012-01-12 18:26 ` Andi Kleen
  2012-01-12 19:02   ` KOSAKI Motohiro
@ 2012-01-12 20:13   ` Peter Zijlstra
  2012-01-12 21:07     ` Andi Kleen
  1 sibling, 1 reply; 13+ messages in thread
From: Peter Zijlstra @ 2012-01-12 20:13 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Mel Gorman, Christoph Lameter, Lee Schermerhorn, Andrew Morton, linux-mm

On Thu, 2012-01-12 at 19:26 +0100, Andi Kleen wrote:
> This would break the numactl testsuite.
> 
How so? The userspace output will still contain the field, we'll simply
always print 0.

But if you want I can provide a patch for numactl.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC][PATCH] mm: Remove NUMA_INTERLEAVE_HIT
  2012-01-12 20:13   ` Peter Zijlstra
@ 2012-01-12 21:07     ` Andi Kleen
  2012-01-12 21:40       ` Andrew Morton
  0 siblings, 1 reply; 13+ messages in thread
From: Andi Kleen @ 2012-01-12 21:07 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andi Kleen, Mel Gorman, Christoph Lameter, Lee Schermerhorn,
	Andrew Morton, linux-mm

On Thu, Jan 12, 2012 at 09:13:47PM +0100, Peter Zijlstra wrote:
> On Thu, 2012-01-12 at 19:26 +0100, Andi Kleen wrote:
> > This would break the numactl testsuite.
> > 
> How so? The userspace output will still contain the field, we'll simply
> always print 0.

Then the interleave test in the test suite will fail

> 
> But if you want I can provide a patch for numactl.

Disable the test? That would be bad too.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC][PATCH] mm: Remove NUMA_INTERLEAVE_HIT
  2012-01-12 21:07     ` Andi Kleen
@ 2012-01-12 21:40       ` Andrew Morton
  2012-01-12 22:29         ` Andi Kleen
  0 siblings, 1 reply; 13+ messages in thread
From: Andrew Morton @ 2012-01-12 21:40 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Peter Zijlstra, Mel Gorman, Christoph Lameter, Lee Schermerhorn,
	linux-mm

On Thu, 12 Jan 2012 22:07:43 +0100
Andi Kleen <andi@firstfloor.org> wrote:

> On Thu, Jan 12, 2012 at 09:13:47PM +0100, Peter Zijlstra wrote:
> > On Thu, 2012-01-12 at 19:26 +0100, Andi Kleen wrote:
> > > This would break the numactl testsuite.
> > > 
> > How so? The userspace output will still contain the field, we'll simply
> > always print 0.
> 
> Then the interleave test in the test suite will fail
> 
> > 
> > But if you want I can provide a patch for numactl.
> 
> Disable the test? That would be bad too.
> 

My googling and codesearch attempts didn't reveal any users of
NUMA_INTERLEAVE_HIT.  But then, it didn't find the usage in the numactl
suite either.

It would be good if we could find some way to remove this code (and any
other code!).  If that causes a bit of pain for users of the test suite
(presumably a small number of technically able people) then that seems
acceptable to me - we end up with a better kernel.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC][PATCH] mm: Remove NUMA_INTERLEAVE_HIT
  2012-01-12 21:40       ` Andrew Morton
@ 2012-01-12 22:29         ` Andi Kleen
  2012-01-13 15:28           ` Christoph Lameter
  0 siblings, 1 reply; 13+ messages in thread
From: Andi Kleen @ 2012-01-12 22:29 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Andi Kleen, Peter Zijlstra, Mel Gorman, Christoph Lameter,
	Lee Schermerhorn, linux-mm

On Thu, Jan 12, 2012 at 01:40:45PM -0800, Andrew Morton wrote:
> On Thu, 12 Jan 2012 22:07:43 +0100
> Andi Kleen <andi@firstfloor.org> wrote:
> 
> > On Thu, Jan 12, 2012 at 09:13:47PM +0100, Peter Zijlstra wrote:
> > > On Thu, 2012-01-12 at 19:26 +0100, Andi Kleen wrote:
> > > > This would break the numactl testsuite.
> > > > 
> > > How so? The userspace output will still contain the field, we'll simply
> > > always print 0.
> > 
> > Then the interleave test in the test suite will fail
> > 
> > > 
> > > But if you want I can provide a patch for numactl.
> > 
> > Disable the test? That would be bad too.
> > 
> 
> My googling and codesearch attempts didn't reveal any users of
> NUMA_INTERLEAVE_HIT.  But then, it didn't find the usage in the numactl

Obviously you have to search for "interleave_hit", the uppercase variant is 
just an kernel internal define.

> suite either.

test/regress

> 
> It would be good if we could find some way to remove this code (and any
> other code!).  If that causes a bit of pain for users of the test suite
> (presumably a small number of technically able people) then that seems
> acceptable to me - we end up with a better kernel.

The problem is that then there will be nothing left that actually
tests interleaving. The numactl has caught kernel regressions in the past.

I don't think disabling useful regression tests is a good idea.
In contrary the kernel needs far more of them, not less.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC][PATCH] mm: Remove NUMA_INTERLEAVE_HIT
  2012-01-12 22:29         ` Andi Kleen
@ 2012-01-13 15:28           ` Christoph Lameter
  2012-01-13 18:39             ` Andi Kleen
  0 siblings, 1 reply; 13+ messages in thread
From: Christoph Lameter @ 2012-01-13 15:28 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Andrew Morton, Peter Zijlstra, Mel Gorman, Lee Schermerhorn, linux-mm

On Thu, 12 Jan 2012, Andi Kleen wrote:

> The problem is that then there will be nothing left that actually
> tests interleaving. The numactl has caught kernel regressions in the past.

How about adding a CONFIG_NUMA_DEBUG option and have it only available
then? I think there is no general use case.

> I don't think disabling useful regression tests is a good idea.
> In contrary the kernel needs far more of them, not less.

True. Some more debugging code for the NUMA features would be appreciated
but that does not need to be enabled by default. Lately I have become a
bit concerned about the number of statistics we are adding. The
per_cpu_pageset structure should not get too large.



--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC][PATCH] mm: Remove NUMA_INTERLEAVE_HIT
  2012-01-13 15:28           ` Christoph Lameter
@ 2012-01-13 18:39             ` Andi Kleen
  2012-01-13 19:28               ` Christoph Lameter
  0 siblings, 1 reply; 13+ messages in thread
From: Andi Kleen @ 2012-01-13 18:39 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Andi Kleen, Andrew Morton, Peter Zijlstra, Mel Gorman,
	Lee Schermerhorn, linux-mm

On Fri, Jan 13, 2012 at 09:28:20AM -0600, Christoph Lameter wrote:
> On Thu, 12 Jan 2012, Andi Kleen wrote:
> 
> > The problem is that then there will be nothing left that actually
> > tests interleaving. The numactl has caught kernel regressions in the past.
> 
> How about adding a CONFIG_NUMA_DEBUG option and have it only available
> then? I think there is no general use case.

For a few lines of code? And making it harder to test?

> > I don't think disabling useful regression tests is a good idea.
> > In contrary the kernel needs far more of them, not less.
> 
> True. Some more debugging code for the NUMA features would be appreciated
> but that does not need to be enabled by default. Lately I have become a
> bit concerned about the number of statistics we are adding. The
> per_cpu_pageset structure should not get too large.

I don't think the single counter is a problem.

-Andi

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC][PATCH] mm: Remove NUMA_INTERLEAVE_HIT
  2012-01-13 18:39             ` Andi Kleen
@ 2012-01-13 19:28               ` Christoph Lameter
  0 siblings, 0 replies; 13+ messages in thread
From: Christoph Lameter @ 2012-01-13 19:28 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Andrew Morton, Peter Zijlstra, Mel Gorman, Lee Schermerhorn, linux-mm

On Fri, 13 Jan 2012, Andi Kleen wrote:

> On Fri, Jan 13, 2012 at 09:28:20AM -0600, Christoph Lameter wrote:
> > On Thu, 12 Jan 2012, Andi Kleen wrote:
> >
> > > The problem is that then there will be nothing left that actually
> > > tests interleaving. The numactl has caught kernel regressions in the past.
> >
> > How about adding a CONFIG_NUMA_DEBUG option and have it only available
> > then? I think there is no general use case.
>
> For a few lines of code? And making it harder to test?

For now yes. We can then add more debugging stuff. Right now there is no
framework for that.

> > > I don't think disabling useful regression tests is a good idea.
> > > In contrary the kernel needs far more of them, not less.
> >
> > True. Some more debugging code for the NUMA features would be appreciated
> > but that does not need to be enabled by default. Lately I have become a
> > bit concerned about the number of statistics we are adding. The
> > per_cpu_pageset structure should not get too large.
>
> I don't think the single counter is a problem.

I never said that .... There are multiple counters that may not be
too useful in that structure. Not just the one thats useless.


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [tip:sched/numa] mm/mpol: Remove NUMA_INTERLEAVE_HIT
  2012-01-12 15:07 [RFC][PATCH] mm: Remove NUMA_INTERLEAVE_HIT Peter Zijlstra
  2012-01-12 17:37 ` Christoph Lameter
  2012-01-12 18:26 ` Andi Kleen
@ 2012-05-18 10:22 ` tip-bot for Peter Zijlstra
  2 siblings, 0 replies; 13+ messages in thread
From: tip-bot for Peter Zijlstra @ 2012-05-18 10:22 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: mingo, torvalds, a.p.zijlstra, peterz, cl, riel, akpm, aarcange,
	suresh.b.siddha, tglx, hpa, linux-kernel, pjt, bharata.rao,
	Lee.Schermerhorn, danms

Commit-ID:  e975d6ac08f3447d7e44851d37f7791ace6da73b
Gitweb:     http://git.kernel.org/tip/e975d6ac08f3447d7e44851d37f7791ace6da73b
Author:     Peter Zijlstra <peterz@infradead.org>
AuthorDate: Thu, 12 Jan 2012 16:07:00 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 17 May 2012 14:06:12 +0200

mm/mpol: Remove NUMA_INTERLEAVE_HIT

Since the NUMA_INTERLEAVE_HIT statistic is useless on its own; it wants
to be compared to either a total of interleave allocations or to a miss
count, remove it.

Fixing it would be possible, but since we've gone years without these
statistics I figure we can continue that way.

Also NUMA_HIT fully includes NUMA_INTERLEAVE_HIT so users might
switch to using that.

This cleans up some of the weird MPOL_INTERLEAVE allocation exceptions.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Paul Turner <pjt@google.com>
Cc: Dan Smith <danms@us.ibm.com>
Cc: Bharata B Rao <bharata.rao@gmail.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1326380820.2442.186.camel@twins
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/base/node.c    |    2 +-
 include/linux/mmzone.h |    1 -
 mm/mempolicy.c         |   68 +++++++++++++++---------------------------------
 3 files changed, 22 insertions(+), 49 deletions(-)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 90aa2a1..e7c61f3 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -169,7 +169,7 @@ static ssize_t node_read_numastat(struct device *dev,
 		       node_page_state(dev->id, NUMA_HIT),
 		       node_page_state(dev->id, NUMA_MISS),
 		       node_page_state(dev->id, NUMA_FOREIGN),
-		       node_page_state(dev->id, NUMA_INTERLEAVE_HIT),
+		       0UL,
 		       node_page_state(dev->id, NUMA_LOCAL),
 		       node_page_state(dev->id, NUMA_OTHER));
 }
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index dff7115..0440e51 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -111,7 +111,6 @@ enum zone_stat_item {
 	NUMA_HIT,		/* allocated in intended node */
 	NUMA_MISS,		/* allocated in non intended node */
 	NUMA_FOREIGN,		/* was intended here, hit elsewhere */
-	NUMA_INTERLEAVE_HIT,	/* interleaver preferred this zone */
 	NUMA_LOCAL,		/* allocation from local node */
 	NUMA_OTHER,		/* allocation from other node */
 #endif
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index cdb3b9d..82d209b 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1489,11 +1489,29 @@ static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy)
 	return NULL;
 }
 
+/* Do dynamic interleaving for a process */
+static unsigned interleave_nodes(struct mempolicy *policy)
+{
+	unsigned nid, next;
+	struct task_struct *me = current;
+
+	nid = me->il_next;
+	next = next_node(nid, policy->v.nodes);
+	if (next >= MAX_NUMNODES)
+		next = first_node(policy->v.nodes);
+	if (next < MAX_NUMNODES)
+		me->il_next = next;
+	return nid;
+}
+
 /* Return a zonelist indicated by gfp for node representing a mempolicy */
 static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy,
 	int nd)
 {
 	switch (policy->mode) {
+	case MPOL_INTERLEAVE:
+		nd = interleave_nodes(policy);
+		break;
 	case MPOL_PREFERRED:
 		if (!(policy->flags & MPOL_F_LOCAL))
 			nd = policy->v.preferred_node;
@@ -1515,21 +1533,6 @@ static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy,
 	return node_zonelist(nd, gfp);
 }
 
-/* Do dynamic interleaving for a process */
-static unsigned interleave_nodes(struct mempolicy *policy)
-{
-	unsigned nid, next;
-	struct task_struct *me = current;
-
-	nid = me->il_next;
-	next = next_node(nid, policy->v.nodes);
-	if (next >= MAX_NUMNODES)
-		next = first_node(policy->v.nodes);
-	if (next < MAX_NUMNODES)
-		me->il_next = next;
-	return nid;
-}
-
 /*
  * Depending on the memory policy provide a node from which to allocate the
  * next slab entry.
@@ -1760,21 +1763,6 @@ out:
 	return ret;
 }
 
-/* Allocate a page in interleaved policy.
-   Own path because it needs to do special accounting. */
-static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
-					unsigned nid)
-{
-	struct zonelist *zl;
-	struct page *page;
-
-	zl = node_zonelist(nid, gfp);
-	page = __alloc_pages(gfp, order, zl);
-	if (page && page_zone(page) == zonelist_zone(&zl->_zonerefs[0]))
-		inc_zone_page_state(page, NUMA_INTERLEAVE_HIT);
-	return page;
-}
-
 /**
  * 	alloc_pages_vma	- Allocate a page for a VMA.
  *
@@ -1811,17 +1799,6 @@ retry_cpuset:
 	pol = get_vma_policy(current, vma, addr);
 	cpuset_mems_cookie = read_mems_allowed_begin();
 
-	if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
-		unsigned nid;
-
-		nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
-		mpol_cond_put(pol);
-		page = alloc_page_interleave(gfp, order, nid);
-		if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
-			goto retry_cpuset;
-
-		return page;
-	}
 	zl = policy_zonelist(gfp, pol, node);
 	if (unlikely(mpol_needs_cond_ref(pol))) {
 		/*
@@ -1879,12 +1856,9 @@ retry_cpuset:
 	 * No reference counting needed for current->mempolicy
 	 * nor system default_policy
 	 */
-	if (pol->mode == MPOL_INTERLEAVE)
-		page = alloc_page_interleave(gfp, order, interleave_nodes(pol));
-	else
-		page = __alloc_pages_nodemask(gfp, order,
-				policy_zonelist(gfp, pol, numa_node_id()),
-				policy_nodemask(gfp, pol));
+	page = __alloc_pages_nodemask(gfp, order,
+			policy_zonelist(gfp, pol, numa_node_id()),
+			policy_nodemask(gfp, pol));
 
 	if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
 		goto retry_cpuset;

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2012-05-18 10:22 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-01-12 15:07 [RFC][PATCH] mm: Remove NUMA_INTERLEAVE_HIT Peter Zijlstra
2012-01-12 17:37 ` Christoph Lameter
2012-01-12 18:26 ` Andi Kleen
2012-01-12 19:02   ` KOSAKI Motohiro
2012-01-12 19:10     ` Andi Kleen
2012-01-12 20:13   ` Peter Zijlstra
2012-01-12 21:07     ` Andi Kleen
2012-01-12 21:40       ` Andrew Morton
2012-01-12 22:29         ` Andi Kleen
2012-01-13 15:28           ` Christoph Lameter
2012-01-13 18:39             ` Andi Kleen
2012-01-13 19:28               ` Christoph Lameter
2012-05-18 10:22 ` [tip:sched/numa] mm/mpol: " tip-bot for Peter Zijlstra

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.