All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] fix vmscan to take care of nodemask
@ 2009-03-23  1:03 ` KAMEZAWA Hiroyuki
  0 siblings, 0 replies; 18+ messages in thread
From: KAMEZAWA Hiroyuki @ 2009-03-23  1:03 UTC (permalink / raw)
  To: linux-mm
  Cc: linux-kernel, kosaki.motohiro, balbir, mel, riel, linux-kernel, akpm

From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

try_to_free_pages() scans zonelist but doesn't take care of nodemask which is
passed to alloc_pages_nodemask(). This makes try_to_free_pages() less effective.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
 fs/buffer.c          |    2 +-
 include/linux/swap.h |    2 +-
 mm/page_alloc.c      |    3 ++-
 mm/vmscan.c          |   14 ++++++++++++--
 4 files changed, 16 insertions(+), 5 deletions(-)

Index: mmotm-2.6.29-Mar21/mm/vmscan.c
===================================================================
--- mmotm-2.6.29-Mar21.orig/mm/vmscan.c
+++ mmotm-2.6.29-Mar21/mm/vmscan.c
@@ -79,6 +79,9 @@ struct scan_control {
 	/* Which cgroup do we reclaim from */
 	struct mem_cgroup *mem_cgroup;
 
+	/* Nodemask */
+	nodemask_t	*nodemask;
+
 	/* Pluggable isolate pages callback */
 	unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
 			unsigned long *scanned, int order, int mode,
@@ -1544,7 +1547,9 @@ static void shrink_zones(int priority, s
 	struct zone *zone;
 
 	sc->all_unreclaimable = 1;
-	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
+	/* Note: sc->nodemask==NULL means scan all node */
+	for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
+					sc->nodemask) {
 		if (!populated_zone(zone))
 			continue;
 		/*
@@ -1689,7 +1694,7 @@ out:
 }
 
 unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
-								gfp_t gfp_mask)
+				gfp_t gfp_mask, nodemask_t *nodemask)
 {
 	struct scan_control sc = {
 		.gfp_mask = gfp_mask,
@@ -1700,6 +1705,7 @@ unsigned long try_to_free_pages(struct z
 		.order = order,
 		.mem_cgroup = NULL,
 		.isolate_pages = isolate_pages_global,
+		.nodemask = nodemask,
 	};
 
 	return do_try_to_free_pages(zonelist, &sc);
@@ -1720,6 +1726,7 @@ unsigned long try_to_free_mem_cgroup_pag
 		.order = 0,
 		.mem_cgroup = mem_cont,
 		.isolate_pages = mem_cgroup_isolate_pages,
+		.nodemask = NULL,
 	};
 	struct zonelist *zonelist;
 
@@ -1769,6 +1776,7 @@ static unsigned long balance_pgdat(pg_da
 		.order = order,
 		.mem_cgroup = NULL,
 		.isolate_pages = isolate_pages_global,
+		.nodemask = NULL,
 	};
 	/*
 	 * temp_priority is used to remember the scanning priority at which
@@ -2112,6 +2120,7 @@ unsigned long shrink_all_memory(unsigned
 		.may_unmap = 0,
 		.may_writepage = 1,
 		.isolate_pages = isolate_pages_global,
+		.nodemask = NULL,
 	};
 
 	current->reclaim_state = &reclaim_state;
@@ -2298,6 +2307,7 @@ static int __zone_reclaim(struct zone *z
 		.swappiness = vm_swappiness,
 		.order = order,
 		.isolate_pages = isolate_pages_global,
+		.nodemask = NULL,
 	};
 	unsigned long slab_reclaimable;
 
Index: mmotm-2.6.29-Mar21/include/linux/swap.h
===================================================================
--- mmotm-2.6.29-Mar21.orig/include/linux/swap.h
+++ mmotm-2.6.29-Mar21/include/linux/swap.h
@@ -213,7 +213,7 @@ static inline void lru_cache_add_active_
 
 /* linux/mm/vmscan.c */
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
-					gfp_t gfp_mask);
+					gfp_t gfp_mask, nodemask_t *mask);
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
 						  gfp_t gfp_mask, bool noswap,
 						  unsigned int swappiness);
Index: mmotm-2.6.29-Mar21/mm/page_alloc.c
===================================================================
--- mmotm-2.6.29-Mar21.orig/mm/page_alloc.c
+++ mmotm-2.6.29-Mar21/mm/page_alloc.c
@@ -1598,7 +1598,8 @@ nofail_alloc:
 	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
 
-	did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
+	did_some_progress = try_to_free_pages(zonelist, order,
+						gfp_mask, nodemask);
 
 	p->reclaim_state = NULL;
 	lockdep_clear_current_reclaim_state();
Index: mmotm-2.6.29-Mar21/fs/buffer.c
===================================================================
--- mmotm-2.6.29-Mar21.orig/fs/buffer.c
+++ mmotm-2.6.29-Mar21/fs/buffer.c
@@ -476,7 +476,7 @@ static void free_more_memory(void)
 						&zone);
 		if (zone)
 			try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
-						GFP_NOFS);
+						GFP_NOFS, NULL);
 	}
 }
 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] fix vmscan to take care of nodemask
@ 2009-03-23  1:03 ` KAMEZAWA Hiroyuki
  0 siblings, 0 replies; 18+ messages in thread
From: KAMEZAWA Hiroyuki @ 2009-03-23  1:03 UTC (permalink / raw)
  To: linux-mm; +Cc: linux-kernel, kosaki.motohiro, balbir, mel, riel

From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

try_to_free_pages() scans zonelist but doesn't take care of nodemask which is
passed to alloc_pages_nodemask(). This makes try_to_free_pages() less effective.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
 fs/buffer.c          |    2 +-
 include/linux/swap.h |    2 +-
 mm/page_alloc.c      |    3 ++-
 mm/vmscan.c          |   14 ++++++++++++--
 4 files changed, 16 insertions(+), 5 deletions(-)

Index: mmotm-2.6.29-Mar21/mm/vmscan.c
===================================================================
--- mmotm-2.6.29-Mar21.orig/mm/vmscan.c
+++ mmotm-2.6.29-Mar21/mm/vmscan.c
@@ -79,6 +79,9 @@ struct scan_control {
 	/* Which cgroup do we reclaim from */
 	struct mem_cgroup *mem_cgroup;
 
+	/* Nodemask */
+	nodemask_t	*nodemask;
+
 	/* Pluggable isolate pages callback */
 	unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
 			unsigned long *scanned, int order, int mode,
@@ -1544,7 +1547,9 @@ static void shrink_zones(int priority, s
 	struct zone *zone;
 
 	sc->all_unreclaimable = 1;
-	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
+	/* Note: sc->nodemask==NULL means scan all node */
+	for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
+					sc->nodemask) {
 		if (!populated_zone(zone))
 			continue;
 		/*
@@ -1689,7 +1694,7 @@ out:
 }
 
 unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
-								gfp_t gfp_mask)
+				gfp_t gfp_mask, nodemask_t *nodemask)
 {
 	struct scan_control sc = {
 		.gfp_mask = gfp_mask,
@@ -1700,6 +1705,7 @@ unsigned long try_to_free_pages(struct z
 		.order = order,
 		.mem_cgroup = NULL,
 		.isolate_pages = isolate_pages_global,
+		.nodemask = nodemask,
 	};
 
 	return do_try_to_free_pages(zonelist, &sc);
@@ -1720,6 +1726,7 @@ unsigned long try_to_free_mem_cgroup_pag
 		.order = 0,
 		.mem_cgroup = mem_cont,
 		.isolate_pages = mem_cgroup_isolate_pages,
+		.nodemask = NULL,
 	};
 	struct zonelist *zonelist;
 
@@ -1769,6 +1776,7 @@ static unsigned long balance_pgdat(pg_da
 		.order = order,
 		.mem_cgroup = NULL,
 		.isolate_pages = isolate_pages_global,
+		.nodemask = NULL,
 	};
 	/*
 	 * temp_priority is used to remember the scanning priority at which
@@ -2112,6 +2120,7 @@ unsigned long shrink_all_memory(unsigned
 		.may_unmap = 0,
 		.may_writepage = 1,
 		.isolate_pages = isolate_pages_global,
+		.nodemask = NULL,
 	};
 
 	current->reclaim_state = &reclaim_state;
@@ -2298,6 +2307,7 @@ static int __zone_reclaim(struct zone *z
 		.swappiness = vm_swappiness,
 		.order = order,
 		.isolate_pages = isolate_pages_global,
+		.nodemask = NULL,
 	};
 	unsigned long slab_reclaimable;
 
Index: mmotm-2.6.29-Mar21/include/linux/swap.h
===================================================================
--- mmotm-2.6.29-Mar21.orig/include/linux/swap.h
+++ mmotm-2.6.29-Mar21/include/linux/swap.h
@@ -213,7 +213,7 @@ static inline void lru_cache_add_active_
 
 /* linux/mm/vmscan.c */
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
-					gfp_t gfp_mask);
+					gfp_t gfp_mask, nodemask_t *mask);
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
 						  gfp_t gfp_mask, bool noswap,
 						  unsigned int swappiness);
Index: mmotm-2.6.29-Mar21/mm/page_alloc.c
===================================================================
--- mmotm-2.6.29-Mar21.orig/mm/page_alloc.c
+++ mmotm-2.6.29-Mar21/mm/page_alloc.c
@@ -1598,7 +1598,8 @@ nofail_alloc:
 	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
 
-	did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
+	did_some_progress = try_to_free_pages(zonelist, order,
+						gfp_mask, nodemask);
 
 	p->reclaim_state = NULL;
 	lockdep_clear_current_reclaim_state();
Index: mmotm-2.6.29-Mar21/fs/buffer.c
===================================================================
--- mmotm-2.6.29-Mar21.orig/fs/buffer.c
+++ mmotm-2.6.29-Mar21/fs/buffer.c
@@ -476,7 +476,7 @@ static void free_more_memory(void)
 						&zone);
 		if (zone)
 			try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
-						GFP_NOFS);
+						GFP_NOFS, NULL);
 	}
 }
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] fix vmscan to take care of nodemask
  2009-03-23  1:03 ` KAMEZAWA Hiroyuki
@ 2009-03-23  1:13   ` KAMEZAWA Hiroyuki
  -1 siblings, 0 replies; 18+ messages in thread
From: KAMEZAWA Hiroyuki @ 2009-03-23  1:13 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: linux-mm, linux-kernel, kosaki.motohiro, balbir, mel, riel, akpm

Kosaki pointed out it's not necessary to initialize struct member value by NULL.
Remvoed it. 

Regards,
-Kame
==
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

try_to_free_pages() scans zonelist but don't take care of nodemask which is
given to alloc_pages_nodemask(). This makes try_to_free_pages() less effective.

Changelog: v1 -> v2
  - removed unnecessary nodemask=NULL initialization.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
Index: mmotm-2.6.29-Mar21/mm/vmscan.c
===================================================================
--- mmotm-2.6.29-Mar21.orig/mm/vmscan.c
+++ mmotm-2.6.29-Mar21/mm/vmscan.c
@@ -79,6 +79,9 @@ struct scan_control {
 	/* Which cgroup do we reclaim from */
 	struct mem_cgroup *mem_cgroup;
 
+	/* Nodemask */
+	nodemask_t	*nodemask;
+
 	/* Pluggable isolate pages callback */
 	unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
 			unsigned long *scanned, int order, int mode,
@@ -1544,7 +1547,9 @@ static void shrink_zones(int priority, s
 	struct zone *zone;
 
 	sc->all_unreclaimable = 1;
-	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
+	/* Note: sc->nodemask==NULL means scan all node */
+	for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
+					sc->nodemask) {
 		if (!populated_zone(zone))
 			continue;
 		/*
@@ -1689,7 +1694,7 @@ out:
 }
 
 unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
-								gfp_t gfp_mask)
+				gfp_t gfp_mask, nodemask_t *nodemask)
 {
 	struct scan_control sc = {
 		.gfp_mask = gfp_mask,
@@ -1700,6 +1705,7 @@ unsigned long try_to_free_pages(struct z
 		.order = order,
 		.mem_cgroup = NULL,
 		.isolate_pages = isolate_pages_global,
+		.nodemask = nodemask,
 	};
 
 	return do_try_to_free_pages(zonelist, &sc);
Index: mmotm-2.6.29-Mar21/include/linux/swap.h
===================================================================
--- mmotm-2.6.29-Mar21.orig/include/linux/swap.h
+++ mmotm-2.6.29-Mar21/include/linux/swap.h
@@ -213,7 +213,7 @@ static inline void lru_cache_add_active_
 
 /* linux/mm/vmscan.c */
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
-					gfp_t gfp_mask);
+					gfp_t gfp_mask, nodemask_t *mask);
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
 						  gfp_t gfp_mask, bool noswap,
 						  unsigned int swappiness);
Index: mmotm-2.6.29-Mar21/mm/page_alloc.c
===================================================================
--- mmotm-2.6.29-Mar21.orig/mm/page_alloc.c
+++ mmotm-2.6.29-Mar21/mm/page_alloc.c
@@ -1598,7 +1598,8 @@ nofail_alloc:
 	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
 
-	did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
+	did_some_progress = try_to_free_pages(zonelist, order,
+						gfp_mask, nodemask);
 
 	p->reclaim_state = NULL;
 	lockdep_clear_current_reclaim_state();
Index: mmotm-2.6.29-Mar21/fs/buffer.c
===================================================================
--- mmotm-2.6.29-Mar21.orig/fs/buffer.c
+++ mmotm-2.6.29-Mar21/fs/buffer.c
@@ -476,7 +476,7 @@ static void free_more_memory(void)
 						&zone);
 		if (zone)
 			try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
-						GFP_NOFS);
+						GFP_NOFS, NULL);
 	}
 }
 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] fix vmscan to take care of nodemask
@ 2009-03-23  1:13   ` KAMEZAWA Hiroyuki
  0 siblings, 0 replies; 18+ messages in thread
From: KAMEZAWA Hiroyuki @ 2009-03-23  1:13 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: linux-mm, linux-kernel, kosaki.motohiro, balbir, mel, riel, akpm

Kosaki pointed out it's not necessary to initialize struct member value by NULL.
Remvoed it. 

Regards,
-Kame
==
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

try_to_free_pages() scans zonelist but don't take care of nodemask which is
given to alloc_pages_nodemask(). This makes try_to_free_pages() less effective.

Changelog: v1 -> v2
  - removed unnecessary nodemask=NULL initialization.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
Index: mmotm-2.6.29-Mar21/mm/vmscan.c
===================================================================
--- mmotm-2.6.29-Mar21.orig/mm/vmscan.c
+++ mmotm-2.6.29-Mar21/mm/vmscan.c
@@ -79,6 +79,9 @@ struct scan_control {
 	/* Which cgroup do we reclaim from */
 	struct mem_cgroup *mem_cgroup;
 
+	/* Nodemask */
+	nodemask_t	*nodemask;
+
 	/* Pluggable isolate pages callback */
 	unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
 			unsigned long *scanned, int order, int mode,
@@ -1544,7 +1547,9 @@ static void shrink_zones(int priority, s
 	struct zone *zone;
 
 	sc->all_unreclaimable = 1;
-	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
+	/* Note: sc->nodemask==NULL means scan all node */
+	for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
+					sc->nodemask) {
 		if (!populated_zone(zone))
 			continue;
 		/*
@@ -1689,7 +1694,7 @@ out:
 }
 
 unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
-								gfp_t gfp_mask)
+				gfp_t gfp_mask, nodemask_t *nodemask)
 {
 	struct scan_control sc = {
 		.gfp_mask = gfp_mask,
@@ -1700,6 +1705,7 @@ unsigned long try_to_free_pages(struct z
 		.order = order,
 		.mem_cgroup = NULL,
 		.isolate_pages = isolate_pages_global,
+		.nodemask = nodemask,
 	};
 
 	return do_try_to_free_pages(zonelist, &sc);
Index: mmotm-2.6.29-Mar21/include/linux/swap.h
===================================================================
--- mmotm-2.6.29-Mar21.orig/include/linux/swap.h
+++ mmotm-2.6.29-Mar21/include/linux/swap.h
@@ -213,7 +213,7 @@ static inline void lru_cache_add_active_
 
 /* linux/mm/vmscan.c */
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
-					gfp_t gfp_mask);
+					gfp_t gfp_mask, nodemask_t *mask);
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
 						  gfp_t gfp_mask, bool noswap,
 						  unsigned int swappiness);
Index: mmotm-2.6.29-Mar21/mm/page_alloc.c
===================================================================
--- mmotm-2.6.29-Mar21.orig/mm/page_alloc.c
+++ mmotm-2.6.29-Mar21/mm/page_alloc.c
@@ -1598,7 +1598,8 @@ nofail_alloc:
 	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
 
-	did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
+	did_some_progress = try_to_free_pages(zonelist, order,
+						gfp_mask, nodemask);
 
 	p->reclaim_state = NULL;
 	lockdep_clear_current_reclaim_state();
Index: mmotm-2.6.29-Mar21/fs/buffer.c
===================================================================
--- mmotm-2.6.29-Mar21.orig/fs/buffer.c
+++ mmotm-2.6.29-Mar21/fs/buffer.c
@@ -476,7 +476,7 @@ static void free_more_memory(void)
 						&zone);
 		if (zone)
 			try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
-						GFP_NOFS);
+						GFP_NOFS, NULL);
 	}
 }
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] fix vmscan to take care of nodemask
  2009-03-23  1:13   ` KAMEZAWA Hiroyuki
@ 2009-03-23  1:20     ` KOSAKI Motohiro
  -1 siblings, 0 replies; 18+ messages in thread
From: KOSAKI Motohiro @ 2009-03-23  1:20 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: kosaki.motohiro, linux-mm, linux-kernel, balbir, mel, riel, akpm

> Kosaki pointed out it's not necessary to initialize struct member value by NULL.
> Remvoed it. 
> 
> Regards,
> -Kame
> ==
> From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> 
> try_to_free_pages() scans zonelist but don't take care of nodemask which is
> given to alloc_pages_nodemask(). This makes try_to_free_pages() less effective.

Yes, ignore nodemask make unnecessary reclaim. it decrease try_to_free_pages()
performance.

Note: currently, try_to_free_pages() stop to process reclaim after 32 pages reclaimed.
then, non intentional node scanning can cause large performance degression.

> 
> Changelog: v1 -> v2
>   - removed unnecessary nodemask=NULL initialization.
> 
> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

thanks.
	Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>




^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] fix vmscan to take care of nodemask
@ 2009-03-23  1:20     ` KOSAKI Motohiro
  0 siblings, 0 replies; 18+ messages in thread
From: KOSAKI Motohiro @ 2009-03-23  1:20 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: kosaki.motohiro, linux-mm, linux-kernel, balbir, mel, riel, akpm

> Kosaki pointed out it's not necessary to initialize struct member value by NULL.
> Remvoed it. 
> 
> Regards,
> -Kame
> ==
> From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> 
> try_to_free_pages() scans zonelist but don't take care of nodemask which is
> given to alloc_pages_nodemask(). This makes try_to_free_pages() less effective.

Yes, ignore nodemask make unnecessary reclaim. it decrease try_to_free_pages()
performance.

Note: currently, try_to_free_pages() stop to process reclaim after 32 pages reclaimed.
then, non intentional node scanning can cause large performance degression.

> 
> Changelog: v1 -> v2
>   - removed unnecessary nodemask=NULL initialization.
> 
> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

thanks.
	Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>



--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] fix vmscan to take care of nodemask
  2009-03-23  1:03 ` KAMEZAWA Hiroyuki
@ 2009-03-23 11:48   ` Mel Gorman
  -1 siblings, 0 replies; 18+ messages in thread
From: Mel Gorman @ 2009-03-23 11:48 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: linux-mm, linux-kernel, kosaki.motohiro, balbir, riel, akpm

On Mon, Mar 23, 2009 at 10:03:56AM +0900, KAMEZAWA Hiroyuki wrote:
> From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> 
> try_to_free_pages() scans zonelist but doesn't take care of nodemask which is
> passed to alloc_pages_nodemask(). This makes try_to_free_pages() less effective.
> 

Hmm, do you mind if I try expanding that changelog a bit?

====

try_to_free_pages() is used for the direct reclaim of up to
SWAP_CLUSTER_MAX pages when watermarks are low. The caller to
alloc_pages_nodemask() can specify a nodemask of nodes that are allowed
to be used but this is not passed to try_to_free_pages(). This can lead
to the unnecessary reclaim of pages that are unusable by the caller and
in the worst case lead to allocation failure as progress was not been
made where it is needed.

This patch passes the nodemask used for alloc_pages_nodemask() to
try_to_free_pages().

====

?

> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> ---
>  fs/buffer.c          |    2 +-
>  include/linux/swap.h |    2 +-
>  mm/page_alloc.c      |    3 ++-
>  mm/vmscan.c          |   14 ++++++++++++--
>  4 files changed, 16 insertions(+), 5 deletions(-)
> 
> Index: mmotm-2.6.29-Mar21/mm/vmscan.c
> ===================================================================
> --- mmotm-2.6.29-Mar21.orig/mm/vmscan.c
> +++ mmotm-2.6.29-Mar21/mm/vmscan.c
> @@ -79,6 +79,9 @@ struct scan_control {
>  	/* Which cgroup do we reclaim from */
>  	struct mem_cgroup *mem_cgroup;
>  
> +	/* Nodemask */
> +	nodemask_t	*nodemask;
> +

That comment is not a whole pile of help

/*
 * nodemask of nodes allowed by the caller. Note that nodemask==NULL
 * means scal all nods
 */

?

>  	/* Pluggable isolate pages callback */
>  	unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
>  			unsigned long *scanned, int order, int mode,
> @@ -1544,7 +1547,9 @@ static void shrink_zones(int priority, s
>  	struct zone *zone;
>  
>  	sc->all_unreclaimable = 1;
> -	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
> +	/* Note: sc->nodemask==NULL means scan all node */

Your choice, but I moved this comment to scan_control. I don't mind
where it is really.

> +	for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
> +					sc->nodemask) {
>  		if (!populated_zone(zone))
>  			continue;
>  		/*
> @@ -1689,7 +1694,7 @@ out:
>  }
>  
>  unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
> -								gfp_t gfp_mask)
> +				gfp_t gfp_mask, nodemask_t *nodemask)
>  {
>  	struct scan_control sc = {
>  		.gfp_mask = gfp_mask,
> @@ -1700,6 +1705,7 @@ unsigned long try_to_free_pages(struct z
>  		.order = order,
>  		.mem_cgroup = NULL,
>  		.isolate_pages = isolate_pages_global,
> +		.nodemask = nodemask,
>  	};
>  
>  	return do_try_to_free_pages(zonelist, &sc);
> @@ -1720,6 +1726,7 @@ unsigned long try_to_free_mem_cgroup_pag
>  		.order = 0,
>  		.mem_cgroup = mem_cont,
>  		.isolate_pages = mem_cgroup_isolate_pages,
> +		.nodemask = NULL,

While strictly speaking, this is unnecessary, I prefer it because it
tells a reader that "yes, I really meant it to be NULL".

>  	};
>  	struct zonelist *zonelist;
>  
> @@ -1769,6 +1776,7 @@ static unsigned long balance_pgdat(pg_da
>  		.order = order,
>  		.mem_cgroup = NULL,
>  		.isolate_pages = isolate_pages_global,
> +		.nodemask = NULL,
>  	};
>  	/*
>  	 * temp_priority is used to remember the scanning priority at which
> @@ -2112,6 +2120,7 @@ unsigned long shrink_all_memory(unsigned
>  		.may_unmap = 0,
>  		.may_writepage = 1,
>  		.isolate_pages = isolate_pages_global,
> +		.nodemask = NULL,
>  	};
>  
>  	current->reclaim_state = &reclaim_state;
> @@ -2298,6 +2307,7 @@ static int __zone_reclaim(struct zone *z
>  		.swappiness = vm_swappiness,
>  		.order = order,
>  		.isolate_pages = isolate_pages_global,
> +		.nodemask = NULL,
>  	};
>  	unsigned long slab_reclaimable;
>  
> Index: mmotm-2.6.29-Mar21/include/linux/swap.h
> ===================================================================
> --- mmotm-2.6.29-Mar21.orig/include/linux/swap.h
> +++ mmotm-2.6.29-Mar21/include/linux/swap.h
> @@ -213,7 +213,7 @@ static inline void lru_cache_add_active_
>  
>  /* linux/mm/vmscan.c */
>  extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
> -					gfp_t gfp_mask);
> +					gfp_t gfp_mask, nodemask_t *mask);
>  extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
>  						  gfp_t gfp_mask, bool noswap,
>  						  unsigned int swappiness);
> Index: mmotm-2.6.29-Mar21/mm/page_alloc.c
> ===================================================================
> --- mmotm-2.6.29-Mar21.orig/mm/page_alloc.c
> +++ mmotm-2.6.29-Mar21/mm/page_alloc.c
> @@ -1598,7 +1598,8 @@ nofail_alloc:
>  	reclaim_state.reclaimed_slab = 0;
>  	p->reclaim_state = &reclaim_state;
>  
> -	did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
> +	did_some_progress = try_to_free_pages(zonelist, order,
> +						gfp_mask, nodemask);
>  
>  	p->reclaim_state = NULL;
>  	lockdep_clear_current_reclaim_state();
> Index: mmotm-2.6.29-Mar21/fs/buffer.c
> ===================================================================
> --- mmotm-2.6.29-Mar21.orig/fs/buffer.c
> +++ mmotm-2.6.29-Mar21/fs/buffer.c
> @@ -476,7 +476,7 @@ static void free_more_memory(void)
>  						&zone);
>  		if (zone)
>  			try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
> -						GFP_NOFS);
> +						GFP_NOFS, NULL);
>  	}
>  }
>  

Otherwise looks good. With a better changelog and a comment other than "/*
nodemask */" explaining what nodemask is;

Acked-by: Mel Gorman <mel@csn.ul.ie>

-- 
Mel Gorman
Part-time Phd Student                          Linux Technology Center
University of Limerick                         IBM Dublin Software Lab

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] fix vmscan to take care of nodemask
@ 2009-03-23 11:48   ` Mel Gorman
  0 siblings, 0 replies; 18+ messages in thread
From: Mel Gorman @ 2009-03-23 11:48 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: linux-mm, linux-kernel, kosaki.motohiro, balbir, riel, akpm

On Mon, Mar 23, 2009 at 10:03:56AM +0900, KAMEZAWA Hiroyuki wrote:
> From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> 
> try_to_free_pages() scans zonelist but doesn't take care of nodemask which is
> passed to alloc_pages_nodemask(). This makes try_to_free_pages() less effective.
> 

Hmm, do you mind if I try expanding that changelog a bit?

====

try_to_free_pages() is used for the direct reclaim of up to
SWAP_CLUSTER_MAX pages when watermarks are low. The caller to
alloc_pages_nodemask() can specify a nodemask of nodes that are allowed
to be used but this is not passed to try_to_free_pages(). This can lead
to the unnecessary reclaim of pages that are unusable by the caller and
in the worst case lead to allocation failure as progress was not been
made where it is needed.

This patch passes the nodemask used for alloc_pages_nodemask() to
try_to_free_pages().

====

?

> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> ---
>  fs/buffer.c          |    2 +-
>  include/linux/swap.h |    2 +-
>  mm/page_alloc.c      |    3 ++-
>  mm/vmscan.c          |   14 ++++++++++++--
>  4 files changed, 16 insertions(+), 5 deletions(-)
> 
> Index: mmotm-2.6.29-Mar21/mm/vmscan.c
> ===================================================================
> --- mmotm-2.6.29-Mar21.orig/mm/vmscan.c
> +++ mmotm-2.6.29-Mar21/mm/vmscan.c
> @@ -79,6 +79,9 @@ struct scan_control {
>  	/* Which cgroup do we reclaim from */
>  	struct mem_cgroup *mem_cgroup;
>  
> +	/* Nodemask */
> +	nodemask_t	*nodemask;
> +

That comment is not a whole pile of help

/*
 * nodemask of nodes allowed by the caller. Note that nodemask==NULL
 * means scal all nods
 */

?

>  	/* Pluggable isolate pages callback */
>  	unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
>  			unsigned long *scanned, int order, int mode,
> @@ -1544,7 +1547,9 @@ static void shrink_zones(int priority, s
>  	struct zone *zone;
>  
>  	sc->all_unreclaimable = 1;
> -	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
> +	/* Note: sc->nodemask==NULL means scan all node */

Your choice, but I moved this comment to scan_control. I don't mind
where it is really.

> +	for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
> +					sc->nodemask) {
>  		if (!populated_zone(zone))
>  			continue;
>  		/*
> @@ -1689,7 +1694,7 @@ out:
>  }
>  
>  unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
> -								gfp_t gfp_mask)
> +				gfp_t gfp_mask, nodemask_t *nodemask)
>  {
>  	struct scan_control sc = {
>  		.gfp_mask = gfp_mask,
> @@ -1700,6 +1705,7 @@ unsigned long try_to_free_pages(struct z
>  		.order = order,
>  		.mem_cgroup = NULL,
>  		.isolate_pages = isolate_pages_global,
> +		.nodemask = nodemask,
>  	};
>  
>  	return do_try_to_free_pages(zonelist, &sc);
> @@ -1720,6 +1726,7 @@ unsigned long try_to_free_mem_cgroup_pag
>  		.order = 0,
>  		.mem_cgroup = mem_cont,
>  		.isolate_pages = mem_cgroup_isolate_pages,
> +		.nodemask = NULL,

While strictly speaking, this is unnecessary, I prefer it because it
tells a reader that "yes, I really meant it to be NULL".

>  	};
>  	struct zonelist *zonelist;
>  
> @@ -1769,6 +1776,7 @@ static unsigned long balance_pgdat(pg_da
>  		.order = order,
>  		.mem_cgroup = NULL,
>  		.isolate_pages = isolate_pages_global,
> +		.nodemask = NULL,
>  	};
>  	/*
>  	 * temp_priority is used to remember the scanning priority at which
> @@ -2112,6 +2120,7 @@ unsigned long shrink_all_memory(unsigned
>  		.may_unmap = 0,
>  		.may_writepage = 1,
>  		.isolate_pages = isolate_pages_global,
> +		.nodemask = NULL,
>  	};
>  
>  	current->reclaim_state = &reclaim_state;
> @@ -2298,6 +2307,7 @@ static int __zone_reclaim(struct zone *z
>  		.swappiness = vm_swappiness,
>  		.order = order,
>  		.isolate_pages = isolate_pages_global,
> +		.nodemask = NULL,
>  	};
>  	unsigned long slab_reclaimable;
>  
> Index: mmotm-2.6.29-Mar21/include/linux/swap.h
> ===================================================================
> --- mmotm-2.6.29-Mar21.orig/include/linux/swap.h
> +++ mmotm-2.6.29-Mar21/include/linux/swap.h
> @@ -213,7 +213,7 @@ static inline void lru_cache_add_active_
>  
>  /* linux/mm/vmscan.c */
>  extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
> -					gfp_t gfp_mask);
> +					gfp_t gfp_mask, nodemask_t *mask);
>  extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
>  						  gfp_t gfp_mask, bool noswap,
>  						  unsigned int swappiness);
> Index: mmotm-2.6.29-Mar21/mm/page_alloc.c
> ===================================================================
> --- mmotm-2.6.29-Mar21.orig/mm/page_alloc.c
> +++ mmotm-2.6.29-Mar21/mm/page_alloc.c
> @@ -1598,7 +1598,8 @@ nofail_alloc:
>  	reclaim_state.reclaimed_slab = 0;
>  	p->reclaim_state = &reclaim_state;
>  
> -	did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
> +	did_some_progress = try_to_free_pages(zonelist, order,
> +						gfp_mask, nodemask);
>  
>  	p->reclaim_state = NULL;
>  	lockdep_clear_current_reclaim_state();
> Index: mmotm-2.6.29-Mar21/fs/buffer.c
> ===================================================================
> --- mmotm-2.6.29-Mar21.orig/fs/buffer.c
> +++ mmotm-2.6.29-Mar21/fs/buffer.c
> @@ -476,7 +476,7 @@ static void free_more_memory(void)
>  						&zone);
>  		if (zone)
>  			try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
> -						GFP_NOFS);
> +						GFP_NOFS, NULL);
>  	}
>  }
>  

Otherwise looks good. With a better changelog and a comment other than "/*
nodemask */" explaining what nodemask is;

Acked-by: Mel Gorman <mel@csn.ul.ie>

-- 
Mel Gorman
Part-time Phd Student                          Linux Technology Center
University of Limerick                         IBM Dublin Software Lab

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] fix vmscan to take care of nodemask
  2009-03-23 11:48   ` Mel Gorman
@ 2009-03-23 13:38     ` Christoph Lameter
  -1 siblings, 0 replies; 18+ messages in thread
From: Christoph Lameter @ 2009-03-23 13:38 UTC (permalink / raw)
  To: Mel Gorman
  Cc: KAMEZAWA Hiroyuki, linux-mm, linux-kernel, kosaki.motohiro,
	balbir, riel, akpm

On Mon, 23 Mar 2009, Mel Gorman wrote:

> try_to_free_pages() is used for the direct reclaim of up to
> SWAP_CLUSTER_MAX pages when watermarks are low. The caller to
> alloc_pages_nodemask() can specify a nodemask of nodes that are allowed
> to be used but this is not passed to try_to_free_pages(). This can lead
> to the unnecessary reclaim of pages that are unusable by the caller and
> in the worst case lead to allocation failure as progress was not been
> made where it is needed.
>
> This patch passes the nodemask used for alloc_pages_nodemask() to
> try_to_free_pages().


This is only useful for MPOL_BIND. Direct reclaim within a cpuset already
honors the boundaries of the cpuset.


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] fix vmscan to take care of nodemask
@ 2009-03-23 13:38     ` Christoph Lameter
  0 siblings, 0 replies; 18+ messages in thread
From: Christoph Lameter @ 2009-03-23 13:38 UTC (permalink / raw)
  To: Mel Gorman
  Cc: KAMEZAWA Hiroyuki, linux-mm, linux-kernel, kosaki.motohiro,
	balbir, riel, akpm

On Mon, 23 Mar 2009, Mel Gorman wrote:

> try_to_free_pages() is used for the direct reclaim of up to
> SWAP_CLUSTER_MAX pages when watermarks are low. The caller to
> alloc_pages_nodemask() can specify a nodemask of nodes that are allowed
> to be used but this is not passed to try_to_free_pages(). This can lead
> to the unnecessary reclaim of pages that are unusable by the caller and
> in the worst case lead to allocation failure as progress was not been
> made where it is needed.
>
> This patch passes the nodemask used for alloc_pages_nodemask() to
> try_to_free_pages().


This is only useful for MPOL_BIND. Direct reclaim within a cpuset already
honors the boundaries of the cpuset.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] fix vmscan to take care of nodemask
  2009-03-23 13:38     ` Christoph Lameter
@ 2009-03-23 15:31       ` KOSAKI Motohiro
  -1 siblings, 0 replies; 18+ messages in thread
From: KOSAKI Motohiro @ 2009-03-23 15:31 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Mel Gorman, KAMEZAWA Hiroyuki, linux-mm, linux-kernel, balbir,
	riel, akpm

2009/3/23 Christoph Lameter <cl@linux-foundation.org>:
> On Mon, 23 Mar 2009, Mel Gorman wrote:
>
>> try_to_free_pages() is used for the direct reclaim of up to
>> SWAP_CLUSTER_MAX pages when watermarks are low. The caller to
>> alloc_pages_nodemask() can specify a nodemask of nodes that are allowed
>> to be used but this is not passed to try_to_free_pages(). This can lead
>> to the unnecessary reclaim of pages that are unusable by the caller and
>> in the worst case lead to allocation failure as progress was not been
>> made where it is needed.
>>
>> This patch passes the nodemask used for alloc_pages_nodemask() to
>> try_to_free_pages().
>
> This is only useful for MPOL_BIND. Direct reclaim within a cpuset already
> honors the boundaries of the cpuset.

Do you mean nak or comment adding request?
I agree you. but I don't find any weak point of this patch.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] fix vmscan to take care of nodemask
@ 2009-03-23 15:31       ` KOSAKI Motohiro
  0 siblings, 0 replies; 18+ messages in thread
From: KOSAKI Motohiro @ 2009-03-23 15:31 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Mel Gorman, KAMEZAWA Hiroyuki, linux-mm, linux-kernel, balbir,
	riel, akpm

2009/3/23 Christoph Lameter <cl@linux-foundation.org>:
> On Mon, 23 Mar 2009, Mel Gorman wrote:
>
>> try_to_free_pages() is used for the direct reclaim of up to
>> SWAP_CLUSTER_MAX pages when watermarks are low. The caller to
>> alloc_pages_nodemask() can specify a nodemask of nodes that are allowed
>> to be used but this is not passed to try_to_free_pages(). This can lead
>> to the unnecessary reclaim of pages that are unusable by the caller and
>> in the worst case lead to allocation failure as progress was not been
>> made where it is needed.
>>
>> This patch passes the nodemask used for alloc_pages_nodemask() to
>> try_to_free_pages().
>
> This is only useful for MPOL_BIND. Direct reclaim within a cpuset already
> honors the boundaries of the cpuset.

Do you mean nak or comment adding request?
I agree you. but I don't find any weak point of this patch.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] fix vmscan to take care of nodemask
  2009-03-23 15:31       ` KOSAKI Motohiro
@ 2009-03-23 15:39         ` Christoph Lameter
  -1 siblings, 0 replies; 18+ messages in thread
From: Christoph Lameter @ 2009-03-23 15:39 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Mel Gorman, KAMEZAWA Hiroyuki, linux-mm, linux-kernel, balbir,
	riel, akpm

On Tue, 24 Mar 2009, KOSAKI Motohiro wrote:

> 2009/3/23 Christoph Lameter <cl@linux-foundation.org>:
> > On Mon, 23 Mar 2009, Mel Gorman wrote:
> >
> >> try_to_free_pages() is used for the direct reclaim of up to
> >> SWAP_CLUSTER_MAX pages when watermarks are low. The caller to
> >> alloc_pages_nodemask() can specify a nodemask of nodes that are allowed
> >> to be used but this is not passed to try_to_free_pages(). This can lead
> >> to the unnecessary reclaim of pages that are unusable by the caller and
> >> in the worst case lead to allocation failure as progress was not been
> >> made where it is needed.
> >>
> >> This patch passes the nodemask used for alloc_pages_nodemask() to
> >> try_to_free_pages().
> >
> > This is only useful for MPOL_BIND. Direct reclaim within a cpuset already
> > honors the boundaries of the cpuset.
>
> Do you mean nak or comment adding request?
> I agree you. but I don't find any weak point of this patch.

Just a comment.


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] fix vmscan to take care of nodemask
@ 2009-03-23 15:39         ` Christoph Lameter
  0 siblings, 0 replies; 18+ messages in thread
From: Christoph Lameter @ 2009-03-23 15:39 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Mel Gorman, KAMEZAWA Hiroyuki, linux-mm, linux-kernel, balbir,
	riel, akpm

On Tue, 24 Mar 2009, KOSAKI Motohiro wrote:

> 2009/3/23 Christoph Lameter <cl@linux-foundation.org>:
> > On Mon, 23 Mar 2009, Mel Gorman wrote:
> >
> >> try_to_free_pages() is used for the direct reclaim of up to
> >> SWAP_CLUSTER_MAX pages when watermarks are low. The caller to
> >> alloc_pages_nodemask() can specify a nodemask of nodes that are allowed
> >> to be used but this is not passed to try_to_free_pages(). This can lead
> >> to the unnecessary reclaim of pages that are unusable by the caller and
> >> in the worst case lead to allocation failure as progress was not been
> >> made where it is needed.
> >>
> >> This patch passes the nodemask used for alloc_pages_nodemask() to
> >> try_to_free_pages().
> >
> > This is only useful for MPOL_BIND. Direct reclaim within a cpuset already
> > honors the boundaries of the cpuset.
>
> Do you mean nak or comment adding request?
> I agree you. but I don't find any weak point of this patch.

Just a comment.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] fix vmscan to take care of nodemask
  2009-03-23 11:48   ` Mel Gorman
@ 2009-03-24  1:31     ` KAMEZAWA Hiroyuki
  -1 siblings, 0 replies; 18+ messages in thread
From: KAMEZAWA Hiroyuki @ 2009-03-24  1:31 UTC (permalink / raw)
  To: Mel Gorman; +Cc: linux-mm, linux-kernel, kosaki.motohiro, balbir, riel, akpm

On Mon, 23 Mar 2009 11:48:14 +0000
Mel Gorman <mel@csn.ul.ie> wrote:

> On Mon, Mar 23, 2009 at 10:03:56AM +0900, KAMEZAWA Hiroyuki wrote:
> > From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> > 
> > try_to_free_pages() scans zonelist but doesn't take care of nodemask which is
> > passed to alloc_pages_nodemask(). This makes try_to_free_pages() less effective.
> > 
> 
> Hmm, do you mind if I try expanding that changelog a bit?
> 
> ====
> 
> try_to_free_pages() is used for the direct reclaim of up to
> SWAP_CLUSTER_MAX pages when watermarks are low. The caller to
> alloc_pages_nodemask() can specify a nodemask of nodes that are allowed
> to be used but this is not passed to try_to_free_pages(). This can lead
> to the unnecessary reclaim of pages that are unusable by the caller and
> in the worst case lead to allocation failure as progress was not been
> made where it is needed.
> 
> This patch passes the nodemask used for alloc_pages_nodemask() to
> try_to_free_pages().
> 
> ====
> 
> ?

Thank you. I gradly use your text :)

> 
> > Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> > ---
> >  fs/buffer.c          |    2 +-
> >  include/linux/swap.h |    2 +-
> >  mm/page_alloc.c      |    3 ++-
> >  mm/vmscan.c          |   14 ++++++++++++--
> >  4 files changed, 16 insertions(+), 5 deletions(-)
> > 
> > Index: mmotm-2.6.29-Mar21/mm/vmscan.c
> > ===================================================================
> > --- mmotm-2.6.29-Mar21.orig/mm/vmscan.c
> > +++ mmotm-2.6.29-Mar21/mm/vmscan.c
> > @@ -79,6 +79,9 @@ struct scan_control {
> >  	/* Which cgroup do we reclaim from */
> >  	struct mem_cgroup *mem_cgroup;
> >  
> > +	/* Nodemask */
> > +	nodemask_t	*nodemask;
> > +
> 
> That comment is not a whole pile of help
> 
> /*
>  * nodemask of nodes allowed by the caller. Note that nodemask==NULL
>  * means scal all nods
>  */
> 
> ?
> 
ok.

> >  	/* Pluggable isolate pages callback */
> >  	unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
> >  			unsigned long *scanned, int order, int mode,
> > @@ -1544,7 +1547,9 @@ static void shrink_zones(int priority, s
> >  	struct zone *zone;
> >  
> >  	sc->all_unreclaimable = 1;
> > -	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
> > +	/* Note: sc->nodemask==NULL means scan all node */
> 
> Your choice, but I moved this comment to scan_control. I don't mind
> where it is really.
> 
ok.


> > +	for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
> > +					sc->nodemask) {
> >  		if (!populated_zone(zone))
> >  			continue;
> >  		/*
> > @@ -1689,7 +1694,7 @@ out:
> >  }
> >  
> >  unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
> > -								gfp_t gfp_mask)
> > +				gfp_t gfp_mask, nodemask_t *nodemask)
> >  {
> >  	struct scan_control sc = {
> >  		.gfp_mask = gfp_mask,
> > @@ -1700,6 +1705,7 @@ unsigned long try_to_free_pages(struct z
> >  		.order = order,
> >  		.mem_cgroup = NULL,
> >  		.isolate_pages = isolate_pages_global,
> > +		.nodemask = nodemask,
> >  	};
> >  
> >  	return do_try_to_free_pages(zonelist, &sc);
> > @@ -1720,6 +1726,7 @@ unsigned long try_to_free_mem_cgroup_pag
> >  		.order = 0,
> >  		.mem_cgroup = mem_cont,
> >  		.isolate_pages = mem_cgroup_isolate_pages,
> > +		.nodemask = NULL,
> 
> While strictly speaking, this is unnecessary, I prefer it because it
> tells a reader that "yes, I really meant it to be NULL".
> 
> >  	};
> >  	struct zonelist *zonelist;
> >  
> > @@ -1769,6 +1776,7 @@ static unsigned long balance_pgdat(pg_da
> >  		.order = order,
> >  		.mem_cgroup = NULL,
> >  		.isolate_pages = isolate_pages_global,
> > +		.nodemask = NULL,
> >  	};
> >  	/*
> >  	 * temp_priority is used to remember the scanning priority at which
> > @@ -2112,6 +2120,7 @@ unsigned long shrink_all_memory(unsigned
> >  		.may_unmap = 0,
> >  		.may_writepage = 1,
> >  		.isolate_pages = isolate_pages_global,
> > +		.nodemask = NULL,
> >  	};
> >  
> >  	current->reclaim_state = &reclaim_state;
> > @@ -2298,6 +2307,7 @@ static int __zone_reclaim(struct zone *z
> >  		.swappiness = vm_swappiness,
> >  		.order = order,
> >  		.isolate_pages = isolate_pages_global,
> > +		.nodemask = NULL,
> >  	};
> >  	unsigned long slab_reclaimable;
> >  
> > Index: mmotm-2.6.29-Mar21/include/linux/swap.h
> > ===================================================================
> > --- mmotm-2.6.29-Mar21.orig/include/linux/swap.h
> > +++ mmotm-2.6.29-Mar21/include/linux/swap.h
> > @@ -213,7 +213,7 @@ static inline void lru_cache_add_active_
> >  
> >  /* linux/mm/vmscan.c */
> >  extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
> > -					gfp_t gfp_mask);
> > +					gfp_t gfp_mask, nodemask_t *mask);
> >  extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
> >  						  gfp_t gfp_mask, bool noswap,
> >  						  unsigned int swappiness);
> > Index: mmotm-2.6.29-Mar21/mm/page_alloc.c
> > ===================================================================
> > --- mmotm-2.6.29-Mar21.orig/mm/page_alloc.c
> > +++ mmotm-2.6.29-Mar21/mm/page_alloc.c
> > @@ -1598,7 +1598,8 @@ nofail_alloc:
> >  	reclaim_state.reclaimed_slab = 0;
> >  	p->reclaim_state = &reclaim_state;
> >  
> > -	did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
> > +	did_some_progress = try_to_free_pages(zonelist, order,
> > +						gfp_mask, nodemask);
> >  
> >  	p->reclaim_state = NULL;
> >  	lockdep_clear_current_reclaim_state();
> > Index: mmotm-2.6.29-Mar21/fs/buffer.c
> > ===================================================================
> > --- mmotm-2.6.29-Mar21.orig/fs/buffer.c
> > +++ mmotm-2.6.29-Mar21/fs/buffer.c
> > @@ -476,7 +476,7 @@ static void free_more_memory(void)
> >  						&zone);
> >  		if (zone)
> >  			try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
> > -						GFP_NOFS);
> > +						GFP_NOFS, NULL);
> >  	}
> >  }
> >  
> 
> Otherwise looks good. With a better changelog and a comment other than "/*
> nodemask */" explaining what nodemask is;
> 
> Acked-by: Mel Gorman <mel@csn.ul.ie>
> 

Thank you for review !

Regards,
-Kame


> -- 
> Mel Gorman
> Part-time Phd Student                          Linux Technology Center
> University of Limerick                         IBM Dublin Software Lab
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] fix vmscan to take care of nodemask
@ 2009-03-24  1:31     ` KAMEZAWA Hiroyuki
  0 siblings, 0 replies; 18+ messages in thread
From: KAMEZAWA Hiroyuki @ 2009-03-24  1:31 UTC (permalink / raw)
  To: Mel Gorman; +Cc: linux-mm, linux-kernel, kosaki.motohiro, balbir, riel, akpm

On Mon, 23 Mar 2009 11:48:14 +0000
Mel Gorman <mel@csn.ul.ie> wrote:

> On Mon, Mar 23, 2009 at 10:03:56AM +0900, KAMEZAWA Hiroyuki wrote:
> > From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> > 
> > try_to_free_pages() scans zonelist but doesn't take care of nodemask which is
> > passed to alloc_pages_nodemask(). This makes try_to_free_pages() less effective.
> > 
> 
> Hmm, do you mind if I try expanding that changelog a bit?
> 
> ====
> 
> try_to_free_pages() is used for the direct reclaim of up to
> SWAP_CLUSTER_MAX pages when watermarks are low. The caller to
> alloc_pages_nodemask() can specify a nodemask of nodes that are allowed
> to be used but this is not passed to try_to_free_pages(). This can lead
> to the unnecessary reclaim of pages that are unusable by the caller and
> in the worst case lead to allocation failure as progress was not been
> made where it is needed.
> 
> This patch passes the nodemask used for alloc_pages_nodemask() to
> try_to_free_pages().
> 
> ====
> 
> ?

Thank you. I gradly use your text :)

> 
> > Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> > ---
> >  fs/buffer.c          |    2 +-
> >  include/linux/swap.h |    2 +-
> >  mm/page_alloc.c      |    3 ++-
> >  mm/vmscan.c          |   14 ++++++++++++--
> >  4 files changed, 16 insertions(+), 5 deletions(-)
> > 
> > Index: mmotm-2.6.29-Mar21/mm/vmscan.c
> > ===================================================================
> > --- mmotm-2.6.29-Mar21.orig/mm/vmscan.c
> > +++ mmotm-2.6.29-Mar21/mm/vmscan.c
> > @@ -79,6 +79,9 @@ struct scan_control {
> >  	/* Which cgroup do we reclaim from */
> >  	struct mem_cgroup *mem_cgroup;
> >  
> > +	/* Nodemask */
> > +	nodemask_t	*nodemask;
> > +
> 
> That comment is not a whole pile of help
> 
> /*
>  * nodemask of nodes allowed by the caller. Note that nodemask==NULL
>  * means scal all nods
>  */
> 
> ?
> 
ok.

> >  	/* Pluggable isolate pages callback */
> >  	unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
> >  			unsigned long *scanned, int order, int mode,
> > @@ -1544,7 +1547,9 @@ static void shrink_zones(int priority, s
> >  	struct zone *zone;
> >  
> >  	sc->all_unreclaimable = 1;
> > -	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
> > +	/* Note: sc->nodemask==NULL means scan all node */
> 
> Your choice, but I moved this comment to scan_control. I don't mind
> where it is really.
> 
ok.


> > +	for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
> > +					sc->nodemask) {
> >  		if (!populated_zone(zone))
> >  			continue;
> >  		/*
> > @@ -1689,7 +1694,7 @@ out:
> >  }
> >  
> >  unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
> > -								gfp_t gfp_mask)
> > +				gfp_t gfp_mask, nodemask_t *nodemask)
> >  {
> >  	struct scan_control sc = {
> >  		.gfp_mask = gfp_mask,
> > @@ -1700,6 +1705,7 @@ unsigned long try_to_free_pages(struct z
> >  		.order = order,
> >  		.mem_cgroup = NULL,
> >  		.isolate_pages = isolate_pages_global,
> > +		.nodemask = nodemask,
> >  	};
> >  
> >  	return do_try_to_free_pages(zonelist, &sc);
> > @@ -1720,6 +1726,7 @@ unsigned long try_to_free_mem_cgroup_pag
> >  		.order = 0,
> >  		.mem_cgroup = mem_cont,
> >  		.isolate_pages = mem_cgroup_isolate_pages,
> > +		.nodemask = NULL,
> 
> While strictly speaking, this is unnecessary, I prefer it because it
> tells a reader that "yes, I really meant it to be NULL".
> 
> >  	};
> >  	struct zonelist *zonelist;
> >  
> > @@ -1769,6 +1776,7 @@ static unsigned long balance_pgdat(pg_da
> >  		.order = order,
> >  		.mem_cgroup = NULL,
> >  		.isolate_pages = isolate_pages_global,
> > +		.nodemask = NULL,
> >  	};
> >  	/*
> >  	 * temp_priority is used to remember the scanning priority at which
> > @@ -2112,6 +2120,7 @@ unsigned long shrink_all_memory(unsigned
> >  		.may_unmap = 0,
> >  		.may_writepage = 1,
> >  		.isolate_pages = isolate_pages_global,
> > +		.nodemask = NULL,
> >  	};
> >  
> >  	current->reclaim_state = &reclaim_state;
> > @@ -2298,6 +2307,7 @@ static int __zone_reclaim(struct zone *z
> >  		.swappiness = vm_swappiness,
> >  		.order = order,
> >  		.isolate_pages = isolate_pages_global,
> > +		.nodemask = NULL,
> >  	};
> >  	unsigned long slab_reclaimable;
> >  
> > Index: mmotm-2.6.29-Mar21/include/linux/swap.h
> > ===================================================================
> > --- mmotm-2.6.29-Mar21.orig/include/linux/swap.h
> > +++ mmotm-2.6.29-Mar21/include/linux/swap.h
> > @@ -213,7 +213,7 @@ static inline void lru_cache_add_active_
> >  
> >  /* linux/mm/vmscan.c */
> >  extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
> > -					gfp_t gfp_mask);
> > +					gfp_t gfp_mask, nodemask_t *mask);
> >  extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
> >  						  gfp_t gfp_mask, bool noswap,
> >  						  unsigned int swappiness);
> > Index: mmotm-2.6.29-Mar21/mm/page_alloc.c
> > ===================================================================
> > --- mmotm-2.6.29-Mar21.orig/mm/page_alloc.c
> > +++ mmotm-2.6.29-Mar21/mm/page_alloc.c
> > @@ -1598,7 +1598,8 @@ nofail_alloc:
> >  	reclaim_state.reclaimed_slab = 0;
> >  	p->reclaim_state = &reclaim_state;
> >  
> > -	did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
> > +	did_some_progress = try_to_free_pages(zonelist, order,
> > +						gfp_mask, nodemask);
> >  
> >  	p->reclaim_state = NULL;
> >  	lockdep_clear_current_reclaim_state();
> > Index: mmotm-2.6.29-Mar21/fs/buffer.c
> > ===================================================================
> > --- mmotm-2.6.29-Mar21.orig/fs/buffer.c
> > +++ mmotm-2.6.29-Mar21/fs/buffer.c
> > @@ -476,7 +476,7 @@ static void free_more_memory(void)
> >  						&zone);
> >  		if (zone)
> >  			try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
> > -						GFP_NOFS);
> > +						GFP_NOFS, NULL);
> >  	}
> >  }
> >  
> 
> Otherwise looks good. With a better changelog and a comment other than "/*
> nodemask */" explaining what nodemask is;
> 
> Acked-by: Mel Gorman <mel@csn.ul.ie>
> 

Thank you for review !

Regards,
-Kame


> -- 
> Mel Gorman
> Part-time Phd Student                          Linux Technology Center
> University of Limerick                         IBM Dublin Software Lab
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] fix vmscan to take care of nodemask v3
  2009-03-24  1:31     ` KAMEZAWA Hiroyuki
@ 2009-03-24  2:26       ` KAMEZAWA Hiroyuki
  -1 siblings, 0 replies; 18+ messages in thread
From: KAMEZAWA Hiroyuki @ 2009-03-24  2:26 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Mel Gorman, linux-mm, linux-kernel, kosaki.motohiro, balbir, riel, akpm

Thank you for all comments :)
-Kame
==
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

try_to_free_pages() is used for the direct reclaim of up to SWAP_CLUSTER_MAX
pages when watermarks are low. The caller to alloc_pages_nodemask() can
specify a nodemask of nodes that are allowed to be used but this is not
passed to try_to_free_pages(). This can lead to unnecessary reclaim of pages
that are unusable by the caller and int the worst case lead to allocation
failure as progress was not been make where it is needed.

This patch passes the nodemask used for alloc_pages_nodemask() to
try_to_free_pages().

Changelog: v2 -> v3
  - rewrote the patch description.
  - enhanced comment text.
  - added .nodemask = NULL for try_to_free_mem_cgroup_pages() to show the
    difference with try_to_free_pages().
Changelog: v1 -> v2
  - removed unnecessary nodemask=NULL initialization.

Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
 fs/buffer.c          |    2 +-
 include/linux/swap.h |    2 +-
 mm/page_alloc.c      |    3 ++-
 mm/vmscan.c          |   13 +++++++++++--
 4 files changed, 15 insertions(+), 5 deletions(-)

Index: mmotm-2.6.29-Mar21/mm/vmscan.c
===================================================================
--- mmotm-2.6.29-Mar21.orig/mm/vmscan.c
+++ mmotm-2.6.29-Mar21/mm/vmscan.c
@@ -79,6 +79,12 @@ struct scan_control {
 	/* Which cgroup do we reclaim from */
 	struct mem_cgroup *mem_cgroup;
 
+	/*
+	 * Nodemask of nodes allowed by the caller. If NULL, all nodes
+	 * are scanned.
+	 */
+	nodemask_t	*nodemask;
+
 	/* Pluggable isolate pages callback */
 	unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
 			unsigned long *scanned, int order, int mode,
@@ -1544,7 +1550,8 @@ static void shrink_zones(int priority, s
 	struct zone *zone;
 
 	sc->all_unreclaimable = 1;
-	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
+	for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
+					sc->nodemask) {
 		if (!populated_zone(zone))
 			continue;
 		/*
@@ -1689,7 +1696,7 @@ out:
 }
 
 unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
-								gfp_t gfp_mask)
+				gfp_t gfp_mask, nodemask_t *nodemask)
 {
 	struct scan_control sc = {
 		.gfp_mask = gfp_mask,
@@ -1700,6 +1707,7 @@ unsigned long try_to_free_pages(struct z
 		.order = order,
 		.mem_cgroup = NULL,
 		.isolate_pages = isolate_pages_global,
+		.nodemask = nodemask,
 	};
 
 	return do_try_to_free_pages(zonelist, &sc);
@@ -1720,6 +1728,7 @@ unsigned long try_to_free_mem_cgroup_pag
 		.order = 0,
 		.mem_cgroup = mem_cont,
 		.isolate_pages = mem_cgroup_isolate_pages,
+		.nodemask = NULL, /* we don't care the placement */
 	};
 	struct zonelist *zonelist;
 
Index: mmotm-2.6.29-Mar21/include/linux/swap.h
===================================================================
--- mmotm-2.6.29-Mar21.orig/include/linux/swap.h
+++ mmotm-2.6.29-Mar21/include/linux/swap.h
@@ -213,7 +213,7 @@ static inline void lru_cache_add_active_
 
 /* linux/mm/vmscan.c */
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
-					gfp_t gfp_mask);
+					gfp_t gfp_mask, nodemask_t *mask);
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
 						  gfp_t gfp_mask, bool noswap,
 						  unsigned int swappiness);
Index: mmotm-2.6.29-Mar21/mm/page_alloc.c
===================================================================
--- mmotm-2.6.29-Mar21.orig/mm/page_alloc.c
+++ mmotm-2.6.29-Mar21/mm/page_alloc.c
@@ -1598,7 +1598,8 @@ nofail_alloc:
 	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
 
-	did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
+	did_some_progress = try_to_free_pages(zonelist, order,
+						gfp_mask, nodemask);
 
 	p->reclaim_state = NULL;
 	lockdep_clear_current_reclaim_state();
Index: mmotm-2.6.29-Mar21/fs/buffer.c
===================================================================
--- mmotm-2.6.29-Mar21.orig/fs/buffer.c
+++ mmotm-2.6.29-Mar21/fs/buffer.c
@@ -476,7 +476,7 @@ static void free_more_memory(void)
 						&zone);
 		if (zone)
 			try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
-						GFP_NOFS);
+						GFP_NOFS, NULL);
 	}
 }
 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] fix vmscan to take care of nodemask v3
@ 2009-03-24  2:26       ` KAMEZAWA Hiroyuki
  0 siblings, 0 replies; 18+ messages in thread
From: KAMEZAWA Hiroyuki @ 2009-03-24  2:26 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Mel Gorman, linux-mm, linux-kernel, kosaki.motohiro, balbir, riel, akpm

Thank you for all comments :)
-Kame
==
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

try_to_free_pages() is used for the direct reclaim of up to SWAP_CLUSTER_MAX
pages when watermarks are low. The caller to alloc_pages_nodemask() can
specify a nodemask of nodes that are allowed to be used but this is not
passed to try_to_free_pages(). This can lead to unnecessary reclaim of pages
that are unusable by the caller and int the worst case lead to allocation
failure as progress was not been make where it is needed.

This patch passes the nodemask used for alloc_pages_nodemask() to
try_to_free_pages().

Changelog: v2 -> v3
  - rewrote the patch description.
  - enhanced comment text.
  - added .nodemask = NULL for try_to_free_mem_cgroup_pages() to show the
    difference with try_to_free_pages().
Changelog: v1 -> v2
  - removed unnecessary nodemask=NULL initialization.

Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
 fs/buffer.c          |    2 +-
 include/linux/swap.h |    2 +-
 mm/page_alloc.c      |    3 ++-
 mm/vmscan.c          |   13 +++++++++++--
 4 files changed, 15 insertions(+), 5 deletions(-)

Index: mmotm-2.6.29-Mar21/mm/vmscan.c
===================================================================
--- mmotm-2.6.29-Mar21.orig/mm/vmscan.c
+++ mmotm-2.6.29-Mar21/mm/vmscan.c
@@ -79,6 +79,12 @@ struct scan_control {
 	/* Which cgroup do we reclaim from */
 	struct mem_cgroup *mem_cgroup;
 
+	/*
+	 * Nodemask of nodes allowed by the caller. If NULL, all nodes
+	 * are scanned.
+	 */
+	nodemask_t	*nodemask;
+
 	/* Pluggable isolate pages callback */
 	unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
 			unsigned long *scanned, int order, int mode,
@@ -1544,7 +1550,8 @@ static void shrink_zones(int priority, s
 	struct zone *zone;
 
 	sc->all_unreclaimable = 1;
-	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
+	for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
+					sc->nodemask) {
 		if (!populated_zone(zone))
 			continue;
 		/*
@@ -1689,7 +1696,7 @@ out:
 }
 
 unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
-								gfp_t gfp_mask)
+				gfp_t gfp_mask, nodemask_t *nodemask)
 {
 	struct scan_control sc = {
 		.gfp_mask = gfp_mask,
@@ -1700,6 +1707,7 @@ unsigned long try_to_free_pages(struct z
 		.order = order,
 		.mem_cgroup = NULL,
 		.isolate_pages = isolate_pages_global,
+		.nodemask = nodemask,
 	};
 
 	return do_try_to_free_pages(zonelist, &sc);
@@ -1720,6 +1728,7 @@ unsigned long try_to_free_mem_cgroup_pag
 		.order = 0,
 		.mem_cgroup = mem_cont,
 		.isolate_pages = mem_cgroup_isolate_pages,
+		.nodemask = NULL, /* we don't care the placement */
 	};
 	struct zonelist *zonelist;
 
Index: mmotm-2.6.29-Mar21/include/linux/swap.h
===================================================================
--- mmotm-2.6.29-Mar21.orig/include/linux/swap.h
+++ mmotm-2.6.29-Mar21/include/linux/swap.h
@@ -213,7 +213,7 @@ static inline void lru_cache_add_active_
 
 /* linux/mm/vmscan.c */
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
-					gfp_t gfp_mask);
+					gfp_t gfp_mask, nodemask_t *mask);
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
 						  gfp_t gfp_mask, bool noswap,
 						  unsigned int swappiness);
Index: mmotm-2.6.29-Mar21/mm/page_alloc.c
===================================================================
--- mmotm-2.6.29-Mar21.orig/mm/page_alloc.c
+++ mmotm-2.6.29-Mar21/mm/page_alloc.c
@@ -1598,7 +1598,8 @@ nofail_alloc:
 	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
 
-	did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
+	did_some_progress = try_to_free_pages(zonelist, order,
+						gfp_mask, nodemask);
 
 	p->reclaim_state = NULL;
 	lockdep_clear_current_reclaim_state();
Index: mmotm-2.6.29-Mar21/fs/buffer.c
===================================================================
--- mmotm-2.6.29-Mar21.orig/fs/buffer.c
+++ mmotm-2.6.29-Mar21/fs/buffer.c
@@ -476,7 +476,7 @@ static void free_more_memory(void)
 						&zone);
 		if (zone)
 			try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
-						GFP_NOFS);
+						GFP_NOFS, NULL);
 	}
 }
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2009-03-24  2:28 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-03-23  1:03 [PATCH] fix vmscan to take care of nodemask KAMEZAWA Hiroyuki
2009-03-23  1:03 ` KAMEZAWA Hiroyuki
2009-03-23  1:13 ` KAMEZAWA Hiroyuki
2009-03-23  1:13   ` KAMEZAWA Hiroyuki
2009-03-23  1:20   ` KOSAKI Motohiro
2009-03-23  1:20     ` KOSAKI Motohiro
2009-03-23 11:48 ` Mel Gorman
2009-03-23 11:48   ` Mel Gorman
2009-03-23 13:38   ` Christoph Lameter
2009-03-23 13:38     ` Christoph Lameter
2009-03-23 15:31     ` KOSAKI Motohiro
2009-03-23 15:31       ` KOSAKI Motohiro
2009-03-23 15:39       ` Christoph Lameter
2009-03-23 15:39         ` Christoph Lameter
2009-03-24  1:31   ` KAMEZAWA Hiroyuki
2009-03-24  1:31     ` KAMEZAWA Hiroyuki
2009-03-24  2:26     ` [PATCH] fix vmscan to take care of nodemask v3 KAMEZAWA Hiroyuki
2009-03-24  2:26       ` KAMEZAWA Hiroyuki

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.