linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/4] mm/hotplug: correctly setup fallback zonelists when creating new pgdat
@ 2012-07-05  9:45 Jiang Liu
  2012-07-05  9:45 ` [PATCH 2/4] mm/hotplug: correctly add new zone to all other nodes' zone lists Jiang Liu
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Jiang Liu @ 2012-07-05  9:45 UTC (permalink / raw)
  To: Andrew Morton, Mel Gorman, Michal Hocko, Minchan Kim
  Cc: Jiang Liu, Rusty Russell, Yinghai Lu, Tony Luck,
	KAMEZAWA Hiroyuki, KOSAKI Motohiro, David Rientjes,
	Bjorn Helgaas, Keping Chen, linux-mm, linux-kernel, Jiang Liu,
	Xishi Qiu

When hotadd_new_pgdat() is called to create new pgdat for a new node,
a fallback zonelist should be created for the new node. There's code
to try to achieve that in hotadd_new_pgdat() as below:
	/*
	 * The node we allocated has no zone fallback lists. For avoiding
	 * to access not-initialized zonelist, build here.
	 */
	mutex_lock(&zonelists_mutex);
	build_all_zonelists(pgdat, NULL);
	mutex_unlock(&zonelists_mutex);

But it doesn't work as expected. When hotadd_new_pgdat() is called, the
new node is still in offline state because node_set_online(nid) hasn't
been called yet. And build_all_zonelists() only builds zonelists for
online nodes as:
        for_each_online_node(nid) {
                pg_data_t *pgdat = NODE_DATA(nid);

                build_zonelists(pgdat);
                build_zonelist_cache(pgdat);
        }

Though we hope to create zonelist for the new pgdat, but it doesn't.
So add a new parameter "pgdat" the build_all_zonelists() to build pgdat
for the new pgdat too.

Signed-off-by: Jiang Liu <liuj97@gmail.com>
Signed-off-by: Xishi Qiu <qiuxishi@huawei.com>
---
 include/linux/mmzone.h |    2 +-
 init/main.c            |    2 +-
 kernel/cpu.c           |    2 +-
 mm/memory_hotplug.c    |    4 ++--
 mm/page_alloc.c        |   17 ++++++++++++-----
 5 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 2427706..8ddbfb4 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -718,7 +718,7 @@ typedef struct pglist_data {
 #include <linux/memory_hotplug.h>
 
 extern struct mutex zonelists_mutex;
-void build_all_zonelists(void *data);
+void build_all_zonelists(pg_data_t *pgdat, struct zone *zone);
 void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx);
 bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
 		int classzone_idx, int alloc_flags);
diff --git a/init/main.c b/init/main.c
index b5cc0a7..622364d 100644
--- a/init/main.c
+++ b/init/main.c
@@ -501,7 +501,7 @@ asmlinkage void __init start_kernel(void)
 	setup_per_cpu_areas();
 	smp_prepare_boot_cpu();	/* arch-specific boot-cpu hooks */
 
-	build_all_zonelists(NULL);
+	build_all_zonelists(NULL, NULL);
 	page_alloc_init();
 
 	printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index a4eb522..14d3258 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -416,7 +416,7 @@ int __cpuinit cpu_up(unsigned int cpu)
 
 	if (pgdat->node_zonelists->_zonerefs->zone == NULL) {
 		mutex_lock(&zonelists_mutex);
-		build_all_zonelists(NULL);
+		build_all_zonelists(NULL, NULL);
 		mutex_unlock(&zonelists_mutex);
 	}
 #endif
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 0d7e3ec..f93c5b5 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -513,7 +513,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages)
 	zone->present_pages += onlined_pages;
 	zone->zone_pgdat->node_present_pages += onlined_pages;
 	if (need_zonelists_rebuild)
-		build_all_zonelists(zone);
+		build_all_zonelists(NULL, zone);
 	else
 		zone_pcp_update(zone);
 
@@ -562,7 +562,7 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
 	 * to access not-initialized zonelist, build here.
 	 */
 	mutex_lock(&zonelists_mutex);
-	build_all_zonelists(NULL);
+	build_all_zonelists(pgdat, NULL);
 	mutex_unlock(&zonelists_mutex);
 
 	return pgdat;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4403009..ebf319d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3030,7 +3030,7 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
 			user_zonelist_order = oldval;
 		} else if (oldval != user_zonelist_order) {
 			mutex_lock(&zonelists_mutex);
-			build_all_zonelists(NULL);
+			build_all_zonelists(NULL, NULL);
 			mutex_unlock(&zonelists_mutex);
 		}
 	}
@@ -3413,10 +3413,17 @@ static __init_refok int __build_all_zonelists(void *data)
 {
 	int nid;
 	int cpu;
+	pg_data_t *self = data;
 
 #ifdef CONFIG_NUMA
 	memset(node_load, 0, sizeof(node_load));
 #endif
+
+	if (self && !node_online(self->node_id)) {
+		build_zonelists(self);
+		build_zonelist_cache(self);
+	}
+
 	for_each_online_node(nid) {
 		pg_data_t *pgdat = NODE_DATA(nid);
 
@@ -3461,7 +3468,7 @@ static __init_refok int __build_all_zonelists(void *data)
  * Called with zonelists_mutex held always
  * unless system_state == SYSTEM_BOOTING.
  */
-void __ref build_all_zonelists(void *data)
+void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone)
 {
 	set_zonelist_order();
 
@@ -3473,10 +3480,10 @@ void __ref build_all_zonelists(void *data)
 		/* we have to stop all cpus to guarantee there is no user
 		   of zonelist */
 #ifdef CONFIG_MEMORY_HOTPLUG
-		if (data)
-			setup_zone_pageset((struct zone *)data);
+		if (zone)
+			setup_zone_pageset(zone);
 #endif
-		stop_machine(__build_all_zonelists, NULL, NULL);
+		stop_machine(__build_all_zonelists, pgdat, NULL);
 		/* cpuset refresh routine should be here */
 	}
 	vm_total_pages = nr_free_pagecache_pages();
-- 
1.7.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/4] mm/hotplug: correctly add new zone to all other nodes' zone lists
  2012-07-05  9:45 [PATCH 1/4] mm/hotplug: correctly setup fallback zonelists when creating new pgdat Jiang Liu
@ 2012-07-05  9:45 ` Jiang Liu
  2012-07-05  9:45 ` [PATCH 3/4] mm/hotplug: free zone->pageset when a zone becomes empty Jiang Liu
  2012-07-05  9:45 ` [PATCH 4/4] mm/hotplug: mark memory hotplug code in page_alloc.c as __meminit Jiang Liu
  2 siblings, 0 replies; 7+ messages in thread
From: Jiang Liu @ 2012-07-05  9:45 UTC (permalink / raw)
  To: Andrew Morton, Mel Gorman, Michal Hocko, Minchan Kim
  Cc: Jiang Liu, Rusty Russell, Yinghai Lu, Tony Luck,
	KAMEZAWA Hiroyuki, KOSAKI Motohiro, David Rientjes,
	Bjorn Helgaas, Keping Chen, linux-mm, linux-kernel, Jiang Liu

When online_pages() is called to add new memory to an empty zone,
it rebuilds all zone lists by calling build_all_zonelists().
But there's a bug which prevents the new zone to be added to other
nodes' zone lists.

online_pages() {
	build_all_zonelists()
	.....
	node_set_state(zone_to_nid(zone), N_HIGH_MEMORY)
}

Here the node of the zone is put into N_HIGH_MEMORY state after calling
build_all_zonelists(), but build_all_zonelists() only adds zones from
nodes in N_HIGH_MEMORY state to the fallback zone lists.
build_all_zonelists()
    ->__build_all_zonelists()
	->build_zonelists()
	    ->find_next_best_node()
		->for_each_node_state(n, N_HIGH_MEMORY)

So memory in the new zone will never be used by other nodes, and it may
cause strange behavor when system is under memory pressure.  So put node
into N_HIGH_MEMORY state before calling build_all_zonelists().

Signed-Off: Jianguo Wu <wujianguo@huawei.com>
Signed-off-by: Jiang Liu <liuj97@gmail.com>
---
 mm/memory_hotplug.c |   15 ++++++++-------
 1 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index f93c5b5..bce80c7 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -512,19 +512,20 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages)
 
 	zone->present_pages += onlined_pages;
 	zone->zone_pgdat->node_present_pages += onlined_pages;
-	if (need_zonelists_rebuild)
-		build_all_zonelists(NULL, zone);
-	else
-		zone_pcp_update(zone);
+	if (onlined_pages) {
+		node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);
+		if (need_zonelists_rebuild)
+			build_all_zonelists(NULL, zone);
+		else
+			zone_pcp_update(zone);
+	}
 
 	mutex_unlock(&zonelists_mutex);
 
 	init_per_zone_wmark_min();
 
-	if (onlined_pages) {
+	if (onlined_pages)
 		kswapd_run(zone_to_nid(zone));
-		node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);
-	}
 
 	vm_total_pages = nr_free_pagecache_pages();
 
-- 
1.7.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/4] mm/hotplug: free zone->pageset when a zone becomes empty
  2012-07-05  9:45 [PATCH 1/4] mm/hotplug: correctly setup fallback zonelists when creating new pgdat Jiang Liu
  2012-07-05  9:45 ` [PATCH 2/4] mm/hotplug: correctly add new zone to all other nodes' zone lists Jiang Liu
@ 2012-07-05  9:45 ` Jiang Liu
  2012-07-19  7:58   ` Bob Liu
  2012-07-31  7:43   ` Wen Congyang
  2012-07-05  9:45 ` [PATCH 4/4] mm/hotplug: mark memory hotplug code in page_alloc.c as __meminit Jiang Liu
  2 siblings, 2 replies; 7+ messages in thread
From: Jiang Liu @ 2012-07-05  9:45 UTC (permalink / raw)
  To: Andrew Morton, Mel Gorman, Michal Hocko, Minchan Kim
  Cc: Jiang Liu, Rusty Russell, Yinghai Lu, Tony Luck,
	KAMEZAWA Hiroyuki, KOSAKI Motohiro, David Rientjes,
	Bjorn Helgaas, Keping Chen, linux-mm, linux-kernel, Jiang Liu,
	Wei Wang

When a zone becomes empty after memory offlining, free zone->pageset.
Otherwise it will cause memory leak when adding memory to the empty
zone again because build_all_zonelists() will allocate zone->pageset
for an empty zone.

Signed-off-by: Jiang Liu <liuj97@gmail.com>
Signed-off-by: Wei Wang <Bessel.Wang@huawei.com>
---
 include/linux/mm.h  |    1 +
 mm/memory_hotplug.c |    3 +++
 mm/page_alloc.c     |   13 +++++++++++++
 3 files changed, 17 insertions(+), 0 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index b36d08c..f8b62f2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1331,6 +1331,7 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...);
 extern void setup_per_cpu_pageset(void);
 
 extern void zone_pcp_update(struct zone *zone);
+extern void zone_pcp_reset(struct zone *zone);
 
 /* nommu.c */
 extern atomic_long_t mmap_pages_allocated;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index bce80c7..998b792 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -966,6 +966,9 @@ repeat:
 
 	init_per_zone_wmark_min();
 
+	if (!populated_zone(zone))
+		zone_pcp_reset(zone);
+
 	if (!node_present_pages(node)) {
 		node_clear_state(node, N_HIGH_MEMORY);
 		kswapd_stop(node);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ebf319d..5964b7a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5872,6 +5872,19 @@ void free_contig_range(unsigned long pfn, unsigned nr_pages)
 #endif
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
+void zone_pcp_reset(struct zone *zone)
+{
+	unsigned long flags;
+
+	/* avoid races with drain_pages()  */
+	local_irq_save(flags);
+	if (zone->pageset != &boot_pageset) {
+		free_percpu(zone->pageset);
+		zone->pageset = &boot_pageset;
+	}
+	local_irq_restore(flags);
+}
+
 /*
  * All pages in the range must be isolated before calling this.
  */
-- 
1.7.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/4] mm/hotplug: mark memory hotplug code in page_alloc.c as __meminit
  2012-07-05  9:45 [PATCH 1/4] mm/hotplug: correctly setup fallback zonelists when creating new pgdat Jiang Liu
  2012-07-05  9:45 ` [PATCH 2/4] mm/hotplug: correctly add new zone to all other nodes' zone lists Jiang Liu
  2012-07-05  9:45 ` [PATCH 3/4] mm/hotplug: free zone->pageset when a zone becomes empty Jiang Liu
@ 2012-07-05  9:45 ` Jiang Liu
  2 siblings, 0 replies; 7+ messages in thread
From: Jiang Liu @ 2012-07-05  9:45 UTC (permalink / raw)
  To: Andrew Morton, Mel Gorman, Michal Hocko, Minchan Kim
  Cc: Jiang Liu, Rusty Russell, Yinghai Lu, Tony Luck,
	KAMEZAWA Hiroyuki, KOSAKI Motohiro, David Rientjes,
	Bjorn Helgaas, Keping Chen, linux-mm, linux-kernel, Jiang Liu

Mark functions used by both boot and memory hotplug as __meminit to reduce
memory footprint when memory hotplug is disabled.

Alos guard zone_pcp_update() with CONFIG_MEMORY_HOTPLUG because it's only
used by memory hotplug code.

Signed-off-by: Jiang Liu <liuj97@gmail.com>
---
 mm/page_alloc.c |   66 ++++++++++++++++++++++++++++--------------------------
 1 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5964b7a..da08449 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3409,7 +3409,7 @@ static void setup_zone_pageset(struct zone *zone);
 DEFINE_MUTEX(zonelists_mutex);
 
 /* return values int ....just for stop_machine() */
-static __init_refok int __build_all_zonelists(void *data)
+static int __build_all_zonelists(void *data)
 {
 	int nid;
 	int cpu;
@@ -3753,7 +3753,7 @@ static void __meminit zone_init_free_lists(struct zone *zone)
 	memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY)
 #endif
 
-static int zone_batchsize(struct zone *zone)
+static int __meminit zone_batchsize(struct zone *zone)
 {
 #ifdef CONFIG_MMU
 	int batch;
@@ -3835,7 +3835,7 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p,
 		pcp->batch = PAGE_SHIFT * 8;
 }
 
-static void setup_zone_pageset(struct zone *zone)
+static void __meminit setup_zone_pageset(struct zone *zone)
 {
 	int cpu;
 
@@ -3908,33 +3908,7 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
 	return 0;
 }
 
-static int __zone_pcp_update(void *data)
-{
-	struct zone *zone = data;
-	int cpu;
-	unsigned long batch = zone_batchsize(zone), flags;
-
-	for_each_possible_cpu(cpu) {
-		struct per_cpu_pageset *pset;
-		struct per_cpu_pages *pcp;
-
-		pset = per_cpu_ptr(zone->pageset, cpu);
-		pcp = &pset->pcp;
-
-		local_irq_save(flags);
-		free_pcppages_bulk(zone, pcp->count, pcp);
-		setup_pageset(pset, batch);
-		local_irq_restore(flags);
-	}
-	return 0;
-}
-
-void zone_pcp_update(struct zone *zone)
-{
-	stop_machine(__zone_pcp_update, zone, NULL);
-}
-
-static __meminit void zone_pcp_init(struct zone *zone)
+static void __meminit zone_pcp_init(struct zone *zone)
 {
 	/*
 	 * per cpu subsystem is not up at this point. The following code
@@ -3949,7 +3923,7 @@ static __meminit void zone_pcp_init(struct zone *zone)
 					 zone_batchsize(zone));
 }
 
-__meminit int init_currently_empty_zone(struct zone *zone,
+int __meminit init_currently_empty_zone(struct zone *zone,
 					unsigned long zone_start_pfn,
 					unsigned long size,
 					enum memmap_context context)
@@ -4757,7 +4731,7 @@ out:
 }
 
 /* Any regular memory on that node ? */
-static void check_for_regular_memory(pg_data_t *pgdat)
+static void __init check_for_regular_memory(pg_data_t *pgdat)
 {
 #ifdef CONFIG_HIGHMEM
 	enum zone_type zone_type;
@@ -5871,6 +5845,34 @@ void free_contig_range(unsigned long pfn, unsigned nr_pages)
 }
 #endif
 
+#ifdef	CONFIG_MEMORY_HOTPLUG
+static int __meminit __zone_pcp_update(void *data)
+{
+	struct zone *zone = data;
+	int cpu;
+	unsigned long batch = zone_batchsize(zone), flags;
+
+	for_each_possible_cpu(cpu) {
+		struct per_cpu_pageset *pset;
+		struct per_cpu_pages *pcp;
+
+		pset = per_cpu_ptr(zone->pageset, cpu);
+		pcp = &pset->pcp;
+
+		local_irq_save(flags);
+		free_pcppages_bulk(zone, pcp->count, pcp);
+		setup_pageset(pset, batch);
+		local_irq_restore(flags);
+	}
+	return 0;
+}
+
+void __meminit zone_pcp_update(struct zone *zone)
+{
+	stop_machine(__zone_pcp_update, zone, NULL);
+}
+#endif
+
 #ifdef CONFIG_MEMORY_HOTREMOVE
 void zone_pcp_reset(struct zone *zone)
 {
-- 
1.7.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/4] mm/hotplug: free zone->pageset when a zone becomes empty
  2012-07-05  9:45 ` [PATCH 3/4] mm/hotplug: free zone->pageset when a zone becomes empty Jiang Liu
@ 2012-07-19  7:58   ` Bob Liu
  2012-07-24  9:47     ` Jiang Liu
  2012-07-31  7:43   ` Wen Congyang
  1 sibling, 1 reply; 7+ messages in thread
From: Bob Liu @ 2012-07-19  7:58 UTC (permalink / raw)
  To: Jiang Liu
  Cc: Andrew Morton, Mel Gorman, Michal Hocko, Minchan Kim,
	Rusty Russell, Yinghai Lu, Tony Luck, KAMEZAWA Hiroyuki,
	KOSAKI Motohiro, David Rientjes, Bjorn Helgaas, Keping Chen,
	linux-mm, linux-kernel, Jiang Liu, Wei Wang

On Thu, Jul 5, 2012 at 5:45 PM, Jiang Liu <jiang.liu@huawei.com> wrote:
> When a zone becomes empty after memory offlining, free zone->pageset.
> Otherwise it will cause memory leak when adding memory to the empty
> zone again because build_all_zonelists() will allocate zone->pageset
> for an empty zone.
>

What about other area allocated to the zone?  eg. wait_table?

> Signed-off-by: Jiang Liu <liuj97@gmail.com>
> Signed-off-by: Wei Wang <Bessel.Wang@huawei.com>
> ---
>  include/linux/mm.h  |    1 +
>  mm/memory_hotplug.c |    3 +++
>  mm/page_alloc.c     |   13 +++++++++++++
>  3 files changed, 17 insertions(+), 0 deletions(-)
>
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index b36d08c..f8b62f2 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1331,6 +1331,7 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...);
>  extern void setup_per_cpu_pageset(void);
>
>  extern void zone_pcp_update(struct zone *zone);
> +extern void zone_pcp_reset(struct zone *zone);
>
>  /* nommu.c */
>  extern atomic_long_t mmap_pages_allocated;
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index bce80c7..998b792 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -966,6 +966,9 @@ repeat:
>
>         init_per_zone_wmark_min();
>
> +       if (!populated_zone(zone))
> +               zone_pcp_reset(zone);
> +
>         if (!node_present_pages(node)) {
>                 node_clear_state(node, N_HIGH_MEMORY);
>                 kswapd_stop(node);
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index ebf319d..5964b7a 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -5872,6 +5872,19 @@ void free_contig_range(unsigned long pfn, unsigned nr_pages)
>  #endif
>
>  #ifdef CONFIG_MEMORY_HOTREMOVE
> +void zone_pcp_reset(struct zone *zone)
> +{
> +       unsigned long flags;
> +
> +       /* avoid races with drain_pages()  */
> +       local_irq_save(flags);
> +       if (zone->pageset != &boot_pageset) {
> +               free_percpu(zone->pageset);
> +               zone->pageset = &boot_pageset;
> +       }
> +       local_irq_restore(flags);
> +}
> +
>  /*
>   * All pages in the range must be isolated before calling this.
>   */
> --
> 1.7.1
>
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>



-- 
Regards,
--Bob

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/4] mm/hotplug: free zone->pageset when a zone becomes empty
  2012-07-19  7:58   ` Bob Liu
@ 2012-07-24  9:47     ` Jiang Liu
  0 siblings, 0 replies; 7+ messages in thread
From: Jiang Liu @ 2012-07-24  9:47 UTC (permalink / raw)
  To: Bob Liu
  Cc: Andrew Morton, Mel Gorman, Michal Hocko, Minchan Kim,
	Rusty Russell, Yinghai Lu, Tony Luck, KAMEZAWA Hiroyuki,
	KOSAKI Motohiro, David Rientjes, Bjorn Helgaas, Keping Chen,
	linux-mm, linux-kernel, Jiang Liu, Wei Wang

Hi Bob,
	Yes, we need to handle the wait table too. We has tried to remove the
pgdat and wait table altogether, but found it's really hard to remove pgdat
for empty nodes. I think the candidate solution is to free wait table but
keep pgdat. Any suggestions?
	Thanks!
	Gerry

On 2012-7-19 15:58, Bob Liu wrote:
> On Thu, Jul 5, 2012 at 5:45 PM, Jiang Liu <jiang.liu@huawei.com> wrote:
>> When a zone becomes empty after memory offlining, free zone->pageset.
>> Otherwise it will cause memory leak when adding memory to the empty
>> zone again because build_all_zonelists() will allocate zone->pageset
>> for an empty zone.
>>
> 
> What about other area allocated to the zone?  eg. wait_table?
> 
>> Signed-off-by: Jiang Liu <liuj97@gmail.com>
>> Signed-off-by: Wei Wang <Bessel.Wang@huawei.com>
>> ---
>>  include/linux/mm.h  |    1 +
>>  mm/memory_hotplug.c |    3 +++
>>  mm/page_alloc.c     |   13 +++++++++++++
>>  3 files changed, 17 insertions(+), 0 deletions(-)
>>
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index b36d08c..f8b62f2 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -1331,6 +1331,7 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...);
>>  extern void setup_per_cpu_pageset(void);
>>
>>  extern void zone_pcp_update(struct zone *zone);
>> +extern void zone_pcp_reset(struct zone *zone);
>>
>>  /* nommu.c */
>>  extern atomic_long_t mmap_pages_allocated;
>> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
>> index bce80c7..998b792 100644
>> --- a/mm/memory_hotplug.c
>> +++ b/mm/memory_hotplug.c
>> @@ -966,6 +966,9 @@ repeat:
>>
>>         init_per_zone_wmark_min();
>>
>> +       if (!populated_zone(zone))
>> +               zone_pcp_reset(zone);
>> +
>>         if (!node_present_pages(node)) {
>>                 node_clear_state(node, N_HIGH_MEMORY);
>>                 kswapd_stop(node);
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index ebf319d..5964b7a 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -5872,6 +5872,19 @@ void free_contig_range(unsigned long pfn, unsigned nr_pages)
>>  #endif
>>
>>  #ifdef CONFIG_MEMORY_HOTREMOVE
>> +void zone_pcp_reset(struct zone *zone)
>> +{
>> +       unsigned long flags;
>> +
>> +       /* avoid races with drain_pages()  */
>> +       local_irq_save(flags);
>> +       if (zone->pageset != &boot_pageset) {
>> +               free_percpu(zone->pageset);
>> +               zone->pageset = &boot_pageset;
>> +       }
>> +       local_irq_restore(flags);
>> +}
>> +
>>  /*
>>   * All pages in the range must be isolated before calling this.
>>   */
>> --
>> 1.7.1
>>
>>
>> --
>> To unsubscribe, send a message with 'unsubscribe linux-mm' in
>> the body to majordomo@kvack.org.  For more info on Linux MM,
>> see: http://www.linux-mm.org/ .
>> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
> 
> 
> 



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/4] mm/hotplug: free zone->pageset when a zone becomes empty
  2012-07-05  9:45 ` [PATCH 3/4] mm/hotplug: free zone->pageset when a zone becomes empty Jiang Liu
  2012-07-19  7:58   ` Bob Liu
@ 2012-07-31  7:43   ` Wen Congyang
  1 sibling, 0 replies; 7+ messages in thread
From: Wen Congyang @ 2012-07-31  7:43 UTC (permalink / raw)
  To: Jiang Liu
  Cc: Andrew Morton, Mel Gorman, Michal Hocko, Minchan Kim,
	Rusty Russell, Yinghai Lu, Tony Luck, KAMEZAWA Hiroyuki,
	KOSAKI Motohiro, David Rientjes, Bjorn Helgaas, Keping Chen,
	linux-mm, linux-kernel, Jiang Liu, Wei Wang

At 07/05/2012 05:45 PM, Jiang Liu Wrote:
> When a zone becomes empty after memory offlining, free zone->pageset.
> Otherwise it will cause memory leak when adding memory to the empty
> zone again because build_all_zonelists() will allocate zone->pageset
> for an empty zone.
> 
> Signed-off-by: Jiang Liu <liuj97@gmail.com>
> Signed-off-by: Wei Wang <Bessel.Wang@huawei.com>
> ---
>  include/linux/mm.h  |    1 +
>  mm/memory_hotplug.c |    3 +++
>  mm/page_alloc.c     |   13 +++++++++++++
>  3 files changed, 17 insertions(+), 0 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index b36d08c..f8b62f2 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1331,6 +1331,7 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...);
>  extern void setup_per_cpu_pageset(void);
>  
>  extern void zone_pcp_update(struct zone *zone);
> +extern void zone_pcp_reset(struct zone *zone);
>  
>  /* nommu.c */
>  extern atomic_long_t mmap_pages_allocated;
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index bce80c7..998b792 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -966,6 +966,9 @@ repeat:
>  
>  	init_per_zone_wmark_min();
>  
> +	if (!populated_zone(zone))
> +		zone_pcp_reset(zone);
> +
>  	if (!node_present_pages(node)) {
>  		node_clear_state(node, N_HIGH_MEMORY);
>  		kswapd_stop(node);
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index ebf319d..5964b7a 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -5872,6 +5872,19 @@ void free_contig_range(unsigned long pfn, unsigned nr_pages)
>  #endif
>  
>  #ifdef CONFIG_MEMORY_HOTREMOVE
> +void zone_pcp_reset(struct zone *zone)
> +{
> +	unsigned long flags;
> +
> +	/* avoid races with drain_pages()  */
> +	local_irq_save(flags);

drain_pages() may run on another cpu, so it cann't avoid
races with drain_pages().

Thanks
Wen Congyang

> +	if (zone->pageset != &boot_pageset) {
> +		free_percpu(zone->pageset);
> +		zone->pageset = &boot_pageset;
> +	}
> +	local_irq_restore(flags);
> +}
> +
>  /*
>   * All pages in the range must be isolated before calling this.
>   */


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2012-07-31  7:42 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-07-05  9:45 [PATCH 1/4] mm/hotplug: correctly setup fallback zonelists when creating new pgdat Jiang Liu
2012-07-05  9:45 ` [PATCH 2/4] mm/hotplug: correctly add new zone to all other nodes' zone lists Jiang Liu
2012-07-05  9:45 ` [PATCH 3/4] mm/hotplug: free zone->pageset when a zone becomes empty Jiang Liu
2012-07-19  7:58   ` Bob Liu
2012-07-24  9:47     ` Jiang Liu
2012-07-31  7:43   ` Wen Congyang
2012-07-05  9:45 ` [PATCH 4/4] mm/hotplug: mark memory hotplug code in page_alloc.c as __meminit Jiang Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).