From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753822Ab3BDKGC (ORCPT ); Mon, 4 Feb 2013 05:06:02 -0500 Received: from cn.fujitsu.com ([222.73.24.84]:3871 "EHLO song.cn.fujitsu.com" rhost-flags-OK-FAIL-OK-OK) by vger.kernel.org with ESMTP id S1751055Ab3BDKFe (ORCPT ); Mon, 4 Feb 2013 05:05:34 -0500 X-IronPort-AV: E=Sophos;i="4.84,599,1355068800"; d="scan'208";a="6687005" From: Lin Feng To: akpm@linux-foundation.org, mgorman@suse.de, bcrl@kvack.org, viro@zeniv.linux.org.uk Cc: khlebnikov@openvz.org, walken@google.com, kamezawa.hiroyu@jp.fujitsu.com, minchan@kernel.org, riel@redhat.com, rientjes@google.com, isimatu.yasuaki@jp.fujitsu.com, wency@cn.fujitsu.com, laijs@cn.fujitsu.com, jiang.liu@huawei.com, linux-mm@kvack.org, linux-aio@kvack.org, linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, Lin Feng Subject: [PATCH 1/2] mm: hotplug: implement non-movable version of get_user_pages() called get_user_pages_non_movable() Date: Mon, 4 Feb 2013 18:04:07 +0800 Message-Id: <1359972248-8722-2-git-send-email-linfeng@cn.fujitsu.com> X-Mailer: git-send-email 1.7.11.7 In-Reply-To: <1359972248-8722-1-git-send-email-linfeng@cn.fujitsu.com> References: <1359972248-8722-1-git-send-email-linfeng@cn.fujitsu.com> X-MIMETrack: Itemize by SMTP Server on mailserver/fnst(Release 8.5.3|September 15, 2011) at 2013/02/04 18:04:16, Serialize by Router on mailserver/fnst(Release 8.5.3|September 15, 2011) at 2013/02/04 18:04:16, Serialize complete at 2013/02/04 18:04:16 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org get_user_pages() always tries to allocate pages from movable zone, which is not reliable to memory hotremove framework in some case. This patch introduces a new library function called get_user_pages_non_movable() to pin pages only from zone non-movable in memory. It's a wrapper of get_user_pages() but it makes sure that all pages come from non-movable zone via additional page migration. Cc: Andrew Morton Cc: Mel Gorman Cc: KAMEZAWA Hiroyuki Cc: Yasuaki Ishimatsu Reviewed-by: Tang Chen Reviewed-by: Gu Zheng Signed-off-by: Lin Feng --- include/linux/mm.h | 5 ++++ include/linux/mmzone.h | 4 ++++ mm/memory.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++ mm/page_isolation.c | 5 ++++ 4 files changed, 77 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 66e2f7c..2a25d0e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1049,6 +1049,11 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, struct page **pages, struct vm_area_struct **vmas); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); +#ifdef CONFIG_MEMORY_HOTREMOVE +int get_user_pages_non_movable(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, int nr_pages, int write, int force, + struct page **pages, struct vm_area_struct **vmas); +#endif struct kvec; int get_kernel_pages(const struct kvec *iov, int nr_pages, int write, struct page **pages); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 73b64a3..5db811e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -838,6 +838,10 @@ static inline int is_normal_idx(enum zone_type idx) return (idx == ZONE_NORMAL); } +static inline int is_movable(struct zone *zone) +{ + return zone == zone->zone_pgdat->node_zones + ZONE_MOVABLE; +} /** * is_highmem - helper function to quickly check if a struct zone is a * highmem zone or not. This is an attempt to keep references diff --git a/mm/memory.c b/mm/memory.c index bb1369f..e3b8e19 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -58,6 +58,8 @@ #include #include #include +#include +#include #include #include @@ -1995,6 +1997,67 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, } EXPORT_SYMBOL(get_user_pages); +#ifdef CONFIG_MEMORY_HOTREMOVE +/** + * It's a wrapper of get_user_pages() but it makes sure that all pages come from + * non-movable zone via additional page migration. + */ +int get_user_pages_non_movable(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, int nr_pages, int write, int force, + struct page **pages, struct vm_area_struct **vmas) +{ + int ret, i, isolate_err, migrate_pre_flag; + LIST_HEAD(pagelist); + +retry: + ret = get_user_pages(tsk, mm, start, nr_pages, write, force, pages, + vmas); + + isolate_err = 0; + migrate_pre_flag = 0; + + for (i = 0; i < ret; i++) { + if (is_movable(page_zone(pages[i]))) { + if (!migrate_pre_flag) { + if (migrate_prep()) + goto put_page; + migrate_pre_flag = 1; + } + + if (!isolate_lru_page(pages[i])) { + inc_zone_page_state(pages[i], NR_ISOLATED_ANON + + page_is_file_cache(pages[i])); + list_add_tail(&pages[i]->lru, &pagelist); + } else { + isolate_err = 1; + goto put_page; + } + } + } + + /* All pages are non movable, we are done :) */ + if (i == ret && list_empty(&pagelist)) + return ret; + +put_page: + /* Undo the effects of former get_user_pages(), we won't pin anything */ + for (i = 0; i < ret; i++) + put_page(pages[i]); + + if (migrate_pre_flag && !isolate_err) { + ret = migrate_pages(&pagelist, alloc_migrate_target, 1, + false, MIGRATE_SYNC, MR_SYSCALL); + /* Steal pages from non-movable zone successfully? */ + if (!ret) + goto retry; + } + + putback_lru_pages(&pagelist); + return 0; +} +EXPORT_SYMBOL(get_user_pages_non_movable); +#endif + /** * get_dump_page() - pin user page in memory while writing it to core dump * @addr: user address diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 383bdbb..1b7bd17 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -247,6 +247,9 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, return ret ? 0 : -EBUSY; } +/** + * @private: 0 means page can be alloced from movable zone, otherwise forbidden + */ struct page *alloc_migrate_target(struct page *page, unsigned long private, int **resultp) { @@ -254,6 +257,8 @@ struct page *alloc_migrate_target(struct page *page, unsigned long private, if (PageHighMem(page)) gfp_mask |= __GFP_HIGHMEM; + if (unlikely(private != 0)) + gfp_mask &= ~__GFP_MOVABLE; return alloc_page(gfp_mask); } -- 1.7.11.7 From mboxrd@z Thu Jan 1 00:00:00 1970 From: Lin Feng Subject: [PATCH 1/2] mm: hotplug: implement non-movable version of get_user_pages() called get_user_pages_non_movable() Date: Mon, 4 Feb 2013 18:04:07 +0800 Message-ID: <1359972248-8722-2-git-send-email-linfeng@cn.fujitsu.com> References: <1359972248-8722-1-git-send-email-linfeng@cn.fujitsu.com> Cc: khlebnikov@openvz.org, walken@google.com, kamezawa.hiroyu@jp.fujitsu.com, minchan@kernel.org, riel@redhat.com, rientjes@google.com, isimatu.yasuaki@jp.fujitsu.com, wency@cn.fujitsu.com, laijs@cn.fujitsu.com, jiang.liu@huawei.com, linux-mm@kvack.org, linux-aio@kvack.org, linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, Lin Feng To: akpm@linux-foundation.org, mgorman@suse.de, bcrl@kvack.org, viro@zeniv.linux.org.uk Return-path: In-Reply-To: <1359972248-8722-1-git-send-email-linfeng@cn.fujitsu.com> Sender: owner-linux-aio@kvack.org List-Id: linux-fsdevel.vger.kernel.org get_user_pages() always tries to allocate pages from movable zone, which is not reliable to memory hotremove framework in some case. This patch introduces a new library function called get_user_pages_non_movable() to pin pages only from zone non-movable in memory. It's a wrapper of get_user_pages() but it makes sure that all pages come from non-movable zone via additional page migration. Cc: Andrew Morton Cc: Mel Gorman Cc: KAMEZAWA Hiroyuki Cc: Yasuaki Ishimatsu Reviewed-by: Tang Chen Reviewed-by: Gu Zheng Signed-off-by: Lin Feng --- include/linux/mm.h | 5 ++++ include/linux/mmzone.h | 4 ++++ mm/memory.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++ mm/page_isolation.c | 5 ++++ 4 files changed, 77 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 66e2f7c..2a25d0e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1049,6 +1049,11 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, struct page **pages, struct vm_area_struct **vmas); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); +#ifdef CONFIG_MEMORY_HOTREMOVE +int get_user_pages_non_movable(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, int nr_pages, int write, int force, + struct page **pages, struct vm_area_struct **vmas); +#endif struct kvec; int get_kernel_pages(const struct kvec *iov, int nr_pages, int write, struct page **pages); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 73b64a3..5db811e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -838,6 +838,10 @@ static inline int is_normal_idx(enum zone_type idx) return (idx == ZONE_NORMAL); } +static inline int is_movable(struct zone *zone) +{ + return zone == zone->zone_pgdat->node_zones + ZONE_MOVABLE; +} /** * is_highmem - helper function to quickly check if a struct zone is a * highmem zone or not. This is an attempt to keep references diff --git a/mm/memory.c b/mm/memory.c index bb1369f..e3b8e19 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -58,6 +58,8 @@ #include #include #include +#include +#include #include #include @@ -1995,6 +1997,67 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, } EXPORT_SYMBOL(get_user_pages); +#ifdef CONFIG_MEMORY_HOTREMOVE +/** + * It's a wrapper of get_user_pages() but it makes sure that all pages come from + * non-movable zone via additional page migration. + */ +int get_user_pages_non_movable(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, int nr_pages, int write, int force, + struct page **pages, struct vm_area_struct **vmas) +{ + int ret, i, isolate_err, migrate_pre_flag; + LIST_HEAD(pagelist); + +retry: + ret = get_user_pages(tsk, mm, start, nr_pages, write, force, pages, + vmas); + + isolate_err = 0; + migrate_pre_flag = 0; + + for (i = 0; i < ret; i++) { + if (is_movable(page_zone(pages[i]))) { + if (!migrate_pre_flag) { + if (migrate_prep()) + goto put_page; + migrate_pre_flag = 1; + } + + if (!isolate_lru_page(pages[i])) { + inc_zone_page_state(pages[i], NR_ISOLATED_ANON + + page_is_file_cache(pages[i])); + list_add_tail(&pages[i]->lru, &pagelist); + } else { + isolate_err = 1; + goto put_page; + } + } + } + + /* All pages are non movable, we are done :) */ + if (i == ret && list_empty(&pagelist)) + return ret; + +put_page: + /* Undo the effects of former get_user_pages(), we won't pin anything */ + for (i = 0; i < ret; i++) + put_page(pages[i]); + + if (migrate_pre_flag && !isolate_err) { + ret = migrate_pages(&pagelist, alloc_migrate_target, 1, + false, MIGRATE_SYNC, MR_SYSCALL); + /* Steal pages from non-movable zone successfully? */ + if (!ret) + goto retry; + } + + putback_lru_pages(&pagelist); + return 0; +} +EXPORT_SYMBOL(get_user_pages_non_movable); +#endif + /** * get_dump_page() - pin user page in memory while writing it to core dump * @addr: user address diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 383bdbb..1b7bd17 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -247,6 +247,9 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, return ret ? 0 : -EBUSY; } +/** + * @private: 0 means page can be alloced from movable zone, otherwise forbidden + */ struct page *alloc_migrate_target(struct page *page, unsigned long private, int **resultp) { @@ -254,6 +257,8 @@ struct page *alloc_migrate_target(struct page *page, unsigned long private, if (PageHighMem(page)) gfp_mask |= __GFP_HIGHMEM; + if (unlikely(private != 0)) + gfp_mask &= ~__GFP_MOVABLE; return alloc_page(gfp_mask); } -- 1.7.11.7 -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: aart@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Lin Feng Subject: [PATCH 1/2] mm: hotplug: implement non-movable version of get_user_pages() called get_user_pages_non_movable() Date: Mon, 4 Feb 2013 18:04:07 +0800 Message-Id: <1359972248-8722-2-git-send-email-linfeng@cn.fujitsu.com> In-Reply-To: <1359972248-8722-1-git-send-email-linfeng@cn.fujitsu.com> References: <1359972248-8722-1-git-send-email-linfeng@cn.fujitsu.com> Sender: owner-linux-mm@kvack.org List-ID: To: akpm@linux-foundation.org, mgorman@suse.de, bcrl@kvack.org, viro@zeniv.linux.org.uk Cc: khlebnikov@openvz.org, walken@google.com, kamezawa.hiroyu@jp.fujitsu.com, minchan@kernel.org, riel@redhat.com, rientjes@google.com, isimatu.yasuaki@jp.fujitsu.com, wency@cn.fujitsu.com, laijs@cn.fujitsu.com, jiang.liu@huawei.com, linux-mm@kvack.org, linux-aio@kvack.org, linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, Lin Feng get_user_pages() always tries to allocate pages from movable zone, which is not reliable to memory hotremove framework in some case. This patch introduces a new library function called get_user_pages_non_movable() to pin pages only from zone non-movable in memory. It's a wrapper of get_user_pages() but it makes sure that all pages come from non-movable zone via additional page migration. Cc: Andrew Morton Cc: Mel Gorman Cc: KAMEZAWA Hiroyuki Cc: Yasuaki Ishimatsu Reviewed-by: Tang Chen Reviewed-by: Gu Zheng Signed-off-by: Lin Feng --- include/linux/mm.h | 5 ++++ include/linux/mmzone.h | 4 ++++ mm/memory.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++ mm/page_isolation.c | 5 ++++ 4 files changed, 77 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 66e2f7c..2a25d0e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1049,6 +1049,11 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, struct page **pages, struct vm_area_struct **vmas); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); +#ifdef CONFIG_MEMORY_HOTREMOVE +int get_user_pages_non_movable(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, int nr_pages, int write, int force, + struct page **pages, struct vm_area_struct **vmas); +#endif struct kvec; int get_kernel_pages(const struct kvec *iov, int nr_pages, int write, struct page **pages); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 73b64a3..5db811e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -838,6 +838,10 @@ static inline int is_normal_idx(enum zone_type idx) return (idx == ZONE_NORMAL); } +static inline int is_movable(struct zone *zone) +{ + return zone == zone->zone_pgdat->node_zones + ZONE_MOVABLE; +} /** * is_highmem - helper function to quickly check if a struct zone is a * highmem zone or not. This is an attempt to keep references diff --git a/mm/memory.c b/mm/memory.c index bb1369f..e3b8e19 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -58,6 +58,8 @@ #include #include #include +#include +#include #include #include @@ -1995,6 +1997,67 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, } EXPORT_SYMBOL(get_user_pages); +#ifdef CONFIG_MEMORY_HOTREMOVE +/** + * It's a wrapper of get_user_pages() but it makes sure that all pages come from + * non-movable zone via additional page migration. + */ +int get_user_pages_non_movable(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, int nr_pages, int write, int force, + struct page **pages, struct vm_area_struct **vmas) +{ + int ret, i, isolate_err, migrate_pre_flag; + LIST_HEAD(pagelist); + +retry: + ret = get_user_pages(tsk, mm, start, nr_pages, write, force, pages, + vmas); + + isolate_err = 0; + migrate_pre_flag = 0; + + for (i = 0; i < ret; i++) { + if (is_movable(page_zone(pages[i]))) { + if (!migrate_pre_flag) { + if (migrate_prep()) + goto put_page; + migrate_pre_flag = 1; + } + + if (!isolate_lru_page(pages[i])) { + inc_zone_page_state(pages[i], NR_ISOLATED_ANON + + page_is_file_cache(pages[i])); + list_add_tail(&pages[i]->lru, &pagelist); + } else { + isolate_err = 1; + goto put_page; + } + } + } + + /* All pages are non movable, we are done :) */ + if (i == ret && list_empty(&pagelist)) + return ret; + +put_page: + /* Undo the effects of former get_user_pages(), we won't pin anything */ + for (i = 0; i < ret; i++) + put_page(pages[i]); + + if (migrate_pre_flag && !isolate_err) { + ret = migrate_pages(&pagelist, alloc_migrate_target, 1, + false, MIGRATE_SYNC, MR_SYSCALL); + /* Steal pages from non-movable zone successfully? */ + if (!ret) + goto retry; + } + + putback_lru_pages(&pagelist); + return 0; +} +EXPORT_SYMBOL(get_user_pages_non_movable); +#endif + /** * get_dump_page() - pin user page in memory while writing it to core dump * @addr: user address diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 383bdbb..1b7bd17 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -247,6 +247,9 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, return ret ? 0 : -EBUSY; } +/** + * @private: 0 means page can be alloced from movable zone, otherwise forbidden + */ struct page *alloc_migrate_target(struct page *page, unsigned long private, int **resultp) { @@ -254,6 +257,8 @@ struct page *alloc_migrate_target(struct page *page, unsigned long private, if (PageHighMem(page)) gfp_mask |= __GFP_HIGHMEM; + if (unlikely(private != 0)) + gfp_mask &= ~__GFP_MOVABLE; return alloc_page(gfp_mask); } -- 1.7.11.7 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org