All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch]THP: add split tail pages to shrink page list in page reclaim
@ 2013-04-01 13:26 Shaohua Li
  2013-04-01 14:24 ` Rik van Riel
                   ` (5 more replies)
  0 siblings, 6 replies; 10+ messages in thread
From: Shaohua Li @ 2013-04-01 13:26 UTC (permalink / raw)
  To: linux-mm; +Cc: akpm, hughd, aarcange, minchan

In page reclaim, huge page is split. split_huge_page() adds tail pages to LRU
list. Since we are reclaiming a huge page, it's better we reclaim all subpages
of the huge page instead of just the head page. This patch adds split tail
pages to shrink page list so the tail pages can be reclaimed soon.

Before this patch, run a swap workload:
thp_fault_alloc 3492
thp_fault_fallback 608
thp_collapse_alloc 6
thp_collapse_alloc_failed 0
thp_split 916

With this patch:
thp_fault_alloc 4085
thp_fault_fallback 16
thp_collapse_alloc 90
thp_collapse_alloc_failed 0
thp_split 1272

fallback allocation is reduced a lot.

Signed-off-by: Shaohua Li <shli@fusionio.com>
---
 include/linux/huge_mm.h |   11 ++++++++++-
 include/linux/swap.h    |    4 ++--
 mm/huge_memory.c        |   14 ++++++++------
 mm/swap.c               |   11 ++++++++---
 mm/swap_state.c         |    4 ++--
 mm/vmscan.c             |    2 +-
 6 files changed, 31 insertions(+), 15 deletions(-)

Index: linux/include/linux/huge_mm.h
===================================================================
--- linux.orig/include/linux/huge_mm.h	2013-04-01 20:16:23.822120955 +0800
+++ linux/include/linux/huge_mm.h	2013-04-01 20:18:22.668627309 +0800
@@ -99,7 +99,11 @@ extern int copy_pte_range(struct mm_stru
 extern int handle_pte_fault(struct mm_struct *mm,
 			    struct vm_area_struct *vma, unsigned long address,
 			    pte_t *pte, pmd_t *pmd, unsigned int flags);
-extern int split_huge_page(struct page *page);
+extern int split_huge_page_to_list(struct page *page, struct list_head *list);
+static inline int split_huge_page(struct page *page)
+{
+	return split_huge_page_to_list(page, NULL);
+}
 extern void __split_huge_page_pmd(struct vm_area_struct *vma,
 		unsigned long address, pmd_t *pmd);
 #define split_huge_page_pmd(__vma, __address, __pmd)			\
@@ -186,6 +190,11 @@ extern int do_huge_pmd_numa_page(struct
 #define transparent_hugepage_enabled(__vma) 0
 
 #define transparent_hugepage_flags 0UL
+static inline int
+split_huge_page_to_list(struct page *page, struct list_head *list)
+{
+	return 0;
+}
 static inline int split_huge_page(struct page *page)
 {
 	return 0;
Index: linux/include/linux/swap.h
===================================================================
--- linux.orig/include/linux/swap.h	2013-04-01 20:16:23.810121105 +0800
+++ linux/include/linux/swap.h	2013-04-01 20:18:22.668627309 +0800
@@ -236,7 +236,7 @@ extern unsigned long nr_free_pagecache_p
 extern void __lru_cache_add(struct page *, enum lru_list lru);
 extern void lru_cache_add_lru(struct page *, enum lru_list lru);
 extern void lru_add_page_tail(struct page *page, struct page *page_tail,
-			      struct lruvec *lruvec);
+			 struct lruvec *lruvec, struct list_head *head);
 extern void activate_page(struct page *);
 extern void mark_page_accessed(struct page *);
 extern void lru_add_drain(void);
@@ -343,7 +343,7 @@ extern struct address_space swapper_spac
 #define swap_address_space(entry) (&swapper_spaces[swp_type(entry)])
 extern unsigned long total_swapcache_pages(void);
 extern void show_swap_cache_info(void);
-extern int add_to_swap(struct page *);
+extern int add_to_swap(struct page *, struct list_head *list);
 extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
 extern void __delete_from_swap_cache(struct page *);
 extern void delete_from_swap_cache(struct page *);
Index: linux/mm/huge_memory.c
===================================================================
--- linux.orig/mm/huge_memory.c	2013-04-01 20:16:23.798121258 +0800
+++ linux/mm/huge_memory.c	2013-04-01 20:18:43.020371209 +0800
@@ -1560,7 +1560,8 @@ static int __split_huge_page_splitting(s
 	return ret;
 }
 
-static void __split_huge_page_refcount(struct page *page)
+static void __split_huge_page_refcount(struct page *page,
+				       struct list_head *list)
 {
 	int i;
 	struct zone *zone = page_zone(page);
@@ -1646,7 +1647,7 @@ static void __split_huge_page_refcount(s
 		BUG_ON(!PageDirty(page_tail));
 		BUG_ON(!PageSwapBacked(page_tail));
 
-		lru_add_page_tail(page, page_tail, lruvec);
+		lru_add_page_tail(page, page_tail, lruvec, list);
 	}
 	atomic_sub(tail_count, &page->_count);
 	BUG_ON(atomic_read(&page->_count) <= 0);
@@ -1753,7 +1754,8 @@ static int __split_huge_page_map(struct
 
 /* must be called with anon_vma->root->rwsem held */
 static void __split_huge_page(struct page *page,
-			      struct anon_vma *anon_vma)
+			      struct anon_vma *anon_vma,
+			      struct list_head *list)
 {
 	int mapcount, mapcount2;
 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
@@ -1784,7 +1786,7 @@ static void __split_huge_page(struct pag
 		       mapcount, page_mapcount(page));
 	BUG_ON(mapcount != page_mapcount(page));
 
-	__split_huge_page_refcount(page);
+	__split_huge_page_refcount(page, list);
 
 	mapcount2 = 0;
 	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
@@ -1799,7 +1801,7 @@ static void __split_huge_page(struct pag
 	BUG_ON(mapcount != mapcount2);
 }
 
-int split_huge_page(struct page *page)
+int split_huge_page_to_list(struct page *page, struct list_head *list)
 {
 	struct anon_vma *anon_vma;
 	int ret = 1;
@@ -1824,7 +1826,7 @@ int split_huge_page(struct page *page)
 		goto out_unlock;
 
 	BUG_ON(!PageSwapBacked(page));
-	__split_huge_page(page, anon_vma);
+	__split_huge_page(page, anon_vma, list);
 	count_vm_event(THP_SPLIT);
 
 	BUG_ON(PageCompound(page));
Index: linux/mm/swap.c
===================================================================
--- linux.orig/mm/swap.c	2013-04-01 20:16:23.794121307 +0800
+++ linux/mm/swap.c	2013-04-01 20:18:22.668627309 +0800
@@ -737,7 +737,7 @@ EXPORT_SYMBOL(__pagevec_release);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 /* used by __split_huge_page_refcount() */
 void lru_add_page_tail(struct page *page, struct page *page_tail,
-		       struct lruvec *lruvec)
+		       struct lruvec *lruvec, struct list_head *list)
 {
 	int uninitialized_var(active);
 	enum lru_list lru;
@@ -749,7 +749,8 @@ void lru_add_page_tail(struct page *page
 	VM_BUG_ON(NR_CPUS != 1 &&
 		  !spin_is_locked(&lruvec_zone(lruvec)->lru_lock));
 
-	SetPageLRU(page_tail);
+	if (!list)
+		SetPageLRU(page_tail);
 
 	if (page_evictable(page_tail)) {
 		if (PageActive(page)) {
@@ -767,7 +768,11 @@ void lru_add_page_tail(struct page *page
 
 	if (likely(PageLRU(page)))
 		list_add_tail(&page_tail->lru, &page->lru);
-	else {
+	else if (list) {
+		/* page reclaim is reclaiming a huge page */
+		get_page(page_tail);
+		list_add_tail(&page_tail->lru, list);
+	} else {
 		struct list_head *list_head;
 		/*
 		 * Head page has not yet been counted, as an hpage,
Index: linux/mm/swap_state.c
===================================================================
--- linux.orig/mm/swap_state.c	2013-04-01 20:16:23.778121508 +0800
+++ linux/mm/swap_state.c	2013-04-01 20:18:22.668627309 +0800
@@ -160,7 +160,7 @@ void __delete_from_swap_cache(struct pag
  * Allocate swap space for the page and add the page to the
  * swap cache.  Caller needs to hold the page lock. 
  */
-int add_to_swap(struct page *page)
+int add_to_swap(struct page *page, struct list_head *list)
 {
 	swp_entry_t entry;
 	int err;
@@ -173,7 +173,7 @@ int add_to_swap(struct page *page)
 		return 0;
 
 	if (unlikely(PageTransHuge(page)))
-		if (unlikely(split_huge_page(page))) {
+		if (unlikely(split_huge_page_to_list(page, list))) {
 			swapcache_free(entry, NULL);
 			return 0;
 		}
Index: linux/mm/vmscan.c
===================================================================
--- linux.orig/mm/vmscan.c	2013-04-01 20:16:23.782121457 +0800
+++ linux/mm/vmscan.c	2013-04-01 20:18:22.668627309 +0800
@@ -780,7 +780,7 @@ static unsigned long shrink_page_list(st
 		if (PageAnon(page) && !PageSwapCache(page)) {
 			if (!(sc->gfp_mask & __GFP_IO))
 				goto keep_locked;
-			if (!add_to_swap(page))
+			if (!add_to_swap(page, page_list))
 				goto activate_locked;
 			may_enter_fs = 1;
 		}

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [patch]THP: add split tail pages to shrink page list in page reclaim
  2013-04-01 13:26 [patch]THP: add split tail pages to shrink page list in page reclaim Shaohua Li
@ 2013-04-01 14:24 ` Rik van Riel
  2013-04-02  1:24 ` Minchan Kim
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Rik van Riel @ 2013-04-01 14:24 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-mm, akpm, hughd, aarcange, minchan

On 04/01/2013 09:26 AM, Shaohua Li wrote:
> In page reclaim, huge page is split. split_huge_page() adds tail pages to LRU
> list. Since we are reclaiming a huge page, it's better we reclaim all subpages
> of the huge page instead of just the head page. This patch adds split tail
> pages to shrink page list so the tail pages can be reclaimed soon.
>
> Before this patch, run a swap workload:
> thp_fault_alloc 3492
> thp_fault_fallback 608
> thp_collapse_alloc 6
> thp_collapse_alloc_failed 0
> thp_split 916
>
> With this patch:
> thp_fault_alloc 4085
> thp_fault_fallback 16
> thp_collapse_alloc 90
> thp_collapse_alloc_failed 0
> thp_split 1272
>
> fallback allocation is reduced a lot.
>
> Signed-off-by: Shaohua Li <shli@fusionio.com>

I'm not entirely happy that lru_add_page_tail can now add a page to
list that is not an LRU list, but the patch does do the right thing
policy wise, and I am not sure how to do it better...

Acked-by: Rik van Riel <riel@redhat.com>

-- 
All rights reversed

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [patch]THP: add split tail pages to shrink page list in page reclaim
  2013-04-01 13:26 [patch]THP: add split tail pages to shrink page list in page reclaim Shaohua Li
  2013-04-01 14:24 ` Rik van Riel
@ 2013-04-02  1:24 ` Minchan Kim
  2013-04-02  2:03   ` Shaohua Li
  2013-04-02 13:09 ` Johannes Weiner
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 10+ messages in thread
From: Minchan Kim @ 2013-04-02  1:24 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-mm, akpm, hughd, aarcange

Hi Shaohua,

On Mon, Apr 01, 2013 at 09:26:05PM +0800, Shaohua Li wrote:
> In page reclaim, huge page is split. split_huge_page() adds tail pages to LRU
> list. Since we are reclaiming a huge page, it's better we reclaim all subpages
> of the huge page instead of just the head page. This patch adds split tail
> pages to shrink page list so the tail pages can be reclaimed soon.
> 
> Before this patch, run a swap workload:
> thp_fault_alloc 3492
> thp_fault_fallback 608
> thp_collapse_alloc 6
> thp_collapse_alloc_failed 0
> thp_split 916
> 
> With this patch:
> thp_fault_alloc 4085
> thp_fault_fallback 16
> thp_collapse_alloc 90
> thp_collapse_alloc_failed 0
> thp_split 1272
> 
> fallback allocation is reduced a lot.

What I have a concern is that there is about spatial locality about 2M all pages
expecially, THP-always case. But yes, THP already have done it via
lru_add_page_tail and yours makes more sense if we really intended it.

But I didn't like passing page_list to split_huge_page, either.
Couldn't we do it in isolate_lru_pages in shrink_inactive_list?
Maybe, we can add new isolate_mode, ISOLATE_SPLIT_HUGEPAGE.
One problem I can see is deadlock of zone->lru_lock so maybe we have to
release the lock the work and re-hold it.

> 
> Signed-off-by: Shaohua Li <shli@fusionio.com>
> ---
>  include/linux/huge_mm.h |   11 ++++++++++-
>  include/linux/swap.h    |    4 ++--
>  mm/huge_memory.c        |   14 ++++++++------
>  mm/swap.c               |   11 ++++++++---
>  mm/swap_state.c         |    4 ++--
>  mm/vmscan.c             |    2 +-
>  6 files changed, 31 insertions(+), 15 deletions(-)
> 
> Index: linux/include/linux/huge_mm.h
> ===================================================================
> --- linux.orig/include/linux/huge_mm.h	2013-04-01 20:16:23.822120955 +0800
> +++ linux/include/linux/huge_mm.h	2013-04-01 20:18:22.668627309 +0800
> @@ -99,7 +99,11 @@ extern int copy_pte_range(struct mm_stru
>  extern int handle_pte_fault(struct mm_struct *mm,
>  			    struct vm_area_struct *vma, unsigned long address,
>  			    pte_t *pte, pmd_t *pmd, unsigned int flags);
> -extern int split_huge_page(struct page *page);
> +extern int split_huge_page_to_list(struct page *page, struct list_head *list);
> +static inline int split_huge_page(struct page *page)
> +{
> +	return split_huge_page_to_list(page, NULL);
> +}
>  extern void __split_huge_page_pmd(struct vm_area_struct *vma,
>  		unsigned long address, pmd_t *pmd);
>  #define split_huge_page_pmd(__vma, __address, __pmd)			\
> @@ -186,6 +190,11 @@ extern int do_huge_pmd_numa_page(struct
>  #define transparent_hugepage_enabled(__vma) 0
>  
>  #define transparent_hugepage_flags 0UL
> +static inline int
> +split_huge_page_to_list(struct page *page, struct list_head *list)
> +{
> +	return 0;
> +}
>  static inline int split_huge_page(struct page *page)
>  {
>  	return 0;
> Index: linux/include/linux/swap.h
> ===================================================================
> --- linux.orig/include/linux/swap.h	2013-04-01 20:16:23.810121105 +0800
> +++ linux/include/linux/swap.h	2013-04-01 20:18:22.668627309 +0800
> @@ -236,7 +236,7 @@ extern unsigned long nr_free_pagecache_p
>  extern void __lru_cache_add(struct page *, enum lru_list lru);
>  extern void lru_cache_add_lru(struct page *, enum lru_list lru);
>  extern void lru_add_page_tail(struct page *page, struct page *page_tail,
> -			      struct lruvec *lruvec);
> +			 struct lruvec *lruvec, struct list_head *head);
>  extern void activate_page(struct page *);
>  extern void mark_page_accessed(struct page *);
>  extern void lru_add_drain(void);
> @@ -343,7 +343,7 @@ extern struct address_space swapper_spac
>  #define swap_address_space(entry) (&swapper_spaces[swp_type(entry)])
>  extern unsigned long total_swapcache_pages(void);
>  extern void show_swap_cache_info(void);
> -extern int add_to_swap(struct page *);
> +extern int add_to_swap(struct page *, struct list_head *list);
>  extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
>  extern void __delete_from_swap_cache(struct page *);
>  extern void delete_from_swap_cache(struct page *);
> Index: linux/mm/huge_memory.c
> ===================================================================
> --- linux.orig/mm/huge_memory.c	2013-04-01 20:16:23.798121258 +0800
> +++ linux/mm/huge_memory.c	2013-04-01 20:18:43.020371209 +0800
> @@ -1560,7 +1560,8 @@ static int __split_huge_page_splitting(s
>  	return ret;
>  }
>  
> -static void __split_huge_page_refcount(struct page *page)
> +static void __split_huge_page_refcount(struct page *page,
> +				       struct list_head *list)
>  {
>  	int i;
>  	struct zone *zone = page_zone(page);
> @@ -1646,7 +1647,7 @@ static void __split_huge_page_refcount(s
>  		BUG_ON(!PageDirty(page_tail));
>  		BUG_ON(!PageSwapBacked(page_tail));
>  
> -		lru_add_page_tail(page, page_tail, lruvec);
> +		lru_add_page_tail(page, page_tail, lruvec, list);
>  	}
>  	atomic_sub(tail_count, &page->_count);
>  	BUG_ON(atomic_read(&page->_count) <= 0);
> @@ -1753,7 +1754,8 @@ static int __split_huge_page_map(struct
>  
>  /* must be called with anon_vma->root->rwsem held */
>  static void __split_huge_page(struct page *page,
> -			      struct anon_vma *anon_vma)
> +			      struct anon_vma *anon_vma,
> +			      struct list_head *list)
>  {
>  	int mapcount, mapcount2;
>  	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
> @@ -1784,7 +1786,7 @@ static void __split_huge_page(struct pag
>  		       mapcount, page_mapcount(page));
>  	BUG_ON(mapcount != page_mapcount(page));
>  
> -	__split_huge_page_refcount(page);
> +	__split_huge_page_refcount(page, list);
>  
>  	mapcount2 = 0;
>  	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
> @@ -1799,7 +1801,7 @@ static void __split_huge_page(struct pag
>  	BUG_ON(mapcount != mapcount2);
>  }
>  
> -int split_huge_page(struct page *page)
> +int split_huge_page_to_list(struct page *page, struct list_head *list)
>  {
>  	struct anon_vma *anon_vma;
>  	int ret = 1;
> @@ -1824,7 +1826,7 @@ int split_huge_page(struct page *page)
>  		goto out_unlock;
>  
>  	BUG_ON(!PageSwapBacked(page));
> -	__split_huge_page(page, anon_vma);
> +	__split_huge_page(page, anon_vma, list);
>  	count_vm_event(THP_SPLIT);
>  
>  	BUG_ON(PageCompound(page));
> Index: linux/mm/swap.c
> ===================================================================
> --- linux.orig/mm/swap.c	2013-04-01 20:16:23.794121307 +0800
> +++ linux/mm/swap.c	2013-04-01 20:18:22.668627309 +0800
> @@ -737,7 +737,7 @@ EXPORT_SYMBOL(__pagevec_release);
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>  /* used by __split_huge_page_refcount() */
>  void lru_add_page_tail(struct page *page, struct page *page_tail,
> -		       struct lruvec *lruvec)
> +		       struct lruvec *lruvec, struct list_head *list)
>  {
>  	int uninitialized_var(active);
>  	enum lru_list lru;
> @@ -749,7 +749,8 @@ void lru_add_page_tail(struct page *page
>  	VM_BUG_ON(NR_CPUS != 1 &&
>  		  !spin_is_locked(&lruvec_zone(lruvec)->lru_lock));
>  
> -	SetPageLRU(page_tail);
> +	if (!list)
> +		SetPageLRU(page_tail);
>  
>  	if (page_evictable(page_tail)) {
>  		if (PageActive(page)) {
> @@ -767,7 +768,11 @@ void lru_add_page_tail(struct page *page
>  
>  	if (likely(PageLRU(page)))
>  		list_add_tail(&page_tail->lru, &page->lru);
> -	else {
> +	else if (list) {
> +		/* page reclaim is reclaiming a huge page */
> +		get_page(page_tail);
> +		list_add_tail(&page_tail->lru, list);
> +	} else {
>  		struct list_head *list_head;
>  		/*
>  		 * Head page has not yet been counted, as an hpage,
> Index: linux/mm/swap_state.c
> ===================================================================
> --- linux.orig/mm/swap_state.c	2013-04-01 20:16:23.778121508 +0800
> +++ linux/mm/swap_state.c	2013-04-01 20:18:22.668627309 +0800
> @@ -160,7 +160,7 @@ void __delete_from_swap_cache(struct pag
>   * Allocate swap space for the page and add the page to the
>   * swap cache.  Caller needs to hold the page lock. 
>   */
> -int add_to_swap(struct page *page)
> +int add_to_swap(struct page *page, struct list_head *list)
>  {
>  	swp_entry_t entry;
>  	int err;
> @@ -173,7 +173,7 @@ int add_to_swap(struct page *page)
>  		return 0;
>  
>  	if (unlikely(PageTransHuge(page)))
> -		if (unlikely(split_huge_page(page))) {
> +		if (unlikely(split_huge_page_to_list(page, list))) {
>  			swapcache_free(entry, NULL);
>  			return 0;
>  		}
> Index: linux/mm/vmscan.c
> ===================================================================
> --- linux.orig/mm/vmscan.c	2013-04-01 20:16:23.782121457 +0800
> +++ linux/mm/vmscan.c	2013-04-01 20:18:22.668627309 +0800
> @@ -780,7 +780,7 @@ static unsigned long shrink_page_list(st
>  		if (PageAnon(page) && !PageSwapCache(page)) {
>  			if (!(sc->gfp_mask & __GFP_IO))
>  				goto keep_locked;
> -			if (!add_to_swap(page))
> +			if (!add_to_swap(page, page_list))
>  				goto activate_locked;
>  			may_enter_fs = 1;
>  		}
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

-- 
Kind regards,
Minchan Kim

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [patch]THP: add split tail pages to shrink page list in page reclaim
  2013-04-02  1:24 ` Minchan Kim
@ 2013-04-02  2:03   ` Shaohua Li
  2013-04-02  2:22     ` Minchan Kim
  0 siblings, 1 reply; 10+ messages in thread
From: Shaohua Li @ 2013-04-02  2:03 UTC (permalink / raw)
  To: Minchan Kim; +Cc: linux-mm, akpm, hughd, aarcange

On Tue, Apr 02, 2013 at 10:24:22AM +0900, Minchan Kim wrote:
> Hi Shaohua,
> 
> On Mon, Apr 01, 2013 at 09:26:05PM +0800, Shaohua Li wrote:
> > In page reclaim, huge page is split. split_huge_page() adds tail pages to LRU
> > list. Since we are reclaiming a huge page, it's better we reclaim all subpages
> > of the huge page instead of just the head page. This patch adds split tail
> > pages to shrink page list so the tail pages can be reclaimed soon.
> > 
> > Before this patch, run a swap workload:
> > thp_fault_alloc 3492
> > thp_fault_fallback 608
> > thp_collapse_alloc 6
> > thp_collapse_alloc_failed 0
> > thp_split 916
> > 
> > With this patch:
> > thp_fault_alloc 4085
> > thp_fault_fallback 16
> > thp_collapse_alloc 90
> > thp_collapse_alloc_failed 0
> > thp_split 1272
> > 
> > fallback allocation is reduced a lot.
> 
> What I have a concern is that there is about spatial locality about 2M all pages
> expecially, THP-always case. But yes, THP already have done it via
> lru_add_page_tail and yours makes more sense if we really intended it.
> 
> But I didn't like passing page_list to split_huge_page, either.
> Couldn't we do it in isolate_lru_pages in shrink_inactive_list?
> Maybe, we can add new isolate_mode, ISOLATE_SPLIT_HUGEPAGE.
> One problem I can see is deadlock of zone->lru_lock so maybe we have to
> release the lock the work and re-hold it.

I'd prefer split huge page after page_check_references like what we do now.
It's possible we don't want to reclaim (so split) the page at all.

Thanks,
Shaohua

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [patch]THP: add split tail pages to shrink page list in page reclaim
  2013-04-02  2:03   ` Shaohua Li
@ 2013-04-02  2:22     ` Minchan Kim
  0 siblings, 0 replies; 10+ messages in thread
From: Minchan Kim @ 2013-04-02  2:22 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-mm, akpm, hughd, aarcange

On Tue, Apr 02, 2013 at 10:03:57AM +0800, Shaohua Li wrote:
> On Tue, Apr 02, 2013 at 10:24:22AM +0900, Minchan Kim wrote:
> > Hi Shaohua,
> > 
> > On Mon, Apr 01, 2013 at 09:26:05PM +0800, Shaohua Li wrote:
> > > In page reclaim, huge page is split. split_huge_page() adds tail pages to LRU
> > > list. Since we are reclaiming a huge page, it's better we reclaim all subpages
> > > of the huge page instead of just the head page. This patch adds split tail
> > > pages to shrink page list so the tail pages can be reclaimed soon.
> > > 
> > > Before this patch, run a swap workload:
> > > thp_fault_alloc 3492
> > > thp_fault_fallback 608
> > > thp_collapse_alloc 6
> > > thp_collapse_alloc_failed 0
> > > thp_split 916
> > > 
> > > With this patch:
> > > thp_fault_alloc 4085
> > > thp_fault_fallback 16
> > > thp_collapse_alloc 90
> > > thp_collapse_alloc_failed 0
> > > thp_split 1272
> > > 
> > > fallback allocation is reduced a lot.
> > 
> > What I have a concern is that there is about spatial locality about 2M all pages
> > expecially, THP-always case. But yes, THP already have done it via
> > lru_add_page_tail and yours makes more sense if we really intended it.
> > 
> > But I didn't like passing page_list to split_huge_page, either.
> > Couldn't we do it in isolate_lru_pages in shrink_inactive_list?
> > Maybe, we can add new isolate_mode, ISOLATE_SPLIT_HUGEPAGE.
> > One problem I can see is deadlock of zone->lru_lock so maybe we have to
> > release the lock the work and re-hold it.
> 
> I'd prefer split huge page after page_check_references like what we do now.
> It's possible we don't want to reclaim (so split) the page at all.

Absolutely right you are!

We can check refereced bit in split_huge_page easily instead of
page_check_reference but for it, we have to introudce another
argument like split_huge_page(struct page, bool force) and moreover,
it needs additional argument for handling memcg aware.  

        splut_huge_page(struct page, struct mem_cgroup *memcg, bool force);

It's very ugly so I don't have any objection.
Sorry for the noise.

Acked-by: Minchan Kim <minchan@kernel.org>


> Thanks,
> Shaohua
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

-- 
Kind regards,
Minchan Kim

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [patch]THP: add split tail pages to shrink page list in page reclaim
  2013-04-01 13:26 [patch]THP: add split tail pages to shrink page list in page reclaim Shaohua Li
  2013-04-01 14:24 ` Rik van Riel
  2013-04-02  1:24 ` Minchan Kim
@ 2013-04-02 13:09 ` Johannes Weiner
  2013-04-04 23:50 ` Wanpeng Li
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Johannes Weiner @ 2013-04-02 13:09 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-mm, akpm, hughd, aarcange, minchan

On Mon, Apr 01, 2013 at 09:26:05PM +0800, Shaohua Li wrote:
> In page reclaim, huge page is split. split_huge_page() adds tail pages to LRU
> list. Since we are reclaiming a huge page, it's better we reclaim all subpages
> of the huge page instead of just the head page. This patch adds split tail
> pages to shrink page list so the tail pages can be reclaimed soon.
> 
> Before this patch, run a swap workload:
> thp_fault_alloc 3492
> thp_fault_fallback 608
> thp_collapse_alloc 6
> thp_collapse_alloc_failed 0
> thp_split 916
> 
> With this patch:
> thp_fault_alloc 4085
> thp_fault_fallback 16
> thp_collapse_alloc 90
> thp_collapse_alloc_failed 0
> thp_split 1272
> 
> fallback allocation is reduced a lot.
> 
> Signed-off-by: Shaohua Li <shli@fusionio.com>

Acked-by: Johannes Weiner <hannes@cmpxchg.org>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [patch]THP: add split tail pages to shrink page list in page reclaim
  2013-04-01 13:26 [patch]THP: add split tail pages to shrink page list in page reclaim Shaohua Li
                   ` (3 preceding siblings ...)
  2013-04-04 23:50 ` Wanpeng Li
@ 2013-04-04 23:50 ` Wanpeng Li
  2013-04-11 23:44 ` Andrew Morton
  5 siblings, 0 replies; 10+ messages in thread
From: Wanpeng Li @ 2013-04-04 23:50 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-mm, akpm, hughd, aarcange, minchan

On Mon, Apr 01, 2013 at 09:26:05PM +0800, Shaohua Li wrote:
>In page reclaim, huge page is split. split_huge_page() adds tail pages to LRU
>list. Since we are reclaiming a huge page, it's better we reclaim all subpages
>of the huge page instead of just the head page. This patch adds split tail
>pages to shrink page list so the tail pages can be reclaimed soon.
>
>Before this patch, run a swap workload:
>thp_fault_alloc 3492
>thp_fault_fallback 608
>thp_collapse_alloc 6
>thp_collapse_alloc_failed 0
>thp_split 916
>
>With this patch:
>thp_fault_alloc 4085
>thp_fault_fallback 16
>thp_collapse_alloc 90
>thp_collapse_alloc_failed 0
>thp_split 1272
>
>fallback allocation is reduced a lot.
>
>Signed-off-by: Shaohua Li <shli@fusionio.com>

Nice!

Reviewed-by: Wanpeng Li <liwanp@linux.vnet.ibm.com>

>---
> include/linux/huge_mm.h |   11 ++++++++++-
> include/linux/swap.h    |    4 ++--
> mm/huge_memory.c        |   14 ++++++++------
> mm/swap.c               |   11 ++++++++---
> mm/swap_state.c         |    4 ++--
> mm/vmscan.c             |    2 +-
> 6 files changed, 31 insertions(+), 15 deletions(-)
>
>Index: linux/include/linux/huge_mm.h
>===================================================================
>--- linux.orig/include/linux/huge_mm.h	2013-04-01 20:16:23.822120955 +0800
>+++ linux/include/linux/huge_mm.h	2013-04-01 20:18:22.668627309 +0800
>@@ -99,7 +99,11 @@ extern int copy_pte_range(struct mm_stru
> extern int handle_pte_fault(struct mm_struct *mm,
> 			    struct vm_area_struct *vma, unsigned long address,
> 			    pte_t *pte, pmd_t *pmd, unsigned int flags);
>-extern int split_huge_page(struct page *page);
>+extern int split_huge_page_to_list(struct page *page, struct list_head *list);
>+static inline int split_huge_page(struct page *page)
>+{
>+	return split_huge_page_to_list(page, NULL);
>+}
> extern void __split_huge_page_pmd(struct vm_area_struct *vma,
> 		unsigned long address, pmd_t *pmd);
> #define split_huge_page_pmd(__vma, __address, __pmd)			\
>@@ -186,6 +190,11 @@ extern int do_huge_pmd_numa_page(struct
> #define transparent_hugepage_enabled(__vma) 0
>
> #define transparent_hugepage_flags 0UL
>+static inline int
>+split_huge_page_to_list(struct page *page, struct list_head *list)
>+{
>+	return 0;
>+}
> static inline int split_huge_page(struct page *page)
> {
> 	return 0;
>Index: linux/include/linux/swap.h
>===================================================================
>--- linux.orig/include/linux/swap.h	2013-04-01 20:16:23.810121105 +0800
>+++ linux/include/linux/swap.h	2013-04-01 20:18:22.668627309 +0800
>@@ -236,7 +236,7 @@ extern unsigned long nr_free_pagecache_p
> extern void __lru_cache_add(struct page *, enum lru_list lru);
> extern void lru_cache_add_lru(struct page *, enum lru_list lru);
> extern void lru_add_page_tail(struct page *page, struct page *page_tail,
>-			      struct lruvec *lruvec);
>+			 struct lruvec *lruvec, struct list_head *head);
> extern void activate_page(struct page *);
> extern void mark_page_accessed(struct page *);
> extern void lru_add_drain(void);
>@@ -343,7 +343,7 @@ extern struct address_space swapper_spac
> #define swap_address_space(entry) (&swapper_spaces[swp_type(entry)])
> extern unsigned long total_swapcache_pages(void);
> extern void show_swap_cache_info(void);
>-extern int add_to_swap(struct page *);
>+extern int add_to_swap(struct page *, struct list_head *list);
> extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
> extern void __delete_from_swap_cache(struct page *);
> extern void delete_from_swap_cache(struct page *);
>Index: linux/mm/huge_memory.c
>===================================================================
>--- linux.orig/mm/huge_memory.c	2013-04-01 20:16:23.798121258 +0800
>+++ linux/mm/huge_memory.c	2013-04-01 20:18:43.020371209 +0800
>@@ -1560,7 +1560,8 @@ static int __split_huge_page_splitting(s
> 	return ret;
> }
>
>-static void __split_huge_page_refcount(struct page *page)
>+static void __split_huge_page_refcount(struct page *page,
>+				       struct list_head *list)
> {
> 	int i;
> 	struct zone *zone = page_zone(page);
>@@ -1646,7 +1647,7 @@ static void __split_huge_page_refcount(s
> 		BUG_ON(!PageDirty(page_tail));
> 		BUG_ON(!PageSwapBacked(page_tail));
>
>-		lru_add_page_tail(page, page_tail, lruvec);
>+		lru_add_page_tail(page, page_tail, lruvec, list);
> 	}
> 	atomic_sub(tail_count, &page->_count);
> 	BUG_ON(atomic_read(&page->_count) <= 0);
>@@ -1753,7 +1754,8 @@ static int __split_huge_page_map(struct
>
> /* must be called with anon_vma->root->rwsem held */
> static void __split_huge_page(struct page *page,
>-			      struct anon_vma *anon_vma)
>+			      struct anon_vma *anon_vma,
>+			      struct list_head *list)
> {
> 	int mapcount, mapcount2;
> 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
>@@ -1784,7 +1786,7 @@ static void __split_huge_page(struct pag
> 		       mapcount, page_mapcount(page));
> 	BUG_ON(mapcount != page_mapcount(page));
>
>-	__split_huge_page_refcount(page);
>+	__split_huge_page_refcount(page, list);
>
> 	mapcount2 = 0;
> 	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
>@@ -1799,7 +1801,7 @@ static void __split_huge_page(struct pag
> 	BUG_ON(mapcount != mapcount2);
> }
>
>-int split_huge_page(struct page *page)
>+int split_huge_page_to_list(struct page *page, struct list_head *list)
> {
> 	struct anon_vma *anon_vma;
> 	int ret = 1;
>@@ -1824,7 +1826,7 @@ int split_huge_page(struct page *page)
> 		goto out_unlock;
>
> 	BUG_ON(!PageSwapBacked(page));
>-	__split_huge_page(page, anon_vma);
>+	__split_huge_page(page, anon_vma, list);
> 	count_vm_event(THP_SPLIT);
>
> 	BUG_ON(PageCompound(page));
>Index: linux/mm/swap.c
>===================================================================
>--- linux.orig/mm/swap.c	2013-04-01 20:16:23.794121307 +0800
>+++ linux/mm/swap.c	2013-04-01 20:18:22.668627309 +0800
>@@ -737,7 +737,7 @@ EXPORT_SYMBOL(__pagevec_release);
> #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> /* used by __split_huge_page_refcount() */
> void lru_add_page_tail(struct page *page, struct page *page_tail,
>-		       struct lruvec *lruvec)
>+		       struct lruvec *lruvec, struct list_head *list)
> {
> 	int uninitialized_var(active);
> 	enum lru_list lru;
>@@ -749,7 +749,8 @@ void lru_add_page_tail(struct page *page
> 	VM_BUG_ON(NR_CPUS != 1 &&
> 		  !spin_is_locked(&lruvec_zone(lruvec)->lru_lock));
>
>-	SetPageLRU(page_tail);
>+	if (!list)
>+		SetPageLRU(page_tail);
>
> 	if (page_evictable(page_tail)) {
> 		if (PageActive(page)) {
>@@ -767,7 +768,11 @@ void lru_add_page_tail(struct page *page
>
> 	if (likely(PageLRU(page)))
> 		list_add_tail(&page_tail->lru, &page->lru);
>-	else {
>+	else if (list) {
>+		/* page reclaim is reclaiming a huge page */
>+		get_page(page_tail);
>+		list_add_tail(&page_tail->lru, list);
>+	} else {
> 		struct list_head *list_head;
> 		/*
> 		 * Head page has not yet been counted, as an hpage,
>Index: linux/mm/swap_state.c
>===================================================================
>--- linux.orig/mm/swap_state.c	2013-04-01 20:16:23.778121508 +0800
>+++ linux/mm/swap_state.c	2013-04-01 20:18:22.668627309 +0800
>@@ -160,7 +160,7 @@ void __delete_from_swap_cache(struct pag
>  * Allocate swap space for the page and add the page to the
>  * swap cache.  Caller needs to hold the page lock. 
>  */
>-int add_to_swap(struct page *page)
>+int add_to_swap(struct page *page, struct list_head *list)
> {
> 	swp_entry_t entry;
> 	int err;
>@@ -173,7 +173,7 @@ int add_to_swap(struct page *page)
> 		return 0;
>
> 	if (unlikely(PageTransHuge(page)))
>-		if (unlikely(split_huge_page(page))) {
>+		if (unlikely(split_huge_page_to_list(page, list))) {
> 			swapcache_free(entry, NULL);
> 			return 0;
> 		}
>Index: linux/mm/vmscan.c
>===================================================================
>--- linux.orig/mm/vmscan.c	2013-04-01 20:16:23.782121457 +0800
>+++ linux/mm/vmscan.c	2013-04-01 20:18:22.668627309 +0800
>@@ -780,7 +780,7 @@ static unsigned long shrink_page_list(st
> 		if (PageAnon(page) && !PageSwapCache(page)) {
> 			if (!(sc->gfp_mask & __GFP_IO))
> 				goto keep_locked;
>-			if (!add_to_swap(page))
>+			if (!add_to_swap(page, page_list))
> 				goto activate_locked;
> 			may_enter_fs = 1;
> 		}
>
>--
>To unsubscribe, send a message with 'unsubscribe linux-mm' in
>the body to majordomo@kvack.org.  For more info on Linux MM,
>see: http://www.linux-mm.org/ .
>Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [patch]THP: add split tail pages to shrink page list in page reclaim
  2013-04-01 13:26 [patch]THP: add split tail pages to shrink page list in page reclaim Shaohua Li
                   ` (2 preceding siblings ...)
  2013-04-02 13:09 ` Johannes Weiner
@ 2013-04-04 23:50 ` Wanpeng Li
  2013-04-04 23:50 ` Wanpeng Li
  2013-04-11 23:44 ` Andrew Morton
  5 siblings, 0 replies; 10+ messages in thread
From: Wanpeng Li @ 2013-04-04 23:50 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-mm, akpm, hughd, aarcange, minchan

On Mon, Apr 01, 2013 at 09:26:05PM +0800, Shaohua Li wrote:
>In page reclaim, huge page is split. split_huge_page() adds tail pages to LRU
>list. Since we are reclaiming a huge page, it's better we reclaim all subpages
>of the huge page instead of just the head page. This patch adds split tail
>pages to shrink page list so the tail pages can be reclaimed soon.
>
>Before this patch, run a swap workload:
>thp_fault_alloc 3492
>thp_fault_fallback 608
>thp_collapse_alloc 6
>thp_collapse_alloc_failed 0
>thp_split 916
>
>With this patch:
>thp_fault_alloc 4085
>thp_fault_fallback 16
>thp_collapse_alloc 90
>thp_collapse_alloc_failed 0
>thp_split 1272
>
>fallback allocation is reduced a lot.
>
>Signed-off-by: Shaohua Li <shli@fusionio.com>

Nice!

Reviewed-by: Wanpeng Li <liwanp@linux.vnet.ibm.com>

>---
> include/linux/huge_mm.h |   11 ++++++++++-
> include/linux/swap.h    |    4 ++--
> mm/huge_memory.c        |   14 ++++++++------
> mm/swap.c               |   11 ++++++++---
> mm/swap_state.c         |    4 ++--
> mm/vmscan.c             |    2 +-
> 6 files changed, 31 insertions(+), 15 deletions(-)
>
>Index: linux/include/linux/huge_mm.h
>===================================================================
>--- linux.orig/include/linux/huge_mm.h	2013-04-01 20:16:23.822120955 +0800
>+++ linux/include/linux/huge_mm.h	2013-04-01 20:18:22.668627309 +0800
>@@ -99,7 +99,11 @@ extern int copy_pte_range(struct mm_stru
> extern int handle_pte_fault(struct mm_struct *mm,
> 			    struct vm_area_struct *vma, unsigned long address,
> 			    pte_t *pte, pmd_t *pmd, unsigned int flags);
>-extern int split_huge_page(struct page *page);
>+extern int split_huge_page_to_list(struct page *page, struct list_head *list);
>+static inline int split_huge_page(struct page *page)
>+{
>+	return split_huge_page_to_list(page, NULL);
>+}
> extern void __split_huge_page_pmd(struct vm_area_struct *vma,
> 		unsigned long address, pmd_t *pmd);
> #define split_huge_page_pmd(__vma, __address, __pmd)			\
>@@ -186,6 +190,11 @@ extern int do_huge_pmd_numa_page(struct
> #define transparent_hugepage_enabled(__vma) 0
>
> #define transparent_hugepage_flags 0UL
>+static inline int
>+split_huge_page_to_list(struct page *page, struct list_head *list)
>+{
>+	return 0;
>+}
> static inline int split_huge_page(struct page *page)
> {
> 	return 0;
>Index: linux/include/linux/swap.h
>===================================================================
>--- linux.orig/include/linux/swap.h	2013-04-01 20:16:23.810121105 +0800
>+++ linux/include/linux/swap.h	2013-04-01 20:18:22.668627309 +0800
>@@ -236,7 +236,7 @@ extern unsigned long nr_free_pagecache_p
> extern void __lru_cache_add(struct page *, enum lru_list lru);
> extern void lru_cache_add_lru(struct page *, enum lru_list lru);
> extern void lru_add_page_tail(struct page *page, struct page *page_tail,
>-			      struct lruvec *lruvec);
>+			 struct lruvec *lruvec, struct list_head *head);
> extern void activate_page(struct page *);
> extern void mark_page_accessed(struct page *);
> extern void lru_add_drain(void);
>@@ -343,7 +343,7 @@ extern struct address_space swapper_spac
> #define swap_address_space(entry) (&swapper_spaces[swp_type(entry)])
> extern unsigned long total_swapcache_pages(void);
> extern void show_swap_cache_info(void);
>-extern int add_to_swap(struct page *);
>+extern int add_to_swap(struct page *, struct list_head *list);
> extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
> extern void __delete_from_swap_cache(struct page *);
> extern void delete_from_swap_cache(struct page *);
>Index: linux/mm/huge_memory.c
>===================================================================
>--- linux.orig/mm/huge_memory.c	2013-04-01 20:16:23.798121258 +0800
>+++ linux/mm/huge_memory.c	2013-04-01 20:18:43.020371209 +0800
>@@ -1560,7 +1560,8 @@ static int __split_huge_page_splitting(s
> 	return ret;
> }
>
>-static void __split_huge_page_refcount(struct page *page)
>+static void __split_huge_page_refcount(struct page *page,
>+				       struct list_head *list)
> {
> 	int i;
> 	struct zone *zone = page_zone(page);
>@@ -1646,7 +1647,7 @@ static void __split_huge_page_refcount(s
> 		BUG_ON(!PageDirty(page_tail));
> 		BUG_ON(!PageSwapBacked(page_tail));
>
>-		lru_add_page_tail(page, page_tail, lruvec);
>+		lru_add_page_tail(page, page_tail, lruvec, list);
> 	}
> 	atomic_sub(tail_count, &page->_count);
> 	BUG_ON(atomic_read(&page->_count) <= 0);
>@@ -1753,7 +1754,8 @@ static int __split_huge_page_map(struct
>
> /* must be called with anon_vma->root->rwsem held */
> static void __split_huge_page(struct page *page,
>-			      struct anon_vma *anon_vma)
>+			      struct anon_vma *anon_vma,
>+			      struct list_head *list)
> {
> 	int mapcount, mapcount2;
> 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
>@@ -1784,7 +1786,7 @@ static void __split_huge_page(struct pag
> 		       mapcount, page_mapcount(page));
> 	BUG_ON(mapcount != page_mapcount(page));
>
>-	__split_huge_page_refcount(page);
>+	__split_huge_page_refcount(page, list);
>
> 	mapcount2 = 0;
> 	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
>@@ -1799,7 +1801,7 @@ static void __split_huge_page(struct pag
> 	BUG_ON(mapcount != mapcount2);
> }
>
>-int split_huge_page(struct page *page)
>+int split_huge_page_to_list(struct page *page, struct list_head *list)
> {
> 	struct anon_vma *anon_vma;
> 	int ret = 1;
>@@ -1824,7 +1826,7 @@ int split_huge_page(struct page *page)
> 		goto out_unlock;
>
> 	BUG_ON(!PageSwapBacked(page));
>-	__split_huge_page(page, anon_vma);
>+	__split_huge_page(page, anon_vma, list);
> 	count_vm_event(THP_SPLIT);
>
> 	BUG_ON(PageCompound(page));
>Index: linux/mm/swap.c
>===================================================================
>--- linux.orig/mm/swap.c	2013-04-01 20:16:23.794121307 +0800
>+++ linux/mm/swap.c	2013-04-01 20:18:22.668627309 +0800
>@@ -737,7 +737,7 @@ EXPORT_SYMBOL(__pagevec_release);
> #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> /* used by __split_huge_page_refcount() */
> void lru_add_page_tail(struct page *page, struct page *page_tail,
>-		       struct lruvec *lruvec)
>+		       struct lruvec *lruvec, struct list_head *list)
> {
> 	int uninitialized_var(active);
> 	enum lru_list lru;
>@@ -749,7 +749,8 @@ void lru_add_page_tail(struct page *page
> 	VM_BUG_ON(NR_CPUS != 1 &&
> 		  !spin_is_locked(&lruvec_zone(lruvec)->lru_lock));
>
>-	SetPageLRU(page_tail);
>+	if (!list)
>+		SetPageLRU(page_tail);
>
> 	if (page_evictable(page_tail)) {
> 		if (PageActive(page)) {
>@@ -767,7 +768,11 @@ void lru_add_page_tail(struct page *page
>
> 	if (likely(PageLRU(page)))
> 		list_add_tail(&page_tail->lru, &page->lru);
>-	else {
>+	else if (list) {
>+		/* page reclaim is reclaiming a huge page */
>+		get_page(page_tail);
>+		list_add_tail(&page_tail->lru, list);
>+	} else {
> 		struct list_head *list_head;
> 		/*
> 		 * Head page has not yet been counted, as an hpage,
>Index: linux/mm/swap_state.c
>===================================================================
>--- linux.orig/mm/swap_state.c	2013-04-01 20:16:23.778121508 +0800
>+++ linux/mm/swap_state.c	2013-04-01 20:18:22.668627309 +0800
>@@ -160,7 +160,7 @@ void __delete_from_swap_cache(struct pag
>  * Allocate swap space for the page and add the page to the
>  * swap cache.  Caller needs to hold the page lock. 
>  */
>-int add_to_swap(struct page *page)
>+int add_to_swap(struct page *page, struct list_head *list)
> {
> 	swp_entry_t entry;
> 	int err;
>@@ -173,7 +173,7 @@ int add_to_swap(struct page *page)
> 		return 0;
>
> 	if (unlikely(PageTransHuge(page)))
>-		if (unlikely(split_huge_page(page))) {
>+		if (unlikely(split_huge_page_to_list(page, list))) {
> 			swapcache_free(entry, NULL);
> 			return 0;
> 		}
>Index: linux/mm/vmscan.c
>===================================================================
>--- linux.orig/mm/vmscan.c	2013-04-01 20:16:23.782121457 +0800
>+++ linux/mm/vmscan.c	2013-04-01 20:18:22.668627309 +0800
>@@ -780,7 +780,7 @@ static unsigned long shrink_page_list(st
> 		if (PageAnon(page) && !PageSwapCache(page)) {
> 			if (!(sc->gfp_mask & __GFP_IO))
> 				goto keep_locked;
>-			if (!add_to_swap(page))
>+			if (!add_to_swap(page, page_list))
> 				goto activate_locked;
> 			may_enter_fs = 1;
> 		}
>
>--
>To unsubscribe, send a message with 'unsubscribe linux-mm' in
>the body to majordomo@kvack.org.  For more info on Linux MM,
>see: http://www.linux-mm.org/ .
>Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [patch]THP: add split tail pages to shrink page list in page reclaim
  2013-04-01 13:26 [patch]THP: add split tail pages to shrink page list in page reclaim Shaohua Li
                   ` (4 preceding siblings ...)
  2013-04-04 23:50 ` Wanpeng Li
@ 2013-04-11 23:44 ` Andrew Morton
  2013-04-12  1:02   ` Shaohua Li
  5 siblings, 1 reply; 10+ messages in thread
From: Andrew Morton @ 2013-04-11 23:44 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-mm, hughd, aarcange, minchan

On Mon, 1 Apr 2013 21:26:05 +0800 Shaohua Li <shli@kernel.org> wrote:

> In page reclaim, huge page is split. split_huge_page() adds tail pages to LRU
> list. Since we are reclaiming a huge page, it's better we reclaim all subpages
> of the huge page instead of just the head page. This patch adds split tail
> pages to shrink page list so the tail pages can be reclaimed soon.
> 
> Before this patch, run a swap workload:
> thp_fault_alloc 3492
> thp_fault_fallback 608
> thp_collapse_alloc 6
> thp_collapse_alloc_failed 0
> thp_split 916
> 
> With this patch:
> thp_fault_alloc 4085
> thp_fault_fallback 16
> thp_collapse_alloc 90
> thp_collapse_alloc_failed 0
> thp_split 1272
> 
> fallback allocation is reduced a lot.
> 
> ...
>
> -int split_huge_page(struct page *page)
> +int split_huge_page_to_list(struct page *page, struct list_head *list)

While it's fresh, could you please prepare a covering comment describing
this function?  The meaning of the return value is particularly
cryptic.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [patch]THP: add split tail pages to shrink page list in page reclaim
  2013-04-11 23:44 ` Andrew Morton
@ 2013-04-12  1:02   ` Shaohua Li
  0 siblings, 0 replies; 10+ messages in thread
From: Shaohua Li @ 2013-04-12  1:02 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, hughd, aarcange, minchan

On Thu, Apr 11, 2013 at 04:44:21PM -0700, Andrew Morton wrote:
> On Mon, 1 Apr 2013 21:26:05 +0800 Shaohua Li <shli@kernel.org> wrote:
> 
> > In page reclaim, huge page is split. split_huge_page() adds tail pages to LRU
> > list. Since we are reclaiming a huge page, it's better we reclaim all subpages
> > of the huge page instead of just the head page. This patch adds split tail
> > pages to shrink page list so the tail pages can be reclaimed soon.
> > 
> > Before this patch, run a swap workload:
> > thp_fault_alloc 3492
> > thp_fault_fallback 608
> > thp_collapse_alloc 6
> > thp_collapse_alloc_failed 0
> > thp_split 916
> > 
> > With this patch:
> > thp_fault_alloc 4085
> > thp_fault_fallback 16
> > thp_collapse_alloc 90
> > thp_collapse_alloc_failed 0
> > thp_split 1272
> > 
> > fallback allocation is reduced a lot.
> > 
> > ...
> >
> > -int split_huge_page(struct page *page)
> > +int split_huge_page_to_list(struct page *page, struct list_head *list)
> 
> While it's fresh, could you please prepare a covering comment describing
> this function?  The meaning of the return value is particularly
> cryptic.

Is this ok to you?

---
 mm/huge_memory.c |    7 +++++++
 1 file changed, 7 insertions(+)

Index: linux/mm/huge_memory.c
===================================================================
--- linux.orig/mm/huge_memory.c	2013-04-12 08:07:59.075329318 +0800
+++ linux/mm/huge_memory.c	2013-04-12 08:53:53.076706235 +0800
@@ -1801,6 +1801,13 @@ static void __split_huge_page(struct pag
 	BUG_ON(mapcount != mapcount2);
 }
 
+/*
+ * Split a hugepage into normal pages. This doesn't change the position of head
+ * page. If @list is null, tail pages will be added to LRU list, otherwise, to
+ * @list. Both head page and tail pages will inherit mapping, flags, and so on
+ * from the hugepage.
+ * Return 0 if the hugepage is split successfully otherwise return 1.
+ */
 int split_huge_page_to_list(struct page *page, struct list_head *list)
 {
 	struct anon_vma *anon_vma;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2013-04-12  1:02 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-04-01 13:26 [patch]THP: add split tail pages to shrink page list in page reclaim Shaohua Li
2013-04-01 14:24 ` Rik van Riel
2013-04-02  1:24 ` Minchan Kim
2013-04-02  2:03   ` Shaohua Li
2013-04-02  2:22     ` Minchan Kim
2013-04-02 13:09 ` Johannes Weiner
2013-04-04 23:50 ` Wanpeng Li
2013-04-04 23:50 ` Wanpeng Li
2013-04-11 23:44 ` Andrew Morton
2013-04-12  1:02   ` Shaohua Li

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.