linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC] mm: allow isolation for pages not inserted into lru lists yet
@ 2017-07-18 16:00 Konstantin Khlebnikov
  2017-07-20  9:45 ` Vlastimil Babka
  2017-07-24  3:53 ` Minchan Kim
  0 siblings, 2 replies; 4+ messages in thread
From: Konstantin Khlebnikov @ 2017-07-18 16:00 UTC (permalink / raw)
  To: Michal Hocko, Minchan Kim, Hugh Dickins, linux-mm, Shaohua Li,
	Johannes Weiner, Andrew Morton, Mel Gorman
  Cc: linux-kernel

Pages are added into lru lists via per-cpu page vectors in order
to combine these insertions and reduce lru lock contention.

These pending pages cannot be isolated and moved into another lru.
This breaks in some cases page activation and makes mlock-munlock
much more complicated.

Also this breaks newly added swapless MADV_FREE: if it cannot move
anon page into file lru then page could never be freed lazily.

This patch rearranges lru list handling to allow lru isolation for
such pages. It set PageLRU earlier and initialize page->lru to mark
pages still pending for lru insert.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
---
 include/linux/mm_inline.h |   10 ++++++++--
 mm/swap.c                 |   26 ++++++++++++++++++++++++--
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index e030a68ead7e..6618c588ee40 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -60,8 +60,14 @@ static __always_inline void add_page_to_lru_list_tail(struct page *page,
 static __always_inline void del_page_from_lru_list(struct page *page,
 				struct lruvec *lruvec, enum lru_list lru)
 {
-	list_del(&page->lru);
-	update_lru_size(lruvec, lru, page_zonenum(page), -hpage_nr_pages(page));
+	/*
+	 * Empty list head means page is not drained to lru list yet.
+	 */
+	if (likely(!list_empty(&page->lru))) {
+		list_del(&page->lru);
+		update_lru_size(lruvec, lru, page_zonenum(page),
+				-hpage_nr_pages(page));
+	}
 }
 
 /**
diff --git a/mm/swap.c b/mm/swap.c
index 23fc6e049cda..ba4c98074a09 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -400,13 +400,35 @@ void mark_page_accessed(struct page *page)
 }
 EXPORT_SYMBOL(mark_page_accessed);
 
+static void __pagevec_lru_add_drain_fn(struct page *page, struct lruvec *lruvec,
+				       void *arg)
+{
+	/* Check for isolated or already added pages */
+	if (likely(PageLRU(page) && list_empty(&page->lru))) {
+		int file = page_is_file_cache(page);
+		int active = PageActive(page);
+		enum lru_list lru = page_lru(page);
+
+		add_page_to_lru_list(page, lruvec, lru);
+		update_page_reclaim_stat(lruvec, file, active);
+		trace_mm_lru_insertion(page, lru);
+	}
+}
+
 static void __lru_cache_add(struct page *page)
 {
 	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
 
+	/*
+	 * Set PageLRU right here and initialize list head to
+	 * allow page isolation while it on the way to the LRU list.
+	 */
+	VM_BUG_ON_PAGE(PageLRU(page), page);
+	INIT_LIST_HEAD(&page->lru);
 	get_page(page);
+	SetPageLRU(page);
 	if (!pagevec_add(pvec, page) || PageCompound(page))
-		__pagevec_lru_add(pvec);
+		pagevec_lru_move_fn(pvec, __pagevec_lru_add_drain_fn, NULL);
 	put_cpu_var(lru_add_pvec);
 }
 
@@ -611,7 +633,7 @@ void lru_add_drain_cpu(int cpu)
 	struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu);
 
 	if (pagevec_count(pvec))
-		__pagevec_lru_add(pvec);
+		pagevec_lru_move_fn(pvec, __pagevec_lru_add_drain_fn, NULL);
 
 	pvec = &per_cpu(lru_rotate_pvecs, cpu);
 	if (pagevec_count(pvec)) {

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH RFC] mm: allow isolation for pages not inserted into lru lists yet
  2017-07-18 16:00 [PATCH RFC] mm: allow isolation for pages not inserted into lru lists yet Konstantin Khlebnikov
@ 2017-07-20  9:45 ` Vlastimil Babka
  2017-07-20 10:17   ` Konstantin Khlebnikov
  2017-07-24  3:53 ` Minchan Kim
  1 sibling, 1 reply; 4+ messages in thread
From: Vlastimil Babka @ 2017-07-20  9:45 UTC (permalink / raw)
  To: Konstantin Khlebnikov, Michal Hocko, Minchan Kim, Hugh Dickins,
	linux-mm, Shaohua Li, Johannes Weiner, Andrew Morton, Mel Gorman
  Cc: linux-kernel

On 07/18/2017 06:00 PM, Konstantin Khlebnikov wrote:
> Pages are added into lru lists via per-cpu page vectors in order
> to combine these insertions and reduce lru lock contention.
> 
> These pending pages cannot be isolated and moved into another lru.
> This breaks in some cases page activation and makes mlock-munlock
> much more complicated.
> 
> Also this breaks newly added swapless MADV_FREE: if it cannot move
> anon page into file lru then page could never be freed lazily.
> 
> This patch rearranges lru list handling to allow lru isolation for
> such pages. It set PageLRU earlier and initialize page->lru to mark
> pages still pending for lru insert.
> 
> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>

I think it's not so simple and won't work as you expect after this
patch. See below.

> ---
>  include/linux/mm_inline.h |   10 ++++++++--
>  mm/swap.c                 |   26 ++++++++++++++++++++++++--
>  2 files changed, 32 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
> index e030a68ead7e..6618c588ee40 100644
> --- a/include/linux/mm_inline.h
> +++ b/include/linux/mm_inline.h
> @@ -60,8 +60,14 @@ static __always_inline void add_page_to_lru_list_tail(struct page *page,
>  static __always_inline void del_page_from_lru_list(struct page *page,
>  				struct lruvec *lruvec, enum lru_list lru)
>  {
> -	list_del(&page->lru);
> -	update_lru_size(lruvec, lru, page_zonenum(page), -hpage_nr_pages(page));
> +	/*
> +	 * Empty list head means page is not drained to lru list yet.
> +	 */
> +	if (likely(!list_empty(&page->lru))) {
> +		list_del(&page->lru);
> +		update_lru_size(lruvec, lru, page_zonenum(page),
> +				-hpage_nr_pages(page));
> +	}
>  }
>  
>  /**
> diff --git a/mm/swap.c b/mm/swap.c
> index 23fc6e049cda..ba4c98074a09 100644
> --- a/mm/swap.c
> +++ b/mm/swap.c
> @@ -400,13 +400,35 @@ void mark_page_accessed(struct page *page)
>  }
>  EXPORT_SYMBOL(mark_page_accessed);
>  
> +static void __pagevec_lru_add_drain_fn(struct page *page, struct lruvec *lruvec,
> +				       void *arg)
> +{
> +	/* Check for isolated or already added pages */
> +	if (likely(PageLRU(page) && list_empty(&page->lru))) {

I think it's now possible that page ends up on two (or more) cpu's
pagevecs, right. And they can race doing their local drains, and both
pass this check at the same moment. The lru lock should prevent at least
some disaster, but what if the first CPU succeeds, and then the page is
further isolated and e.g. reclaimed. Then the second CPU still assumes
it's PageLRU() etc, but it's not anymore...?

> +		int file = page_is_file_cache(page);
> +		int active = PageActive(page);
> +		enum lru_list lru = page_lru(page);
> +
> +		add_page_to_lru_list(page, lruvec, lru);
> +		update_page_reclaim_stat(lruvec, file, active);
> +		trace_mm_lru_insertion(page, lru);
> +	}
> +}
> +
>  static void __lru_cache_add(struct page *page)
>  {
>  	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
>  
> +	/*
> +	 * Set PageLRU right here and initialize list head to
> +	 * allow page isolation while it on the way to the LRU list.
> +	 */
> +	VM_BUG_ON_PAGE(PageLRU(page), page);
> +	INIT_LIST_HEAD(&page->lru);
>  	get_page(page);

This elevates the page count, I think at least some LRU isolators will
skip the pages anyway because of that.

> +	SetPageLRU(page);
>  	if (!pagevec_add(pvec, page) || PageCompound(page))
> -		__pagevec_lru_add(pvec);
> +		pagevec_lru_move_fn(pvec, __pagevec_lru_add_drain_fn, NULL);
>  	put_cpu_var(lru_add_pvec);
>  }
>  
> @@ -611,7 +633,7 @@ void lru_add_drain_cpu(int cpu)
>  	struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu);
>  
>  	if (pagevec_count(pvec))
> -		__pagevec_lru_add(pvec);
> +		pagevec_lru_move_fn(pvec, __pagevec_lru_add_drain_fn, NULL);
>  
>  	pvec = &per_cpu(lru_rotate_pvecs, cpu);
>  	if (pagevec_count(pvec)) {
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH RFC] mm: allow isolation for pages not inserted into lru lists yet
  2017-07-20  9:45 ` Vlastimil Babka
@ 2017-07-20 10:17   ` Konstantin Khlebnikov
  0 siblings, 0 replies; 4+ messages in thread
From: Konstantin Khlebnikov @ 2017-07-20 10:17 UTC (permalink / raw)
  To: Vlastimil Babka, Michal Hocko, Minchan Kim, Hugh Dickins,
	linux-mm, Shaohua Li, Johannes Weiner, Andrew Morton, Mel Gorman
  Cc: linux-kernel

On 20.07.2017 12:45, Vlastimil Babka wrote:
> On 07/18/2017 06:00 PM, Konstantin Khlebnikov wrote:
>> Pages are added into lru lists via per-cpu page vectors in order
>> to combine these insertions and reduce lru lock contention.
>>
>> These pending pages cannot be isolated and moved into another lru.
>> This breaks in some cases page activation and makes mlock-munlock
>> much more complicated.
>>
>> Also this breaks newly added swapless MADV_FREE: if it cannot move
>> anon page into file lru then page could never be freed lazily.
>>
>> This patch rearranges lru list handling to allow lru isolation for
>> such pages. It set PageLRU earlier and initialize page->lru to mark
>> pages still pending for lru insert.
>>
>> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> 
> I think it's not so simple and won't work as you expect after this
> patch. See below.
> 
>> ---
>>   include/linux/mm_inline.h |   10 ++++++++--
>>   mm/swap.c                 |   26 ++++++++++++++++++++++++--
>>   2 files changed, 32 insertions(+), 4 deletions(-)
>>
>> diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
>> index e030a68ead7e..6618c588ee40 100644
>> --- a/include/linux/mm_inline.h
>> +++ b/include/linux/mm_inline.h
>> @@ -60,8 +60,14 @@ static __always_inline void add_page_to_lru_list_tail(struct page *page,
>>   static __always_inline void del_page_from_lru_list(struct page *page,
>>   				struct lruvec *lruvec, enum lru_list lru)
>>   {
>> -	list_del(&page->lru);
>> -	update_lru_size(lruvec, lru, page_zonenum(page), -hpage_nr_pages(page));
>> +	/*
>> +	 * Empty list head means page is not drained to lru list yet.
>> +	 */
>> +	if (likely(!list_empty(&page->lru))) {
>> +		list_del(&page->lru);
>> +		update_lru_size(lruvec, lru, page_zonenum(page),
>> +				-hpage_nr_pages(page));
>> +	}
>>   }
>>   
>>   /**
>> diff --git a/mm/swap.c b/mm/swap.c
>> index 23fc6e049cda..ba4c98074a09 100644
>> --- a/mm/swap.c
>> +++ b/mm/swap.c
>> @@ -400,13 +400,35 @@ void mark_page_accessed(struct page *page)
>>   }
>>   EXPORT_SYMBOL(mark_page_accessed);
>>   
>> +static void __pagevec_lru_add_drain_fn(struct page *page, struct lruvec *lruvec,
>> +				       void *arg)
>> +{
>> +	/* Check for isolated or already added pages */
>> +	if (likely(PageLRU(page) && list_empty(&page->lru))) {
> 
> I think it's now possible that page ends up on two (or more) cpu's
> pagevecs, right. And they can race doing their local drains, and both
> pass this check at the same moment. The lru lock should prevent at least
> some disaster, but what if the first CPU succeeds, and then the page is
> further isolated and e.g. reclaimed. Then the second CPU still assumes
> it's PageLRU() etc, but it's not anymore...?

Reclaimer/isolate clears PageLRU under lru_lock and drain will skip that page.
Duplicate inserts are catched by second check.


> 
>> +		int file = page_is_file_cache(page);
>> +		int active = PageActive(page);
>> +		enum lru_list lru = page_lru(page);
>> +
>> +		add_page_to_lru_list(page, lruvec, lru);
>> +		update_page_reclaim_stat(lruvec, file, active);
>> +		trace_mm_lru_insertion(page, lru);
>> +	}
>> +}
>> +
>>   static void __lru_cache_add(struct page *page)
>>   {
>>   	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
>>   
>> +	/*
>> +	 * Set PageLRU right here and initialize list head to
>> +	 * allow page isolation while it on the way to the LRU list.
>> +	 */
>> +	VM_BUG_ON_PAGE(PageLRU(page), page);
>> +	INIT_LIST_HEAD(&page->lru);
>>   	get_page(page);
> 
> This elevates the page count, I think at least some LRU isolators will
> skip the pages anyway because of that.

Yep, theoretically we could get rid of these references:
memory offline must darain all these vectors before freeing stuct page.

This will help memory migration and compaction a little.

> 
>> +	SetPageLRU(page);
>>   	if (!pagevec_add(pvec, page) || PageCompound(page))
>> -		__pagevec_lru_add(pvec);
>> +		pagevec_lru_move_fn(pvec, __pagevec_lru_add_drain_fn, NULL);
>>   	put_cpu_var(lru_add_pvec);
>>   }
>>   
>> @@ -611,7 +633,7 @@ void lru_add_drain_cpu(int cpu)
>>   	struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu);
>>   
>>   	if (pagevec_count(pvec))
>> -		__pagevec_lru_add(pvec);
>> +		pagevec_lru_move_fn(pvec, __pagevec_lru_add_drain_fn, NULL);
>>   
>>   	pvec = &per_cpu(lru_rotate_pvecs, cpu);
>>   	if (pagevec_count(pvec)) {
>>
>> --
>> To unsubscribe, send a message with 'unsubscribe linux-mm' in
>> the body to majordomo@kvack.org.  For more info on Linux MM,
>> see: http://www.linux-mm.org/ .
>> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
>>
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH RFC] mm: allow isolation for pages not inserted into lru lists yet
  2017-07-18 16:00 [PATCH RFC] mm: allow isolation for pages not inserted into lru lists yet Konstantin Khlebnikov
  2017-07-20  9:45 ` Vlastimil Babka
@ 2017-07-24  3:53 ` Minchan Kim
  1 sibling, 0 replies; 4+ messages in thread
From: Minchan Kim @ 2017-07-24  3:53 UTC (permalink / raw)
  To: Konstantin Khlebnikov
  Cc: Michal Hocko, Hugh Dickins, linux-mm, Shaohua Li,
	Johannes Weiner, Andrew Morton, Mel Gorman, linux-kernel

Hi,

On Tue, Jul 18, 2017 at 07:00:23PM +0300, Konstantin Khlebnikov wrote:
> Pages are added into lru lists via per-cpu page vectors in order
> to combine these insertions and reduce lru lock contention.
> 
> These pending pages cannot be isolated and moved into another lru.
> This breaks in some cases page activation and makes mlock-munlock
> much more complicated.
> 
> Also this breaks newly added swapless MADV_FREE: if it cannot move
> anon page into file lru then page could never be freed lazily.

Yes, it's really unforunate.

> 
> This patch rearranges lru list handling to allow lru isolation for
> such pages. It set PageLRU earlier and initialize page->lru to mark
> pages still pending for lru insert.

At a first glance, it seems to work but it's rather hacky to me.

Could you make mark_page_lazyfree be aware of it?
IOW, mark_page_lazyfree can clear PG_active|referenced|swapbacked under
lru_lock if it was not in the LRU. With it, pagevec handler for LRU
can move pages into proper list when drain happens.

> 
> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> ---
>  include/linux/mm_inline.h |   10 ++++++++--
>  mm/swap.c                 |   26 ++++++++++++++++++++++++--
>  2 files changed, 32 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
> index e030a68ead7e..6618c588ee40 100644
> --- a/include/linux/mm_inline.h
> +++ b/include/linux/mm_inline.h
> @@ -60,8 +60,14 @@ static __always_inline void add_page_to_lru_list_tail(struct page *page,
>  static __always_inline void del_page_from_lru_list(struct page *page,
>  				struct lruvec *lruvec, enum lru_list lru)
>  {
> -	list_del(&page->lru);
> -	update_lru_size(lruvec, lru, page_zonenum(page), -hpage_nr_pages(page));
> +	/*
> +	 * Empty list head means page is not drained to lru list yet.
> +	 */
> +	if (likely(!list_empty(&page->lru))) {
> +		list_del(&page->lru);
> +		update_lru_size(lruvec, lru, page_zonenum(page),
> +				-hpage_nr_pages(page));
> +	}
>  }
>  
>  /**
> diff --git a/mm/swap.c b/mm/swap.c
> index 23fc6e049cda..ba4c98074a09 100644
> --- a/mm/swap.c
> +++ b/mm/swap.c
> @@ -400,13 +400,35 @@ void mark_page_accessed(struct page *page)
>  }
>  EXPORT_SYMBOL(mark_page_accessed);
>  
> +static void __pagevec_lru_add_drain_fn(struct page *page, struct lruvec *lruvec,
> +				       void *arg)
> +{
> +	/* Check for isolated or already added pages */
> +	if (likely(PageLRU(page) && list_empty(&page->lru))) {
> +		int file = page_is_file_cache(page);
> +		int active = PageActive(page);
> +		enum lru_list lru = page_lru(page);
> +
> +		add_page_to_lru_list(page, lruvec, lru);
> +		update_page_reclaim_stat(lruvec, file, active);
> +		trace_mm_lru_insertion(page, lru);
> +	}
> +}
> +
>  static void __lru_cache_add(struct page *page)
>  {
>  	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
>  
> +	/*
> +	 * Set PageLRU right here and initialize list head to
> +	 * allow page isolation while it on the way to the LRU list.
> +	 */
> +	VM_BUG_ON_PAGE(PageLRU(page), page);
> +	INIT_LIST_HEAD(&page->lru);
>  	get_page(page);
> +	SetPageLRU(page);
>  	if (!pagevec_add(pvec, page) || PageCompound(page))
> -		__pagevec_lru_add(pvec);
> +		pagevec_lru_move_fn(pvec, __pagevec_lru_add_drain_fn, NULL);
>  	put_cpu_var(lru_add_pvec);
>  }
>  
> @@ -611,7 +633,7 @@ void lru_add_drain_cpu(int cpu)
>  	struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu);
>  
>  	if (pagevec_count(pvec))
> -		__pagevec_lru_add(pvec);
> +		pagevec_lru_move_fn(pvec, __pagevec_lru_add_drain_fn, NULL);
>  
>  	pvec = &per_cpu(lru_rotate_pvecs, cpu);
>  	if (pagevec_count(pvec)) {
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2017-07-24  3:53 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-18 16:00 [PATCH RFC] mm: allow isolation for pages not inserted into lru lists yet Konstantin Khlebnikov
2017-07-20  9:45 ` Vlastimil Babka
2017-07-20 10:17   ` Konstantin Khlebnikov
2017-07-24  3:53 ` Minchan Kim

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).