linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] mm: Reuse only-pte-mapped KSM page in do_wp_page()
@ 2018-12-13 15:29 Kirill Tkhai
  2018-12-13 19:15 ` Yang Shi
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Kirill Tkhai @ 2018-12-13 15:29 UTC (permalink / raw)
  To: akpm, kirill, hughd, aarcange
  Cc: christian.koenig, imbrenda, yang.shi, riel, ying.huang, minchan,
	ktkhai, linux-kernel, linux-mm

This patch adds an optimization for KSM pages almost
in the same way, that we have for ordinary anonymous
pages. If there is a write fault in a page, which is
mapped to an only pte, and it is not related to swap
cache; the page may be reused without copying its
content.

[Note, that we do not consider PageSwapCache() pages
 at least for now, since we don't want to complicate
 __get_ksm_page(), which has nice optimization based
 on this (for the migration case). Currenly it is
 spinning on PageSwapCache() pages, waiting for when
 they have unfreezed counters (i.e., for the migration
 finish). But we don't want to make it also spinning
 on swap cache pages, which we try to reuse, since
 there is not a very high probability to reuse them.
 So, for now we do not consider PageSwapCache() pages
 at all.]

So, in reuse_ksm_page() we check for 1)PageSwapCache()
and 2)page_stable_node(), to skip a page, which KSM
is currently trying to link to stable tree. Then we
do page_ref_freeze() to prohibit KSM to merge one more
page into the page, we are reusing. After that, nobody
can refer to the reusing page: KSM skips !PageSwapCache()
pages with zero refcount; and the protection against
of all other participants is the same as for reused
ordinary anon pages pte lock, page lock and mmap_sem.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 include/linux/ksm.h |    7 +++++++
 mm/ksm.c            |   25 +++++++++++++++++++++++--
 mm/memory.c         |   16 ++++++++++++++--
 3 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 161e8164abcf..e48b1e453ff5 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -53,6 +53,8 @@ struct page *ksm_might_need_to_copy(struct page *page,
 
 void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc);
 void ksm_migrate_page(struct page *newpage, struct page *oldpage);
+bool reuse_ksm_page(struct page *page,
+			struct vm_area_struct *vma, unsigned long address);
 
 #else  /* !CONFIG_KSM */
 
@@ -86,6 +88,11 @@ static inline void rmap_walk_ksm(struct page *page,
 static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage)
 {
 }
+static inline bool reuse_ksm_page(struct page *page,
+			struct vm_area_struct *vma, unsigned long address)
+{
+	return false;
+}
 #endif /* CONFIG_MMU */
 #endif /* !CONFIG_KSM */
 
diff --git a/mm/ksm.c b/mm/ksm.c
index 383f961e577a..fbd14264d784 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -707,8 +707,9 @@ static struct page *__get_ksm_page(struct stable_node *stable_node,
 	 * case this node is no longer referenced, and should be freed;
 	 * however, it might mean that the page is under page_ref_freeze().
 	 * The __remove_mapping() case is easy, again the node is now stale;
-	 * but if page is swapcache in migrate_page_move_mapping(), it might
-	 * still be our page, in which case it's essential to keep the node.
+	 * the same is in reuse_ksm_page() case; but if page is swapcache
+	 * in migrate_page_move_mapping(), it might still be our page,
+	 * in which case it's essential to keep the node.
 	 */
 	while (!get_page_unless_zero(page)) {
 		/*
@@ -2666,6 +2667,26 @@ void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc)
 		goto again;
 }
 
+bool reuse_ksm_page(struct page *page,
+		    struct vm_area_struct *vma,
+		    unsigned long address)
+{
+	VM_BUG_ON_PAGE(is_zero_pfn(page_to_pfn(page)), page);
+	VM_BUG_ON_PAGE(!page_mapped(page), page);
+	VM_BUG_ON_PAGE(!PageLocked(page), page);
+
+	if (PageSwapCache(page) || !page_stable_node(page))
+		return false;
+	/* Prohibit parallel get_ksm_page() */
+	if (!page_ref_freeze(page, 1))
+		return false;
+
+	page_move_anon_rmap(page, vma);
+	page->index = linear_page_index(vma, address);
+	page_ref_unfreeze(page, 1);
+
+	return true;
+}
 #ifdef CONFIG_MIGRATION
 void ksm_migrate_page(struct page *newpage, struct page *oldpage)
 {
diff --git a/mm/memory.c b/mm/memory.c
index 532061217e03..5817527f1877 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2509,8 +2509,11 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
 	 * Take out anonymous pages first, anonymous shared vmas are
 	 * not dirty accountable.
 	 */
-	if (PageAnon(vmf->page) && !PageKsm(vmf->page)) {
+	if (PageAnon(vmf->page)) {
 		int total_map_swapcount;
+		if (PageKsm(vmf->page) && (PageSwapCache(vmf->page) ||
+					   page_count(vmf->page) != 1))
+			goto copy;
 		if (!trylock_page(vmf->page)) {
 			get_page(vmf->page);
 			pte_unmap_unlock(vmf->pte, vmf->ptl);
@@ -2525,6 +2528,15 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
 			}
 			put_page(vmf->page);
 		}
+		if (PageKsm(vmf->page)) {
+			bool reused = reuse_ksm_page(vmf->page, vmf->vma,
+						     vmf->address);
+			unlock_page(vmf->page);
+			if (!reused)
+				goto copy;
+			wp_page_reuse(vmf);
+			return VM_FAULT_WRITE;
+		}
 		if (reuse_swap_page(vmf->page, &total_map_swapcount)) {
 			if (total_map_swapcount == 1) {
 				/*
@@ -2545,7 +2557,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
 					(VM_WRITE|VM_SHARED))) {
 		return wp_page_shared(vmf);
 	}
-
+copy:
 	/*
 	 * Ok, we need to copy. Oh, well..
 	 */


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] mm: Reuse only-pte-mapped KSM page in do_wp_page()
  2018-12-13 15:29 [PATCH] mm: Reuse only-pte-mapped KSM page in do_wp_page() Kirill Tkhai
@ 2018-12-13 19:15 ` Yang Shi
  2018-12-14  9:26   ` Kirill Tkhai
  2018-12-29  9:34 ` Kirill Tkhai
  2018-12-29 21:40 ` Andrew Morton
  2 siblings, 1 reply; 8+ messages in thread
From: Yang Shi @ 2018-12-13 19:15 UTC (permalink / raw)
  To: Kirill Tkhai, akpm, kirill, hughd, aarcange
  Cc: christian.koenig, imbrenda, riel, ying.huang, minchan,
	linux-kernel, linux-mm



On 12/13/18 7:29 AM, Kirill Tkhai wrote:
> This patch adds an optimization for KSM pages almost
> in the same way, that we have for ordinary anonymous
> pages. If there is a write fault in a page, which is
> mapped to an only pte, and it is not related to swap
> cache; the page may be reused without copying its
> content.
>
> [Note, that we do not consider PageSwapCache() pages
>   at least for now, since we don't want to complicate
>   __get_ksm_page(), which has nice optimization based
>   on this (for the migration case). Currenly it is
>   spinning on PageSwapCache() pages, waiting for when
>   they have unfreezed counters (i.e., for the migration
>   finish). But we don't want to make it also spinning
>   on swap cache pages, which we try to reuse, since
>   there is not a very high probability to reuse them.
>   So, for now we do not consider PageSwapCache() pages
>   at all.]
>
> So, in reuse_ksm_page() we check for 1)PageSwapCache()
> and 2)page_stable_node(), to skip a page, which KSM
> is currently trying to link to stable tree. Then we
> do page_ref_freeze() to prohibit KSM to merge one more
> page into the page, we are reusing. After that, nobody
> can refer to the reusing page: KSM skips !PageSwapCache()
> pages with zero refcount; and the protection against
> of all other participants is the same as for reused
> ordinary anon pages pte lock, page lock and mmap_sem.
>
> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
> ---
>   include/linux/ksm.h |    7 +++++++
>   mm/ksm.c            |   25 +++++++++++++++++++++++--
>   mm/memory.c         |   16 ++++++++++++++--
>   3 files changed, 44 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/ksm.h b/include/linux/ksm.h
> index 161e8164abcf..e48b1e453ff5 100644
> --- a/include/linux/ksm.h
> +++ b/include/linux/ksm.h
> @@ -53,6 +53,8 @@ struct page *ksm_might_need_to_copy(struct page *page,
>   
>   void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc);
>   void ksm_migrate_page(struct page *newpage, struct page *oldpage);
> +bool reuse_ksm_page(struct page *page,
> +			struct vm_area_struct *vma, unsigned long address);
>   
>   #else  /* !CONFIG_KSM */
>   
> @@ -86,6 +88,11 @@ static inline void rmap_walk_ksm(struct page *page,
>   static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage)
>   {
>   }
> +static inline bool reuse_ksm_page(struct page *page,
> +			struct vm_area_struct *vma, unsigned long address)
> +{
> +	return false;
> +}
>   #endif /* CONFIG_MMU */
>   #endif /* !CONFIG_KSM */
>   
> diff --git a/mm/ksm.c b/mm/ksm.c
> index 383f961e577a..fbd14264d784 100644
> --- a/mm/ksm.c
> +++ b/mm/ksm.c
> @@ -707,8 +707,9 @@ static struct page *__get_ksm_page(struct stable_node *stable_node,
>   	 * case this node is no longer referenced, and should be freed;
>   	 * however, it might mean that the page is under page_ref_freeze().
>   	 * The __remove_mapping() case is easy, again the node is now stale;
> -	 * but if page is swapcache in migrate_page_move_mapping(), it might
> -	 * still be our page, in which case it's essential to keep the node.
> +	 * the same is in reuse_ksm_page() case; but if page is swapcache
> +	 * in migrate_page_move_mapping(), it might still be our page,
> +	 * in which case it's essential to keep the node.
>   	 */
>   	while (!get_page_unless_zero(page)) {
>   		/*
> @@ -2666,6 +2667,26 @@ void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc)
>   		goto again;
>   }
>   
> +bool reuse_ksm_page(struct page *page,
> +		    struct vm_area_struct *vma,
> +		    unsigned long address)
> +{
> +	VM_BUG_ON_PAGE(is_zero_pfn(page_to_pfn(page)), page);
> +	VM_BUG_ON_PAGE(!page_mapped(page), page);
> +	VM_BUG_ON_PAGE(!PageLocked(page), page);
> +
> +	if (PageSwapCache(page) || !page_stable_node(page))
> +		return false;
> +	/* Prohibit parallel get_ksm_page() */
> +	if (!page_ref_freeze(page, 1))
> +		return false;
> +
> +	page_move_anon_rmap(page, vma);

Once the mapping is changed, it is not KSM mapping anymore. It looks 
later get_ksm_page() would always fail on this page. Is this expected?

Thanks,
Yang


> +	page->index = linear_page_index(vma, address);
> +	page_ref_unfreeze(page, 1);
> +
> +	return true;
> +}
>   #ifdef CONFIG_MIGRATION
>   void ksm_migrate_page(struct page *newpage, struct page *oldpage)
>   {
> diff --git a/mm/memory.c b/mm/memory.c
> index 532061217e03..5817527f1877 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -2509,8 +2509,11 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
>   	 * Take out anonymous pages first, anonymous shared vmas are
>   	 * not dirty accountable.
>   	 */
> -	if (PageAnon(vmf->page) && !PageKsm(vmf->page)) {
> +	if (PageAnon(vmf->page)) {
>   		int total_map_swapcount;
> +		if (PageKsm(vmf->page) && (PageSwapCache(vmf->page) ||
> +					   page_count(vmf->page) != 1))
> +			goto copy;
>   		if (!trylock_page(vmf->page)) {
>   			get_page(vmf->page);
>   			pte_unmap_unlock(vmf->pte, vmf->ptl);
> @@ -2525,6 +2528,15 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
>   			}
>   			put_page(vmf->page);
>   		}
> +		if (PageKsm(vmf->page)) {
> +			bool reused = reuse_ksm_page(vmf->page, vmf->vma,
> +						     vmf->address);
> +			unlock_page(vmf->page);
> +			if (!reused)
> +				goto copy;
> +			wp_page_reuse(vmf);
> +			return VM_FAULT_WRITE;
> +		}
>   		if (reuse_swap_page(vmf->page, &total_map_swapcount)) {
>   			if (total_map_swapcount == 1) {
>   				/*
> @@ -2545,7 +2557,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
>   					(VM_WRITE|VM_SHARED))) {
>   		return wp_page_shared(vmf);
>   	}
> -
> +copy:
>   	/*
>   	 * Ok, we need to copy. Oh, well..
>   	 */


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] mm: Reuse only-pte-mapped KSM page in do_wp_page()
  2018-12-13 19:15 ` Yang Shi
@ 2018-12-14  9:26   ` Kirill Tkhai
  2018-12-14 18:06     ` Yang Shi
  0 siblings, 1 reply; 8+ messages in thread
From: Kirill Tkhai @ 2018-12-14  9:26 UTC (permalink / raw)
  To: Yang Shi, akpm, kirill, hughd, aarcange
  Cc: christian.koenig, imbrenda, riel, ying.huang, minchan,
	linux-kernel, linux-mm

On 13.12.2018 22:15, Yang Shi wrote:
> 
> 
> On 12/13/18 7:29 AM, Kirill Tkhai wrote:
>> This patch adds an optimization for KSM pages almost
>> in the same way, that we have for ordinary anonymous
>> pages. If there is a write fault in a page, which is
>> mapped to an only pte, and it is not related to swap
>> cache; the page may be reused without copying its
>> content.
>>
>> [Note, that we do not consider PageSwapCache() pages
>>   at least for now, since we don't want to complicate
>>   __get_ksm_page(), which has nice optimization based
>>   on this (for the migration case). Currenly it is
>>   spinning on PageSwapCache() pages, waiting for when
>>   they have unfreezed counters (i.e., for the migration
>>   finish). But we don't want to make it also spinning
>>   on swap cache pages, which we try to reuse, since
>>   there is not a very high probability to reuse them.
>>   So, for now we do not consider PageSwapCache() pages
>>   at all.]
>>
>> So, in reuse_ksm_page() we check for 1)PageSwapCache()
>> and 2)page_stable_node(), to skip a page, which KSM
>> is currently trying to link to stable tree. Then we
>> do page_ref_freeze() to prohibit KSM to merge one more
>> page into the page, we are reusing. After that, nobody
>> can refer to the reusing page: KSM skips !PageSwapCache()
>> pages with zero refcount; and the protection against
>> of all other participants is the same as for reused
>> ordinary anon pages pte lock, page lock and mmap_sem.
>>
>> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
>> ---
>>   include/linux/ksm.h |    7 +++++++
>>   mm/ksm.c            |   25 +++++++++++++++++++++++--
>>   mm/memory.c         |   16 ++++++++++++++--
>>   3 files changed, 44 insertions(+), 4 deletions(-)
>>
>> diff --git a/include/linux/ksm.h b/include/linux/ksm.h
>> index 161e8164abcf..e48b1e453ff5 100644
>> --- a/include/linux/ksm.h
>> +++ b/include/linux/ksm.h
>> @@ -53,6 +53,8 @@ struct page *ksm_might_need_to_copy(struct page *page,
>>     void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc);
>>   void ksm_migrate_page(struct page *newpage, struct page *oldpage);
>> +bool reuse_ksm_page(struct page *page,
>> +            struct vm_area_struct *vma, unsigned long address);
>>     #else  /* !CONFIG_KSM */
>>   @@ -86,6 +88,11 @@ static inline void rmap_walk_ksm(struct page *page,
>>   static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage)
>>   {
>>   }
>> +static inline bool reuse_ksm_page(struct page *page,
>> +            struct vm_area_struct *vma, unsigned long address)
>> +{
>> +    return false;
>> +}
>>   #endif /* CONFIG_MMU */
>>   #endif /* !CONFIG_KSM */
>>   diff --git a/mm/ksm.c b/mm/ksm.c
>> index 383f961e577a..fbd14264d784 100644
>> --- a/mm/ksm.c
>> +++ b/mm/ksm.c
>> @@ -707,8 +707,9 @@ static struct page *__get_ksm_page(struct stable_node *stable_node,
>>        * case this node is no longer referenced, and should be freed;
>>        * however, it might mean that the page is under page_ref_freeze().
>>        * The __remove_mapping() case is easy, again the node is now stale;
>> -     * but if page is swapcache in migrate_page_move_mapping(), it might
>> -     * still be our page, in which case it's essential to keep the node.
>> +     * the same is in reuse_ksm_page() case; but if page is swapcache
>> +     * in migrate_page_move_mapping(), it might still be our page,
>> +     * in which case it's essential to keep the node.
>>        */
>>       while (!get_page_unless_zero(page)) {
>>           /*
>> @@ -2666,6 +2667,26 @@ void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc)
>>           goto again;
>>   }
>>   +bool reuse_ksm_page(struct page *page,
>> +            struct vm_area_struct *vma,
>> +            unsigned long address)
>> +{
>> +    VM_BUG_ON_PAGE(is_zero_pfn(page_to_pfn(page)), page);
>> +    VM_BUG_ON_PAGE(!page_mapped(page), page);
>> +    VM_BUG_ON_PAGE(!PageLocked(page), page);
>> +
>> +    if (PageSwapCache(page) || !page_stable_node(page))
>> +        return false;
>> +    /* Prohibit parallel get_ksm_page() */
>> +    if (!page_ref_freeze(page, 1))
>> +        return false;
>> +
>> +    page_move_anon_rmap(page, vma);
> 
> Once the mapping is changed, it is not KSM mapping anymore. It looks later get_ksm_page() would always fail on this page. Is this expected?

Yes, this is the thing that the patch makes. Let's look at the actions,
we have without the patch, when there is a writing to an only-pte-mapped
KSM page.

We enter to do_wp_page() with page_count() == 1, since KSM page is mapped
in only pte (and we do not get extra reference to a page, when we add it
to KSM stable tree). Then:

  do_wp_page()
    get_page(vmf->page) <- page_count() is 2
    wp_page_copy()
      ..
      cow_user_page() /* Copy user page to a new one */
      ..
      put_page(vmf->page) <- page_count() is 1
      put_page(vmf->page) <- page_count() is 0

Second put_page() frees the page (and also zeroes page->mapping),
and since that it's not a PageKsm() page anymore. Further
__get_ksm_page() calls will fail on this page (since the mapping
was zeroed), and its node will be unlinked from ksm stable tree:

__get_ksm_page()
{
	/* page->mapping == NULL, expected_mapping != NULL */
	if (READ_ONCE(page->mapping) != expected_mapping)
		goto stale;
	.......
stale:
	remove_node_from_stable_tree(stable_node);
}


The patch optimizes do_wp_page(), and makes it to avoid the copying
(like we have for ordinary anon pages). Since KSM page is freed anyway,
after we dropped the last reference to it; we reuse it instead of this.
So, the thing will now work in this way:

do_wp_page()
  lock_page(vmf->page)
  reuse_ksm_page()
    check PageSwapCache() and page_stable_node()
    page_ref_freeze(page, 1) <- Freeze the page to make parallel
                                __get_ksm_page() (if any) waiting
    page_move_anon_rmap()    <- Write new mapping, so __get_ksm_page()
                                sees this is not a KSM page anymore,
                                and it removes stable node.

So, the result is the same, but after the patch we achieve it faster :)

Also, note, that in the most probably case, do_wp_page() does not cross
with __get_ksm_page() (the race window is very small; __get_ksm_page()
is spinning, only when reuse_ksm_page() is between page_ref_freeze()
and page_move_anon_rmap(), which are on neighboring lines).

So, this is the idea. Please, let me know in case of something is unclear
for you.

Kirill

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] mm: Reuse only-pte-mapped KSM page in do_wp_page()
  2018-12-14  9:26   ` Kirill Tkhai
@ 2018-12-14 18:06     ` Yang Shi
  2018-12-15  9:18       ` Kirill Tkhai
  0 siblings, 1 reply; 8+ messages in thread
From: Yang Shi @ 2018-12-14 18:06 UTC (permalink / raw)
  To: Kirill Tkhai, akpm, kirill, hughd, aarcange
  Cc: christian.koenig, imbrenda, riel, ying.huang, minchan,
	linux-kernel, linux-mm



On 12/14/18 1:26 AM, Kirill Tkhai wrote:
> On 13.12.2018 22:15, Yang Shi wrote:
>>
>> On 12/13/18 7:29 AM, Kirill Tkhai wrote:
>>> This patch adds an optimization for KSM pages almost
>>> in the same way, that we have for ordinary anonymous
>>> pages. If there is a write fault in a page, which is
>>> mapped to an only pte, and it is not related to swap
>>> cache; the page may be reused without copying its
>>> content.
>>>
>>> [Note, that we do not consider PageSwapCache() pages
>>>    at least for now, since we don't want to complicate
>>>    __get_ksm_page(), which has nice optimization based
>>>    on this (for the migration case). Currenly it is
>>>    spinning on PageSwapCache() pages, waiting for when
>>>    they have unfreezed counters (i.e., for the migration
>>>    finish). But we don't want to make it also spinning
>>>    on swap cache pages, which we try to reuse, since
>>>    there is not a very high probability to reuse them.
>>>    So, for now we do not consider PageSwapCache() pages
>>>    at all.]
>>>
>>> So, in reuse_ksm_page() we check for 1)PageSwapCache()
>>> and 2)page_stable_node(), to skip a page, which KSM
>>> is currently trying to link to stable tree. Then we
>>> do page_ref_freeze() to prohibit KSM to merge one more
>>> page into the page, we are reusing. After that, nobody
>>> can refer to the reusing page: KSM skips !PageSwapCache()
>>> pages with zero refcount; and the protection against
>>> of all other participants is the same as for reused
>>> ordinary anon pages pte lock, page lock and mmap_sem.
>>>
>>> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
>>> ---
>>>    include/linux/ksm.h |    7 +++++++
>>>    mm/ksm.c            |   25 +++++++++++++++++++++++--
>>>    mm/memory.c         |   16 ++++++++++++++--
>>>    3 files changed, 44 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/include/linux/ksm.h b/include/linux/ksm.h
>>> index 161e8164abcf..e48b1e453ff5 100644
>>> --- a/include/linux/ksm.h
>>> +++ b/include/linux/ksm.h
>>> @@ -53,6 +53,8 @@ struct page *ksm_might_need_to_copy(struct page *page,
>>>      void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc);
>>>    void ksm_migrate_page(struct page *newpage, struct page *oldpage);
>>> +bool reuse_ksm_page(struct page *page,
>>> +            struct vm_area_struct *vma, unsigned long address);
>>>      #else  /* !CONFIG_KSM */
>>>    @@ -86,6 +88,11 @@ static inline void rmap_walk_ksm(struct page *page,
>>>    static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage)
>>>    {
>>>    }
>>> +static inline bool reuse_ksm_page(struct page *page,
>>> +            struct vm_area_struct *vma, unsigned long address)
>>> +{
>>> +    return false;
>>> +}
>>>    #endif /* CONFIG_MMU */
>>>    #endif /* !CONFIG_KSM */
>>>    diff --git a/mm/ksm.c b/mm/ksm.c
>>> index 383f961e577a..fbd14264d784 100644
>>> --- a/mm/ksm.c
>>> +++ b/mm/ksm.c
>>> @@ -707,8 +707,9 @@ static struct page *__get_ksm_page(struct stable_node *stable_node,
>>>         * case this node is no longer referenced, and should be freed;
>>>         * however, it might mean that the page is under page_ref_freeze().
>>>         * The __remove_mapping() case is easy, again the node is now stale;
>>> -     * but if page is swapcache in migrate_page_move_mapping(), it might
>>> -     * still be our page, in which case it's essential to keep the node.
>>> +     * the same is in reuse_ksm_page() case; but if page is swapcache
>>> +     * in migrate_page_move_mapping(), it might still be our page,
>>> +     * in which case it's essential to keep the node.
>>>         */
>>>        while (!get_page_unless_zero(page)) {
>>>            /*
>>> @@ -2666,6 +2667,26 @@ void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc)
>>>            goto again;
>>>    }
>>>    +bool reuse_ksm_page(struct page *page,
>>> +            struct vm_area_struct *vma,
>>> +            unsigned long address)
>>> +{
>>> +    VM_BUG_ON_PAGE(is_zero_pfn(page_to_pfn(page)), page);
>>> +    VM_BUG_ON_PAGE(!page_mapped(page), page);
>>> +    VM_BUG_ON_PAGE(!PageLocked(page), page);
>>> +
>>> +    if (PageSwapCache(page) || !page_stable_node(page))
>>> +        return false;
>>> +    /* Prohibit parallel get_ksm_page() */
>>> +    if (!page_ref_freeze(page, 1))
>>> +        return false;
>>> +
>>> +    page_move_anon_rmap(page, vma);
>> Once the mapping is changed, it is not KSM mapping anymore. It looks later get_ksm_page() would always fail on this page. Is this expected?
> Yes, this is the thing that the patch makes. Let's look at the actions,
> we have without the patch, when there is a writing to an only-pte-mapped
> KSM page.
>
> We enter to do_wp_page() with page_count() == 1, since KSM page is mapped
> in only pte (and we do not get extra reference to a page, when we add it
> to KSM stable tree). Then:
>
>    do_wp_page()
>      get_page(vmf->page) <- page_count() is 2
>      wp_page_copy()
>        ..
>        cow_user_page() /* Copy user page to a new one */
>        ..
>        put_page(vmf->page) <- page_count() is 1
>        put_page(vmf->page) <- page_count() is 0
>
> Second put_page() frees the page (and also zeroes page->mapping),
> and since that it's not a PageKsm() page anymore. Further
> __get_ksm_page() calls will fail on this page (since the mapping
> was zeroed), and its node will be unlinked from ksm stable tree:
>
> __get_ksm_page()
> {
> 	/* page->mapping == NULL, expected_mapping != NULL */
> 	if (READ_ONCE(page->mapping) != expected_mapping)
> 		goto stale;
> 	.......
> stale:
> 	remove_node_from_stable_tree(stable_node);
> }
>
>
> The patch optimizes do_wp_page(), and makes it to avoid the copying
> (like we have for ordinary anon pages). Since KSM page is freed anyway,
> after we dropped the last reference to it; we reuse it instead of this.
> So, the thing will now work in this way:
>
> do_wp_page()
>    lock_page(vmf->page)
>    reuse_ksm_page()
>      check PageSwapCache() and page_stable_node()
>      page_ref_freeze(page, 1) <- Freeze the page to make parallel
>                                  __get_ksm_page() (if any) waiting
>      page_move_anon_rmap()    <- Write new mapping, so __get_ksm_page()
>                                  sees this is not a KSM page anymore,
>                                  and it removes stable node.
>
> So, the result is the same, but after the patch we achieve it faster :)
>
> Also, note, that in the most probably case, do_wp_page() does not cross
> with __get_ksm_page() (the race window is very small; __get_ksm_page()
> is spinning, only when reuse_ksm_page() is between page_ref_freeze()
> and page_move_anon_rmap(), which are on neighboring lines).
>
> So, this is the idea. Please, let me know in case of something is unclear
> for you.

Thanks for elaborating this. It sounds reasonable. You can add 
Reviewed-by: Yang Shi <yang.shi@linux.alibaba.com>

>
> Kirill


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] mm: Reuse only-pte-mapped KSM page in do_wp_page()
  2018-12-14 18:06     ` Yang Shi
@ 2018-12-15  9:18       ` Kirill Tkhai
  0 siblings, 0 replies; 8+ messages in thread
From: Kirill Tkhai @ 2018-12-15  9:18 UTC (permalink / raw)
  To: Yang Shi, akpm, kirill, hughd, aarcange
  Cc: christian.koenig, imbrenda, riel, ying.huang, minchan,
	linux-kernel, linux-mm

On 14.12.2018 21:06, Yang Shi wrote:
> 
> 
> On 12/14/18 1:26 AM, Kirill Tkhai wrote:
>> On 13.12.2018 22:15, Yang Shi wrote:
>>>
>>> On 12/13/18 7:29 AM, Kirill Tkhai wrote:
>>>> This patch adds an optimization for KSM pages almost
>>>> in the same way, that we have for ordinary anonymous
>>>> pages. If there is a write fault in a page, which is
>>>> mapped to an only pte, and it is not related to swap
>>>> cache; the page may be reused without copying its
>>>> content.
>>>>
>>>> [Note, that we do not consider PageSwapCache() pages
>>>>    at least for now, since we don't want to complicate
>>>>    __get_ksm_page(), which has nice optimization based
>>>>    on this (for the migration case). Currenly it is
>>>>    spinning on PageSwapCache() pages, waiting for when
>>>>    they have unfreezed counters (i.e., for the migration
>>>>    finish). But we don't want to make it also spinning
>>>>    on swap cache pages, which we try to reuse, since
>>>>    there is not a very high probability to reuse them.
>>>>    So, for now we do not consider PageSwapCache() pages
>>>>    at all.]
>>>>
>>>> So, in reuse_ksm_page() we check for 1)PageSwapCache()
>>>> and 2)page_stable_node(), to skip a page, which KSM
>>>> is currently trying to link to stable tree. Then we
>>>> do page_ref_freeze() to prohibit KSM to merge one more
>>>> page into the page, we are reusing. After that, nobody
>>>> can refer to the reusing page: KSM skips !PageSwapCache()
>>>> pages with zero refcount; and the protection against
>>>> of all other participants is the same as for reused
>>>> ordinary anon pages pte lock, page lock and mmap_sem.
>>>>
>>>> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
>>>> ---
>>>>    include/linux/ksm.h |    7 +++++++
>>>>    mm/ksm.c            |   25 +++++++++++++++++++++++--
>>>>    mm/memory.c         |   16 ++++++++++++++--
>>>>    3 files changed, 44 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/include/linux/ksm.h b/include/linux/ksm.h
>>>> index 161e8164abcf..e48b1e453ff5 100644
>>>> --- a/include/linux/ksm.h
>>>> +++ b/include/linux/ksm.h
>>>> @@ -53,6 +53,8 @@ struct page *ksm_might_need_to_copy(struct page *page,
>>>>      void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc);
>>>>    void ksm_migrate_page(struct page *newpage, struct page *oldpage);
>>>> +bool reuse_ksm_page(struct page *page,
>>>> +            struct vm_area_struct *vma, unsigned long address);
>>>>      #else  /* !CONFIG_KSM */
>>>>    @@ -86,6 +88,11 @@ static inline void rmap_walk_ksm(struct page *page,
>>>>    static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage)
>>>>    {
>>>>    }
>>>> +static inline bool reuse_ksm_page(struct page *page,
>>>> +            struct vm_area_struct *vma, unsigned long address)
>>>> +{
>>>> +    return false;
>>>> +}
>>>>    #endif /* CONFIG_MMU */
>>>>    #endif /* !CONFIG_KSM */
>>>>    diff --git a/mm/ksm.c b/mm/ksm.c
>>>> index 383f961e577a..fbd14264d784 100644
>>>> --- a/mm/ksm.c
>>>> +++ b/mm/ksm.c
>>>> @@ -707,8 +707,9 @@ static struct page *__get_ksm_page(struct stable_node *stable_node,
>>>>         * case this node is no longer referenced, and should be freed;
>>>>         * however, it might mean that the page is under page_ref_freeze().
>>>>         * The __remove_mapping() case is easy, again the node is now stale;
>>>> -     * but if page is swapcache in migrate_page_move_mapping(), it might
>>>> -     * still be our page, in which case it's essential to keep the node.
>>>> +     * the same is in reuse_ksm_page() case; but if page is swapcache
>>>> +     * in migrate_page_move_mapping(), it might still be our page,
>>>> +     * in which case it's essential to keep the node.
>>>>         */
>>>>        while (!get_page_unless_zero(page)) {
>>>>            /*
>>>> @@ -2666,6 +2667,26 @@ void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc)
>>>>            goto again;
>>>>    }
>>>>    +bool reuse_ksm_page(struct page *page,
>>>> +            struct vm_area_struct *vma,
>>>> +            unsigned long address)
>>>> +{
>>>> +    VM_BUG_ON_PAGE(is_zero_pfn(page_to_pfn(page)), page);
>>>> +    VM_BUG_ON_PAGE(!page_mapped(page), page);
>>>> +    VM_BUG_ON_PAGE(!PageLocked(page), page);
>>>> +
>>>> +    if (PageSwapCache(page) || !page_stable_node(page))
>>>> +        return false;
>>>> +    /* Prohibit parallel get_ksm_page() */
>>>> +    if (!page_ref_freeze(page, 1))
>>>> +        return false;
>>>> +
>>>> +    page_move_anon_rmap(page, vma);
>>> Once the mapping is changed, it is not KSM mapping anymore. It looks later get_ksm_page() would always fail on this page. Is this expected?
>> Yes, this is the thing that the patch makes. Let's look at the actions,
>> we have without the patch, when there is a writing to an only-pte-mapped
>> KSM page.
>>
>> We enter to do_wp_page() with page_count() == 1, since KSM page is mapped
>> in only pte (and we do not get extra reference to a page, when we add it
>> to KSM stable tree). Then:
>>
>>    do_wp_page()
>>      get_page(vmf->page) <- page_count() is 2
>>      wp_page_copy()
>>        ..
>>        cow_user_page() /* Copy user page to a new one */
>>        ..
>>        put_page(vmf->page) <- page_count() is 1
>>        put_page(vmf->page) <- page_count() is 0
>>
>> Second put_page() frees the page (and also zeroes page->mapping),
>> and since that it's not a PageKsm() page anymore. Further
>> __get_ksm_page() calls will fail on this page (since the mapping
>> was zeroed), and its node will be unlinked from ksm stable tree:
>>
>> __get_ksm_page()
>> {
>>     /* page->mapping == NULL, expected_mapping != NULL */
>>     if (READ_ONCE(page->mapping) != expected_mapping)
>>         goto stale;
>>     .......
>> stale:
>>     remove_node_from_stable_tree(stable_node);
>> }
>>
>>
>> The patch optimizes do_wp_page(), and makes it to avoid the copying
>> (like we have for ordinary anon pages). Since KSM page is freed anyway,
>> after we dropped the last reference to it; we reuse it instead of this.
>> So, the thing will now work in this way:
>>
>> do_wp_page()
>>    lock_page(vmf->page)
>>    reuse_ksm_page()
>>      check PageSwapCache() and page_stable_node()
>>      page_ref_freeze(page, 1) <- Freeze the page to make parallel
>>                                  __get_ksm_page() (if any) waiting
>>      page_move_anon_rmap()    <- Write new mapping, so __get_ksm_page()
>>                                  sees this is not a KSM page anymore,
>>                                  and it removes stable node.
>>
>> So, the result is the same, but after the patch we achieve it faster :)
>>
>> Also, note, that in the most probably case, do_wp_page() does not cross
>> with __get_ksm_page() (the race window is very small; __get_ksm_page()
>> is spinning, only when reuse_ksm_page() is between page_ref_freeze()
>> and page_move_anon_rmap(), which are on neighboring lines).
>>
>> So, this is the idea. Please, let me know in case of something is unclear
>> for you.
> 
> Thanks for elaborating this. It sounds reasonable. You can add Reviewed-by: Yang Shi <yang.shi@linux.alibaba.com>

Thanks!

Kirill

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] mm: Reuse only-pte-mapped KSM page in do_wp_page()
  2018-12-13 15:29 [PATCH] mm: Reuse only-pte-mapped KSM page in do_wp_page() Kirill Tkhai
  2018-12-13 19:15 ` Yang Shi
@ 2018-12-29  9:34 ` Kirill Tkhai
  2018-12-29 21:40 ` Andrew Morton
  2 siblings, 0 replies; 8+ messages in thread
From: Kirill Tkhai @ 2018-12-29  9:34 UTC (permalink / raw)
  To: akpm
  Cc: kirill, hughd, aarcange, christian.koenig, imbrenda, yang.shi,
	riel, ying.huang, minchan, linux-kernel, linux-mm

Hi, Andrew!

How do you look at this patch? It had been reviewed.

Thanks,
Kirill

On 13.12.2018 18:29, Kirill Tkhai wrote:
> This patch adds an optimization for KSM pages almost
> in the same way, that we have for ordinary anonymous
> pages. If there is a write fault in a page, which is
> mapped to an only pte, and it is not related to swap
> cache; the page may be reused without copying its
> content.
> 
> [Note, that we do not consider PageSwapCache() pages
>  at least for now, since we don't want to complicate
>  __get_ksm_page(), which has nice optimization based
>  on this (for the migration case). Currenly it is
>  spinning on PageSwapCache() pages, waiting for when
>  they have unfreezed counters (i.e., for the migration
>  finish). But we don't want to make it also spinning
>  on swap cache pages, which we try to reuse, since
>  there is not a very high probability to reuse them.
>  So, for now we do not consider PageSwapCache() pages
>  at all.]
> 
> So, in reuse_ksm_page() we check for 1)PageSwapCache()
> and 2)page_stable_node(), to skip a page, which KSM
> is currently trying to link to stable tree. Then we
> do page_ref_freeze() to prohibit KSM to merge one more
> page into the page, we are reusing. After that, nobody
> can refer to the reusing page: KSM skips !PageSwapCache()
> pages with zero refcount; and the protection against
> of all other participants is the same as for reused
> ordinary anon pages pte lock, page lock and mmap_sem.
> 
> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
> ---
>  include/linux/ksm.h |    7 +++++++
>  mm/ksm.c            |   25 +++++++++++++++++++++++--
>  mm/memory.c         |   16 ++++++++++++++--
>  3 files changed, 44 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/ksm.h b/include/linux/ksm.h
> index 161e8164abcf..e48b1e453ff5 100644
> --- a/include/linux/ksm.h
> +++ b/include/linux/ksm.h
> @@ -53,6 +53,8 @@ struct page *ksm_might_need_to_copy(struct page *page,
>  
>  void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc);
>  void ksm_migrate_page(struct page *newpage, struct page *oldpage);
> +bool reuse_ksm_page(struct page *page,
> +			struct vm_area_struct *vma, unsigned long address);
>  
>  #else  /* !CONFIG_KSM */
>  
> @@ -86,6 +88,11 @@ static inline void rmap_walk_ksm(struct page *page,
>  static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage)
>  {
>  }
> +static inline bool reuse_ksm_page(struct page *page,
> +			struct vm_area_struct *vma, unsigned long address)
> +{
> +	return false;
> +}
>  #endif /* CONFIG_MMU */
>  #endif /* !CONFIG_KSM */
>  
> diff --git a/mm/ksm.c b/mm/ksm.c
> index 383f961e577a..fbd14264d784 100644
> --- a/mm/ksm.c
> +++ b/mm/ksm.c
> @@ -707,8 +707,9 @@ static struct page *__get_ksm_page(struct stable_node *stable_node,
>  	 * case this node is no longer referenced, and should be freed;
>  	 * however, it might mean that the page is under page_ref_freeze().
>  	 * The __remove_mapping() case is easy, again the node is now stale;
> -	 * but if page is swapcache in migrate_page_move_mapping(), it might
> -	 * still be our page, in which case it's essential to keep the node.
> +	 * the same is in reuse_ksm_page() case; but if page is swapcache
> +	 * in migrate_page_move_mapping(), it might still be our page,
> +	 * in which case it's essential to keep the node.
>  	 */
>  	while (!get_page_unless_zero(page)) {
>  		/*
> @@ -2666,6 +2667,26 @@ void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc)
>  		goto again;
>  }
>  
> +bool reuse_ksm_page(struct page *page,
> +		    struct vm_area_struct *vma,
> +		    unsigned long address)
> +{
> +	VM_BUG_ON_PAGE(is_zero_pfn(page_to_pfn(page)), page);
> +	VM_BUG_ON_PAGE(!page_mapped(page), page);
> +	VM_BUG_ON_PAGE(!PageLocked(page), page);
> +
> +	if (PageSwapCache(page) || !page_stable_node(page))
> +		return false;
> +	/* Prohibit parallel get_ksm_page() */
> +	if (!page_ref_freeze(page, 1))
> +		return false;
> +
> +	page_move_anon_rmap(page, vma);
> +	page->index = linear_page_index(vma, address);
> +	page_ref_unfreeze(page, 1);
> +
> +	return true;
> +}
>  #ifdef CONFIG_MIGRATION
>  void ksm_migrate_page(struct page *newpage, struct page *oldpage)
>  {
> diff --git a/mm/memory.c b/mm/memory.c
> index 532061217e03..5817527f1877 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -2509,8 +2509,11 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
>  	 * Take out anonymous pages first, anonymous shared vmas are
>  	 * not dirty accountable.
>  	 */
> -	if (PageAnon(vmf->page) && !PageKsm(vmf->page)) {
> +	if (PageAnon(vmf->page)) {
>  		int total_map_swapcount;
> +		if (PageKsm(vmf->page) && (PageSwapCache(vmf->page) ||
> +					   page_count(vmf->page) != 1))
> +			goto copy;
>  		if (!trylock_page(vmf->page)) {
>  			get_page(vmf->page);
>  			pte_unmap_unlock(vmf->pte, vmf->ptl);
> @@ -2525,6 +2528,15 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
>  			}
>  			put_page(vmf->page);
>  		}
> +		if (PageKsm(vmf->page)) {
> +			bool reused = reuse_ksm_page(vmf->page, vmf->vma,
> +						     vmf->address);
> +			unlock_page(vmf->page);
> +			if (!reused)
> +				goto copy;
> +			wp_page_reuse(vmf);
> +			return VM_FAULT_WRITE;
> +		}
>  		if (reuse_swap_page(vmf->page, &total_map_swapcount)) {
>  			if (total_map_swapcount == 1) {
>  				/*
> @@ -2545,7 +2557,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
>  					(VM_WRITE|VM_SHARED))) {
>  		return wp_page_shared(vmf);
>  	}
> -
> +copy:
>  	/*
>  	 * Ok, we need to copy. Oh, well..
>  	 */
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] mm: Reuse only-pte-mapped KSM page in do_wp_page()
  2018-12-13 15:29 [PATCH] mm: Reuse only-pte-mapped KSM page in do_wp_page() Kirill Tkhai
  2018-12-13 19:15 ` Yang Shi
  2018-12-29  9:34 ` Kirill Tkhai
@ 2018-12-29 21:40 ` Andrew Morton
  2018-12-30  9:47   ` Kirill Tkhai
  2 siblings, 1 reply; 8+ messages in thread
From: Andrew Morton @ 2018-12-29 21:40 UTC (permalink / raw)
  To: Kirill Tkhai
  Cc: kirill, hughd, aarcange, christian.koenig, imbrenda, yang.shi,
	riel, ying.huang, minchan, linux-kernel, linux-mm

On Thu, 13 Dec 2018 18:29:08 +0300 Kirill Tkhai <ktkhai@virtuozzo.com> wrote:

> This patch adds an optimization for KSM pages almost
> in the same way, that we have for ordinary anonymous
> pages. If there is a write fault in a page, which is
> mapped to an only pte, and it is not related to swap
> cache; the page may be reused without copying its
> content.
> 
> [Note, that we do not consider PageSwapCache() pages
>  at least for now, since we don't want to complicate
>  __get_ksm_page(), which has nice optimization based
>  on this (for the migration case). Currenly it is
>  spinning on PageSwapCache() pages, waiting for when
>  they have unfreezed counters (i.e., for the migration
>  finish). But we don't want to make it also spinning
>  on swap cache pages, which we try to reuse, since
>  there is not a very high probability to reuse them.
>  So, for now we do not consider PageSwapCache() pages
>  at all.]
> 
> So, in reuse_ksm_page() we check for 1)PageSwapCache()
> and 2)page_stable_node(), to skip a page, which KSM
> is currently trying to link to stable tree. Then we
> do page_ref_freeze() to prohibit KSM to merge one more
> page into the page, we are reusing. After that, nobody
> can refer to the reusing page: KSM skips !PageSwapCache()
> pages with zero refcount; and the protection against
> of all other participants is the same as for reused
> ordinary anon pages pte lock, page lock and mmap_sem.
> 
> ...
>
> +bool reuse_ksm_page(struct page *page,
> +		    struct vm_area_struct *vma,
> +		    unsigned long address)
> +{
> +	VM_BUG_ON_PAGE(is_zero_pfn(page_to_pfn(page)), page);
> +	VM_BUG_ON_PAGE(!page_mapped(page), page);
> +	VM_BUG_ON_PAGE(!PageLocked(page), page);
> +
> +	if (PageSwapCache(page) || !page_stable_node(page))
> +		return false;
> +	/* Prohibit parallel get_ksm_page() */
> +	if (!page_ref_freeze(page, 1))
> +		return false;
> +
> +	page_move_anon_rmap(page, vma);
> +	page->index = linear_page_index(vma, address);
> +	page_ref_unfreeze(page, 1);
> +
> +	return true;
> +}

Can we avoid those BUG_ON()s?

Something like this:

--- a/mm/ksm.c~mm-reuse-only-pte-mapped-ksm-page-in-do_wp_page-fix
+++ a/mm/ksm.c
@@ -2649,9 +2649,14 @@ bool reuse_ksm_page(struct page *page,
 		    struct vm_area_struct *vma,
 		    unsigned long address)
 {
-	VM_BUG_ON_PAGE(is_zero_pfn(page_to_pfn(page)), page);
-	VM_BUG_ON_PAGE(!page_mapped(page), page);
-	VM_BUG_ON_PAGE(!PageLocked(page), page);
+#ifdef CONFIG_DEBUG_VM
+	if (WARN_ON(is_zero_pfn(page_to_pfn(page))) ||
+			WARN_ON(!page_mapped(page)) ||
+			WARN_ON(!PageLocked(page))) {
+		dump_page(page, "reuse_ksm_page");
+		return false;
+	}
+#endif
 
 	if (PageSwapCache(page) || !page_stable_node(page))
 		return false;

We don't have a VM_WARN_ON_PAGE() and we can't provide one because the
VM_foo() macros don't return a value.  It's irritating and I keep
forgetting why we ended up doing them this way.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] mm: Reuse only-pte-mapped KSM page in do_wp_page()
  2018-12-29 21:40 ` Andrew Morton
@ 2018-12-30  9:47   ` Kirill Tkhai
  0 siblings, 0 replies; 8+ messages in thread
From: Kirill Tkhai @ 2018-12-30  9:47 UTC (permalink / raw)
  To: Andrew Morton
  Cc: kirill, hughd, aarcange, christian.koenig, imbrenda, yang.shi,
	riel, ying.huang, minchan, linux-kernel, linux-mm

On 30.12.2018 00:40, Andrew Morton wrote:
> On Thu, 13 Dec 2018 18:29:08 +0300 Kirill Tkhai <ktkhai@virtuozzo.com> wrote:
> 
>> This patch adds an optimization for KSM pages almost
>> in the same way, that we have for ordinary anonymous
>> pages. If there is a write fault in a page, which is
>> mapped to an only pte, and it is not related to swap
>> cache; the page may be reused without copying its
>> content.
>>
>> [Note, that we do not consider PageSwapCache() pages
>>  at least for now, since we don't want to complicate
>>  __get_ksm_page(), which has nice optimization based
>>  on this (for the migration case). Currenly it is
>>  spinning on PageSwapCache() pages, waiting for when
>>  they have unfreezed counters (i.e., for the migration
>>  finish). But we don't want to make it also spinning
>>  on swap cache pages, which we try to reuse, since
>>  there is not a very high probability to reuse them.
>>  So, for now we do not consider PageSwapCache() pages
>>  at all.]
>>
>> So, in reuse_ksm_page() we check for 1)PageSwapCache()
>> and 2)page_stable_node(), to skip a page, which KSM
>> is currently trying to link to stable tree. Then we
>> do page_ref_freeze() to prohibit KSM to merge one more
>> page into the page, we are reusing. After that, nobody
>> can refer to the reusing page: KSM skips !PageSwapCache()
>> pages with zero refcount; and the protection against
>> of all other participants is the same as for reused
>> ordinary anon pages pte lock, page lock and mmap_sem.
>>
>> ...
>>
>> +bool reuse_ksm_page(struct page *page,
>> +		    struct vm_area_struct *vma,
>> +		    unsigned long address)
>> +{
>> +	VM_BUG_ON_PAGE(is_zero_pfn(page_to_pfn(page)), page);
>> +	VM_BUG_ON_PAGE(!page_mapped(page), page);
>> +	VM_BUG_ON_PAGE(!PageLocked(page), page);
>> +
>> +	if (PageSwapCache(page) || !page_stable_node(page))
>> +		return false;
>> +	/* Prohibit parallel get_ksm_page() */
>> +	if (!page_ref_freeze(page, 1))
>> +		return false;
>> +
>> +	page_move_anon_rmap(page, vma);
>> +	page->index = linear_page_index(vma, address);
>> +	page_ref_unfreeze(page, 1);
>> +
>> +	return true;
>> +}
> 
> Can we avoid those BUG_ON()s?
> 
> Something like this:
> 
> --- a/mm/ksm.c~mm-reuse-only-pte-mapped-ksm-page-in-do_wp_page-fix
> +++ a/mm/ksm.c
> @@ -2649,9 +2649,14 @@ bool reuse_ksm_page(struct page *page,
>  		    struct vm_area_struct *vma,
>  		    unsigned long address)
>  {
> -	VM_BUG_ON_PAGE(is_zero_pfn(page_to_pfn(page)), page);
> -	VM_BUG_ON_PAGE(!page_mapped(page), page);
> -	VM_BUG_ON_PAGE(!PageLocked(page), page);
> +#ifdef CONFIG_DEBUG_VM
> +	if (WARN_ON(is_zero_pfn(page_to_pfn(page))) ||
> +			WARN_ON(!page_mapped(page)) ||
> +			WARN_ON(!PageLocked(page))) {
> +		dump_page(page, "reuse_ksm_page");
> +		return false;
> +	}
> +#endif

Looks good!
  
>  	if (PageSwapCache(page) || !page_stable_node(page))
>  		return false;
> 
> We don't have a VM_WARN_ON_PAGE() and we can't provide one because the
> VM_foo() macros don't return a value.  It's irritating and I keep
> forgetting why we ended up doing them this way.
Thanks!

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-12-30  9:48 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-13 15:29 [PATCH] mm: Reuse only-pte-mapped KSM page in do_wp_page() Kirill Tkhai
2018-12-13 19:15 ` Yang Shi
2018-12-14  9:26   ` Kirill Tkhai
2018-12-14 18:06     ` Yang Shi
2018-12-15  9:18       ` Kirill Tkhai
2018-12-29  9:34 ` Kirill Tkhai
2018-12-29 21:40 ` Andrew Morton
2018-12-30  9:47   ` Kirill Tkhai

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).