All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yang Shi <yang.shi@linux.alibaba.com>
To: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
	akpm@linux-foundation.org, Andrea Arcangeli <aarcange@redhat.com>
Cc: Zi Yan <ziy@nvidia.com>, Ralph Campbell <rcampbell@nvidia.com>,
	John Hubbard <jhubbard@nvidia.com>,
	William Kucharski <william.kucharski@oracle.com>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCHv3, RESEND 6/8] khugepaged: Allow to collapse PTE-mapped compound pages
Date: Wed, 15 Apr 2020 13:46:35 -0700	[thread overview]
Message-ID: <e171fd09-53a7-f8a0-e2c7-1ec4fccbd42d@linux.alibaba.com> (raw)
In-Reply-To: <20200413125220.663-7-kirill.shutemov@linux.intel.com>



On 4/13/20 5:52 AM, Kirill A. Shutemov wrote:
> We can collapse PTE-mapped compound pages. We only need to avoid
> handling them more than once: lock/unlock page only once if it's present
> in the PMD range multiple times as it handled on compound level. The
> same goes for LRU isolation and putback.
>
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> ---
>   mm/khugepaged.c | 99 ++++++++++++++++++++++++++++++++-----------------
>   1 file changed, 65 insertions(+), 34 deletions(-)

Acked-by: Yang Shi <yang.shi@linux.alibaba.com>

>
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index f9864644c3b7..11d500396d85 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -515,17 +515,30 @@ void __khugepaged_exit(struct mm_struct *mm)
>   
>   static void release_pte_page(struct page *page)
>   {
> -	dec_node_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page));
> +	mod_node_page_state(page_pgdat(page),
> +			NR_ISOLATED_ANON + page_is_file_cache(page),
> +			-compound_nr(page));
>   	unlock_page(page);
>   	putback_lru_page(page);
>   }
>   
> -static void release_pte_pages(pte_t *pte, pte_t *_pte)
> +static void release_pte_pages(pte_t *pte, pte_t *_pte,
> +		struct list_head *compound_pagelist)
>   {
> +	struct page *page, *tmp;
> +
>   	while (--_pte >= pte) {
>   		pte_t pteval = *_pte;
> -		if (!pte_none(pteval) && !is_zero_pfn(pte_pfn(pteval)))
> -			release_pte_page(pte_page(pteval));
> +
> +		page = pte_page(pteval);
> +		if (!pte_none(pteval) && !is_zero_pfn(pte_pfn(pteval)) &&
> +				!PageCompound(page))
> +			release_pte_page(page);
> +	}
> +
> +	list_for_each_entry_safe(page, tmp, compound_pagelist, lru) {
> +		list_del(&page->lru);
> +		release_pte_page(page);
>   	}
>   }
>   
> @@ -549,7 +562,8 @@ static bool is_refcount_suitable(struct page *page)
>   
>   static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>   					unsigned long address,
> -					pte_t *pte)
> +					pte_t *pte,
> +					struct list_head *compound_pagelist)
>   {
>   	struct page *page = NULL;
>   	pte_t *_pte;
> @@ -579,13 +593,21 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>   			goto out;
>   		}
>   
> -		/* TODO: teach khugepaged to collapse THP mapped with pte */
> +		VM_BUG_ON_PAGE(!PageAnon(page), page);
> +
>   		if (PageCompound(page)) {
> -			result = SCAN_PAGE_COMPOUND;
> -			goto out;
> -		}
> +			struct page *p;
> +			page = compound_head(page);
>   
> -		VM_BUG_ON_PAGE(!PageAnon(page), page);
> +			/*
> +			 * Check if we have dealt with the compound page
> +			 * already
> +			 */
> +			list_for_each_entry(p, compound_pagelist, lru) {
> +				if (page == p)
> +					goto next;
> +			}
> +		}
>   
>   		/*
>   		 * We can do it before isolate_lru_page because the
> @@ -614,19 +636,15 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>   			result = SCAN_PAGE_COUNT;
>   			goto out;
>   		}
> -		if (pte_write(pteval)) {
> -			writable = true;
> -		} else {
> -			if (PageSwapCache(page) &&
> -			    !reuse_swap_page(page, NULL)) {
> -				unlock_page(page);
> -				result = SCAN_SWAP_CACHE_PAGE;
> -				goto out;
> -			}
> +		if (!pte_write(pteval) && PageSwapCache(page) &&
> +				!reuse_swap_page(page, NULL)) {
>   			/*
> -			 * Page is not in the swap cache. It can be collapsed
> -			 * into a THP.
> +			 * Page is in the swap cache and cannot be re-used.
> +			 * It cannot be collapsed into a THP.
>   			 */
> +			unlock_page(page);
> +			result = SCAN_SWAP_CACHE_PAGE;
> +			goto out;
>   		}
>   
>   		/*
> @@ -638,16 +656,23 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>   			result = SCAN_DEL_PAGE_LRU;
>   			goto out;
>   		}
> -		inc_node_page_state(page,
> -				NR_ISOLATED_ANON + page_is_file_cache(page));
> +		mod_node_page_state(page_pgdat(page),
> +				NR_ISOLATED_ANON + page_is_file_cache(page),
> +				compound_nr(page));
>   		VM_BUG_ON_PAGE(!PageLocked(page), page);
>   		VM_BUG_ON_PAGE(PageLRU(page), page);
>   
> +		if (PageCompound(page))
> +			list_add_tail(&page->lru, compound_pagelist);
> +next:
>   		/* There should be enough young pte to collapse the page */
>   		if (pte_young(pteval) ||
>   		    page_is_young(page) || PageReferenced(page) ||
>   		    mmu_notifier_test_young(vma->vm_mm, address))
>   			referenced++;
> +
> +		if (pte_write(pteval))
> +			writable = true;
>   	}
>   	if (likely(writable)) {
>   		if (likely(referenced)) {
> @@ -661,7 +686,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>   	}
>   
>   out:
> -	release_pte_pages(pte, _pte);
> +	release_pte_pages(pte, _pte, compound_pagelist);
>   	trace_mm_collapse_huge_page_isolate(page, none_or_zero,
>   					    referenced, writable, result);
>   	return 0;
> @@ -670,13 +695,14 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>   static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
>   				      struct vm_area_struct *vma,
>   				      unsigned long address,
> -				      spinlock_t *ptl)
> +				      spinlock_t *ptl,
> +				      struct list_head *compound_pagelist)
>   {
> +	struct page *src_page, *tmp;
>   	pte_t *_pte;
>   	for (_pte = pte; _pte < pte + HPAGE_PMD_NR;
>   				_pte++, page++, address += PAGE_SIZE) {
>   		pte_t pteval = *_pte;
> -		struct page *src_page;
>   
>   		if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
>   			clear_user_highpage(page, address);
> @@ -696,7 +722,8 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
>   		} else {
>   			src_page = pte_page(pteval);
>   			copy_user_highpage(page, src_page, address, vma);
> -			release_pte_page(src_page);
> +			if (!PageCompound(src_page))
> +				release_pte_page(src_page);
>   			/*
>   			 * ptl mostly unnecessary, but preempt has to
>   			 * be disabled to update the per-cpu stats
> @@ -713,6 +740,11 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
>   			free_page_and_swap_cache(src_page);
>   		}
>   	}
> +
> +	list_for_each_entry_safe(src_page, tmp, compound_pagelist, lru) {
> +		list_del(&src_page->lru);
> +		release_pte_page(src_page);
> +	}
>   }
>   
>   static void khugepaged_alloc_sleep(void)
> @@ -971,6 +1003,7 @@ static void collapse_huge_page(struct mm_struct *mm,
>   				   struct page **hpage,
>   				   int node, int referenced, int unmapped)
>   {
> +	LIST_HEAD(compound_pagelist);
>   	pmd_t *pmd, _pmd;
>   	pte_t *pte;
>   	pgtable_t pgtable;
> @@ -1071,7 +1104,8 @@ static void collapse_huge_page(struct mm_struct *mm,
>   	mmu_notifier_invalidate_range_end(&range);
>   
>   	spin_lock(pte_ptl);
> -	isolated = __collapse_huge_page_isolate(vma, address, pte);
> +	isolated = __collapse_huge_page_isolate(vma, address, pte,
> +			&compound_pagelist);
>   	spin_unlock(pte_ptl);
>   
>   	if (unlikely(!isolated)) {
> @@ -1096,7 +1130,8 @@ static void collapse_huge_page(struct mm_struct *mm,
>   	 */
>   	anon_vma_unlock_write(vma->anon_vma);
>   
> -	__collapse_huge_page_copy(pte, new_page, vma, address, pte_ptl);
> +	__collapse_huge_page_copy(pte, new_page, vma, address, pte_ptl,
> +			&compound_pagelist);
>   	pte_unmap(pte);
>   	__SetPageUptodate(new_page);
>   	pgtable = pmd_pgtable(_pmd);
> @@ -1193,11 +1228,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
>   			goto out_unmap;
>   		}
>   
> -		/* TODO: teach khugepaged to collapse THP mapped with pte */
> -		if (PageCompound(page)) {
> -			result = SCAN_PAGE_COMPOUND;
> -			goto out_unmap;
> -		}
> +		page = compound_head(page);
>   
>   		/*
>   		 * Record which node the original page is from and save this


  reply	other threads:[~2020-04-15 20:46 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-13 12:52 [PATCHv3, RESEND 0/8] thp/khugepaged improvements and CoW semantics Kirill A. Shutemov
2020-04-13 12:52 ` [PATCHv3, RESEND 1/8] khugepaged: Add self test Kirill A. Shutemov
2020-04-15 15:39   ` Zi Yan
2020-04-15 20:11     ` Yang Shi
2020-04-13 12:52 ` [PATCHv3, RESEND 2/8] khugepaged: Do not stop collapse if less than half PTEs are referenced Kirill A. Shutemov
2020-04-15 20:31   ` Yang Shi
2020-04-13 12:52 ` [PATCHv3, RESEND 3/8] khugepaged: Drain all LRU caches before scanning pages Kirill A. Shutemov
2020-04-15 20:31   ` Yang Shi
2020-04-13 12:52 ` [PATCHv3, RESEND 4/8] khugepaged: Drain LRU add pagevec after swapin Kirill A. Shutemov
2020-04-15 20:32   ` Yang Shi
2020-04-13 12:52 ` [PATCHv3, RESEND 5/8] khugepaged: Allow to collapse a page shared across fork Kirill A. Shutemov
2020-04-13 20:48   ` John Hubbard
2020-04-14 21:35     ` Kirill A. Shutemov
2020-04-15  2:42       ` John Hubbard
2020-04-15  2:43   ` John Hubbard
2020-04-15 20:39   ` Yang Shi
2020-04-13 12:52 ` [PATCHv3, RESEND 6/8] khugepaged: Allow to collapse PTE-mapped compound pages Kirill A. Shutemov
2020-04-15 20:46   ` Yang Shi [this message]
2020-04-15 21:44   ` Andrew Morton
2020-04-15 21:52     ` Kirill A. Shutemov
2020-04-15 22:52       ` Andrew Morton
2020-04-15 22:59         ` Kirill A. Shutemov
2020-04-13 12:52 ` [PATCHv3, RESEND 7/8] thp: Change CoW semantics for anon-THP Kirill A. Shutemov
2020-04-15 21:19   ` Yang Shi
2020-04-13 12:52 ` [PATCHv3, RESEND 8/8] khugepaged: Introduce 'max_ptes_shared' tunable Kirill A. Shutemov
2020-04-15 21:25   ` Yang Shi
2020-04-16 15:52     ` Kirill A. Shutemov
2020-04-16 18:25       ` Yang Shi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e171fd09-53a7-f8a0-e2c7-1ec4fccbd42d@linux.alibaba.com \
    --to=yang.shi@linux.alibaba.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=jhubbard@nvidia.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=rcampbell@nvidia.com \
    --cc=william.kucharski@oracle.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.