All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yin Fengwei <fengwei.yin@intel.com>
To: linux-mm@kvack.org, akpm@linux-foundation.org,
	willy@infradead.org, sidhartha.kumar@oracle.com,
	mike.kravetz@oracle.com, jane.chu@oracle.com,
	naoya.horiguchi@nec.com
Cc: fengwei.yin@intel.com
Subject: [PATCH v2 2/5] rmap: move page unmap operation to dedicated function
Date: Tue, 28 Feb 2023 20:23:05 +0800	[thread overview]
Message-ID: <20230228122308.2972219-3-fengwei.yin@intel.com> (raw)
In-Reply-To: <20230228122308.2972219-1-fengwei.yin@intel.com>

No functional change. Just code reorganized.

Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
---
 mm/rmap.c | 369 ++++++++++++++++++++++++++++--------------------------
 1 file changed, 194 insertions(+), 175 deletions(-)

diff --git a/mm/rmap.c b/mm/rmap.c
index 0f09518d6f30..987ab402392f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1540,17 +1540,204 @@ static bool try_to_unmap_one_hugetlb(struct folio *folio,
 	return ret;
 }
 
+static bool try_to_unmap_one_page(struct folio *folio,
+		struct vm_area_struct *vma, struct mmu_notifier_range range,
+		struct page_vma_mapped_walk pvmw, unsigned long address,
+		enum ttu_flags flags)
+{
+	bool anon_exclusive, ret = true;
+	struct page *subpage;
+	struct mm_struct *mm = vma->vm_mm;
+	pte_t pteval;
+
+	subpage = folio_page(folio,
+			pte_pfn(*pvmw.pte) - folio_pfn(folio));
+	anon_exclusive = folio_test_anon(folio) &&
+		PageAnonExclusive(subpage);
+
+	flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
+	/* Nuke the page table entry. */
+	if (should_defer_flush(mm, flags)) {
+		/*
+		 * We clear the PTE but do not flush so potentially
+		 * a remote CPU could still be writing to the folio.
+		 * If the entry was previously clean then the
+		 * architecture must guarantee that a clear->dirty
+		 * transition on a cached TLB entry is written through
+		 * and traps if the PTE is unmapped.
+		 */
+		pteval = ptep_get_and_clear(mm, address, pvmw.pte);
+
+		set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
+	} else {
+		pteval = ptep_clear_flush(vma, address, pvmw.pte);
+	}
+
+	/*
+	 * Now the pte is cleared. If this pte was uffd-wp armed,
+	 * we may want to replace a none pte with a marker pte if
+	 * it's file-backed, so we don't lose the tracking info.
+	 */
+	pte_install_uffd_wp_if_needed(vma, address, pvmw.pte, pteval);
+
+	/* Set the dirty flag on the folio now the pte is gone. */
+	if (pte_dirty(pteval))
+		folio_mark_dirty(folio);
+
+	/* Update high watermark before we lower rss */
+	update_hiwater_rss(mm);
+
+	if (PageHWPoison(subpage) && !(flags & TTU_HWPOISON)) {
+		pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
+		dec_mm_counter(mm, mm_counter(&folio->page));
+		set_pte_at(mm, address, pvmw.pte, pteval);
+	} else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
+		/*
+		 * The guest indicated that the page content is of no
+		 * interest anymore. Simply discard the pte, vmscan
+		 * will take care of the rest.
+		 * A future reference will then fault in a new zero
+		 * page. When userfaultfd is active, we must not drop
+		 * this page though, as its main user (postcopy
+		 * migration) will not expect userfaults on already
+		 * copied pages.
+		 */
+		dec_mm_counter(mm, mm_counter(&folio->page));
+		/* We have to invalidate as we cleared the pte */
+		mmu_notifier_invalidate_range(mm, address,
+				address + PAGE_SIZE);
+	} else if (folio_test_anon(folio)) {
+		swp_entry_t entry = { .val = page_private(subpage) };
+		pte_t swp_pte;
+		/*
+		 * Store the swap location in the pte.
+		 * See handle_pte_fault() ...
+		 */
+		if (unlikely(folio_test_swapbacked(folio) !=
+					folio_test_swapcache(folio))) {
+			WARN_ON_ONCE(1);
+			ret = false;
+			/* We have to invalidate as we cleared the pte */
+			mmu_notifier_invalidate_range(mm, address,
+					address + PAGE_SIZE);
+			page_vma_mapped_walk_done(&pvmw);
+			goto discard;
+		}
+
+		/* MADV_FREE page check */
+		if (!folio_test_swapbacked(folio)) {
+			int ref_count, map_count;
+
+			/*
+			 * Synchronize with gup_pte_range():
+			 * - clear PTE; barrier; read refcount
+			 * - inc refcount; barrier; read PTE
+			 */
+			smp_mb();
+
+			ref_count = folio_ref_count(folio);
+			map_count = folio_mapcount(folio);
+
+			/*
+			 * Order reads for page refcount and dirty flag
+			 * (see comments in __remove_mapping()).
+			 */
+			smp_rmb();
+
+			/*
+			 * The only page refs must be one from isolation
+			 * plus the rmap(s) (dropped by discard:).
+			 */
+			if (ref_count == 1 + map_count &&
+					!folio_test_dirty(folio)) {
+				/* Invalidate as we cleared the pte */
+				mmu_notifier_invalidate_range(mm,
+						address, address + PAGE_SIZE);
+				dec_mm_counter(mm, MM_ANONPAGES);
+				goto discard;
+			}
+
+			/*
+			 * If the folio was redirtied, it cannot be
+			 * discarded. Remap the page to page table.
+			 */
+			set_pte_at(mm, address, pvmw.pte, pteval);
+			folio_set_swapbacked(folio);
+			ret = false;
+			page_vma_mapped_walk_done(&pvmw);
+			goto discard;
+		}
+
+		if (swap_duplicate(entry) < 0) {
+			set_pte_at(mm, address, pvmw.pte, pteval);
+			ret = false;
+			page_vma_mapped_walk_done(&pvmw);
+			goto discard;
+		}
+		if (arch_unmap_one(mm, vma, address, pteval) < 0) {
+			swap_free(entry);
+			set_pte_at(mm, address, pvmw.pte, pteval);
+			ret = false;
+			page_vma_mapped_walk_done(&pvmw);
+			goto discard;
+		}
+
+		/* See page_try_share_anon_rmap(): clear PTE first. */
+		if (anon_exclusive &&
+				page_try_share_anon_rmap(subpage)) {
+			swap_free(entry);
+			set_pte_at(mm, address, pvmw.pte, pteval);
+			ret = false;
+			page_vma_mapped_walk_done(&pvmw);
+			goto discard;
+		}
+		if (list_empty(&mm->mmlist)) {
+			spin_lock(&mmlist_lock);
+			if (list_empty(&mm->mmlist))
+				list_add(&mm->mmlist, &init_mm.mmlist);
+			spin_unlock(&mmlist_lock);
+		}
+		dec_mm_counter(mm, MM_ANONPAGES);
+		inc_mm_counter(mm, MM_SWAPENTS);
+		swp_pte = swp_entry_to_pte(entry);
+		if (anon_exclusive)
+			swp_pte = pte_swp_mkexclusive(swp_pte);
+		if (pte_soft_dirty(pteval))
+			swp_pte = pte_swp_mksoft_dirty(swp_pte);
+		if (pte_uffd_wp(pteval))
+			swp_pte = pte_swp_mkuffd_wp(swp_pte);
+		set_pte_at(mm, address, pvmw.pte, swp_pte);
+		/* Invalidate as we cleared the pte */
+		mmu_notifier_invalidate_range(mm, address,
+				address + PAGE_SIZE);
+	} else {
+		/*
+		 * This is a locked file-backed folio,
+		 * so it cannot be removed from the page
+		 * cache and replaced by a new folio before
+		 * mmu_notifier_invalidate_range_end, so no
+		 * concurrent thread might update its page table
+		 * to point at a new folio while a device is
+		 * still using this folio.
+		 *
+		 * See Documentation/mm/mmu_notifier.rst
+		 */
+		dec_mm_counter(mm, mm_counter_file(&folio->page));
+	}
+
+discard:
+	return ret;
+}
+
 /*
  * @arg: enum ttu_flags will be passed to this argument
  */
 static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
 		     unsigned long address, void *arg)
 {
-	struct mm_struct *mm = vma->vm_mm;
 	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
-	pte_t pteval;
 	struct page *subpage;
-	bool anon_exclusive, ret = true;
+	bool ret = true;
 	struct mmu_notifier_range range;
 	enum ttu_flags flags = (enum ttu_flags)(long)arg;
 
@@ -1615,179 +1802,11 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
 
 		subpage = folio_page(folio,
 					pte_pfn(*pvmw.pte) - folio_pfn(folio));
-		anon_exclusive = folio_test_anon(folio) &&
-				 PageAnonExclusive(subpage);
-
-		flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
-		/* Nuke the page table entry. */
-		if (should_defer_flush(mm, flags)) {
-			/*
-			 * We clear the PTE but do not flush so potentially
-			 * a remote CPU could still be writing to the folio.
-			 * If the entry was previously clean then the
-			 * architecture must guarantee that a clear->dirty
-			 * transition on a cached TLB entry is written through
-			 * and traps if the PTE is unmapped.
-			 */
-			pteval = ptep_get_and_clear(mm, address, pvmw.pte);
-
-			set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
-		} else {
-			pteval = ptep_clear_flush(vma, address, pvmw.pte);
-		}
-
-		/*
-		 * Now the pte is cleared. If this pte was uffd-wp armed,
-		 * we may want to replace a none pte with a marker pte if
-		 * it's file-backed, so we don't lose the tracking info.
-		 */
-		pte_install_uffd_wp_if_needed(vma, address, pvmw.pte, pteval);
-
-		/* Set the dirty flag on the folio now the pte is gone. */
-		if (pte_dirty(pteval))
-			folio_mark_dirty(folio);
-
-		/* Update high watermark before we lower rss */
-		update_hiwater_rss(mm);
-
-		if (PageHWPoison(subpage) && (flags & TTU_HWPOISON)) {
-			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
-			dec_mm_counter(mm, mm_counter(&folio->page));
-			set_pte_at(mm, address, pvmw.pte, pteval);
-		} else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
-			/*
-			 * The guest indicated that the page content is of no
-			 * interest anymore. Simply discard the pte, vmscan
-			 * will take care of the rest.
-			 * A future reference will then fault in a new zero
-			 * page. When userfaultfd is active, we must not drop
-			 * this page though, as its main user (postcopy
-			 * migration) will not expect userfaults on already
-			 * copied pages.
-			 */
-			dec_mm_counter(mm, mm_counter(&folio->page));
-			/* We have to invalidate as we cleared the pte */
-			mmu_notifier_invalidate_range(mm, address,
-						      address + PAGE_SIZE);
-		} else if (folio_test_anon(folio)) {
-			swp_entry_t entry = { .val = page_private(subpage) };
-			pte_t swp_pte;
-			/*
-			 * Store the swap location in the pte.
-			 * See handle_pte_fault() ...
-			 */
-			if (unlikely(folio_test_swapbacked(folio) !=
-					folio_test_swapcache(folio))) {
-				WARN_ON_ONCE(1);
-				ret = false;
-				/* We have to invalidate as we cleared the pte */
-				mmu_notifier_invalidate_range(mm, address,
-							address + PAGE_SIZE);
-				page_vma_mapped_walk_done(&pvmw);
-				break;
-			}
-
-			/* MADV_FREE page check */
-			if (!folio_test_swapbacked(folio)) {
-				int ref_count, map_count;
-
-				/*
-				 * Synchronize with gup_pte_range():
-				 * - clear PTE; barrier; read refcount
-				 * - inc refcount; barrier; read PTE
-				 */
-				smp_mb();
-
-				ref_count = folio_ref_count(folio);
-				map_count = folio_mapcount(folio);
-
-				/*
-				 * Order reads for page refcount and dirty flag
-				 * (see comments in __remove_mapping()).
-				 */
-				smp_rmb();
-
-				/*
-				 * The only page refs must be one from isolation
-				 * plus the rmap(s) (dropped by discard:).
-				 */
-				if (ref_count == 1 + map_count &&
-				    !folio_test_dirty(folio)) {
-					/* Invalidate as we cleared the pte */
-					mmu_notifier_invalidate_range(mm,
-						address, address + PAGE_SIZE);
-					dec_mm_counter(mm, MM_ANONPAGES);
-					goto discard;
-				}
-
-				/*
-				 * If the folio was redirtied, it cannot be
-				 * discarded. Remap the page to page table.
-				 */
-				set_pte_at(mm, address, pvmw.pte, pteval);
-				folio_set_swapbacked(folio);
-				ret = false;
-				page_vma_mapped_walk_done(&pvmw);
-				break;
-			}
-
-			if (swap_duplicate(entry) < 0) {
-				set_pte_at(mm, address, pvmw.pte, pteval);
-				ret = false;
-				page_vma_mapped_walk_done(&pvmw);
-				break;
-			}
-			if (arch_unmap_one(mm, vma, address, pteval) < 0) {
-				swap_free(entry);
-				set_pte_at(mm, address, pvmw.pte, pteval);
-				ret = false;
-				page_vma_mapped_walk_done(&pvmw);
-				break;
-			}
+		ret = try_to_unmap_one_page(folio, vma,
+						range, pvmw, address, flags);
+		if (!ret)
+			break;
 
-			/* See page_try_share_anon_rmap(): clear PTE first. */
-			if (anon_exclusive &&
-			    page_try_share_anon_rmap(subpage)) {
-				swap_free(entry);
-				set_pte_at(mm, address, pvmw.pte, pteval);
-				ret = false;
-				page_vma_mapped_walk_done(&pvmw);
-				break;
-			}
-			if (list_empty(&mm->mmlist)) {
-				spin_lock(&mmlist_lock);
-				if (list_empty(&mm->mmlist))
-					list_add(&mm->mmlist, &init_mm.mmlist);
-				spin_unlock(&mmlist_lock);
-			}
-			dec_mm_counter(mm, MM_ANONPAGES);
-			inc_mm_counter(mm, MM_SWAPENTS);
-			swp_pte = swp_entry_to_pte(entry);
-			if (anon_exclusive)
-				swp_pte = pte_swp_mkexclusive(swp_pte);
-			if (pte_soft_dirty(pteval))
-				swp_pte = pte_swp_mksoft_dirty(swp_pte);
-			if (pte_uffd_wp(pteval))
-				swp_pte = pte_swp_mkuffd_wp(swp_pte);
-			set_pte_at(mm, address, pvmw.pte, swp_pte);
-			/* Invalidate as we cleared the pte */
-			mmu_notifier_invalidate_range(mm, address,
-						      address + PAGE_SIZE);
-		} else {
-			/*
-			 * This is a locked file-backed folio,
-			 * so it cannot be removed from the page
-			 * cache and replaced by a new folio before
-			 * mmu_notifier_invalidate_range_end, so no
-			 * concurrent thread might update its page table
-			 * to point at a new folio while a device is
-			 * still using this folio.
-			 *
-			 * See Documentation/mm/mmu_notifier.rst
-			 */
-			dec_mm_counter(mm, mm_counter_file(&folio->page));
-		}
-discard:
 		/*
 		 * No need to call mmu_notifier_invalidate_range() it has be
 		 * done above for all cases requiring it to happen under page
-- 
2.30.2



  parent reply	other threads:[~2023-02-28 12:22 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-28 12:23 [PATCH v2 0/5] batched remove rmap in try_to_unmap_one() Yin Fengwei
2023-02-28 12:23 ` [PATCH v2 1/5] rmap: move hugetlb try_to_unmap to dedicated function Yin Fengwei
2023-02-28 12:23 ` Yin Fengwei [this message]
2023-02-28 12:23 ` [PATCH v2 3/5] rmap: cleanup exit path of try_to_unmap_one_page() Yin Fengwei
2023-02-28 12:23 ` [PATCH v2 4/5] rmap:addd folio_remove_rmap_range() Yin Fengwei
2023-02-28 12:23 ` [PATCH v2 5/5] try_to_unmap_one: batched remove rmap, update folio refcount Yin Fengwei
2023-02-28 20:28 ` [PATCH v2 0/5] batched remove rmap in try_to_unmap_one() Andrew Morton
2023-03-01  1:44   ` Yin, Fengwei
2023-03-02 10:04     ` David Hildenbrand
2023-03-02 13:32       ` Yin, Fengwei
2023-03-02 14:23         ` David Hildenbrand
2023-03-02 14:33           ` Matthew Wilcox
2023-03-02 14:55             ` David Hildenbrand
2023-03-03  2:44               ` Yin, Fengwei
2023-03-03  2:26           ` Yin, Fengwei
2023-03-06  9:11             ` Yin Fengwei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230228122308.2972219-3-fengwei.yin@intel.com \
    --to=fengwei.yin@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=jane.chu@oracle.com \
    --cc=linux-mm@kvack.org \
    --cc=mike.kravetz@oracle.com \
    --cc=naoya.horiguchi@nec.com \
    --cc=sidhartha.kumar@oracle.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.