All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yin Fengwei <fengwei.yin@intel.com>
To: linux-mm@kvack.org, akpm@linux-foundation.org, willy@infradead.org
Cc: fengwei.yin@intel.com
Subject: [PATCH 5/5] try_to_unmap_one: batched remove rmap, update folio refcount
Date: Thu, 23 Feb 2023 16:32:00 +0800	[thread overview]
Message-ID: <20230223083200.3149015-6-fengwei.yin@intel.com> (raw)
In-Reply-To: <20230223083200.3149015-1-fengwei.yin@intel.com>

If unmap one page fails, or the vma walk will skip next pte,
or the vma walk will end on next pte, batched remove map,
update folio refcount.

Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
---
 include/linux/rmap.h |  1 +
 mm/page_vma_mapped.c | 30 +++++++++++++++++++++++++++
 mm/rmap.c            | 48 ++++++++++++++++++++++++++++++++++----------
 3 files changed, 68 insertions(+), 11 deletions(-)

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index d7a51b96f379..568801ee8d6a 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -424,6 +424,7 @@ static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
 }
 
 bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw);
+bool pvmw_walk_skip_or_end_on_next(struct page_vma_mapped_walk *pvmw);
 
 /*
  * Used by swapoff to help locate where page is expected in vma.
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 4e448cfbc6ef..19e997dfb5c6 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -291,6 +291,36 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 	return false;
 }
 
+/**
+ * pvmw_walk_skip_or_end_on_next - check if next pte will be skipped or
+ *                                 end the walk
+ * @pvmw: pointer to struct page_vma_mapped_walk.
+ *
+ * This function can only be called with correct pte lock hold
+ */
+bool pvmw_walk_skip_or_end_on_next(struct page_vma_mapped_walk *pvmw)
+{
+       unsigned long address = pvmw->address + PAGE_SIZE;
+
+       if (address >= vma_address_end(pvmw))
+               return true;
+
+       if ((address & (PMD_SIZE - PAGE_SIZE)) == 0)
+               return true;
+
+       if (pte_none(*pvmw->pte))
+               return true;
+
+       pvmw->pte++;
+       if (!check_pte(pvmw)) {
+               pvmw->pte--;
+               return true;
+       }
+       pvmw->pte--;
+
+       return false;
+}
+
 /**
  * page_mapped_in_vma - check whether a page is really mapped in a VMA
  * @page: the page to test
diff --git a/mm/rmap.c b/mm/rmap.c
index 3680765b7ec8..7156b804d424 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1748,6 +1748,26 @@ static bool try_to_unmap_one_page(struct folio *folio,
 	return false;
 }
 
+static void folio_remove_rmap_and_update_count(struct folio *folio,
+		struct page *start, struct vm_area_struct *vma, int count)
+{
+	if (count == 0)
+		return;
+
+	/*
+	 * No need to call mmu_notifier_invalidate_range() it has be
+	 * done above for all cases requiring it to happen under page
+	 * table lock before mmu_notifier_invalidate_range_end()
+	 *
+	 * See Documentation/mm/mmu_notifier.rst
+	 */
+	folio_remove_rmap_range(folio, start, count, vma,
+					folio_test_hugetlb(folio));
+	if (vma->vm_flags & VM_LOCKED)
+		mlock_drain_local();
+	folio_ref_sub(folio, count);
+}
+
 /*
  * @arg: enum ttu_flags will be passed to this argument
  */
@@ -1755,10 +1775,11 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
 		     unsigned long address, void *arg)
 {
 	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
-	struct page *subpage;
+	struct page *start = NULL;
 	bool ret = true;
 	struct mmu_notifier_range range;
 	enum ttu_flags flags = (enum ttu_flags)(long)arg;
+	int count = 0;
 
 	/*
 	 * When racing against e.g. zap_pte_range() on another cpu,
@@ -1819,26 +1840,31 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
 			break;
 		}
 
-		subpage = folio_page(folio,
+		if (!start)
+			start = folio_page(folio,
 					pte_pfn(*pvmw.pte) - folio_pfn(folio));
 		ret = try_to_unmap_one_page(folio, vma,
 						range, pvmw, address, flags);
 		if (!ret) {
+			folio_remove_rmap_and_update_count(folio,
+							start, vma, count);
 			page_vma_mapped_walk_done(&pvmw);
 			break;
 		}
+		count++;
 
 		/*
-		 * No need to call mmu_notifier_invalidate_range() it has be
-		 * done above for all cases requiring it to happen under page
-		 * table lock before mmu_notifier_invalidate_range_end()
-		 *
-		 * See Documentation/mm/mmu_notifier.rst
+		 * If next pte will be skipped in page_vma_mapped_walk() or
+		 * the walk will end at it, batched remove rmap and update
+		 * page refcount. We can't do it after page_vma_mapped_walk()
+		 * return false because the pte lock will not be hold.
 		 */
-		page_remove_rmap(subpage, vma, false);
-		if (vma->vm_flags & VM_LOCKED)
-			mlock_drain_local();
-		folio_put(folio);
+		if (pvmw_walk_skip_or_end_on_next(&pvmw)) {
+			folio_remove_rmap_and_update_count(folio,
+							start, vma, count);
+			count = 0;
+			start = NULL;
+		}
 	}
 
 	mmu_notifier_invalidate_range_end(&range);
-- 
2.30.2



      parent reply	other threads:[~2023-02-23  8:30 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-23  8:31 [PATCH 0/5] batched remove rmap in try_to_unmap_one() Yin Fengwei
2023-02-23  8:31 ` [PATCH 1/5] rmap: move hugetlb try_to_unmap to dedicated function Yin Fengwei
2023-02-23 17:28   ` Matthew Wilcox
2023-02-24  0:20     ` Mike Kravetz
2023-02-24  0:52       ` Yin, Fengwei
2023-02-24  2:51     ` HORIGUCHI NAOYA(堀口 直也)
2023-02-24  4:41       ` Yin, Fengwei
2023-02-24 19:21       ` Mike Kravetz
2023-02-26 11:44         ` Yin, Fengwei
2023-02-23  8:31 ` [PATCH 2/5] rmap: move page unmap operation " Yin Fengwei
2023-02-23  8:31 ` [PATCH 3/5] rmap: cleanup exit path of try_to_unmap_one_page() Yin Fengwei
2023-02-23  8:31 ` [PATCH 4/5] rmap:addd folio_remove_rmap_range() Yin Fengwei
2023-02-23  8:32 ` Yin Fengwei [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230223083200.3149015-6-fengwei.yin@intel.com \
    --to=fengwei.yin@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-mm@kvack.org \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.