All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kairui Song <ryncsn@gmail.com>
To: linux-mm@kvack.org
Cc: "Huang, Ying" <ying.huang@intel.com>,
	Chris Li <chrisl@kernel.org>, Minchan Kim <minchan@kernel.org>,
	Barry Song <v-songbaohua@oppo.com>,
	Ryan Roberts <ryan.roberts@arm.com>, Yu Zhao <yuzhao@google.com>,
	SeongJae Park <sj@kernel.org>,
	David Hildenbrand <david@redhat.com>,
	Yosry Ahmed <yosryahmed@google.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Matthew Wilcox <willy@infradead.org>,
	Nhat Pham <nphamcs@gmail.com>,
	Chengming Zhou <zhouchengming@bytedance.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-kernel@vger.kernel.org, Kairui Song <kasong@tencent.com>
Subject: [RFC PATCH 02/10] mm/swap: move no readahead swapin code to a stand-alone helper
Date: Wed, 27 Mar 2024 02:50:24 +0800	[thread overview]
Message-ID: <20240326185032.72159-3-ryncsn@gmail.com> (raw)
In-Reply-To: <20240326185032.72159-1-ryncsn@gmail.com>

From: Kairui Song <kasong@tencent.com>

Simply move the routine to a standalone function, having a cleaner
split and avoid helpers being referenced corss multiple files.

Basically no feature change, but the error path is very slightly
different. Previously a mem_cgroup_swapin_charge_folio fail will cause
direct OOM, now we go through the error checking path in do_swap_pte, if
the page is already there, just return as the page fault was handled.

Signed-off-by: Kairui Song <kasong@tencent.com>
---
 mm/memory.c     | 42 +++-------------------------------
 mm/swap.h       |  8 +++++++
 mm/swap_state.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+), 39 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index f2bc6dd15eb8..e42fadc25268 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3937,7 +3937,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 	swp_entry_t entry;
 	pte_t pte;
 	vm_fault_t ret = 0;
-	void *shadow = NULL;
 
 	if (!pte_unmap_same(vmf))
 		goto out;
@@ -4001,47 +4000,12 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 	if (!folio) {
 		if (data_race(si->flags & SWP_SYNCHRONOUS_IO) &&
 		    __swap_count(entry) == 1) {
-			/*
-			 * Prevent parallel swapin from proceeding with
-			 * the cache flag. Otherwise, another thread may
-			 * finish swapin first, free the entry, and swapout
-			 * reusing the same entry. It's undetectable as
-			 * pte_same() returns true due to entry reuse.
-			 */
-			if (swapcache_prepare(entry)) {
-				/* Relax a bit to prevent rapid repeated page faults */
-				schedule_timeout_uninterruptible(1);
+			/* skip swapcache and readahead */
+			folio = swapin_direct(entry, GFP_HIGHUSER_MOVABLE, vmf);
+			if (PTR_ERR(folio) == -EBUSY)
 				goto out;
-			}
 			need_clear_cache = true;
-
-			/* skip swapcache */
-			folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0,
-						vma, vmf->address, false);
 			page = &folio->page;
-			if (folio) {
-				__folio_set_locked(folio);
-				__folio_set_swapbacked(folio);
-
-				if (mem_cgroup_swapin_charge_folio(folio,
-							vma->vm_mm, GFP_KERNEL,
-							entry)) {
-					ret = VM_FAULT_OOM;
-					goto out_page;
-				}
-				mem_cgroup_swapin_uncharge_swap(entry);
-
-				shadow = get_shadow_from_swap_cache(entry);
-				if (shadow)
-					workingset_refault(folio, shadow);
-
-				folio_add_lru(folio);
-
-				/* To provide entry to swap_read_folio() */
-				folio->swap = entry;
-				swap_read_folio(folio, true, NULL);
-				folio->private = NULL;
-			}
 		} else {
 			page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
 						vmf);
diff --git a/mm/swap.h b/mm/swap.h
index fc2f6ade7f80..40e902812cc5 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -55,6 +55,8 @@ struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_flags,
 		bool skip_if_exists);
 struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
 		struct mempolicy *mpol, pgoff_t ilx);
+struct folio *swapin_direct(swp_entry_t entry, gfp_t flag,
+			    struct vm_fault *vmf);
 struct page *swapin_readahead(swp_entry_t entry, gfp_t flag,
 			      struct vm_fault *vmf);
 
@@ -87,6 +89,12 @@ static inline struct folio *swap_cluster_readahead(swp_entry_t entry,
 	return NULL;
 }
 
+static inline struct folio *swapin_direct(swp_entry_t entry, gfp_t flag,
+			struct vm_fault *vmf)
+{
+	return NULL;
+}
+
 static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
 			struct vm_fault *vmf)
 {
diff --git a/mm/swap_state.c b/mm/swap_state.c
index bfc7e8c58a6d..0a3fa48b3893 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -879,6 +879,66 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
 	return folio;
 }
 
+/**
+ * swapin_direct - swap in folios skipping swap cache and readahead
+ * @entry: swap entry of this memory
+ * @gfp_mask: memory allocation flags
+ * @vmf: fault information
+ *
+ * Returns the struct folio for entry and addr after the swap entry is read
+ * in.
+ */
+struct folio *swapin_direct(swp_entry_t entry, gfp_t gfp_mask,
+			    struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct folio *folio;
+	void *shadow = NULL;
+
+	/*
+	 * Prevent parallel swapin from proceeding with
+	 * the cache flag. Otherwise, another thread may
+	 * finish swapin first, free the entry, and swapout
+	 * reusing the same entry. It's undetectable as
+	 * pte_same() returns true due to entry reuse.
+	 */
+	if (swapcache_prepare(entry)) {
+		/* Relax a bit to prevent rapid repeated page faults */
+		schedule_timeout_uninterruptible(1);
+		return ERR_PTR(-EBUSY);
+	}
+
+	/* skip swapcache */
+	folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0,
+				vma, vmf->address, false);
+	if (folio) {
+		__folio_set_locked(folio);
+		__folio_set_swapbacked(folio);
+
+		if (mem_cgroup_swapin_charge_folio(folio,
+					vma->vm_mm, GFP_KERNEL,
+					entry)) {
+			folio_unlock(folio);
+			folio_put(folio);
+			return NULL;
+		}
+		mem_cgroup_swapin_uncharge_swap(entry);
+
+		shadow = get_shadow_from_swap_cache(entry);
+		if (shadow)
+			workingset_refault(folio, shadow);
+
+		folio_add_lru(folio);
+
+		/* To provide entry to swap_read_folio() */
+		folio->swap = entry;
+		swap_read_folio(folio, true, NULL);
+		folio->private = NULL;
+	}
+
+	return folio;
+}
+
 /**
  * swapin_readahead - swap in pages in hope we need them soon
  * @entry: swap entry of this memory
-- 
2.43.0


  parent reply	other threads:[~2024-03-26 19:04 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-26 18:50 [RFC PATCH 00/10] mm/swap: always use swap cache for synchronization Kairui Song
2024-03-26 18:50 ` [RFC PATCH 01/10] mm/filemap: split filemap storing logic into a standalone helper Kairui Song
2024-03-26 18:50 ` Kairui Song [this message]
2024-03-26 18:50 ` [RFC PATCH 03/10] mm/swap: convert swapin_readahead to return a folio Kairui Song
2024-03-26 20:03   ` Matthew Wilcox
2024-03-26 18:50 ` [RFC PATCH 04/10] mm/swap: remove cache bypass swapin Kairui Song
2024-03-27  6:30   ` Huang, Ying
2024-03-27  6:55     ` Kairui Song
2024-03-27  7:29       ` Huang, Ying
2024-03-26 18:50 ` [RFC PATCH 05/10] mm/swap: clean shadow only in unmap path Kairui Song
2024-03-26 18:50 ` [RFC PATCH 06/10] mm/swap: switch to use multi index entries Kairui Song
2024-03-26 18:50 ` [RFC PATCH 07/10] mm/swap: rename __read_swap_cache_async to swap_cache_alloc_or_get Kairui Song
2024-03-26 18:50 ` [RFC PATCH 08/10] mm/swap: use swap cache as a synchronization layer Kairui Song
2024-03-26 18:50 ` [RFC PATCH 09/10] mm/swap: delay the swap cache lookup for swapin Kairui Song
2024-03-26 18:50 ` [RFC PATCH 10/10] mm/swap: optimize synchronous swapin Kairui Song
2024-03-27  6:22   ` Huang, Ying
2024-03-27  6:37     ` Kairui Song
2024-03-27  6:47       ` Huang, Ying
2024-03-27  7:14         ` Kairui Song
2024-03-27  8:16           ` Huang, Ying
2024-03-27  8:08   ` Barry Song
2024-03-27  8:44     ` Kairui Song
2024-03-27  2:52 ` [RFC PATCH 00/10] mm/swap: always use swap cache for synchronization Huang, Ying
2024-03-27  3:01   ` Kairui Song
2024-03-27  8:27     ` Ryan Roberts
2024-03-27  8:32       ` Huang, Ying
2024-03-27  9:39         ` Ryan Roberts
2024-03-27 11:04       ` Kairui Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240326185032.72159-3-ryncsn@gmail.com \
    --to=ryncsn@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=chrisl@kernel.org \
    --cc=david@redhat.com \
    --cc=hannes@cmpxchg.org \
    --cc=kasong@tencent.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=minchan@kernel.org \
    --cc=nphamcs@gmail.com \
    --cc=ryan.roberts@arm.com \
    --cc=sj@kernel.org \
    --cc=v-songbaohua@oppo.com \
    --cc=willy@infradead.org \
    --cc=ying.huang@intel.com \
    --cc=yosryahmed@google.com \
    --cc=yuzhao@google.com \
    --cc=zhouchengming@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.