All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kairui Song <ryncsn@gmail.com>
To: linux-mm@kvack.org
Cc: "Huang, Ying" <ying.huang@intel.com>,
	Chris Li <chrisl@kernel.org>, Minchan Kim <minchan@kernel.org>,
	Barry Song <v-songbaohua@oppo.com>,
	Ryan Roberts <ryan.roberts@arm.com>, Yu Zhao <yuzhao@google.com>,
	SeongJae Park <sj@kernel.org>,
	David Hildenbrand <david@redhat.com>,
	Yosry Ahmed <yosryahmed@google.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Matthew Wilcox <willy@infradead.org>,
	Nhat Pham <nphamcs@gmail.com>,
	Chengming Zhou <zhouchengming@bytedance.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-kernel@vger.kernel.org, Kairui Song <kasong@tencent.com>
Subject: [RFC PATCH 01/10] mm/filemap: split filemap storing logic into a standalone helper
Date: Wed, 27 Mar 2024 02:50:23 +0800	[thread overview]
Message-ID: <20240326185032.72159-2-ryncsn@gmail.com> (raw)
In-Reply-To: <20240326185032.72159-1-ryncsn@gmail.com>

From: Kairui Song <kasong@tencent.com>

Swapcache can reuse this part for multi index support, no change of
performance from page cache side except noise:

Test in 8G memory cgroup and 16G brd ramdisk.

  echo 3 > /proc/sys/vm/drop_caches

  fio -name=cached --numjobs=16 --filename=/mnt/test.img \
    --buffered=1 --ioengine=mmap --rw=randread --time_based \
    --ramp_time=30s --runtime=5m --group_reporting

Before:
bw (  MiB/s): min=  493, max= 3947, per=100.00%, avg=2625.56, stdev=25.74, samples=8651
iops        : min=126454, max=1010681, avg=672142.61, stdev=6590.48, samples=8651

After:
bw (  MiB/s): min=  298, max= 3840, per=100.00%, avg=2614.34, stdev=23.77, samples=8689
iops        : min=76464, max=983045, avg=669270.35, stdev=6084.31, samples=8689

Test result with THP (do a THP randread then switch to 4K page in hope it
issues a lot of splitting):

  echo 3 > /proc/sys/vm/drop_caches

  fio -name=cached --numjobs=16 --filename=/mnt/test.img \
      --buffered=1 --ioengine=mmap -thp=1 --readonly \
      --rw=randread --time_based --ramp_time=30s --runtime=10m \
      --group_reporting

  fio -name=cached --numjobs=16 --filename=/mnt/test.img \
      --buffered=1 --ioengine=mmap \
      --rw=randread --time_based --runtime=5s --group_reporting

Before:
bw (  KiB/s): min= 4611, max=15370, per=100.00%, avg=8928.74, stdev=105.17, samples=19146
iops        : min= 1151, max= 3842, avg=2231.27, stdev=26.29, samples=19146

READ: bw=4635B/s (4635B/s), 4635B/s-4635B/s (4635B/s-4635B/s), io=64.0KiB (65.5kB), run=14137-14137msec

After:
bw (  KiB/s): min= 4691, max=15666, per=100.00%, avg=8890.30, stdev=104.53, samples=19056
iops        : min= 1167, max= 3913, avg=2218.68, stdev=26.15, samples=19056

READ: bw=4590B/s (4590B/s), 4590B/s-4590B/s (4590B/s-4590B/s), io=64.0KiB (65.5kB), run=14275-14275msec

Signed-off-by: Kairui Song <kasong@tencent.com>
---
 mm/filemap.c | 124 +++++++++++++++++++++++++++------------------------
 1 file changed, 65 insertions(+), 59 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 90b86f22a9df..0ccdc9e92764 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -848,38 +848,23 @@ void replace_page_cache_folio(struct folio *old, struct folio *new)
 }
 EXPORT_SYMBOL_GPL(replace_page_cache_folio);
 
-noinline int __filemap_add_folio(struct address_space *mapping,
-		struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp)
+static int __filemap_lock_store(struct xa_state *xas, struct folio *folio,
+				  pgoff_t index, gfp_t gfp, void **shadowp)
 {
-	XA_STATE(xas, &mapping->i_pages, index);
-	void *alloced_shadow = NULL;
-	int alloced_order = 0;
-	bool huge;
-	long nr;
-
-	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
-	VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
-	mapping_set_update(&xas, mapping);
-
-	VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
-	xas_set_order(&xas, index, folio_order(folio));
-	huge = folio_test_hugetlb(folio);
-	nr = folio_nr_pages(folio);
-
+	void *entry, *old, *alloced_shadow = NULL;
+	int order, split_order, alloced_order = 0;
 	gfp &= GFP_RECLAIM_MASK;
-	folio_ref_add(folio, nr);
-	folio->mapping = mapping;
-	folio->index = xas.xa_index;
 
 	for (;;) {
-		int order = -1, split_order = 0;
-		void *entry, *old = NULL;
+		order = -1;
+		split_order = 0;
+		old = NULL;
 
-		xas_lock_irq(&xas);
-		xas_for_each_conflict(&xas, entry) {
+		xas_lock_irq(xas);
+		xas_for_each_conflict(xas, entry) {
 			old = entry;
 			if (!xa_is_value(entry)) {
-				xas_set_err(&xas, -EEXIST);
+				xas_set_err(xas, -EEXIST);
 				goto unlock;
 			}
 			/*
@@ -887,72 +872,93 @@ noinline int __filemap_add_folio(struct address_space *mapping,
 			 * it will be the first and only entry iterated.
 			 */
 			if (order == -1)
-				order = xas_get_order(&xas);
+				order = xas_get_order(xas);
 		}
 
 		/* entry may have changed before we re-acquire the lock */
 		if (alloced_order && (old != alloced_shadow || order != alloced_order)) {
-			xas_destroy(&xas);
+			xas_destroy(xas);
 			alloced_order = 0;
 		}
 
 		if (old) {
 			if (order > 0 && order > folio_order(folio)) {
-				/* How to handle large swap entries? */
-				BUG_ON(shmem_mapping(mapping));
 				if (!alloced_order) {
 					split_order = order;
 					goto unlock;
 				}
-				xas_split(&xas, old, order);
-				xas_reset(&xas);
+				xas_split(xas, old, order);
+				xas_reset(xas);
 			}
 			if (shadowp)
 				*shadowp = old;
 		}
 
-		xas_store(&xas, folio);
-		if (xas_error(&xas))
-			goto unlock;
-
-		mapping->nrpages += nr;
-
-		/* hugetlb pages do not participate in page cache accounting */
-		if (!huge) {
-			__lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr);
-			if (folio_test_pmd_mappable(folio))
-				__lruvec_stat_mod_folio(folio,
-						NR_FILE_THPS, nr);
-		}
-
+		xas_store(xas, folio);
+		if (!xas_error(xas))
+			return 0;
 unlock:
-		xas_unlock_irq(&xas);
+		xas_unlock_irq(xas);
 
 		/* split needed, alloc here and retry. */
 		if (split_order) {
-			xas_split_alloc(&xas, old, split_order, gfp);
-			if (xas_error(&xas))
+			xas_split_alloc(xas, old, split_order, gfp);
+			if (xas_error(xas))
 				goto error;
 			alloced_shadow = old;
 			alloced_order = split_order;
-			xas_reset(&xas);
+			xas_reset(xas);
 			continue;
 		}
 
-		if (!xas_nomem(&xas, gfp))
+		if (!xas_nomem(xas, gfp))
 			break;
 	}
 
-	if (xas_error(&xas))
-		goto error;
-
-	trace_mm_filemap_add_to_page_cache(folio);
-	return 0;
 error:
-	folio->mapping = NULL;
-	/* Leave page->index set: truncation relies upon it */
-	folio_put_refs(folio, nr);
-	return xas_error(&xas);
+	return xas_error(xas);
+}
+
+noinline int __filemap_add_folio(struct address_space *mapping,
+		struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp)
+{
+	XA_STATE(xas, &mapping->i_pages, index);
+	bool huge;
+	long nr;
+	int ret;
+
+	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+	VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
+	mapping_set_update(&xas, mapping);
+
+	VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
+	xas_set_order(&xas, index, folio_order(folio));
+	huge = folio_test_hugetlb(folio);
+	nr = folio_nr_pages(folio);
+
+	folio_ref_add(folio, nr);
+	folio->mapping = mapping;
+	folio->index = xas.xa_index;
+
+	ret = __filemap_lock_store(&xas, folio, index, gfp, shadowp);
+	if (!ret) {
+		mapping->nrpages += nr;
+		/* hugetlb pages do not participate in page cache accounting */
+		if (!huge) {
+			__lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr);
+			if (folio_test_pmd_mappable(folio))
+				__lruvec_stat_mod_folio(folio,
+						NR_FILE_THPS, nr);
+		}
+		xas_unlock_irq(&xas);
+		trace_mm_filemap_add_to_page_cache(folio);
+	} else {
+		folio->mapping = NULL;
+		/* Leave page->index set: truncation relies upon it */
+		folio_put_refs(folio, nr);
+	}
+
+	return ret;
 }
 ALLOW_ERROR_INJECTION(__filemap_add_folio, ERRNO);
 
-- 
2.43.0


  reply	other threads:[~2024-03-26 19:04 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-26 18:50 [RFC PATCH 00/10] mm/swap: always use swap cache for synchronization Kairui Song
2024-03-26 18:50 ` Kairui Song [this message]
2024-03-26 18:50 ` [RFC PATCH 02/10] mm/swap: move no readahead swapin code to a stand-alone helper Kairui Song
2024-03-26 18:50 ` [RFC PATCH 03/10] mm/swap: convert swapin_readahead to return a folio Kairui Song
2024-03-26 20:03   ` Matthew Wilcox
2024-03-26 18:50 ` [RFC PATCH 04/10] mm/swap: remove cache bypass swapin Kairui Song
2024-03-27  6:30   ` Huang, Ying
2024-03-27  6:55     ` Kairui Song
2024-03-27  7:29       ` Huang, Ying
2024-03-26 18:50 ` [RFC PATCH 05/10] mm/swap: clean shadow only in unmap path Kairui Song
2024-03-26 18:50 ` [RFC PATCH 06/10] mm/swap: switch to use multi index entries Kairui Song
2024-03-26 18:50 ` [RFC PATCH 07/10] mm/swap: rename __read_swap_cache_async to swap_cache_alloc_or_get Kairui Song
2024-03-26 18:50 ` [RFC PATCH 08/10] mm/swap: use swap cache as a synchronization layer Kairui Song
2024-03-26 18:50 ` [RFC PATCH 09/10] mm/swap: delay the swap cache lookup for swapin Kairui Song
2024-03-26 18:50 ` [RFC PATCH 10/10] mm/swap: optimize synchronous swapin Kairui Song
2024-03-27  6:22   ` Huang, Ying
2024-03-27  6:37     ` Kairui Song
2024-03-27  6:47       ` Huang, Ying
2024-03-27  7:14         ` Kairui Song
2024-03-27  8:16           ` Huang, Ying
2024-03-27  8:08   ` Barry Song
2024-03-27  8:44     ` Kairui Song
2024-03-27  2:52 ` [RFC PATCH 00/10] mm/swap: always use swap cache for synchronization Huang, Ying
2024-03-27  3:01   ` Kairui Song
2024-03-27  8:27     ` Ryan Roberts
2024-03-27  8:32       ` Huang, Ying
2024-03-27  9:39         ` Ryan Roberts
2024-03-27 11:04       ` Kairui Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240326185032.72159-2-ryncsn@gmail.com \
    --to=ryncsn@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=chrisl@kernel.org \
    --cc=david@redhat.com \
    --cc=hannes@cmpxchg.org \
    --cc=kasong@tencent.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=minchan@kernel.org \
    --cc=nphamcs@gmail.com \
    --cc=ryan.roberts@arm.com \
    --cc=sj@kernel.org \
    --cc=v-songbaohua@oppo.com \
    --cc=willy@infradead.org \
    --cc=ying.huang@intel.com \
    --cc=yosryahmed@google.com \
    --cc=yuzhao@google.com \
    --cc=zhouchengming@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.