All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yin Fengwei <fengwei.yin@intel.com>
To: linux-mm@kvack.org, akpm@linux-foundation.org,
	willy@infradead.org, kirill@shutemov.name, yuzhao@google.com,
	ryan.roberts@arm.com, ying.huang@intel.com
Cc: fengwei.yin@intel.com
Subject: [PATCH v3 2/2] lru: allow large batched add large folio to lru list
Date: Sat, 29 Apr 2023 16:27:59 +0800	[thread overview]
Message-ID: <20230429082759.1600796-3-fengwei.yin@intel.com> (raw)
In-Reply-To: <20230429082759.1600796-1-fengwei.yin@intel.com>

Currently, large folio is not batched added to lru list. Which
cause high lru lock contention after enable large folio for
anonymous mapping.

Running page_fault1 of will-it-scale + order 2 folio with 96
processes on Ice Lake 48C/96T, the lru lock contention could
be around 64%:
-   64.31%     0.23%  page_fault1_pro  [kernel.kallsyms]           [k] folio_lruvec_lock_irqsave
   - 64.07% folio_lruvec_lock_irqsave
      + 64.01% _raw_spin_lock_irqsave

With this patch, the lru lock contention dropped to 43% with same
testing:
-   42.67%     0.19%  page_fault1_pro  [kernel.kallsyms]           [k] folio_lruvec_lock_irqsave
   - 42.48% folio_lruvec_lock_irqsave
      + 42.42% _raw_spin_lock_irqsave

Reported-by: "Huang, Ying" <ying.huang@intel.com>
Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
---
 include/linux/pagevec.h | 46 ++++++++++++++++++++++++++++++++++++++---
 mm/mlock.c              |  7 +++----
 mm/swap.c               |  3 +--
 3 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index f582f7213ea5..9479b7b50bc6 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -10,6 +10,7 @@
 #define _LINUX_PAGEVEC_H
 
 #include <linux/xarray.h>
+#include <linux/mm.h>
 
 /* 15 pointers + header align the pagevec structure to a power of two */
 #define PAGEVEC_SIZE	15
@@ -22,6 +23,7 @@ struct address_space;
 struct pagevec {
 	unsigned char nr;
 	bool percpu_pvec_drained;
+	unsigned short nr_pages;
 	struct page *pages[PAGEVEC_SIZE];
 };
 
@@ -30,12 +32,14 @@ void __pagevec_release(struct pagevec *pvec);
 static inline void pagevec_init(struct pagevec *pvec)
 {
 	pvec->nr = 0;
+	pvec->nr_pages = 0;
 	pvec->percpu_pvec_drained = false;
 }
 
 static inline void pagevec_reinit(struct pagevec *pvec)
 {
 	pvec->nr = 0;
+	pvec->nr_pages = 0;
 }
 
 static inline unsigned pagevec_count(struct pagevec *pvec)
@@ -54,7 +58,12 @@ static inline unsigned pagevec_space(struct pagevec *pvec)
 static inline unsigned pagevec_add(struct pagevec *pvec, struct page *page)
 {
 	pvec->pages[pvec->nr++] = page;
-	return pagevec_space(pvec);
+	pvec->nr_pages += compound_nr(page);
+
+	if (pvec->nr_pages > PAGEVEC_SIZE)
+		return 0;
+	else
+		return pagevec_space(pvec);
 }
 
 static inline void pagevec_release(struct pagevec *pvec)
@@ -75,6 +84,7 @@ static inline void pagevec_release(struct pagevec *pvec)
 struct folio_batch {
 	unsigned char nr;
 	bool percpu_pvec_drained;
+	unsigned short nr_pages;
 	struct folio *folios[PAGEVEC_SIZE];
 };
 
@@ -92,12 +102,14 @@ static_assert(offsetof(struct pagevec, pages) ==
 static inline void folio_batch_init(struct folio_batch *fbatch)
 {
 	fbatch->nr = 0;
+	fbatch->nr_pages = 0;
 	fbatch->percpu_pvec_drained = false;
 }
 
 static inline void folio_batch_reinit(struct folio_batch *fbatch)
 {
 	fbatch->nr = 0;
+	fbatch->nr_pages = 0;
 }
 
 static inline unsigned int folio_batch_count(struct folio_batch *fbatch)
@@ -110,6 +122,32 @@ static inline unsigned int fbatch_space(struct folio_batch *fbatch)
 	return PAGEVEC_SIZE - fbatch->nr;
 }
 
+/**
+ * folio_batch_add_nr_pages() - Add a folio to a batch.
+ * @fbatch: The folio batch.
+ * @folio: The folio to add.
+ * @nr_pages: The number of pages added to batch.
+ *
+ * The folio is added to the end of the batch.
+ * The batch must have previously been initialised using folio_batch_init().
+ *
+ * Return: The number of slots still available.
+ * Note: parameter folio may not be direct reference to folio and can't
+ *       use folio_nr_pages(folio).
+ *       Currently, this function is only called in mlock.c.
+ */
+static inline unsigned folio_batch_add_nr_pages(struct folio_batch *fbatch,
+		struct folio *folio, unsigned int nr_pages)
+{
+	fbatch->folios[fbatch->nr++] = folio;
+	fbatch->nr_pages += nr_pages;
+
+	if (fbatch->nr_pages > PAGEVEC_SIZE)
+		return 0;
+	else
+		return fbatch_space(fbatch);
+}
+
 /**
  * folio_batch_add() - Add a folio to a batch.
  * @fbatch: The folio batch.
@@ -123,8 +161,10 @@ static inline unsigned int fbatch_space(struct folio_batch *fbatch)
 static inline unsigned folio_batch_add(struct folio_batch *fbatch,
 		struct folio *folio)
 {
-	fbatch->folios[fbatch->nr++] = folio;
-	return fbatch_space(fbatch);
+	unsigned int nr_pages;
+
+	nr_pages = xa_is_value(folio) ? 1 : folio_nr_pages(folio);
+	return folio_batch_add_nr_pages(fbatch, folio, nr_pages);
 }
 
 static inline void folio_batch_release(struct folio_batch *fbatch)
diff --git a/mm/mlock.c b/mm/mlock.c
index 617469fce96d..6de3e6d4639f 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -243,19 +243,18 @@ bool need_mlock_drain(int cpu)
 void mlock_folio(struct folio *folio)
 {
 	struct folio_batch *fbatch;
+	unsigned int nr_pages = folio_nr_pages(folio);
 
 	local_lock(&mlock_fbatch.lock);
 	fbatch = this_cpu_ptr(&mlock_fbatch.fbatch);
 
 	if (!folio_test_set_mlocked(folio)) {
-		int nr_pages = folio_nr_pages(folio);
-
 		zone_stat_mod_folio(folio, NR_MLOCK, nr_pages);
 		__count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
 	}
 
 	folio_get(folio);
-	if (!folio_batch_add(fbatch, mlock_lru(folio)) ||
+	if (!folio_batch_add_nr_pages(fbatch, mlock_lru(folio), nr_pages) ||
 	    folio_test_large(folio) || lru_cache_disabled())
 		mlock_folio_batch(fbatch);
 	local_unlock(&mlock_fbatch.lock);
@@ -278,7 +277,7 @@ void mlock_new_folio(struct folio *folio)
 	__count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
 
 	folio_get(folio);
-	if (!folio_batch_add(fbatch, mlock_new(folio)) ||
+	if (!folio_batch_add_nr_pages(fbatch, mlock_new(folio), nr_pages) ||
 	    folio_test_large(folio) || lru_cache_disabled())
 		mlock_folio_batch(fbatch);
 	local_unlock(&mlock_fbatch.lock);
diff --git a/mm/swap.c b/mm/swap.c
index 57cb01b042f6..0f8554aeb338 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -228,8 +228,7 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
 static void folio_batch_add_and_move(struct folio_batch *fbatch,
 		struct folio *folio, move_fn_t move_fn)
 {
-	if (folio_batch_add(fbatch, folio) && !folio_test_large(folio) &&
-	    !lru_cache_disabled())
+	if (folio_batch_add(fbatch, folio) && !lru_cache_disabled())
 		return;
 	folio_batch_move_lru(fbatch, move_fn);
 }
-- 
2.34.1



  parent reply	other threads:[~2023-04-29  8:28 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-29  8:27 [PATCH v3 0/2] Reduce lock contention related with large folio Yin Fengwei
2023-04-29  8:27 ` [PATCH v3 1/2] THP: avoid lock when check whether THP is in deferred list Yin Fengwei
2023-05-04 11:48   ` kirill
2023-05-05  1:09     ` Yin, Fengwei
2023-05-29  2:58     ` Yin Fengwei
2023-05-05  0:52   ` Huang, Ying
2023-05-05  1:09     ` Yin, Fengwei
2023-04-29  8:27 ` Yin Fengwei [this message]
2023-04-29 22:35   ` [PATCH v3 2/2] lru: allow large batched add large folio to lru list Matthew Wilcox
2023-05-01  5:52     ` Yin, Fengwei
2023-05-05  5:51     ` Yin, Fengwei
2023-05-15  2:14       ` Yin, Fengwei
2023-06-20  3:22   ` Matthew Wilcox
2023-06-20  4:39     ` Yin Fengwei
2023-06-20  8:01     ` Yin Fengwei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230429082759.1600796-3-fengwei.yin@intel.com \
    --to=fengwei.yin@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=kirill@shutemov.name \
    --cc=linux-mm@kvack.org \
    --cc=ryan.roberts@arm.com \
    --cc=willy@infradead.org \
    --cc=ying.huang@intel.com \
    --cc=yuzhao@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.