linux-rt-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Nicolas Saenz Julienne <nsaenzju@redhat.com>
To: akpm@linux-foundation.org, frederic@kernel.org
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	tglx@linutronix.de, cl@linux.com, peterz@infradead.org,
	juri.lelli@redhat.com, mingo@redhat.com, mtosatti@redhat.com,
	nilal@redhat.com, mgorman@suse.de, ppandit@redhat.com,
	williams@redhat.com, bigeasy@linutronix.de,
	anna-maria@linutronix.de, linux-rt-users@vger.kernel.org,
	Nicolas Saenz Julienne <nsaenzju@redhat.com>
Subject: [PATCH 4/6] mm/page_alloc: Introduce alternative per-cpu list locking
Date: Tue, 21 Sep 2021 18:13:22 +0200	[thread overview]
Message-ID: <20210921161323.607817-5-nsaenzju@redhat.com> (raw)
In-Reply-To: <20210921161323.607817-1-nsaenzju@redhat.com>

page_alloc.c's per-cpu page lists are currently protected using local
locks. While performance savvy, this doesn't allow for remote access to
these structures. CPUs requiring system-wide per-cpu list drains get
around this by scheduling drain work on all CPUs. That said, some select
setups like systems with NOHZ_FULL CPUs, aren't well suited to this, as
they can't handle interruptions of any sort.

To mitigate this, introduce an alternative locking scheme using
spinlocks that will permit remotely accessing these per-cpu page lists.
It's disabled by default, with no functional change to regular users,
and enabled through the 'remote_pcpu_cache_access' static key. Upcoming
patches will make use of this static key.

This is based on previous work by Thomas Gleixner, Anna-Maria Gleixner,
and Sebastian Andrzej Siewior[1].

[1] https://patchwork.kernel.org/project/linux-mm/patch/20190424111208.24459-3-bigeasy@linutronix.de/
Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
---
 mm/page_alloc.c | 87 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 68 insertions(+), 19 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3b610b05d9b8..3244eb2ab51b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -123,10 +123,12 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
 #define MIN_PERCPU_PAGELIST_HIGH_FRACTION (8)
 
 struct pagesets {
-	local_lock_t lock;
+	local_lock_t local;
+	spinlock_t spin;
 };
 static DEFINE_PER_CPU(struct pagesets, pagesets) = {
-	.lock = INIT_LOCAL_LOCK(lock),
+	.local = INIT_LOCAL_LOCK(pagesets.local),
+	.spin = __SPIN_LOCK_UNLOCKED(pagesets.spin),
 };
 
 #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
@@ -207,6 +209,52 @@ static int __init early_init_on_free(char *buf)
 }
 early_param("init_on_free", early_init_on_free);
 
+static inline void pagesets_lock_irqsave(struct pagesets *locks,
+					 unsigned long *flagsp)
+{
+	if (static_branch_unlikely(&remote_pcpu_cache_access)) {
+		/* Avoid migration between this_cpu_ptr() and spin_lock_irqsave() */
+		migrate_disable();
+		spin_lock_irqsave(this_cpu_ptr(&locks->spin), *flagsp);
+	} else {
+		local_lock_irqsave(&locks->local, *flagsp);
+	}
+}
+
+/*
+ * pagesets_lock_irqsave_cpu() should only be used from remote CPUs when
+ * 'remote_pcpu_cache_access' is enabled or the target CPU is dead. Otherwise,
+ * it can still be called on the local CPU with migration disabled.
+ */
+static inline void pagesets_lock_irqsave_cpu(struct pagesets *locks,
+					     unsigned long *flagsp, int cpu)
+{
+	if (static_branch_unlikely(&remote_pcpu_cache_access))
+		spin_lock_irqsave(per_cpu_ptr(&locks->spin, cpu), *flagsp);
+	else
+		local_lock_irqsave(&locks->local, *flagsp);
+}
+
+static inline void pagesets_unlock_irqrestore(struct pagesets *locks,
+					      unsigned long flags)
+{
+	if (static_branch_unlikely(&remote_pcpu_cache_access)) {
+		spin_unlock_irqrestore(this_cpu_ptr(&locks->spin), flags);
+		migrate_enable();
+	} else {
+		local_unlock_irqrestore(&locks->local, flags);
+	}
+}
+
+static inline void pagesets_unlock_irqrestore_cpu(struct pagesets *locks,
+						  unsigned long flags, int cpu)
+{
+	if (static_branch_unlikely(&remote_pcpu_cache_access))
+		spin_unlock_irqrestore(per_cpu_ptr(&locks->spin, cpu), flags);
+	else
+		local_unlock_irqrestore(&locks->local, flags);
+}
+
 /*
  * A cached value of the page's pageblock's migratetype, used when the page is
  * put on a pcplist. Used to avoid the pageblock migratetype lookup when
@@ -3064,12 +3112,12 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
 	unsigned long flags;
 	int to_drain, batch;
 
-	local_lock_irqsave(&pagesets.lock, flags);
+	pagesets_lock_irqsave(&pagesets, &flags);
 	batch = READ_ONCE(pcp->batch);
 	to_drain = min(pcp->count, batch);
 	if (to_drain > 0)
 		free_pcppages_bulk(zone, to_drain, pcp);
-	local_unlock_irqrestore(&pagesets.lock, flags);
+	pagesets_unlock_irqrestore(&pagesets, flags);
 }
 #endif
 
@@ -3077,21 +3125,22 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
  * Drain pcplists of the indicated processor and zone.
  *
  * The processor must either be the current processor and the
- * thread pinned to the current processor or a processor that
- * is not online.
+ * thread pinned to the current processor, a processor that
+ * is not online, or a remote processor while 'remote_pcpu_cache_access' is
+ * enabled.
  */
 static void drain_pages_zone(unsigned int cpu, struct zone *zone)
 {
 	unsigned long flags;
 	struct per_cpu_pages *pcp;
 
-	local_lock_irqsave(&pagesets.lock, flags);
+	pagesets_lock_irqsave_cpu(&pagesets, &flags, cpu);
 
 	pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
 	if (pcp->count)
 		free_pcppages_bulk(zone, pcp->count, pcp);
 
-	local_unlock_irqrestore(&pagesets.lock, flags);
+	pagesets_unlock_irqrestore_cpu(&pagesets, flags, cpu);
 }
 
 /*
@@ -3402,9 +3451,9 @@ void free_unref_page(struct page *page, unsigned int order)
 		migratetype = MIGRATE_MOVABLE;
 	}
 
-	local_lock_irqsave(&pagesets.lock, flags);
+	pagesets_lock_irqsave(&pagesets, &flags);
 	free_unref_page_commit(page, pfn, migratetype, order);
-	local_unlock_irqrestore(&pagesets.lock, flags);
+	pagesets_unlock_irqrestore(&pagesets, flags);
 }
 
 /*
@@ -3439,7 +3488,7 @@ void free_unref_page_list(struct list_head *list)
 		set_page_private(page, pfn);
 	}
 
-	local_lock_irqsave(&pagesets.lock, flags);
+	pagesets_lock_irqsave(&pagesets, &flags);
 	list_for_each_entry_safe(page, next, list, lru) {
 		pfn = page_private(page);
 		set_page_private(page, 0);
@@ -3460,12 +3509,12 @@ void free_unref_page_list(struct list_head *list)
 		 * a large list of pages to free.
 		 */
 		if (++batch_count == SWAP_CLUSTER_MAX) {
-			local_unlock_irqrestore(&pagesets.lock, flags);
+			pagesets_unlock_irqrestore(&pagesets, flags);
 			batch_count = 0;
-			local_lock_irqsave(&pagesets.lock, flags);
+			pagesets_lock_irqsave(&pagesets, &flags);
 		}
 	}
-	local_unlock_irqrestore(&pagesets.lock, flags);
+	pagesets_unlock_irqrestore(&pagesets, flags);
 }
 
 /*
@@ -3639,7 +3688,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
 	struct page *page;
 	unsigned long flags;
 
-	local_lock_irqsave(&pagesets.lock, flags);
+	pagesets_lock_irqsave(&pagesets, &flags);
 
 	/*
 	 * On allocation, reduce the number of pages that are batch freed.
@@ -3650,7 +3699,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
 	pcp->free_factor >>= 1;
 	list = &pcp->lists[order_to_pindex(migratetype, order)];
 	page = __rmqueue_pcplist(zone, order, migratetype, alloc_flags, pcp, list);
-	local_unlock_irqrestore(&pagesets.lock, flags);
+	pagesets_unlock_irqrestore(&pagesets, flags);
 	if (page) {
 		__count_zid_vm_events(PGALLOC, page_zonenum(page), 1);
 		zone_statistics(preferred_zone, zone, 1);
@@ -5270,7 +5319,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 		goto failed;
 
 	/* Attempt the batch allocation */
-	local_lock_irqsave(&pagesets.lock, flags);
+	pagesets_lock_irqsave(&pagesets, &flags);
 	pcp = this_cpu_ptr(zone->per_cpu_pageset);
 	pcp_list = &pcp->lists[order_to_pindex(ac.migratetype, 0)];
 
@@ -5300,7 +5349,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 		nr_populated++;
 	}
 
-	local_unlock_irqrestore(&pagesets.lock, flags);
+	pagesets_unlock_irqrestore(&pagesets, flags);
 
 	__count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account);
 	zone_statistics(ac.preferred_zoneref->zone, zone, nr_account);
@@ -5309,7 +5358,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 	return nr_populated;
 
 failed_irq:
-	local_unlock_irqrestore(&pagesets.lock, flags);
+	pagesets_unlock_irqrestore(&pagesets, flags);
 
 failed:
 	page = __alloc_pages(gfp, 0, preferred_nid, nodemask);
-- 
2.31.1


  parent reply	other threads:[~2021-09-21 16:13 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-21 16:13 [PATCH 0/6] mm: Remote LRU per-cpu pagevec cache/per-cpu page list drain support Nicolas Saenz Julienne
2021-09-21 16:13 ` [PATCH 1/6] mm/swap: Introduce lru_cpu_needs_drain() Nicolas Saenz Julienne
2021-09-21 16:13 ` [PATCH 2/6] mm/swap: Introduce alternative per-cpu LRU cache locking Nicolas Saenz Julienne
2021-09-21 22:03   ` Peter Zijlstra
2021-09-22  8:47     ` nsaenzju
2021-09-22  9:20       ` Sebastian Andrzej Siewior
2021-09-22  9:50         ` nsaenzju
2021-09-22 11:37       ` Peter Zijlstra
2021-09-22 11:43         ` nsaenzju
2021-09-21 16:13 ` [PATCH 3/6] mm/swap: Allow remote LRU cache draining Nicolas Saenz Julienne
2021-09-21 16:13 ` Nicolas Saenz Julienne [this message]
2021-09-21 16:13 ` [PATCH 5/6] mm/page_alloc: Allow remote per-cpu page list draining Nicolas Saenz Julienne
2021-09-21 16:13 ` [PATCH 6/6] sched/isolation: Enable 'remote_pcpu_cache_access' on NOHZ_FULL systems Nicolas Saenz Julienne
2021-09-21 17:51 ` [PATCH 0/6] mm: Remote LRU per-cpu pagevec cache/per-cpu page list drain support Andrew Morton
2021-09-21 17:59 ` Vlastimil Babka
2021-09-22 11:28   ` Peter Zijlstra
2021-09-22 22:09     ` Thomas Gleixner
2021-09-23  7:12       ` Vlastimil Babka
2021-09-23 10:36         ` Thomas Gleixner
2021-09-27  9:30       ` nsaenzju

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210921161323.607817-5-nsaenzju@redhat.com \
    --to=nsaenzju@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=anna-maria@linutronix.de \
    --cc=bigeasy@linutronix.de \
    --cc=cl@linux.com \
    --cc=frederic@kernel.org \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-rt-users@vger.kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=mtosatti@redhat.com \
    --cc=nilal@redhat.com \
    --cc=peterz@infradead.org \
    --cc=ppandit@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=williams@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).