linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Mel Gorman <mgorman@techsingularity.net>
To: Nicolas Saenz Julienne <nsaenzju@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	Michal Hocko <mhocko@kernel.org>,
	LKML <linux-kernel@vger.kernel.org>,
	Linux-MM <linux-mm@kvack.org>,
	Mel Gorman <mgorman@techsingularity.net>
Subject: [PATCH 6/6] mm/page_alloc: Remotely drain per-cpu lists
Date: Wed, 20 Apr 2022 10:59:06 +0100	[thread overview]
Message-ID: <20220420095906.27349-7-mgorman@techsingularity.net> (raw)
In-Reply-To: <20220420095906.27349-1-mgorman@techsingularity.net>

From: Nicolas Saenz Julienne <nsaenzju@redhat.com>

Some setups, notably NOHZ_FULL CPUs, are too busy to handle the per-cpu
drain work queued by __drain_all_pages(). So introduce new a mechanism
to remotely drain the per-cpu lists. It is made possible by remotely
locking 'struct per_cpu_pages' new per-cpu spinlocks. A benefit of this
new scheme is that drain operations are now migration safe.

There was no observed performance degradation vs. the previous scheme.
Both netperf and hackbench were run in parallel to triggering the
__drain_all_pages(NULL, true) code path around ~100 times per second.
The new scheme performs a bit better (~5%), although the important point
here is there are no performance regressions vs. the previous mechanism.
Per-cpu lists draining happens only in slow paths.

Link: https://lore.kernel.org/r/20211103170512.2745765-4-nsaenzju@redhat.com
Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
---
 mm/page_alloc.c | 66 +++++++++----------------------------------------
 1 file changed, 11 insertions(+), 55 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 813c84b67c65..17d11eb0413e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -161,13 +161,7 @@ DEFINE_PER_CPU(int, _numa_mem_);		/* Kernel "local memory" node */
 EXPORT_PER_CPU_SYMBOL(_numa_mem_);
 #endif
 
-/* work_structs for global per-cpu drains */
-struct pcpu_drain {
-	struct zone *zone;
-	struct work_struct work;
-};
 static DEFINE_MUTEX(pcpu_drain_mutex);
-static DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain);
 
 #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
 volatile unsigned long latent_entropy __latent_entropy;
@@ -3087,9 +3081,6 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
  * Called from the vmstat counter updater to drain pagesets of this
  * currently executing processor on remote nodes after they have
  * expired.
- *
- * Note that this function must be called with the thread pinned to
- * a single processor.
  */
 void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
 {
@@ -3114,10 +3105,6 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
 
 /*
  * Drain pcplists of the indicated processor and zone.
- *
- * The processor must either be the current processor and the
- * thread pinned to the current processor or a processor that
- * is not online.
  */
 static void drain_pages_zone(unsigned int cpu, struct zone *zone)
 {
@@ -3140,10 +3127,6 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
 
 /*
  * Drain pcplists of all zones on the indicated processor.
- *
- * The processor must either be the current processor and the
- * thread pinned to the current processor or a processor that
- * is not online.
  */
 static void drain_pages(unsigned int cpu)
 {
@@ -3156,9 +3139,6 @@ static void drain_pages(unsigned int cpu)
 
 /*
  * Spill all of this CPU's per-cpu pages back into the buddy allocator.
- *
- * The CPU has to be pinned. When zone parameter is non-NULL, spill just
- * the single zone's pages.
  */
 void drain_local_pages(struct zone *zone)
 {
@@ -3170,24 +3150,6 @@ void drain_local_pages(struct zone *zone)
 		drain_pages(cpu);
 }
 
-static void drain_local_pages_wq(struct work_struct *work)
-{
-	struct pcpu_drain *drain;
-
-	drain = container_of(work, struct pcpu_drain, work);
-
-	/*
-	 * drain_all_pages doesn't use proper cpu hotplug protection so
-	 * we can race with cpu offline when the WQ can move this from
-	 * a cpu pinned worker to an unbound one. We can operate on a different
-	 * cpu which is alright but we also have to make sure to not move to
-	 * a different one.
-	 */
-	migrate_disable();
-	drain_local_pages(drain->zone);
-	migrate_enable();
-}
-
 /*
  * The implementation of drain_all_pages(), exposing an extra parameter to
  * drain on all cpus.
@@ -3208,13 +3170,6 @@ static void __drain_all_pages(struct zone *zone, bool force_all_cpus)
 	 */
 	static cpumask_t cpus_with_pcps;
 
-	/*
-	 * Make sure nobody triggers this path before mm_percpu_wq is fully
-	 * initialized.
-	 */
-	if (WARN_ON_ONCE(!mm_percpu_wq))
-		return;
-
 	/*
 	 * Do not drain if one is already in progress unless it's specific to
 	 * a zone. Such callers are primarily CMA and memory hotplug and need
@@ -3264,14 +3219,12 @@ static void __drain_all_pages(struct zone *zone, bool force_all_cpus)
 	}
 
 	for_each_cpu(cpu, &cpus_with_pcps) {
-		struct pcpu_drain *drain = per_cpu_ptr(&pcpu_drain, cpu);
-
-		drain->zone = zone;
-		INIT_WORK(&drain->work, drain_local_pages_wq);
-		queue_work_on(cpu, mm_percpu_wq, &drain->work);
+		if (zone) {
+			drain_pages_zone(cpu, zone);
+		} else {
+			drain_pages(cpu);
+		}
 	}
-	for_each_cpu(cpu, &cpus_with_pcps)
-		flush_work(&per_cpu_ptr(&pcpu_drain, cpu)->work);
 
 	mutex_unlock(&pcpu_drain_mutex);
 }
@@ -3280,8 +3233,6 @@ static void __drain_all_pages(struct zone *zone, bool force_all_cpus)
  * Spill all the per-cpu pages from all CPUs back into the buddy allocator.
  *
  * When zone parameter is non-NULL, spill just the single zone's pages.
- *
- * Note that this can be extremely slow as the draining happens in a workqueue.
  */
 void drain_all_pages(struct zone *zone)
 {
@@ -3559,7 +3510,12 @@ void free_unref_page_list(struct list_head *list)
 
 		trace_mm_page_free_batched(page);
 
-		/* True is dead code at the moment due to local_lock_irqsave. */
+		/*
+		 * If there is a parallel drain in progress, free to the buddy
+		 * allocator directly. This is expensive as the zone lock will
+		 * be acquired multiple times but if a drain is in progress
+		 * then an expensive operation is already taking place.
+		 */
 		if (unlikely(!free_unref_page_commit(page, migratetype, 0, true)))
 			free_one_page(page_zone(page), page, page_to_pfn(page), 0, migratetype, FPI_NONE);
 
-- 
2.34.1


  parent reply	other threads:[~2022-04-20 10:00 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-20  9:59 [RFC PATCH 0/6] Drain remote per-cpu directly Mel Gorman
2022-04-20  9:59 ` [PATCH 1/6] mm/page_alloc: Add page->buddy_list and page->pcp_list Mel Gorman
2022-04-20 20:43   ` Matthew Wilcox
2022-04-21  8:38     ` Mel Gorman
2022-04-20  9:59 ` [PATCH 2/6] mm/page_alloc: Use only one PCP list for THP-sized allocations Mel Gorman
2022-04-20  9:59 ` [PATCH 3/6] mm/page_alloc: Split out buddy removal code from rmqueue into separate helper Mel Gorman
2022-04-20  9:59 ` [PATCH 4/6] mm/page_alloc: Remove unnecessary page == NULL check in rmqueue Mel Gorman
2022-04-20  9:59 ` [PATCH 5/6] mm/page_alloc: Protect PCP lists with a spinlock Mel Gorman
2022-04-26 16:42   ` Nicolas Saenz Julienne
2022-04-26 16:48     ` Vlastimil Babka
2022-04-29  9:13     ` Mel Gorman
2022-04-26 19:24   ` Minchan Kim
2022-04-29  9:05     ` Mel Gorman
2022-04-20  9:59 ` Mel Gorman [this message]
     [not found] ` <20220420140214.2330-1-hdanton@sina.com>
2022-04-20 14:35   ` Nicolas Saenz Julienne
2022-04-25 22:58 ` [RFC PATCH 0/6] Drain remote per-cpu directly Minchan Kim
2022-04-26 11:06   ` Nicolas Saenz Julienne
2022-04-27 15:21     ` Marcelo Tosatti
2022-04-26  2:49 ` Suren Baghdasaryan
2022-04-26  6:30   ` Suren Baghdasaryan
2022-05-09 13:07 [RFC PATCH 0/6] Drain remote per-cpu directly v2 Mel Gorman
2022-05-09 13:08 ` [PATCH 6/6] mm/page_alloc: Remotely drain per-cpu lists Mel Gorman
2022-05-12  8:50 [PATCH 0/6] Drain remote per-cpu directly v3 Mel Gorman
2022-05-12  8:50 ` [PATCH 6/6] mm/page_alloc: Remotely drain per-cpu lists Mel Gorman
2022-05-12 19:37   ` Andrew Morton
2022-05-13 15:04     ` Mel Gorman
2022-05-13 15:19       ` Nicolas Saenz Julienne
2022-05-13 18:23         ` Mel Gorman
2022-05-17 12:57           ` Mel Gorman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220420095906.27349-7-mgorman@techsingularity.net \
    --to=mgorman@techsingularity.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=mtosatti@redhat.com \
    --cc=nsaenzju@redhat.com \
    --cc=vbabka@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).