linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
To: linux-mm@kvack.org
Cc: tglx@linutronix.de, frederic@kernel.org,
	Christoph Lameter <cl@linux.com>,
	anna-maria@linutronix.de,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Subject: [PATCH 2/4] mm/swap: Add static key dependent pagevec locking
Date: Wed, 24 Apr 2019 13:12:06 +0200	[thread overview]
Message-ID: <20190424111208.24459-3-bigeasy@linutronix.de> (raw)
In-Reply-To: <20190424111208.24459-1-bigeasy@linutronix.de>

From: Thomas Gleixner <tglx@linutronix.de>

The locking of struct pagevec is done by disabling preemption. In case the
struct has be accessed form interrupt context then interrupts are
disabled. This means the struct can only be accessed locally from the
CPU. There is also no lockdep coverage which would scream during if it
accessed from wrong context.

Create struct swap_pagevec which contains of a pagevec member and a
spin_lock_t. Introduce a static key, which changes the locking behavior
only if the key is set in the following way: Before the struct is accessed
the spin_lock has to be acquired instead of using preempt_disable(). Since
the struct is used CPU-locally there is no spinning on the lock but the
lock is acquired immediately. If the struct is accessed from interrupt
context, spin_lock_irqsave() is used.

No functional change yet because static key is not enabled.

[anna-maria: introduce static key]
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 mm/compaction.c |  14 ++--
 mm/internal.h   |   2 +
 mm/swap.c       | 186 +++++++++++++++++++++++++++++++++++++++---------
 3 files changed, 165 insertions(+), 37 deletions(-)

diff --git a/mm/compaction.c b/mm/compaction.c
index 3319e0872d014..ec47c96186771 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2224,10 +2224,16 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
 				block_start_pfn(cc->migrate_pfn, cc->order);
 
 			if (last_migrated_pfn < current_block_start) {
-				cpu = get_cpu();
-				lru_add_drain_cpu(cpu);
-				drain_local_pages(cc->zone);
-				put_cpu();
+				if (static_branch_unlikely(&use_pvec_lock)) {
+					cpu = get_cpu();
+					lru_add_drain_cpu(cpu);
+					drain_local_pages(cc->zone);
+					put_cpu();
+				} else {
+					cpu = raw_smp_processor_id();
+					lru_add_drain_cpu(cpu);
+					drain_cpu_pages(cpu, cc->zone);
+				}
 				/* No more flushing until we migrate again */
 				last_migrated_pfn = 0;
 			}
diff --git a/mm/internal.h b/mm/internal.h
index 9eeaf2b95166f..ddfa760e61652 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -36,6 +36,8 @@
 /* Do not use these with a slab allocator */
 #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
 
+extern struct static_key_false use_pvec_lock;
+
 void page_writeback_init(void);
 
 vm_fault_t do_swap_page(struct vm_fault *vmf);
diff --git a/mm/swap.c b/mm/swap.c
index 301ed4e043205..136c80480dbde 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -43,14 +43,107 @@
 /* How many pages do we try to swap or page in/out together? */
 int page_cluster;
 
-static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
-static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
-static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
-static DEFINE_PER_CPU(struct pagevec, lru_lazyfree_pvecs);
+DEFINE_STATIC_KEY_FALSE(use_pvec_lock);
+
+struct swap_pagevec {
+	spinlock_t	lock;
+	struct pagevec	pvec;
+};
+
+#define DEFINE_PER_CPU_PAGEVEC(lvar)				\
+	DEFINE_PER_CPU(struct swap_pagevec, lvar) = {		\
+		.lock = __SPIN_LOCK_UNLOCKED((lvar).lock) }
+
+static DEFINE_PER_CPU_PAGEVEC(lru_add_pvec);
+static DEFINE_PER_CPU_PAGEVEC(lru_rotate_pvecs);
+static DEFINE_PER_CPU_PAGEVEC(lru_deactivate_file_pvecs);
+static DEFINE_PER_CPU_PAGEVEC(lru_lazyfree_pvecs);
 #ifdef CONFIG_SMP
-static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
+static DEFINE_PER_CPU_PAGEVEC(activate_page_pvecs);
 #endif
 
+static inline
+struct swap_pagevec *lock_swap_pvec(struct swap_pagevec __percpu *p)
+{
+	struct swap_pagevec *swpvec;
+
+	if (static_branch_likely(&use_pvec_lock)) {
+		swpvec = raw_cpu_ptr(p);
+
+		spin_lock(&swpvec->lock);
+	} else {
+		swpvec = &get_cpu_var(*p);
+	}
+	return swpvec;
+}
+
+static inline struct swap_pagevec *
+lock_swap_pvec_cpu(struct swap_pagevec __percpu *p, int cpu)
+{
+	struct swap_pagevec *swpvec = per_cpu_ptr(p, cpu);
+
+	if (static_branch_likely(&use_pvec_lock))
+		spin_lock(&swpvec->lock);
+
+	return swpvec;
+}
+
+static inline struct swap_pagevec *
+lock_swap_pvec_irqsave(struct swap_pagevec __percpu *p, unsigned long *flags)
+{
+	struct swap_pagevec *swpvec;
+
+	if (static_branch_likely(&use_pvec_lock)) {
+		swpvec = raw_cpu_ptr(p);
+
+		spin_lock_irqsave(&swpvec->lock, (*flags));
+	} else {
+		local_irq_save(*flags);
+
+		swpvec = this_cpu_ptr(p);
+	}
+	return swpvec;
+}
+
+static inline struct swap_pagevec *
+lock_swap_pvec_cpu_irqsave(struct swap_pagevec __percpu *p, int cpu,
+			   unsigned long *flags)
+{
+	struct swap_pagevec *swpvec = per_cpu_ptr(p, cpu);
+
+	if (static_branch_likely(&use_pvec_lock))
+		spin_lock_irqsave(&swpvec->lock, *flags);
+	else
+		local_irq_save(*flags);
+
+	return swpvec;
+}
+
+static inline void unlock_swap_pvec(struct swap_pagevec *swpvec,
+				    struct swap_pagevec __percpu *p)
+{
+	if (static_branch_likely(&use_pvec_lock))
+		spin_unlock(&swpvec->lock);
+	else
+		put_cpu_var(*p);
+
+}
+
+static inline void unlock_swap_pvec_cpu(struct swap_pagevec *swpvec)
+{
+	if (static_branch_likely(&use_pvec_lock))
+		spin_unlock(&swpvec->lock);
+}
+
+static inline void
+unlock_swap_pvec_irqrestore(struct swap_pagevec *swpvec, unsigned long flags)
+{
+	if (static_branch_likely(&use_pvec_lock))
+		spin_unlock_irqrestore(&swpvec->lock, flags);
+	else
+		local_irq_restore(flags);
+}
+
 /*
  * This path almost never happens for VM activity - pages are normally
  * freed via pagevecs.  But it gets used by networking.
@@ -248,15 +341,17 @@ void rotate_reclaimable_page(struct page *page)
 {
 	if (!PageLocked(page) && !PageDirty(page) &&
 	    !PageUnevictable(page) && PageLRU(page)) {
+		struct swap_pagevec *swpvec;
 		struct pagevec *pvec;
 		unsigned long flags;
 
 		get_page(page);
-		local_irq_save(flags);
-		pvec = this_cpu_ptr(&lru_rotate_pvecs);
+
+		swpvec = lock_swap_pvec_irqsave(&lru_rotate_pvecs, &flags);
+		pvec = &swpvec->pvec;
 		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_move_tail(pvec);
-		local_irq_restore(flags);
+		unlock_swap_pvec_irqrestore(swpvec, flags);
 	}
 }
 
@@ -291,27 +386,32 @@ static void __activate_page(struct page *page, struct lruvec *lruvec,
 #ifdef CONFIG_SMP
 static void activate_page_drain(int cpu)
 {
-	struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu);
+	struct swap_pagevec *swpvec = lock_swap_pvec_cpu(&activate_page_pvecs, cpu);
+	struct pagevec *pvec = &swpvec->pvec;
 
 	if (pagevec_count(pvec))
 		pagevec_lru_move_fn(pvec, __activate_page, NULL);
+	unlock_swap_pvec_cpu(swpvec);
 }
 
 static bool need_activate_page_drain(int cpu)
 {
-	return pagevec_count(&per_cpu(activate_page_pvecs, cpu)) != 0;
+	return pagevec_count(per_cpu_ptr(&activate_page_pvecs.pvec, cpu)) != 0;
 }
 
 void activate_page(struct page *page)
 {
 	page = compound_head(page);
 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
-		struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
+		struct swap_pagevec *swpvec;
+		struct pagevec *pvec;
 
 		get_page(page);
+		swpvec = lock_swap_pvec(&activate_page_pvecs);
+		pvec = &swpvec->pvec;
 		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_lru_move_fn(pvec, __activate_page, NULL);
-		put_cpu_var(activate_page_pvecs);
+		unlock_swap_pvec(swpvec, &activate_page_pvecs);
 	}
 }
 
@@ -333,7 +433,8 @@ void activate_page(struct page *page)
 
 static void __lru_cache_activate_page(struct page *page)
 {
-	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
+	struct swap_pagevec *swpvec = lock_swap_pvec(&lru_add_pvec);
+	struct pagevec *pvec = &swpvec->pvec;
 	int i;
 
 	/*
@@ -355,7 +456,7 @@ static void __lru_cache_activate_page(struct page *page)
 		}
 	}
 
-	put_cpu_var(lru_add_pvec);
+	unlock_swap_pvec(swpvec, &lru_add_pvec);
 }
 
 /*
@@ -397,12 +498,13 @@ EXPORT_SYMBOL(mark_page_accessed);
 
 static void __lru_cache_add(struct page *page)
 {
-	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
+	struct swap_pagevec *swpvec = lock_swap_pvec(&lru_add_pvec);
+	struct pagevec *pvec = &swpvec->pvec;
 
 	get_page(page);
 	if (!pagevec_add(pvec, page) || PageCompound(page))
 		__pagevec_lru_add(pvec);
-	put_cpu_var(lru_add_pvec);
+	unlock_swap_pvec(swpvec, &lru_add_pvec);
 }
 
 /**
@@ -570,28 +672,34 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,
  */
 void lru_add_drain_cpu(int cpu)
 {
-	struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu);
+	struct swap_pagevec *swpvec = lock_swap_pvec_cpu(&lru_add_pvec, cpu);
+	struct pagevec *pvec = &swpvec->pvec;
+	unsigned long flags;
 
 	if (pagevec_count(pvec))
 		__pagevec_lru_add(pvec);
+	unlock_swap_pvec_cpu(swpvec);
 
-	pvec = &per_cpu(lru_rotate_pvecs, cpu);
+	swpvec = lock_swap_pvec_cpu_irqsave(&lru_rotate_pvecs, cpu, &flags);
+	pvec = &swpvec->pvec;
 	if (pagevec_count(pvec)) {
-		unsigned long flags;
 
 		/* No harm done if a racing interrupt already did this */
-		local_irq_save(flags);
 		pagevec_move_tail(pvec);
-		local_irq_restore(flags);
 	}
+	unlock_swap_pvec_irqrestore(swpvec, flags);
 
-	pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
+	swpvec = lock_swap_pvec_cpu(&lru_deactivate_file_pvecs, cpu);
+	pvec = &swpvec->pvec;
 	if (pagevec_count(pvec))
 		pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
+	unlock_swap_pvec_cpu(swpvec);
 
-	pvec = &per_cpu(lru_lazyfree_pvecs, cpu);
+	swpvec = lock_swap_pvec_cpu(&lru_lazyfree_pvecs, cpu);
+	pvec = &swpvec->pvec;
 	if (pagevec_count(pvec))
 		pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
+	unlock_swap_pvec_cpu(swpvec);
 
 	activate_page_drain(cpu);
 }
@@ -606,6 +714,9 @@ void lru_add_drain_cpu(int cpu)
  */
 void deactivate_file_page(struct page *page)
 {
+	struct swap_pagevec *swpvec;
+	struct pagevec *pvec;
+
 	/*
 	 * In a workload with many unevictable page such as mprotect,
 	 * unevictable page deactivation for accelerating reclaim is pointless.
@@ -614,11 +725,12 @@ void deactivate_file_page(struct page *page)
 		return;
 
 	if (likely(get_page_unless_zero(page))) {
-		struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
+		swpvec = lock_swap_pvec(&lru_deactivate_file_pvecs);
+		pvec = &swpvec->pvec;
 
 		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
-		put_cpu_var(lru_deactivate_file_pvecs);
+		unlock_swap_pvec(swpvec, &lru_deactivate_file_pvecs);
 	}
 }
 
@@ -631,21 +743,29 @@ void deactivate_file_page(struct page *page)
  */
 void mark_page_lazyfree(struct page *page)
 {
+	struct swap_pagevec *swpvec;
+	struct pagevec *pvec;
+
 	if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) &&
 	    !PageSwapCache(page) && !PageUnevictable(page)) {
-		struct pagevec *pvec = &get_cpu_var(lru_lazyfree_pvecs);
+		swpvec = lock_swap_pvec(&lru_lazyfree_pvecs);
+		pvec = &swpvec->pvec;
 
 		get_page(page);
 		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
-		put_cpu_var(lru_lazyfree_pvecs);
+		unlock_swap_pvec(swpvec, &lru_lazyfree_pvecs);
 	}
 }
 
 void lru_add_drain(void)
 {
-	lru_add_drain_cpu(get_cpu());
-	put_cpu();
+	if (static_branch_likely(&use_pvec_lock)) {
+		lru_add_drain_cpu(raw_smp_processor_id());
+	} else {
+		lru_add_drain_cpu(get_cpu());
+		put_cpu();
+	}
 }
 
 #ifdef CONFIG_SMP
@@ -683,10 +803,10 @@ void lru_add_drain_all(void)
 	for_each_online_cpu(cpu) {
 		struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
 
-		if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
-		    pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
-		    pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
-		    pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) ||
+		if (pagevec_count(&per_cpu(lru_add_pvec.pvec, cpu)) ||
+		    pagevec_count(&per_cpu(lru_rotate_pvecs.pvec, cpu)) ||
+		    pagevec_count(&per_cpu(lru_deactivate_file_pvecs.pvec, cpu)) ||
+		    pagevec_count(&per_cpu(lru_lazyfree_pvecs.pvec, cpu)) ||
 		    need_activate_page_drain(cpu)) {
 			INIT_WORK(work, lru_add_drain_per_cpu);
 			queue_work_on(cpu, mm_percpu_wq, work);
-- 
2.20.1


  parent reply	other threads:[~2019-04-24 11:12 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-24 11:12 [PATCH 0/4 v2] mm/swap: Add locking for pagevec Sebastian Andrzej Siewior
2019-04-24 11:12 ` [PATCH 1/4] mm/page_alloc: Split drain_local_pages() Sebastian Andrzej Siewior
2019-04-24 11:12 ` Sebastian Andrzej Siewior [this message]
2019-04-24 11:12 ` [PATCH 3/4] mm/swap: Access struct pagevec remotely Sebastian Andrzej Siewior
2019-04-24 11:12 ` [PATCH 4/4] mm/swap: Enable "use_pvec_lock" nohz_full dependent Sebastian Andrzej Siewior
2019-04-24 12:15 ` [PATCH 0/4 v2] mm/swap: Add locking for pagevec Matthew Wilcox
2019-04-26  8:00   ` Sebastian Andrzej Siewior
2020-06-16 16:55   ` Marcelo Tosatti

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190424111208.24459-3-bigeasy@linutronix.de \
    --to=bigeasy@linutronix.de \
    --cc=anna-maria@linutronix.de \
    --cc=cl@linux.com \
    --cc=frederic@kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).