linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 3/7] mm/swap: Use local_lock for protection
       [not found] <20200527201119.1692513-1-bigeasy@linutronix.de>
@ 2020-05-27 20:11 ` Sebastian Andrzej Siewior
       [not found] ` <20200527201119.1692513-8-bigeasy@linutronix.de>
  1 sibling, 0 replies; 4+ messages in thread
From: Sebastian Andrzej Siewior @ 2020-05-27 20:11 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Ingo Molnar, Steven Rostedt, Will Deacon,
	Thomas Gleixner, Paul E . McKenney, Linus Torvalds,
	Matthew Wilcox, Andrew Morton, linux-mm,
	Sebastian Andrzej Siewior

From: Ingo Molnar <mingo@kernel.org>

The various struct pagevec per CPU variables are protected by disabling
either preemption or interrupts across the critical sections. Inside
these sections spinlocks have to be acquired.

These spinlocks are regular spinlock_t types which are converted to
"sleeping" spinlocks on PREEMPT_RT enabled kernels. Obviously sleeping
locks cannot be acquired in preemption or interrupt disabled sections.

local locks provide a trivial way to substitute preempt and interrupt
disable instances. On a non PREEMPT_RT enabled kernel local_lock() maps
to preempt_disable() and local_lock_irq() to local_irq_disable().

Create lru_rotate_pvecs containing the pagevec and the locallock.
Create lru_pvecs containing the remaining pagevecs and the locallock.
Add lru_add_drain_cpu_zone() which is used from compact_zone() to avoid
exporting the pvec structure.

Change the relevant call sites to acquire these locks instead of using
preempt_disable() / get_cpu() / get_cpu_var() and local_irq_disable() /
local_irq_save().

There is neither a functional change nor a change in the generated
binary code for non PREEMPT_RT enabled non-debug kernels.

When lockdep is enabled local locks have lockdep maps embedded. These
allow lockdep to validate the protections, i.e. inappropriate usage of a
preemption only protected sections would result in a lockdep warning
while the same problem would not be noticed with a plain
preempt_disable() based protection.

local locks also improve readability as they provide a named scope for
the protections while preempt/interrupt disable are opaque scopeless.

Finally local locks allow PREEMPT_RT to substitute them with real
locking primitives to ensure the correctness of operation in a fully
preemptible kernel.

[ bigeasy: Adopted to use local_lock ]

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-mm@kvack.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 include/linux/swap.h |   1 +
 mm/compaction.c      |   6 +--
 mm/swap.c            | 118 +++++++++++++++++++++++++++++--------------
 3 files changed, 82 insertions(+), 43 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index e1bbf7a16b276..25181d2dd0b9f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -337,6 +337,7 @@ extern void activate_page(struct page *);
 extern void mark_page_accessed(struct page *);
 extern void lru_add_drain(void);
 extern void lru_add_drain_cpu(int cpu);
+extern void lru_add_drain_cpu_zone(struct zone *zone);
 extern void lru_add_drain_all(void);
 extern void rotate_reclaimable_page(struct page *page);
 extern void deactivate_file_page(struct page *page);
diff --git a/mm/compaction.c b/mm/compaction.c
index 46f0fcc93081e..c9d659e6a02c5 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2243,15 +2243,11 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
 		 * would succeed.
 		 */
 		if (cc->order > 0 && last_migrated_pfn) {
-			int cpu;
 			unsigned long current_block_start =
 				block_start_pfn(cc->migrate_pfn, cc->order);
 
 			if (last_migrated_pfn < current_block_start) {
-				cpu = get_cpu();
-				lru_add_drain_cpu(cpu);
-				drain_local_pages(cc->zone);
-				put_cpu();
+				lru_add_drain_cpu_zone(cc->zone);
 				/* No more flushing until we migrate again */
 				last_migrated_pfn = 0;
 			}
diff --git a/mm/swap.c b/mm/swap.c
index bf9a79fed62d7..0ac463d44cff4 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -35,6 +35,7 @@
 #include <linux/uio.h>
 #include <linux/hugetlb.h>
 #include <linux/page_idle.h>
+#include <linux/local_lock.h>
 
 #include "internal.h"
 
@@ -44,14 +45,32 @@
 /* How many pages do we try to swap or page in/out together? */
 int page_cluster;
 
-static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
-static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
-static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
-static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
-static DEFINE_PER_CPU(struct pagevec, lru_lazyfree_pvecs);
+/* Protecting only lru_rotate.pvec which requires disabling interrupts */
+struct lru_rotate {
+	local_lock_t lock;
+	struct pagevec pvec;
+};
+static DEFINE_PER_CPU(struct lru_rotate, lru_rotate) = {
+	.lock = INIT_LOCAL_LOCK(lock),
+};
+
+/*
+ * The following struct pagevec are grouped together because they are protected
+ * by disabling preemption (and interrupts remain enabled).
+ */
+struct lru_pvecs {
+	local_lock_t lock;
+	struct pagevec lru_add;
+	struct pagevec lru_deactivate_file;
+	struct pagevec lru_deactivate;
+	struct pagevec lru_lazyfree;
 #ifdef CONFIG_SMP
-static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
+	struct pagevec activate_page;
 #endif
+};
+static DEFINE_PER_CPU(struct lru_pvecs, lru_pvecs) = {
+	.lock = INIT_LOCAL_LOCK(lock),
+};
 
 /*
  * This path almost never happens for VM activity - pages are normally
@@ -254,11 +273,11 @@ void rotate_reclaimable_page(struct page *page)
 		unsigned long flags;
 
 		get_page(page);
-		local_irq_save(flags);
-		pvec = this_cpu_ptr(&lru_rotate_pvecs);
+		local_lock_irqsave(&lru_rotate.lock, flags);
+		pvec = this_cpu_ptr(&lru_rotate.pvec);
 		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_move_tail(pvec);
-		local_irq_restore(flags);
+		local_unlock_irqrestore(&lru_rotate.lock, flags);
 	}
 }
 
@@ -293,7 +312,7 @@ static void __activate_page(struct page *page, struct lruvec *lruvec,
 #ifdef CONFIG_SMP
 static void activate_page_drain(int cpu)
 {
-	struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu);
+	struct pagevec *pvec = &per_cpu(lru_pvecs.activate_page, cpu);
 
 	if (pagevec_count(pvec))
 		pagevec_lru_move_fn(pvec, __activate_page, NULL);
@@ -301,19 +320,21 @@ static void activate_page_drain(int cpu)
 
 static bool need_activate_page_drain(int cpu)
 {
-	return pagevec_count(&per_cpu(activate_page_pvecs, cpu)) != 0;
+	return pagevec_count(&per_cpu(lru_pvecs.activate_page, cpu)) != 0;
 }
 
 void activate_page(struct page *page)
 {
 	page = compound_head(page);
 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
-		struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
+		struct pagevec *pvec;
 
+		local_lock(&lru_pvecs.lock);
+		pvec = this_cpu_ptr(&lru_pvecs.activate_page);
 		get_page(page);
 		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_lru_move_fn(pvec, __activate_page, NULL);
-		put_cpu_var(activate_page_pvecs);
+		local_unlock(&lru_pvecs.lock);
 	}
 }
 
@@ -335,9 +356,12 @@ void activate_page(struct page *page)
 
 static void __lru_cache_activate_page(struct page *page)
 {
-	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
+	struct pagevec *pvec;
 	int i;
 
+	local_lock(&lru_pvecs.lock);
+	pvec = this_cpu_ptr(&lru_pvecs.lru_add);
+
 	/*
 	 * Search backwards on the optimistic assumption that the page being
 	 * activated has just been added to this pagevec. Note that only
@@ -357,7 +381,7 @@ static void __lru_cache_activate_page(struct page *page)
 		}
 	}
 
-	put_cpu_var(lru_add_pvec);
+	local_unlock(&lru_pvecs.lock);
 }
 
 /*
@@ -385,7 +409,7 @@ void mark_page_accessed(struct page *page)
 	} else if (!PageActive(page)) {
 		/*
 		 * If the page is on the LRU, queue it for activation via
-		 * activate_page_pvecs. Otherwise, assume the page is on a
+		 * lru_pvecs.activate_page. Otherwise, assume the page is on a
 		 * pagevec, mark it active and it'll be moved to the active
 		 * LRU on the next drain.
 		 */
@@ -404,12 +428,14 @@ EXPORT_SYMBOL(mark_page_accessed);
 
 static void __lru_cache_add(struct page *page)
 {
-	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
+	struct pagevec *pvec;
 
+	local_lock(&lru_pvecs.lock);
+	pvec = this_cpu_ptr(&lru_pvecs.lru_add);
 	get_page(page);
 	if (!pagevec_add(pvec, page) || PageCompound(page))
 		__pagevec_lru_add(pvec);
-	put_cpu_var(lru_add_pvec);
+	local_unlock(&lru_pvecs.lock);
 }
 
 /**
@@ -593,30 +619,30 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,
  */
 void lru_add_drain_cpu(int cpu)
 {
-	struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu);
+	struct pagevec *pvec = &per_cpu(lru_pvecs.lru_add, cpu);
 
 	if (pagevec_count(pvec))
 		__pagevec_lru_add(pvec);
 
-	pvec = &per_cpu(lru_rotate_pvecs, cpu);
+	pvec = &per_cpu(lru_rotate.pvec, cpu);
 	if (pagevec_count(pvec)) {
 		unsigned long flags;
 
 		/* No harm done if a racing interrupt already did this */
-		local_irq_save(flags);
+		local_lock_irqsave(&lru_rotate.lock, flags);
 		pagevec_move_tail(pvec);
-		local_irq_restore(flags);
+		local_unlock_irqrestore(&lru_rotate.lock, flags);
 	}
 
-	pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
+	pvec = &per_cpu(lru_pvecs.lru_deactivate_file, cpu);
 	if (pagevec_count(pvec))
 		pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
 
-	pvec = &per_cpu(lru_deactivate_pvecs, cpu);
+	pvec = &per_cpu(lru_pvecs.lru_deactivate, cpu);
 	if (pagevec_count(pvec))
 		pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
 
-	pvec = &per_cpu(lru_lazyfree_pvecs, cpu);
+	pvec = &per_cpu(lru_pvecs.lru_lazyfree, cpu);
 	if (pagevec_count(pvec))
 		pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
 
@@ -641,11 +667,14 @@ void deactivate_file_page(struct page *page)
 		return;
 
 	if (likely(get_page_unless_zero(page))) {
-		struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
+		struct pagevec *pvec;
+
+		local_lock(&lru_pvecs.lock);
+		pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate_file);
 
 		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
-		put_cpu_var(lru_deactivate_file_pvecs);
+		local_unlock(&lru_pvecs.lock);
 	}
 }
 
@@ -660,12 +689,14 @@ void deactivate_file_page(struct page *page)
 void deactivate_page(struct page *page)
 {
 	if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
-		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
+		struct pagevec *pvec;
 
+		local_lock(&lru_pvecs.lock);
+		pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate);
 		get_page(page);
 		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
-		put_cpu_var(lru_deactivate_pvecs);
+		local_unlock(&lru_pvecs.lock);
 	}
 }
 
@@ -680,19 +711,30 @@ void mark_page_lazyfree(struct page *page)
 {
 	if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) &&
 	    !PageSwapCache(page) && !PageUnevictable(page)) {
-		struct pagevec *pvec = &get_cpu_var(lru_lazyfree_pvecs);
+		struct pagevec *pvec;
 
+		local_lock(&lru_pvecs.lock);
+		pvec = this_cpu_ptr(&lru_pvecs.lru_lazyfree);
 		get_page(page);
 		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
-		put_cpu_var(lru_lazyfree_pvecs);
+		local_unlock(&lru_pvecs.lock);
 	}
 }
 
 void lru_add_drain(void)
 {
-	lru_add_drain_cpu(get_cpu());
-	put_cpu();
+	local_lock(&lru_pvecs.lock);
+	lru_add_drain_cpu(smp_processor_id());
+	local_unlock(&lru_pvecs.lock);
+}
+
+void lru_add_drain_cpu_zone(struct zone *zone)
+{
+	local_lock(&lru_pvecs.lock);
+	lru_add_drain_cpu(smp_processor_id());
+	drain_local_pages(zone);
+	local_unlock(&lru_pvecs.lock);
 }
 
 #ifdef CONFIG_SMP
@@ -743,11 +785,11 @@ void lru_add_drain_all(void)
 	for_each_online_cpu(cpu) {
 		struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
 
-		if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
-		    pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
-		    pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
-		    pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
-		    pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) ||
+		if (pagevec_count(&per_cpu(lru_pvecs.lru_add, cpu)) ||
+		    pagevec_count(&per_cpu(lru_rotate.pvec, cpu)) ||
+		    pagevec_count(&per_cpu(lru_pvecs.lru_deactivate_file, cpu)) ||
+		    pagevec_count(&per_cpu(lru_pvecs.lru_deactivate, cpu)) ||
+		    pagevec_count(&per_cpu(lru_pvecs.lru_lazyfree, cpu)) ||
 		    need_activate_page_drain(cpu)) {
 			INIT_WORK(work, lru_add_drain_per_cpu);
 			queue_work_on(cpu, mm_percpu_wq, work);
-- 
2.27.0.rc0



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v3 7/7] zram: Use local lock to protect per-CPU data
       [not found] ` <20200527201119.1692513-8-bigeasy@linutronix.de>
@ 2020-10-19  1:52   ` Yu Zhao
  2020-10-19  2:33     ` Hugh Dickins
  2020-10-19  2:46     ` Mike Galbraith
  0 siblings, 2 replies; 4+ messages in thread
From: Yu Zhao @ 2020-10-19  1:52 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Peter Zijlstra, Ingo Molnar, Steven Rostedt,
	Will Deacon, Thomas Gleixner, Paul E . McKenney, Linus Torvalds,
	Matthew Wilcox, Mike Galbraith, Minchan Kim, Nitin Gupta,
	Sergey Senozhatsky, linux-mm

On Wed, May 27, 2020 at 10:11:19PM +0200, Sebastian Andrzej Siewior wrote:
> From: Mike Galbraith <umgwanakikbuti@gmail.com>
> 
> The zcomp driver uses per-CPU compression. The per-CPU data pointer is
> acquired with get_cpu_ptr() which implicitly disables preemption.
> It allocates memory inside the preempt disabled region which conflicts
> with the PREEMPT_RT semantics.
> 
> Replace the implicit preemption control with an explicit local lock.
> This allows RT kernels to substitute it with a real per CPU lock, which
> serializes the access but keeps the code section preemptible. On non RT
> kernels this maps to preempt_disable() as before, i.e. no functional
> change.

Hi,

This change seems to have introduced a potential deadlock. Can you
please take a look?

Thank you.

[   40.030778] ======================================================
[   40.037706] WARNING: possible circular locking dependency detected
[   40.044637] 5.9.0-74216-g5c9472ed6825 #1 Tainted: G        W        
[   40.051759] ------------------------------------------------------
[   40.058685] swapon/586 is trying to acquire lock:
[   40.063950] ffffe8ffffc0ee60 (&zstrm->lock){+.+.}-{2:2}, at: local_lock_acquire+0x5/0x70 [zram]
[   40.073739] 
[   40.073739] but task is already holding lock:
[   40.080277] ffff888101a1f438 (&zspage->lock){.+.+}-{2:2}, at: zs_map_object+0x73/0x28d
[   40.089182] 
[   40.089182] which lock already depends on the new lock.
[   40.089182] 
[   40.098344] 
[   40.098344] the existing dependency chain (in reverse order) is:
[   40.106715] 
[   40.106715] -> #1 (&zspage->lock){.+.+}-{2:2}:
[   40.113386]        lock_acquire+0x1cd/0x2c3
[   40.118083]        _raw_read_lock+0x44/0x78
[   40.122781]        zs_map_object+0x73/0x28d
[   40.127479]        zram_bvec_rw+0x42e/0x75d [zram]
[   40.132855]        zram_submit_bio+0x1fc/0x2d7 [zram]
[   40.138526]        submit_bio_noacct+0x11b/0x372
[   40.143709]        submit_bio+0xfd/0x1b5
[   40.148113]        __block_write_full_page+0x302/0x56f
[   40.153877]        __writepage+0x1e/0x74
[   40.158281]        write_cache_pages+0x404/0x59a
[   40.163461]        generic_writepages+0x53/0x82
[   40.168545]        do_writepages+0x33/0x74
[   40.173145]        __filemap_fdatawrite_range+0x91/0xac
[   40.179005]        file_write_and_wait_range+0x39/0x87
[   40.184769]        blkdev_fsync+0x19/0x3e
[   40.189272]        do_fsync+0x39/0x5c
[   40.193384]        __x64_sys_fsync+0x13/0x17
[   40.198178]        do_syscall_64+0x37/0x45
[   40.202776]        entry_SYSCALL_64_after_hwframe+0x44/0xa9
[   40.209022] 
[   40.209022] -> #0 (&zstrm->lock){+.+.}-{2:2}:
[   40.215589]        validate_chain+0x1966/0x21a8
[   40.220673]        __lock_acquire+0x941/0xbba
[   40.225552]        lock_acquire+0x1cd/0x2c3
[   40.230250]        local_lock_acquire+0x21/0x70 [zram]
[   40.236015]        zcomp_stream_get+0x33/0x4d [zram]
[   40.241585]        zram_bvec_rw+0x476/0x75d [zram]
[   40.246963]        zram_rw_page+0xd8/0x17c [zram]
[   40.252240]        bdev_read_page+0x7a/0x9d
[   40.256933]        do_mpage_readpage+0x6b2/0x860
[   40.262101]        mpage_readahead+0x136/0x245
[   40.267089]        read_pages+0x60/0x1f9
[   40.271492]        page_cache_ra_unbounded+0x211/0x27b
[   40.277251]        generic_file_buffered_read+0x188/0xd4d
[   40.283296]        new_sync_read+0x10c/0x143
[   40.288088]        vfs_read+0xf4/0x1a5
[   40.292285]        ksys_read+0x73/0xd3
[   40.296483]        do_syscall_64+0x37/0x45
[   40.301072]        entry_SYSCALL_64_after_hwframe+0x44/0xa9
[   40.307319] 
[   40.307319] other info that might help us debug this:
[   40.307319] 
[   40.316285]  Possible unsafe locking scenario:
[   40.316285] 
[   40.322907]        CPU0                    CPU1
[   40.327972]        ----                    ----
[   40.333041]   lock(&zspage->lock);
[   40.336874]                                lock(&zstrm->lock);
[   40.343424]                                lock(&zspage->lock);
[   40.350071]   lock(&zstrm->lock);
[   40.353803] 
[   40.353803]  *** DEADLOCK ***


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v3 7/7] zram: Use local lock to protect per-CPU data
  2020-10-19  1:52   ` [PATCH v3 7/7] zram: Use local lock to protect per-CPU data Yu Zhao
@ 2020-10-19  2:33     ` Hugh Dickins
  2020-10-19  2:46     ` Mike Galbraith
  1 sibling, 0 replies; 4+ messages in thread
From: Hugh Dickins @ 2020-10-19  2:33 UTC (permalink / raw)
  To: Yu Zhao
  Cc: Sebastian Andrzej Siewior, linux-kernel, Peter Zijlstra,
	Ingo Molnar, Steven Rostedt, Will Deacon, Thomas Gleixner,
	Paul E . McKenney, Linus Torvalds, Matthew Wilcox,
	Mike Galbraith, Minchan Kim, Nitin Gupta, Sergey Senozhatsky,
	linux-mm

On Sun, Oct 18, 2020 at 6:53 PM Yu Zhao <yuzhao@google.com> wrote:
>
> On Wed, May 27, 2020 at 10:11:19PM +0200, Sebastian Andrzej Siewior wrote:
> > From: Mike Galbraith <umgwanakikbuti@gmail.com>
> >
> > The zcomp driver uses per-CPU compression. The per-CPU data pointer is
> > acquired with get_cpu_ptr() which implicitly disables preemption.
> > It allocates memory inside the preempt disabled region which conflicts
> > with the PREEMPT_RT semantics.
> >
> > Replace the implicit preemption control with an explicit local lock.
> > This allows RT kernels to substitute it with a real per CPU lock, which
> > serializes the access but keeps the code section preemptible. On non RT
> > kernels this maps to preempt_disable() as before, i.e. no functional
> > change.
>
> Hi,
>
> This change seems to have introduced a potential deadlock. Can you
> please take a look?

Probably needs Peter's fix
https://lore.kernel.org/lkml/20201016124009.GQ2611@hirez.programming.kicks-ass.net/

>
> Thank you.
>
> [   40.030778] ======================================================
> [   40.037706] WARNING: possible circular locking dependency detected
> [   40.044637] 5.9.0-74216-g5c9472ed6825 #1 Tainted: G        W
> [   40.051759] ------------------------------------------------------
> [   40.058685] swapon/586 is trying to acquire lock:
> [   40.063950] ffffe8ffffc0ee60 (&zstrm->lock){+.+.}-{2:2}, at: local_lock_acquire+0x5/0x70 [zram]
> [   40.073739]
> [   40.073739] but task is already holding lock:
> [   40.080277] ffff888101a1f438 (&zspage->lock){.+.+}-{2:2}, at: zs_map_object+0x73/0x28d
> [   40.089182]
> [   40.089182] which lock already depends on the new lock.
> [   40.089182]
> [   40.098344]
> [   40.098344] the existing dependency chain (in reverse order) is:
> [   40.106715]
> [   40.106715] -> #1 (&zspage->lock){.+.+}-{2:2}:
> [   40.113386]        lock_acquire+0x1cd/0x2c3
> [   40.118083]        _raw_read_lock+0x44/0x78
> [   40.122781]        zs_map_object+0x73/0x28d
> [   40.127479]        zram_bvec_rw+0x42e/0x75d [zram]
> [   40.132855]        zram_submit_bio+0x1fc/0x2d7 [zram]
> [   40.138526]        submit_bio_noacct+0x11b/0x372
> [   40.143709]        submit_bio+0xfd/0x1b5
> [   40.148113]        __block_write_full_page+0x302/0x56f
> [   40.153877]        __writepage+0x1e/0x74
> [   40.158281]        write_cache_pages+0x404/0x59a
> [   40.163461]        generic_writepages+0x53/0x82
> [   40.168545]        do_writepages+0x33/0x74
> [   40.173145]        __filemap_fdatawrite_range+0x91/0xac
> [   40.179005]        file_write_and_wait_range+0x39/0x87
> [   40.184769]        blkdev_fsync+0x19/0x3e
> [   40.189272]        do_fsync+0x39/0x5c
> [   40.193384]        __x64_sys_fsync+0x13/0x17
> [   40.198178]        do_syscall_64+0x37/0x45
> [   40.202776]        entry_SYSCALL_64_after_hwframe+0x44/0xa9
> [   40.209022]
> [   40.209022] -> #0 (&zstrm->lock){+.+.}-{2:2}:
> [   40.215589]        validate_chain+0x1966/0x21a8
> [   40.220673]        __lock_acquire+0x941/0xbba
> [   40.225552]        lock_acquire+0x1cd/0x2c3
> [   40.230250]        local_lock_acquire+0x21/0x70 [zram]
> [   40.236015]        zcomp_stream_get+0x33/0x4d [zram]
> [   40.241585]        zram_bvec_rw+0x476/0x75d [zram]
> [   40.246963]        zram_rw_page+0xd8/0x17c [zram]
> [   40.252240]        bdev_read_page+0x7a/0x9d
> [   40.256933]        do_mpage_readpage+0x6b2/0x860
> [   40.262101]        mpage_readahead+0x136/0x245
> [   40.267089]        read_pages+0x60/0x1f9
> [   40.271492]        page_cache_ra_unbounded+0x211/0x27b
> [   40.277251]        generic_file_buffered_read+0x188/0xd4d
> [   40.283296]        new_sync_read+0x10c/0x143
> [   40.288088]        vfs_read+0xf4/0x1a5
> [   40.292285]        ksys_read+0x73/0xd3
> [   40.296483]        do_syscall_64+0x37/0x45
> [   40.301072]        entry_SYSCALL_64_after_hwframe+0x44/0xa9
> [   40.307319]
> [   40.307319] other info that might help us debug this:
> [   40.307319]
> [   40.316285]  Possible unsafe locking scenario:
> [   40.316285]
> [   40.322907]        CPU0                    CPU1
> [   40.327972]        ----                    ----
> [   40.333041]   lock(&zspage->lock);
> [   40.336874]                                lock(&zstrm->lock);
> [   40.343424]                                lock(&zspage->lock);
> [   40.350071]   lock(&zstrm->lock);
> [   40.353803]
> [   40.353803]  *** DEADLOCK ***
>


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v3 7/7] zram: Use local lock to protect per-CPU data
  2020-10-19  1:52   ` [PATCH v3 7/7] zram: Use local lock to protect per-CPU data Yu Zhao
  2020-10-19  2:33     ` Hugh Dickins
@ 2020-10-19  2:46     ` Mike Galbraith
  1 sibling, 0 replies; 4+ messages in thread
From: Mike Galbraith @ 2020-10-19  2:46 UTC (permalink / raw)
  To: Yu Zhao, Sebastian Andrzej Siewior
  Cc: linux-kernel, Peter Zijlstra, Ingo Molnar, Steven Rostedt,
	Will Deacon, Thomas Gleixner, Paul E . McKenney, Linus Torvalds,
	Matthew Wilcox, Minchan Kim, Nitin Gupta, Sergey Senozhatsky,
	linux-mm

On Sun, 2020-10-18 at 19:52 -0600, Yu Zhao wrote:
> On Wed, May 27, 2020 at 10:11:19PM +0200, Sebastian Andrzej Siewior wrote:
> > From: Mike Galbraith <umgwanakikbuti@gmail.com>
> > 
> > The zcomp driver uses per-CPU compression. The per-CPU data pointer is
> > acquired with get_cpu_ptr() which implicitly disables preemption.
> > It allocates memory inside the preempt disabled region which conflicts
> > with the PREEMPT_RT semantics.
> > 
> > Replace the implicit preemption control with an explicit local lock.
> > This allows RT kernels to substitute it with a real per CPU lock, which
> > serializes the access but keeps the code section preemptible. On non RT
> > kernels this maps to preempt_disable() as before, i.e. no functional
> > change.
> 
> Hi,
> 
> This change seems to have introduced a potential deadlock. Can you
> please take a look?

Hm, looks like I'm getting undeserved credit for uncovering a locking
bug.  In reality, Sebastian was generous with attribution of derivative
work, so he should ge credit.. and it looks like peterz fixed it.

Date: Fri, 16 Oct 2020 14:40:09 +0200
From: Peter Zijlstra <peterz@infradead.org>

---

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 9100ac36670a..c1e2c2e1cde8 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1216,10 +1216,11 @@ static void zram_free_page(struct zram *zram, size_t index)
 static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
 				struct bio *bio, bool partial_io)
 {
-	int ret;
+	struct zcomp_strm *zstrm;
 	unsigned long handle;
 	unsigned int size;
 	void *src, *dst;
+	int ret;
 
 	zram_slot_lock(zram, index);
 	if (zram_test_flag(zram, index, ZRAM_WB)) {
@@ -1250,6 +1251,9 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
 
 	size = zram_get_obj_size(zram, index);
 
+	if (size != PAGE_SIZE)
+		zstrm = zcomp_stream_get(zram->comp);
+
 	src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
 	if (size == PAGE_SIZE) {
 		dst = kmap_atomic(page);
@@ -1257,8 +1261,6 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
 		kunmap_atomic(dst);
 		ret = 0;
 	} else {
-		struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
-
 		dst = kmap_atomic(page);
 		ret = zcomp_decompress(zstrm, src, size, dst);
 		kunmap_atomic(dst);




^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-10-19  2:46 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20200527201119.1692513-1-bigeasy@linutronix.de>
2020-05-27 20:11 ` [PATCH v3 3/7] mm/swap: Use local_lock for protection Sebastian Andrzej Siewior
     [not found] ` <20200527201119.1692513-8-bigeasy@linutronix.de>
2020-10-19  1:52   ` [PATCH v3 7/7] zram: Use local lock to protect per-CPU data Yu Zhao
2020-10-19  2:33     ` Hugh Dickins
2020-10-19  2:46     ` Mike Galbraith

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).