All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH AUTOSEL 4.4 01/25] locking/spinlock/debug: Fix various data races
@ 2019-12-27 18:15 Sasha Levin
  2019-12-27 18:15 ` [PATCH AUTOSEL 4.4 02/25] netfilter: ctnetlink: netns exit must wait for callbacks Sasha Levin
                   ` (21 more replies)
  0 siblings, 22 replies; 27+ messages in thread
From: Sasha Levin @ 2019-12-27 18:15 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Marco Elver, Qian Cai, Andrew Morton, Linus Torvalds,
	Paul E . McKenney, Peter Zijlstra, Thomas Gleixner, Will Deacon,
	Ingo Molnar, Sasha Levin

From: Marco Elver <elver@google.com>

[ Upstream commit 1a365e822372ba24c9da0822bc583894f6f3d821 ]

This fixes various data races in spinlock_debug. By testing with KCSAN,
it is observable that the console gets spammed with data races reports,
suggesting these are extremely frequent.

Example data race report:

  read to 0xffff8ab24f403c48 of 4 bytes by task 221 on cpu 2:
   debug_spin_lock_before kernel/locking/spinlock_debug.c:85 [inline]
   do_raw_spin_lock+0x9b/0x210 kernel/locking/spinlock_debug.c:112
   __raw_spin_lock include/linux/spinlock_api_smp.h:143 [inline]
   _raw_spin_lock+0x39/0x40 kernel/locking/spinlock.c:151
   spin_lock include/linux/spinlock.h:338 [inline]
   get_partial_node.isra.0.part.0+0x32/0x2f0 mm/slub.c:1873
   get_partial_node mm/slub.c:1870 [inline]
  <snip>

  write to 0xffff8ab24f403c48 of 4 bytes by task 167 on cpu 3:
   debug_spin_unlock kernel/locking/spinlock_debug.c:103 [inline]
   do_raw_spin_unlock+0xc9/0x1a0 kernel/locking/spinlock_debug.c:138
   __raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:159 [inline]
   _raw_spin_unlock_irqrestore+0x2d/0x50 kernel/locking/spinlock.c:191
   spin_unlock_irqrestore include/linux/spinlock.h:393 [inline]
   free_debug_processing+0x1b3/0x210 mm/slub.c:1214
   __slab_free+0x292/0x400 mm/slub.c:2864
  <snip>

As a side-effect, with KCSAN, this eventually locks up the console, most
likely due to deadlock, e.g. .. -> printk lock -> spinlock_debug ->
KCSAN detects data race -> kcsan_print_report() -> printk lock ->
deadlock.

This fix will 1) avoid the data races, and 2) allow using lock debugging
together with KCSAN.

Reported-by: Qian Cai <cai@lca.pw>
Signed-off-by: Marco Elver <elver@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: https://lkml.kernel.org/r/20191120155715.28089-1-elver@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 kernel/locking/spinlock_debug.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c
index 0374a596cffa..95e610e3f7ef 100644
--- a/kernel/locking/spinlock_debug.c
+++ b/kernel/locking/spinlock_debug.c
@@ -51,19 +51,19 @@ EXPORT_SYMBOL(__rwlock_init);
 
 static void spin_dump(raw_spinlock_t *lock, const char *msg)
 {
-	struct task_struct *owner = NULL;
+	struct task_struct *owner = READ_ONCE(lock->owner);
 
-	if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT)
-		owner = lock->owner;
+	if (owner == SPINLOCK_OWNER_INIT)
+		owner = NULL;
 	printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
 		msg, raw_smp_processor_id(),
 		current->comm, task_pid_nr(current));
 	printk(KERN_EMERG " lock: %pS, .magic: %08x, .owner: %s/%d, "
 			".owner_cpu: %d\n",
-		lock, lock->magic,
+		lock, READ_ONCE(lock->magic),
 		owner ? owner->comm : "<none>",
 		owner ? task_pid_nr(owner) : -1,
-		lock->owner_cpu);
+		READ_ONCE(lock->owner_cpu));
 	dump_stack();
 }
 
@@ -80,16 +80,16 @@ static void spin_bug(raw_spinlock_t *lock, const char *msg)
 static inline void
 debug_spin_lock_before(raw_spinlock_t *lock)
 {
-	SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
-	SPIN_BUG_ON(lock->owner == current, lock, "recursion");
-	SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
+	SPIN_BUG_ON(READ_ONCE(lock->magic) != SPINLOCK_MAGIC, lock, "bad magic");
+	SPIN_BUG_ON(READ_ONCE(lock->owner) == current, lock, "recursion");
+	SPIN_BUG_ON(READ_ONCE(lock->owner_cpu) == raw_smp_processor_id(),
 							lock, "cpu recursion");
 }
 
 static inline void debug_spin_lock_after(raw_spinlock_t *lock)
 {
-	lock->owner_cpu = raw_smp_processor_id();
-	lock->owner = current;
+	WRITE_ONCE(lock->owner_cpu, raw_smp_processor_id());
+	WRITE_ONCE(lock->owner, current);
 }
 
 static inline void debug_spin_unlock(raw_spinlock_t *lock)
@@ -99,8 +99,8 @@ static inline void debug_spin_unlock(raw_spinlock_t *lock)
 	SPIN_BUG_ON(lock->owner != current, lock, "wrong owner");
 	SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
 							lock, "wrong CPU");
-	lock->owner = SPINLOCK_OWNER_INIT;
-	lock->owner_cpu = -1;
+	WRITE_ONCE(lock->owner, SPINLOCK_OWNER_INIT);
+	WRITE_ONCE(lock->owner_cpu, -1);
 }
 
 static void __spin_lock_debug(raw_spinlock_t *lock)
@@ -233,8 +233,8 @@ static inline void debug_write_lock_before(rwlock_t *lock)
 
 static inline void debug_write_lock_after(rwlock_t *lock)
 {
-	lock->owner_cpu = raw_smp_processor_id();
-	lock->owner = current;
+	WRITE_ONCE(lock->owner_cpu, raw_smp_processor_id());
+	WRITE_ONCE(lock->owner, current);
 }
 
 static inline void debug_write_unlock(rwlock_t *lock)
@@ -243,8 +243,8 @@ static inline void debug_write_unlock(rwlock_t *lock)
 	RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner");
 	RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
 							lock, "wrong CPU");
-	lock->owner = SPINLOCK_OWNER_INIT;
-	lock->owner_cpu = -1;
+	WRITE_ONCE(lock->owner, SPINLOCK_OWNER_INIT);
+	WRITE_ONCE(lock->owner_cpu, -1);
 }
 
 #if 0		/* This can cause lockups */
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2019-12-27 18:25 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-27 18:15 [PATCH AUTOSEL 4.4 01/25] locking/spinlock/debug: Fix various data races Sasha Levin
2019-12-27 18:15 ` [PATCH AUTOSEL 4.4 02/25] netfilter: ctnetlink: netns exit must wait for callbacks Sasha Levin
2019-12-27 18:15 ` [PATCH AUTOSEL 4.4 03/25] ARM: vexpress: Set-up shared OPP table instead of individual for each CPU Sasha Levin
2019-12-27 18:15   ` Sasha Levin
2019-12-27 18:15 ` [PATCH AUTOSEL 4.4 04/25] netfilter: uapi: Avoid undefined left-shift in xt_sctp.h Sasha Levin
2019-12-27 18:15 ` [PATCH AUTOSEL 4.4 05/25] netfilter: bridge: make sure to pull arp header in br_nf_forward_arp() Sasha Levin
2019-12-27 18:15   ` [Bridge] " Sasha Levin
2019-12-27 18:15 ` [PATCH AUTOSEL 4.4 06/25] af_packet: set defaule value for tmo Sasha Levin
2019-12-27 18:15 ` [PATCH AUTOSEL 4.4 07/25] fjes: fix missed check in fjes_acpi_add Sasha Levin
2019-12-27 18:15 ` [PATCH AUTOSEL 4.4 08/25] ARM: dts: am437x-gp/epos-evm: fix panel compatible Sasha Levin
2019-12-27 18:15 ` [PATCH AUTOSEL 4.4 09/25] tty: serial: msm_serial: Fix lockup for sysrq and oops Sasha Levin
2019-12-27 18:15 ` [PATCH AUTOSEL 4.4 10/25] net: usb: lan78xx: Fix suspend/resume PHY register access error Sasha Levin
2019-12-27 18:15 ` [PATCH AUTOSEL 4.4 11/25] powerpc: Ensure that swiotlb buffer is allocated from low memory Sasha Levin
2019-12-27 18:15   ` Sasha Levin
2019-12-27 18:15 ` [PATCH AUTOSEL 4.4 12/25] 6pack,mkiss: fix possible deadlock Sasha Levin
2019-12-27 18:15 ` [PATCH AUTOSEL 4.4 13/25] bnx2x: Do not handle requests from VFs after parity Sasha Levin
2019-12-27 18:15 ` [PATCH AUTOSEL 4.4 14/25] bnx2x: Fix logic to get total no. of PFs per engine Sasha Levin
2019-12-27 18:15 ` [PATCH AUTOSEL 4.4 15/25] net: usb: lan78xx: Fix error message format specifier Sasha Levin
2019-12-27 18:15 ` [PATCH AUTOSEL 4.4 16/25] rfkill: Fix incorrect check to avoid NULL pointer dereference Sasha Levin
2019-12-27 18:15 ` [PATCH AUTOSEL 4.4 17/25] ASoC: wm8962: fix lambda value Sasha Levin
2019-12-27 18:15   ` [alsa-devel] " Sasha Levin
2019-12-27 18:15 ` [PATCH AUTOSEL 4.4 18/25] regulator: rn5t618: fix module aliases Sasha Levin
2019-12-27 18:15 ` [PATCH AUTOSEL 4.4 19/25] kconfig: don't crash on NULL expressions in expr_eq() Sasha Levin
2019-12-27 18:15 ` [PATCH AUTOSEL 4.4 20/25] net: qlogic: Fix error paths in ql_alloc_large_buffers() Sasha Levin
2019-12-27 18:15 ` [PATCH AUTOSEL 4.4 21/25] net: nfc: nci: fix a possible sleep-in-atomic-context bug in nci_uart_tty_receive() Sasha Levin
2019-12-27 18:15 ` [PATCH AUTOSEL 4.4 22/25] parisc: Fix compiler warnings in debug_core.c Sasha Levin
2019-12-27 18:15 ` [PATCH AUTOSEL 4.4 23/25] net: hisilicon: Fix a BUG trigered by wrong bytes_compl Sasha Levin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.