All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dave Chinner <david@fromorbit.com>
To: linux-fsdevel@vger.kernel.org
Cc: linux-block@vger.kernel.org, linux-cachefs@redhat.com,
	dhowells@redhat.com, gfs2@lists.linux.dev,
	dm-devel@lists.linux.dev, linux-security-module@vger.kernel.org,
	selinux@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 11/11] hlist-bl: introduced nested locking for dm-snap
Date: Wed,  6 Dec 2023 17:05:40 +1100	[thread overview]
Message-ID: <20231206060629.2827226-12-david@fromorbit.com> (raw)
In-Reply-To: <20231206060629.2827226-1-david@fromorbit.com>

From: Dave Chinner <dchinner@redhat.com>

Testing with lockdep enabled threw this warning from generic/081 in
fstests:

[ 2369.724151] ============================================
[ 2369.725805] WARNING: possible recursive locking detected
[ 2369.727125] 6.7.0-rc2-dgc+ #1952 Not tainted
[ 2369.728647] --------------------------------------------
[ 2369.730197] systemd-udevd/389493 is trying to acquire lock:
[ 2369.732378] ffff888116a1a320 (&(et->table + i)->lock){+.+.}-{2:2}, at: snapshot_map+0x13e/0x7f0
[ 2369.736197]
               but task is already holding lock:
[ 2369.738657] ffff8881098a4fd0 (&(et->table + i)->lock){+.+.}-{2:2}, at: snapshot_map+0x136/0x7f0
[ 2369.742118]
               other info that might help us debug this:
[ 2369.744403]  Possible unsafe locking scenario:

[ 2369.746814]        CPU0
[ 2369.747675]        ----
[ 2369.748496]   lock(&(et->table + i)->lock);
[ 2369.749877]   lock(&(et->table + i)->lock);
[ 2369.751241]
                *** DEADLOCK ***

[ 2369.753173]  May be due to missing lock nesting notation

[ 2369.754963] 4 locks held by systemd-udevd/389493:
[ 2369.756124]  #0: ffff88811b3a1f48 (mapping.invalidate_lock#2){++++}-{3:3}, at: page_cache_ra_unbounded+0x69/0x190
[ 2369.758516]  #1: ffff888121ceff10 (&md->io_barrier){.+.+}-{0:0}, at: dm_get_live_table+0x52/0xd0
[ 2369.760888]  #2: ffff888110240078 (&s->lock#2){++++}-{3:3}, at: snapshot_map+0x12e/0x7f0
[ 2369.763254]  #3: ffff8881098a4fd0 (&(et->table + i)->lock){+.+.}-{2:2}, at: snapshot_map+0x136/0x7f0
[ 2369.765896]
               stack backtrace:
[ 2369.767429] CPU: 3 PID: 389493 Comm: systemd-udevd Not tainted 6.7.0-rc2-dgc+ #1952
[ 2369.770203] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[ 2369.773771] Call Trace:
[ 2369.774657]  <TASK>
[ 2369.775494]  dump_stack_lvl+0x5c/0xc0
[ 2369.776765]  dump_stack+0x10/0x20
[ 2369.778031]  print_deadlock_bug+0x220/0x2f0
[ 2369.779568]  __lock_acquire+0x1255/0x2180
[ 2369.781013]  lock_acquire+0xb9/0x2c0
[ 2369.782456]  ? snapshot_map+0x13e/0x7f0
[ 2369.783927]  ? snapshot_map+0x136/0x7f0
[ 2369.785240]  _raw_spin_lock+0x34/0x70
[ 2369.786413]  ? snapshot_map+0x13e/0x7f0
[ 2369.787482]  snapshot_map+0x13e/0x7f0
[ 2369.788462]  ? lockdep_init_map_type+0x75/0x250
[ 2369.789650]  __map_bio+0x1d7/0x200
[ 2369.790364]  dm_submit_bio+0x17d/0x570
[ 2369.791387]  __submit_bio+0x4a/0x80
[ 2369.792215]  submit_bio_noacct_nocheck+0x108/0x350
[ 2369.793357]  submit_bio_noacct+0x115/0x450
[ 2369.794334]  submit_bio+0x43/0x60
[ 2369.795112]  mpage_readahead+0xf1/0x130
[ 2369.796037]  ? blkdev_write_begin+0x30/0x30
[ 2369.797007]  blkdev_readahead+0x15/0x20
[ 2369.797893]  read_pages+0x5c/0x230
[ 2369.798703]  page_cache_ra_unbounded+0x143/0x190
[ 2369.799810]  force_page_cache_ra+0x9a/0xc0
[ 2369.800754]  page_cache_sync_ra+0x2e/0x50
[ 2369.801704]  filemap_get_pages+0x112/0x630
[ 2369.802696]  ? __lock_acquire+0x413/0x2180
[ 2369.803663]  filemap_read+0xfc/0x3a0
[ 2369.804527]  ? __might_sleep+0x42/0x70
[ 2369.805443]  blkdev_read_iter+0x6d/0x150
[ 2369.806370]  vfs_read+0x1a6/0x2d0
[ 2369.807148]  ksys_read+0x71/0xf0
[ 2369.807936]  __x64_sys_read+0x19/0x20
[ 2369.808810]  do_syscall_64+0x3c/0xe0
[ 2369.809746]  entry_SYSCALL_64_after_hwframe+0x63/0x6b
[ 2369.810914] RIP: 0033:0x7f9f14dbb03d

Turns out that dm-snap holds two hash-bl locks at the same time,
so we need nesting semantics to ensure lockdep understands what is
going on.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 drivers/md/dm-snap.c    |  2 +-
 include/linux/list_bl.h | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index bf7a574499a3..cd97d5cb295d 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -645,7 +645,7 @@ static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk,
 static void dm_exception_table_lock(struct dm_exception_table_lock *lock)
 {
 	hlist_bl_lock(lock->complete_slot);
-	hlist_bl_lock(lock->pending_slot);
+	hlist_bl_lock_nested(lock->pending_slot, SINGLE_DEPTH_NESTING);
 }
 
 static void dm_exception_table_unlock(struct dm_exception_table_lock *lock)
diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
index 990ad8e24e0b..0e3e60c10563 100644
--- a/include/linux/list_bl.h
+++ b/include/linux/list_bl.h
@@ -83,6 +83,11 @@ static inline void hlist_bl_lock(struct hlist_bl_head *b)
 	spin_lock(&b->lock);
 }
 
+static inline void hlist_bl_lock_nested(struct hlist_bl_head *b, int subclass)
+{
+	spin_lock_nested(&b->lock, subclass);
+}
+
 static inline void hlist_bl_unlock(struct hlist_bl_head *b)
 {
 	spin_unlock(&b->lock);
@@ -125,6 +130,11 @@ static inline void hlist_bl_lock(struct hlist_bl_head *b)
 	bit_spin_lock(0, (unsigned long *)b);
 }
 
+static inline void hlist_bl_lock_nested(struct hlist_bl_head *b, int subclass)
+{
+	hlist_bl_lock(b);
+}
+
 static inline void hlist_bl_unlock(struct hlist_bl_head *b)
 {
 	__bit_spin_unlock(0, (unsigned long *)b);
-- 
2.42.0


  parent reply	other threads:[~2023-12-06  6:06 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-06  6:05 [PATCH 0/11] vfs: inode cache scalability improvements Dave Chinner
2023-12-06  6:05 ` [PATCH 01/11] lib/dlock-list: Distributed and lock-protected lists Dave Chinner
2023-12-07  2:23   ` Al Viro
2023-12-06  6:05 ` [PATCH 02/11] vfs: Remove unnecessary list_for_each_entry_safe() variants Dave Chinner
2023-12-07  2:26   ` Al Viro
2023-12-07  4:18   ` Kent Overstreet
2023-12-06  6:05 ` [PATCH 03/11] vfs: Use dlock list for superblock's inode list Dave Chinner
2023-12-07  2:40   ` Al Viro
2023-12-07  4:59     ` Dave Chinner
2023-12-07  5:03       ` Kent Overstreet
2023-12-06  6:05 ` [PATCH 04/11] lib/dlock-list: Make sibling CPUs share the same linked list Dave Chinner
2023-12-07  4:31   ` Kent Overstreet
2023-12-07  5:42   ` Kent Overstreet
2023-12-07  6:25     ` Dave Chinner
2023-12-07  6:49   ` Al Viro
2023-12-06  6:05 ` [PATCH 05/11] selinux: use dlist for isec inode list Dave Chinner
2023-12-06 21:52   ` Paul Moore
2023-12-06 23:04     ` Dave Chinner
2023-12-07  0:36       ` Paul Moore
2023-12-06  6:05 ` [PATCH 06/11] vfs: factor out inode hash head calculation Dave Chinner
2023-12-07  3:02   ` Al Viro
2023-12-06  6:05 ` [PATCH 07/11] hlist-bl: add hlist_bl_fake() Dave Chinner
2023-12-07  3:05   ` Al Viro
2023-12-06  6:05 ` [PATCH 08/11] vfs: inode cache conversion to hash-bl Dave Chinner
2023-12-07  4:58   ` Kent Overstreet
2023-12-07  6:03     ` Dave Chinner
2023-12-07  6:42   ` Al Viro
2023-12-06  6:05 ` [PATCH 09/11] hash-bl: explicitly initialise hash-bl heads Dave Chinner
2023-12-07  3:15   ` Al Viro
2023-12-06  6:05 ` [PATCH 10/11] list_bl: don't use bit locks for PREEMPT_RT or lockdep Dave Chinner
2023-12-07  4:16   ` Kent Overstreet
2023-12-07  4:41     ` Dave Chinner
2023-12-06  6:05 ` Dave Chinner [this message]
2023-12-07 17:08 ` [PATCH 0/11] vfs: inode cache scalability improvements Kent Overstreet

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231206060629.2827226-12-david@fromorbit.com \
    --to=david@fromorbit.com \
    --cc=dhowells@redhat.com \
    --cc=dm-devel@lists.linux.dev \
    --cc=gfs2@lists.linux.dev \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-cachefs@redhat.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-security-module@vger.kernel.org \
    --cc=selinux@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.