All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dave Chinner <david@fromorbit.com>
To: linux-fsdevel@vger.kernel.org
Cc: linux-block@vger.kernel.org, linux-cachefs@redhat.com,
	dhowells@redhat.com, gfs2@lists.linux.dev,
	dm-devel@lists.linux.dev, linux-security-module@vger.kernel.org,
	selinux@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 10/11] list_bl: don't use bit locks for PREEMPT_RT or lockdep
Date: Wed,  6 Dec 2023 17:05:39 +1100	[thread overview]
Message-ID: <20231206060629.2827226-11-david@fromorbit.com> (raw)
In-Reply-To: <20231206060629.2827226-1-david@fromorbit.com>

From: Dave Chinner <dchinner@redhat.com>

hash-bl nests spinlocks inside the bit locks. This causes problems
for CONFIG_PREEMPT_RT which converts spin locks to sleeping locks,
and we're not allowed to sleep while holding a spinning lock.

Further, lockdep does not support bit locks, so we lose lockdep
coverage of the inode hash table with the hash-bl conversion.

To enable these configs to work, add an external per-chain spinlock
to the hlist_bl_head() and add helpers to use this instead of the
bit spinlock when preempt_rt or lockdep are enabled.

This converts all users of hlist-bl to use the external spinlock in
these situations, so we also gain lockdep coverage of things like
the dentry cache hash table with this change.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 include/linux/list_bl.h    | 126 ++++++++++++++++++++++++++++---------
 include/linux/rculist_bl.h |  13 ++++
 2 files changed, 110 insertions(+), 29 deletions(-)

diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
index 8ee2bf5af131..990ad8e24e0b 100644
--- a/include/linux/list_bl.h
+++ b/include/linux/list_bl.h
@@ -4,14 +4,27 @@
 
 #include <linux/list.h>
 #include <linux/bit_spinlock.h>
+#include <linux/spinlock.h>
 
 /*
  * Special version of lists, where head of the list has a lock in the lowest
  * bit. This is useful for scalable hash tables without increasing memory
  * footprint overhead.
  *
- * For modification operations, the 0 bit of hlist_bl_head->first
- * pointer must be set.
+ * Whilst the general use of bit spin locking is considered safe, PREEMPT_RT
+ * introduces a problem with nesting spin locks inside bit locks: spin locks
+ * become sleeping locks, and we can't sleep inside spinning locks such as bit
+ * locks. However, for RTPREEMPT, performance is less of an issue than
+ * correctness, so we trade off the memory and cache footprint of a spinlock per
+ * list so the list locks are converted to sleeping locks and work correctly
+ * with PREEMPT_RT kernels.
+ *
+ * An added advantage of this is that we can use the same trick when lockdep is
+ * enabled (again, performance doesn't matter) and gain lockdep coverage of all
+ * the hash-bl operations.
+ *
+ * For modification operations when using pure bit locking, the 0 bit of
+ * hlist_bl_head->first pointer must be set.
  *
  * With some small modifications, this can easily be adapted to store several
  * arbitrary bits (not just a single lock bit), if the need arises to store
@@ -30,16 +43,21 @@
 #define LIST_BL_BUG_ON(x)
 #endif
 
+#undef LIST_BL_USE_SPINLOCKS
+#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_LOCKDEP)
+#define LIST_BL_USE_SPINLOCKS	1
+#endif
 
 struct hlist_bl_head {
 	struct hlist_bl_node *first;
+#ifdef LIST_BL_USE_SPINLOCKS
+	spinlock_t lock;
+#endif
 };
 
 struct hlist_bl_node {
 	struct hlist_bl_node *next, **pprev;
 };
-#define INIT_HLIST_BL_HEAD(ptr) \
-	((ptr)->first = NULL)
 
 static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h)
 {
@@ -54,6 +72,69 @@ static inline bool  hlist_bl_unhashed(const struct hlist_bl_node *h)
 	return !h->pprev;
 }
 
+#ifdef LIST_BL_USE_SPINLOCKS
+#define INIT_HLIST_BL_HEAD(ptr) do { \
+	(ptr)->first = NULL; \
+	spin_lock_init(&(ptr)->lock); \
+} while (0)
+
+static inline void hlist_bl_lock(struct hlist_bl_head *b)
+{
+	spin_lock(&b->lock);
+}
+
+static inline void hlist_bl_unlock(struct hlist_bl_head *b)
+{
+	spin_unlock(&b->lock);
+}
+
+static inline bool hlist_bl_is_locked(struct hlist_bl_head *b)
+{
+	return spin_is_locked(&b->lock);
+}
+
+static inline struct hlist_bl_node *hlist_bl_first(struct hlist_bl_head *h)
+{
+	return h->first;
+}
+
+static inline void hlist_bl_set_first(struct hlist_bl_head *h,
+					struct hlist_bl_node *n)
+{
+	h->first = n;
+}
+
+static inline void hlist_bl_set_before(struct hlist_bl_node **pprev,
+					struct hlist_bl_node *n)
+{
+	WRITE_ONCE(*pprev, n);
+}
+
+static inline bool hlist_bl_empty(const struct hlist_bl_head *h)
+{
+	return !READ_ONCE(h->first);
+}
+
+#else /* !LIST_BL_USE_SPINLOCKS */
+
+#define INIT_HLIST_BL_HEAD(ptr) \
+	((ptr)->first = NULL)
+
+static inline void hlist_bl_lock(struct hlist_bl_head *b)
+{
+	bit_spin_lock(0, (unsigned long *)b);
+}
+
+static inline void hlist_bl_unlock(struct hlist_bl_head *b)
+{
+	__bit_spin_unlock(0, (unsigned long *)b);
+}
+
+static inline bool hlist_bl_is_locked(struct hlist_bl_head *b)
+{
+	return bit_spin_is_locked(0, (unsigned long *)b);
+}
+
 static inline struct hlist_bl_node *hlist_bl_first(struct hlist_bl_head *h)
 {
 	return (struct hlist_bl_node *)
@@ -69,11 +150,21 @@ static inline void hlist_bl_set_first(struct hlist_bl_head *h,
 	h->first = (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK);
 }
 
+static inline void hlist_bl_set_before(struct hlist_bl_node **pprev,
+					struct hlist_bl_node *n)
+{
+	WRITE_ONCE(*pprev,
+		   (struct hlist_bl_node *)
+			((uintptr_t)n | ((uintptr_t)*pprev & LIST_BL_LOCKMASK)));
+}
+
 static inline bool hlist_bl_empty(const struct hlist_bl_head *h)
 {
 	return !((unsigned long)READ_ONCE(h->first) & ~LIST_BL_LOCKMASK);
 }
 
+#endif /* LIST_BL_USE_SPINLOCKS */
+
 static inline void hlist_bl_add_head(struct hlist_bl_node *n,
 					struct hlist_bl_head *h)
 {
@@ -94,11 +185,7 @@ static inline void hlist_bl_add_before(struct hlist_bl_node *n,
 	n->pprev = pprev;
 	n->next = next;
 	next->pprev = &n->next;
-
-	/* pprev may be `first`, so be careful not to lose the lock bit */
-	WRITE_ONCE(*pprev,
-		   (struct hlist_bl_node *)
-			((uintptr_t)n | ((uintptr_t)*pprev & LIST_BL_LOCKMASK)));
+	hlist_bl_set_before(pprev, n);
 }
 
 static inline void hlist_bl_add_behind(struct hlist_bl_node *n,
@@ -119,11 +206,7 @@ static inline void __hlist_bl_del(struct hlist_bl_node *n)
 
 	LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
 
-	/* pprev may be `first`, so be careful not to lose the lock bit */
-	WRITE_ONCE(*pprev,
-		   (struct hlist_bl_node *)
-			((unsigned long)next |
-			 ((unsigned long)*pprev & LIST_BL_LOCKMASK)));
+	hlist_bl_set_before(pprev, next);
 	if (next)
 		next->pprev = pprev;
 }
@@ -165,21 +248,6 @@ static inline bool hlist_bl_fake(struct hlist_bl_node *n)
 	return n->pprev == &n->next;
 }
 
-static inline void hlist_bl_lock(struct hlist_bl_head *b)
-{
-	bit_spin_lock(0, (unsigned long *)b);
-}
-
-static inline void hlist_bl_unlock(struct hlist_bl_head *b)
-{
-	__bit_spin_unlock(0, (unsigned long *)b);
-}
-
-static inline bool hlist_bl_is_locked(struct hlist_bl_head *b)
-{
-	return bit_spin_is_locked(0, (unsigned long *)b);
-}
-
 /**
  * hlist_bl_for_each_entry	- iterate over list of given type
  * @tpos:	the type * to use as a loop cursor.
diff --git a/include/linux/rculist_bl.h b/include/linux/rculist_bl.h
index 0b952d06eb0b..2d5eb5153121 100644
--- a/include/linux/rculist_bl.h
+++ b/include/linux/rculist_bl.h
@@ -8,6 +8,18 @@
 #include <linux/list_bl.h>
 #include <linux/rcupdate.h>
 
+#ifdef LIST_BL_USE_SPINLOCKS
+static inline void hlist_bl_set_first_rcu(struct hlist_bl_head *h,
+					struct hlist_bl_node *n)
+{
+	rcu_assign_pointer(h->first, n);
+}
+
+static inline struct hlist_bl_node *hlist_bl_first_rcu(struct hlist_bl_head *h)
+{
+	return rcu_dereference_check(h->first, hlist_bl_is_locked(h));
+}
+#else /* !LIST_BL_USE_SPINLOCKS */
 static inline void hlist_bl_set_first_rcu(struct hlist_bl_head *h,
 					struct hlist_bl_node *n)
 {
@@ -23,6 +35,7 @@ static inline struct hlist_bl_node *hlist_bl_first_rcu(struct hlist_bl_head *h)
 	return (struct hlist_bl_node *)
 		((unsigned long)rcu_dereference_check(h->first, hlist_bl_is_locked(h)) & ~LIST_BL_LOCKMASK);
 }
+#endif /* LIST_BL_USE_SPINLOCKS */
 
 /**
  * hlist_bl_del_rcu - deletes entry from hash list without re-initialization
-- 
2.42.0


  parent reply	other threads:[~2023-12-06  6:06 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-06  6:05 [PATCH 0/11] vfs: inode cache scalability improvements Dave Chinner
2023-12-06  6:05 ` [PATCH 01/11] lib/dlock-list: Distributed and lock-protected lists Dave Chinner
2023-12-07  2:23   ` Al Viro
2023-12-06  6:05 ` [PATCH 02/11] vfs: Remove unnecessary list_for_each_entry_safe() variants Dave Chinner
2023-12-07  2:26   ` Al Viro
2023-12-07  4:18   ` Kent Overstreet
2023-12-06  6:05 ` [PATCH 03/11] vfs: Use dlock list for superblock's inode list Dave Chinner
2023-12-07  2:40   ` Al Viro
2023-12-07  4:59     ` Dave Chinner
2023-12-07  5:03       ` Kent Overstreet
2023-12-06  6:05 ` [PATCH 04/11] lib/dlock-list: Make sibling CPUs share the same linked list Dave Chinner
2023-12-07  4:31   ` Kent Overstreet
2023-12-07  5:42   ` Kent Overstreet
2023-12-07  6:25     ` Dave Chinner
2023-12-07  6:49   ` Al Viro
2023-12-06  6:05 ` [PATCH 05/11] selinux: use dlist for isec inode list Dave Chinner
2023-12-06 21:52   ` Paul Moore
2023-12-06 23:04     ` Dave Chinner
2023-12-07  0:36       ` Paul Moore
2023-12-06  6:05 ` [PATCH 06/11] vfs: factor out inode hash head calculation Dave Chinner
2023-12-07  3:02   ` Al Viro
2023-12-06  6:05 ` [PATCH 07/11] hlist-bl: add hlist_bl_fake() Dave Chinner
2023-12-07  3:05   ` Al Viro
2023-12-06  6:05 ` [PATCH 08/11] vfs: inode cache conversion to hash-bl Dave Chinner
2023-12-07  4:58   ` Kent Overstreet
2023-12-07  6:03     ` Dave Chinner
2023-12-07  6:42   ` Al Viro
2023-12-06  6:05 ` [PATCH 09/11] hash-bl: explicitly initialise hash-bl heads Dave Chinner
2023-12-07  3:15   ` Al Viro
2023-12-06  6:05 ` Dave Chinner [this message]
2023-12-07  4:16   ` [PATCH 10/11] list_bl: don't use bit locks for PREEMPT_RT or lockdep Kent Overstreet
2023-12-07  4:41     ` Dave Chinner
2023-12-06  6:05 ` [PATCH 11/11] hlist-bl: introduced nested locking for dm-snap Dave Chinner
2023-12-07 17:08 ` [PATCH 0/11] vfs: inode cache scalability improvements Kent Overstreet

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231206060629.2827226-11-david@fromorbit.com \
    --to=david@fromorbit.com \
    --cc=dhowells@redhat.com \
    --cc=dm-devel@lists.linux.dev \
    --cc=gfs2@lists.linux.dev \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-cachefs@redhat.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-security-module@vger.kernel.org \
    --cc=selinux@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.