linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/4] lockdep cmp fn conversions
@ 2024-01-27  2:08 Kent Overstreet
  2024-01-27  2:08 ` [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn Kent Overstreet
                   ` (3 more replies)
  0 siblings, 4 replies; 19+ messages in thread
From: Kent Overstreet @ 2024-01-27  2:08 UTC (permalink / raw)
  To: linux-kernel, netdev, linux-fsdevel; +Cc: Kent Overstreet, peterz, boqun.feng

rationale:
*_lock_nested() is fundamentally broken - in order for lockdep to work
we need to be able to check that we're following some defined ordering,
and it's not possible to define a total ordering of an arbitrary number
of objects with only a small fixed size enum.

so it needs to go. awhile back I added the ability to set a comparison
function for a lock class, and this is the start of hopefully a slow
steady trickle of patches as time allows to convert code to use it.

Kent Overstreet (4):
  fs/pipe: Convert to lockdep_cmp_fn
  pktcdvd: kill mutex_lock_nested() usage
  net: Convert sk->sk_peer_lock to lock_set_cmp_fn_ptr_order()
  af_unix: convert to lock_cmp_fn

 drivers/block/pktcdvd.c  |  8 ++---
 fs/pipe.c                | 77 ++++++++++++++++------------------------
 include/linux/lockdep.h  |  3 ++
 include/net/af_unix.h    |  3 --
 kernel/locking/lockdep.c |  6 ++++
 net/core/sock.c          |  1 +
 net/unix/af_unix.c       | 24 ++++++-------
 net/unix/diag.c          |  2 +-
 8 files changed, 55 insertions(+), 69 deletions(-)

-- 
2.43.0


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn
  2024-01-27  2:08 [PATCH 0/4] lockdep cmp fn conversions Kent Overstreet
@ 2024-01-27  2:08 ` Kent Overstreet
  2024-02-02 12:03   ` Jan Kara
  2024-01-27  2:08 ` [PATCH 2/4] pktcdvd: kill mutex_lock_nested() usage Kent Overstreet
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 19+ messages in thread
From: Kent Overstreet @ 2024-01-27  2:08 UTC (permalink / raw)
  To: linux-kernel, netdev, linux-fsdevel
  Cc: Kent Overstreet, peterz, boqun.feng, Alexander Viro,
	Christian Brauner, Jan Kara

*_lock_nested() is fundamentally broken; lockdep needs to check lock
ordering, but we cannot device a total ordering on an unbounded number
of elements with only a few subclasses.

the replacement is to define lock ordering with a proper comparison
function.

fs/pipe.c was already doing everything correctly otherwise, nothing
much changes here.

Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/pipe.c | 81 +++++++++++++++++++++++++------------------------------
 1 file changed, 36 insertions(+), 45 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index f1adbfe743d4..50c8a8596b52 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -76,18 +76,20 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
  * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
  */
 
-static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
+#define cmp_int(l, r)		((l > r) - (l < r))
+
+#ifdef CONFIG_PROVE_LOCKING
+static int pipe_lock_cmp_fn(const struct lockdep_map *a,
+			    const struct lockdep_map *b)
 {
-	if (pipe->files)
-		mutex_lock_nested(&pipe->mutex, subclass);
+	return cmp_int((unsigned long) a, (unsigned long) b);
 }
+#endif
 
 void pipe_lock(struct pipe_inode_info *pipe)
 {
-	/*
-	 * pipe_lock() nests non-pipe inode locks (for writing to a file)
-	 */
-	pipe_lock_nested(pipe, I_MUTEX_PARENT);
+	if (pipe->files)
+		mutex_lock(&pipe->mutex);
 }
 EXPORT_SYMBOL(pipe_lock);
 
@@ -98,28 +100,16 @@ void pipe_unlock(struct pipe_inode_info *pipe)
 }
 EXPORT_SYMBOL(pipe_unlock);
 
-static inline void __pipe_lock(struct pipe_inode_info *pipe)
-{
-	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
-}
-
-static inline void __pipe_unlock(struct pipe_inode_info *pipe)
-{
-	mutex_unlock(&pipe->mutex);
-}
-
 void pipe_double_lock(struct pipe_inode_info *pipe1,
 		      struct pipe_inode_info *pipe2)
 {
 	BUG_ON(pipe1 == pipe2);
 
-	if (pipe1 < pipe2) {
-		pipe_lock_nested(pipe1, I_MUTEX_PARENT);
-		pipe_lock_nested(pipe2, I_MUTEX_CHILD);
-	} else {
-		pipe_lock_nested(pipe2, I_MUTEX_PARENT);
-		pipe_lock_nested(pipe1, I_MUTEX_CHILD);
-	}
+	if (pipe1 > pipe2)
+		swap(pipe1, pipe2);
+
+	pipe_lock(pipe1);
+	pipe_lock(pipe2);
 }
 
 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
@@ -271,7 +261,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 		return 0;
 
 	ret = 0;
-	__pipe_lock(pipe);
+	mutex_lock(&pipe->mutex);
 
 	/*
 	 * We only wake up writers if the pipe was full when we started
@@ -368,7 +358,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 			ret = -EAGAIN;
 			break;
 		}
-		__pipe_unlock(pipe);
+		mutex_unlock(&pipe->mutex);
 
 		/*
 		 * We only get here if we didn't actually read anything.
@@ -400,13 +390,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 		if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
 			return -ERESTARTSYS;
 
-		__pipe_lock(pipe);
+		mutex_lock(&pipe->mutex);
 		was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
 		wake_next_reader = true;
 	}
 	if (pipe_empty(pipe->head, pipe->tail))
 		wake_next_reader = false;
-	__pipe_unlock(pipe);
+	mutex_unlock(&pipe->mutex);
 
 	if (was_full)
 		wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
@@ -462,7 +452,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 	if (unlikely(total_len == 0))
 		return 0;
 
-	__pipe_lock(pipe);
+	mutex_lock(&pipe->mutex);
 
 	if (!pipe->readers) {
 		send_sig(SIGPIPE, current, 0);
@@ -582,19 +572,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 		 * after waiting we need to re-check whether the pipe
 		 * become empty while we dropped the lock.
 		 */
-		__pipe_unlock(pipe);
+		mutex_unlock(&pipe->mutex);
 		if (was_empty)
 			wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 		wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
-		__pipe_lock(pipe);
+		mutex_lock(&pipe->mutex);
 		was_empty = pipe_empty(pipe->head, pipe->tail);
 		wake_next_writer = true;
 	}
 out:
 	if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
 		wake_next_writer = false;
-	__pipe_unlock(pipe);
+	mutex_unlock(&pipe->mutex);
 
 	/*
 	 * If we do do a wakeup event, we do a 'sync' wakeup, because we
@@ -629,7 +619,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
 	switch (cmd) {
 	case FIONREAD:
-		__pipe_lock(pipe);
+		mutex_lock(&pipe->mutex);
 		count = 0;
 		head = pipe->head;
 		tail = pipe->tail;
@@ -639,16 +629,16 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			count += pipe->bufs[tail & mask].len;
 			tail++;
 		}
-		__pipe_unlock(pipe);
+		mutex_unlock(&pipe->mutex);
 
 		return put_user(count, (int __user *)arg);
 
 #ifdef CONFIG_WATCH_QUEUE
 	case IOC_WATCH_QUEUE_SET_SIZE: {
 		int ret;
-		__pipe_lock(pipe);
+		mutex_lock(&pipe->mutex);
 		ret = watch_queue_set_size(pipe, arg);
-		__pipe_unlock(pipe);
+		mutex_unlock(&pipe->mutex);
 		return ret;
 	}
 
@@ -734,7 +724,7 @@ pipe_release(struct inode *inode, struct file *file)
 {
 	struct pipe_inode_info *pipe = file->private_data;
 
-	__pipe_lock(pipe);
+	mutex_lock(&pipe->mutex);
 	if (file->f_mode & FMODE_READ)
 		pipe->readers--;
 	if (file->f_mode & FMODE_WRITE)
@@ -747,7 +737,7 @@ pipe_release(struct inode *inode, struct file *file)
 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 	}
-	__pipe_unlock(pipe);
+	mutex_unlock(&pipe->mutex);
 
 	put_pipe_info(inode, pipe);
 	return 0;
@@ -759,7 +749,7 @@ pipe_fasync(int fd, struct file *filp, int on)
 	struct pipe_inode_info *pipe = filp->private_data;
 	int retval = 0;
 
-	__pipe_lock(pipe);
+	mutex_lock(&pipe->mutex);
 	if (filp->f_mode & FMODE_READ)
 		retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
 	if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
@@ -768,7 +758,7 @@ pipe_fasync(int fd, struct file *filp, int on)
 			/* this can happen only if on == T */
 			fasync_helper(-1, filp, 0, &pipe->fasync_readers);
 	}
-	__pipe_unlock(pipe);
+	mutex_unlock(&pipe->mutex);
 	return retval;
 }
 
@@ -834,6 +824,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
 		pipe->nr_accounted = pipe_bufs;
 		pipe->user = user;
 		mutex_init(&pipe->mutex);
+		lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL);
 		return pipe;
 	}
 
@@ -1144,7 +1135,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 	filp->private_data = pipe;
 	/* OK, we have a pipe and it's pinned down */
 
-	__pipe_lock(pipe);
+	mutex_lock(&pipe->mutex);
 
 	/* We can only do regular read/write on fifos */
 	stream_open(inode, filp);
@@ -1214,7 +1205,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 	}
 
 	/* Ok! */
-	__pipe_unlock(pipe);
+	mutex_unlock(&pipe->mutex);
 	return 0;
 
 err_rd:
@@ -1230,7 +1221,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 	goto err;
 
 err:
-	__pipe_unlock(pipe);
+	mutex_unlock(&pipe->mutex);
 
 	put_pipe_info(inode, pipe);
 	return ret;
@@ -1411,7 +1402,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
 	if (!pipe)
 		return -EBADF;
 
-	__pipe_lock(pipe);
+	mutex_lock(&pipe->mutex);
 
 	switch (cmd) {
 	case F_SETPIPE_SZ:
@@ -1425,7 +1416,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
 		break;
 	}
 
-	__pipe_unlock(pipe);
+	mutex_unlock(&pipe->mutex);
 	return ret;
 }
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 2/4] pktcdvd: kill mutex_lock_nested() usage
  2024-01-27  2:08 [PATCH 0/4] lockdep cmp fn conversions Kent Overstreet
  2024-01-27  2:08 ` [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn Kent Overstreet
@ 2024-01-27  2:08 ` Kent Overstreet
  2024-01-28  4:29   ` kernel test robot
  2024-01-28  6:48   ` kernel test robot
  2024-01-27  2:08 ` [PATCH 3/4] net: Convert sk->sk_peer_lock to lock_set_cmp_fn_ptr_order() Kent Overstreet
  2024-01-27  2:08 ` [PATCH 4/4] af_unix: convert to lock_cmp_fn Kent Overstreet
  3 siblings, 2 replies; 19+ messages in thread
From: Kent Overstreet @ 2024-01-27  2:08 UTC (permalink / raw)
  To: linux-kernel, netdev, linux-fsdevel
  Cc: Kent Overstreet, peterz, boqun.feng, linux-block, Jens Axboe

Unecessary, we're not actually taking nested locks of the same type.

Cc: linux-block@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 drivers/block/pktcdvd.c  |  8 ++++----
 fs/pipe.c                | 10 +---------
 include/linux/lockdep.h  |  3 +++
 kernel/locking/lockdep.c |  6 ++++++
 4 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index d56d972aadb3..2eb68a624fda 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -332,7 +332,7 @@ static ssize_t device_map_show(const struct class *c, const struct class_attribu
 {
 	int n = 0;
 	int idx;
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+	mutex_lock(&ctl_mutex);
 	for (idx = 0; idx < MAX_WRITERS; idx++) {
 		struct pktcdvd_device *pd = pkt_devs[idx];
 		if (!pd)
@@ -2639,7 +2639,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
 	struct pktcdvd_device *pd;
 	struct gendisk *disk;
 
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+	mutex_lock(&ctl_mutex);
 
 	for (idx = 0; idx < MAX_WRITERS; idx++)
 		if (!pkt_devs[idx])
@@ -2729,7 +2729,7 @@ static int pkt_remove_dev(dev_t pkt_dev)
 	int idx;
 	int ret = 0;
 
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+	mutex_lock(&ctl_mutex);
 
 	for (idx = 0; idx < MAX_WRITERS; idx++) {
 		pd = pkt_devs[idx];
@@ -2780,7 +2780,7 @@ static void pkt_get_status(struct pkt_ctrl_command *ctrl_cmd)
 {
 	struct pktcdvd_device *pd;
 
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+	mutex_lock(&ctl_mutex);
 
 	pd = pkt_find_dev_from_minor(ctrl_cmd->dev_index);
 	if (pd) {
diff --git a/fs/pipe.c b/fs/pipe.c
index 50c8a8596b52..abe171566015 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -78,14 +78,6 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
 
 #define cmp_int(l, r)		((l > r) - (l < r))
 
-#ifdef CONFIG_PROVE_LOCKING
-static int pipe_lock_cmp_fn(const struct lockdep_map *a,
-			    const struct lockdep_map *b)
-{
-	return cmp_int((unsigned long) a, (unsigned long) b);
-}
-#endif
-
 void pipe_lock(struct pipe_inode_info *pipe)
 {
 	if (pipe->files)
@@ -824,7 +816,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
 		pipe->nr_accounted = pipe_bufs;
 		pipe->user = user;
 		mutex_init(&pipe->mutex);
-		lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL);
+		lock_set_cmp_fn_ptr_order(&pipe->mutex);
 		return pipe;
 	}
 
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 08b0d1d9d78b..e0b121f96c80 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -391,6 +391,7 @@ extern int lockdep_is_held(const void *);
 #endif /* !LOCKDEP */
 
 #ifdef CONFIG_PROVE_LOCKING
+int lockdep_ptr_order_cmp_fn(const struct lockdep_map *, const struct lockdep_map *);
 void lockdep_set_lock_cmp_fn(struct lockdep_map *, lock_cmp_fn, lock_print_fn);
 
 #define lock_set_cmp_fn(lock, ...)	lockdep_set_lock_cmp_fn(&(lock)->dep_map, __VA_ARGS__)
@@ -398,6 +399,8 @@ void lockdep_set_lock_cmp_fn(struct lockdep_map *, lock_cmp_fn, lock_print_fn);
 #define lock_set_cmp_fn(lock, ...)	do { } while (0)
 #endif
 
+#define lock_set_cmp_fn_ptr_order(lock)	lock_set_cmp_fn(lock, lockdep_ptr_order_cmp_fn);
+
 enum xhlock_context_t {
 	XHLOCK_HARD,
 	XHLOCK_SOFT,
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 151bd3de5936..5630be7f5cb2 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4919,6 +4919,12 @@ struct lock_class_key __lockdep_no_validate__;
 EXPORT_SYMBOL_GPL(__lockdep_no_validate__);
 
 #ifdef CONFIG_PROVE_LOCKING
+int lockdep_ptr_order_cmp_fn(const struct lockdep_map *a,
+			     const struct lockdep_map *b)
+{
+	return cmp_int((unsigned long) a, (unsigned long) b);
+}
+
 void lockdep_set_lock_cmp_fn(struct lockdep_map *lock, lock_cmp_fn cmp_fn,
 			     lock_print_fn print_fn)
 {
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 3/4] net: Convert sk->sk_peer_lock to lock_set_cmp_fn_ptr_order()
  2024-01-27  2:08 [PATCH 0/4] lockdep cmp fn conversions Kent Overstreet
  2024-01-27  2:08 ` [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn Kent Overstreet
  2024-01-27  2:08 ` [PATCH 2/4] pktcdvd: kill mutex_lock_nested() usage Kent Overstreet
@ 2024-01-27  2:08 ` Kent Overstreet
  2024-01-28  9:17   ` Kuniyuki Iwashima
  2024-01-27  2:08 ` [PATCH 4/4] af_unix: convert to lock_cmp_fn Kent Overstreet
  3 siblings, 1 reply; 19+ messages in thread
From: Kent Overstreet @ 2024-01-27  2:08 UTC (permalink / raw)
  To: linux-kernel, netdev, linux-fsdevel; +Cc: Kent Overstreet, peterz, boqun.feng

Cc: netdev@vger.kernel.org
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 net/core/sock.c    | 1 +
 net/unix/af_unix.c | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index 158dbdebce6a..da7360c0f454 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3474,6 +3474,7 @@ void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid)
 	sk->sk_peer_pid 	=	NULL;
 	sk->sk_peer_cred	=	NULL;
 	spin_lock_init(&sk->sk_peer_lock);
+	lock_set_cmp_fn_ptr_order(&sk->sk_peer_lock);
 
 	sk->sk_write_pending	=	0;
 	sk->sk_rcvlowat		=	1;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index ac1f2bc18fc9..d013de3c5490 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -706,10 +706,10 @@ static void copy_peercred(struct sock *sk, struct sock *peersk)
 
 	if (sk < peersk) {
 		spin_lock(&sk->sk_peer_lock);
-		spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+		spin_lock(&peersk->sk_peer_lock);
 	} else {
 		spin_lock(&peersk->sk_peer_lock);
-		spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+		spin_lock(&sk->sk_peer_lock);
 	}
 	old_pid = sk->sk_peer_pid;
 	old_cred = sk->sk_peer_cred;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 4/4] af_unix: convert to lock_cmp_fn
  2024-01-27  2:08 [PATCH 0/4] lockdep cmp fn conversions Kent Overstreet
                   ` (2 preceding siblings ...)
  2024-01-27  2:08 ` [PATCH 3/4] net: Convert sk->sk_peer_lock to lock_set_cmp_fn_ptr_order() Kent Overstreet
@ 2024-01-27  2:08 ` Kent Overstreet
  2024-01-28  8:28   ` Kuniyuki Iwashima
  3 siblings, 1 reply; 19+ messages in thread
From: Kent Overstreet @ 2024-01-27  2:08 UTC (permalink / raw)
  To: linux-kernel, netdev, linux-fsdevel; +Cc: Kent Overstreet, peterz, boqun.feng

Kill
 - unix_state_lock_nested
 - _nested usage for net->unx.table.locks[].

replace both with lock_set_cmp_fn_ptr_order(&u->lock).

The lock ordering in sk_diag_dump_icons() looks suspicious; this may
turn up a real issue.

Cc: netdev@vger.kernel.org
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 include/net/af_unix.h |  3 ---
 net/unix/af_unix.c    | 20 ++++++++------------
 net/unix/diag.c       |  2 +-
 3 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 49c4640027d8..4eff0a089640 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -48,9 +48,6 @@ struct scm_stat {
 
 #define unix_state_lock(s)	spin_lock(&unix_sk(s)->lock)
 #define unix_state_unlock(s)	spin_unlock(&unix_sk(s)->lock)
-#define unix_state_lock_nested(s) \
-				spin_lock_nested(&unix_sk(s)->lock, \
-				SINGLE_DEPTH_NESTING)
 
 /* The AF_UNIX socket */
 struct unix_sock {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index d013de3c5490..1a0d273799c1 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -170,7 +170,7 @@ static void unix_table_double_lock(struct net *net,
 		swap(hash1, hash2);
 
 	spin_lock(&net->unx.table.locks[hash1]);
-	spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
+	spin_lock(&net->unx.table.locks[hash2]);
 }
 
 static void unix_table_double_unlock(struct net *net,
@@ -997,6 +997,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
 	u->path.dentry = NULL;
 	u->path.mnt = NULL;
 	spin_lock_init(&u->lock);
+	lock_set_cmp_fn_ptr_order(&u->lock);
 	atomic_long_set(&u->inflight, 0);
 	INIT_LIST_HEAD(&u->link);
 	mutex_init(&u->iolock); /* single task reading lock */
@@ -1340,17 +1341,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 
 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
 {
-	if (unlikely(sk1 == sk2) || !sk2) {
-		unix_state_lock(sk1);
-		return;
-	}
-	if (sk1 < sk2) {
+	if (sk1 > sk2)
+		swap(sk1, sk2);
+	if (sk1 && sk1 != sk2)
 		unix_state_lock(sk1);
-		unix_state_lock_nested(sk2);
-	} else {
-		unix_state_lock(sk2);
-		unix_state_lock_nested(sk1);
-	}
+	unix_state_lock(sk2);
 }
 
 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
@@ -1591,7 +1586,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 		goto out_unlock;
 	}
 
-	unix_state_lock_nested(sk);
+	unix_state_lock(sk);
 
 	if (sk->sk_state != st) {
 		unix_state_unlock(sk);
@@ -3575,6 +3570,7 @@ static int __net_init unix_net_init(struct net *net)
 
 	for (i = 0; i < UNIX_HASH_SIZE; i++) {
 		spin_lock_init(&net->unx.table.locks[i]);
+		lock_set_cmp_fn_ptr_order(&net->unx.table.locks[i]);
 		INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
 	}
 
diff --git a/net/unix/diag.c b/net/unix/diag.c
index bec09a3a1d44..8ab5e2217e4c 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -84,7 +84,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb)
 			 * queue lock. With the other's queue locked it's
 			 * OK to lock the state.
 			 */
-			unix_state_lock_nested(req);
+			unix_state_lock(req);
 			peer = unix_sk(req)->peer;
 			buf[i++] = (peer ? sock_i_ino(peer) : 0);
 			unix_state_unlock(req);
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/4] pktcdvd: kill mutex_lock_nested() usage
  2024-01-27  2:08 ` [PATCH 2/4] pktcdvd: kill mutex_lock_nested() usage Kent Overstreet
@ 2024-01-28  4:29   ` kernel test robot
  2024-01-28  6:48   ` kernel test robot
  1 sibling, 0 replies; 19+ messages in thread
From: kernel test robot @ 2024-01-28  4:29 UTC (permalink / raw)
  To: Kent Overstreet, linux-kernel, netdev, linux-fsdevel
  Cc: oe-kbuild-all, Kent Overstreet, peterz, boqun.feng, linux-block,
	Jens Axboe

Hi Kent,

kernel test robot noticed the following build errors:

[auto build test ERROR on tip/locking/core]
[also build test ERROR on net/main net-next/main linus/master v6.8-rc1 next-20240125]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Kent-Overstreet/fs-pipe-Convert-to-lockdep_cmp_fn/20240127-101832
base:   tip/locking/core
patch link:    https://lore.kernel.org/r/20240127020833.487907-3-kent.overstreet%40linux.dev
patch subject: [PATCH 2/4] pktcdvd: kill mutex_lock_nested() usage
config: mips-randconfig-r063-20240127 (https://download.01.org/0day-ci/archive/20240128/202401281210.ZqZ0bZlb-lkp@intel.com/config)
compiler: clang version 15.0.7 (https://github.com/llvm/llvm-project.git 8dfdcc7b7bf66834a761bd8de445840ef68e4d1a)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240128/202401281210.ZqZ0bZlb-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202401281210.ZqZ0bZlb-lkp@intel.com/

All errors (new ones prefixed by >>):

>> kernel/locking/lockdep.c:4925:9: error: call to undeclared function 'cmp_int'; ISO C99 and later do not support implicit function declarations [-Werror,-Wimplicit-function-declaration]
           return cmp_int((unsigned long) a, (unsigned long) b);
                  ^
   kernel/locking/lockdep.c:4925:9: note: did you mean 'smp_init'?
   include/linux/smp.h:225:20: note: 'smp_init' declared here
   static inline void smp_init(void) { }
                      ^
   1 error generated.


vim +/cmp_int +4925 kernel/locking/lockdep.c

  4920	
  4921	#ifdef CONFIG_PROVE_LOCKING
  4922	int lockdep_ptr_order_cmp_fn(const struct lockdep_map *a,
  4923				     const struct lockdep_map *b)
  4924	{
> 4925		return cmp_int((unsigned long) a, (unsigned long) b);
  4926	}
  4927	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/4] pktcdvd: kill mutex_lock_nested() usage
  2024-01-27  2:08 ` [PATCH 2/4] pktcdvd: kill mutex_lock_nested() usage Kent Overstreet
  2024-01-28  4:29   ` kernel test robot
@ 2024-01-28  6:48   ` kernel test robot
  1 sibling, 0 replies; 19+ messages in thread
From: kernel test robot @ 2024-01-28  6:48 UTC (permalink / raw)
  To: Kent Overstreet, linux-kernel, netdev, linux-fsdevel
  Cc: oe-kbuild-all, Kent Overstreet, peterz, boqun.feng, linux-block,
	Jens Axboe

Hi Kent,

kernel test robot noticed the following build errors:

[auto build test ERROR on tip/locking/core]
[also build test ERROR on net/main net-next/main linus/master v6.8-rc1 next-20240125]
[cannot apply to horms-ipvs/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Kent-Overstreet/fs-pipe-Convert-to-lockdep_cmp_fn/20240127-101832
base:   tip/locking/core
patch link:    https://lore.kernel.org/r/20240127020833.487907-3-kent.overstreet%40linux.dev
patch subject: [PATCH 2/4] pktcdvd: kill mutex_lock_nested() usage
config: x86_64-randconfig-161-20240127 (https://download.01.org/0day-ci/archive/20240128/202401281405.72SjSYnP-lkp@intel.com/config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240128/202401281405.72SjSYnP-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202401281405.72SjSYnP-lkp@intel.com/

All errors (new ones prefixed by >>):

   kernel/locking/lockdep.c: In function 'lockdep_ptr_order_cmp_fn':
>> kernel/locking/lockdep.c:4925:9: error: implicit declaration of function 'cmp_int'; did you mean 'smp_init'? [-Werror=implicit-function-declaration]
    4925 |  return cmp_int((unsigned long) a, (unsigned long) b);
         |         ^~~~~~~
         |         smp_init
   cc1: some warnings being treated as errors


vim +4925 kernel/locking/lockdep.c

  4920	
  4921	#ifdef CONFIG_PROVE_LOCKING
  4922	int lockdep_ptr_order_cmp_fn(const struct lockdep_map *a,
  4923				     const struct lockdep_map *b)
  4924	{
> 4925		return cmp_int((unsigned long) a, (unsigned long) b);
  4926	}
  4927	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 4/4] af_unix: convert to lock_cmp_fn
  2024-01-27  2:08 ` [PATCH 4/4] af_unix: convert to lock_cmp_fn Kent Overstreet
@ 2024-01-28  8:28   ` Kuniyuki Iwashima
  2024-01-28 19:38     ` Kent Overstreet
  0 siblings, 1 reply; 19+ messages in thread
From: Kuniyuki Iwashima @ 2024-01-28  8:28 UTC (permalink / raw)
  To: kent.overstreet
  Cc: boqun.feng, linux-fsdevel, linux-kernel, netdev, peterz, kuniyu

From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Fri, 26 Jan 2024 21:08:31 -0500
> Kill
>  - unix_state_lock_nested
>  - _nested usage for net->unx.table.locks[].
> 
> replace both with lock_set_cmp_fn_ptr_order(&u->lock).
> 
> The lock ordering in sk_diag_dump_icons() looks suspicious; this may
> turn up a real issue.

Yes, you cannot use lock_cmp_fn() for unix_state_lock_nested().

The lock order in sk_diag_dump_icons() is

  listening socket -> child socket in the listener's queue

, and the inverse order never happens.  ptr comparison does not make
sense in this case, and lockdep will complain about false positive.


> 
> Cc: netdev@vger.kernel.org
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> ---
>  include/net/af_unix.h |  3 ---
>  net/unix/af_unix.c    | 20 ++++++++------------
>  net/unix/diag.c       |  2 +-
>  3 files changed, 9 insertions(+), 16 deletions(-)
> 
> diff --git a/include/net/af_unix.h b/include/net/af_unix.h
> index 49c4640027d8..4eff0a089640 100644
> --- a/include/net/af_unix.h
> +++ b/include/net/af_unix.h
> @@ -48,9 +48,6 @@ struct scm_stat {
>  
>  #define unix_state_lock(s)	spin_lock(&unix_sk(s)->lock)
>  #define unix_state_unlock(s)	spin_unlock(&unix_sk(s)->lock)
> -#define unix_state_lock_nested(s) \
> -				spin_lock_nested(&unix_sk(s)->lock, \
> -				SINGLE_DEPTH_NESTING)
>  
>  /* The AF_UNIX socket */
>  struct unix_sock {
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index d013de3c5490..1a0d273799c1 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -170,7 +170,7 @@ static void unix_table_double_lock(struct net *net,
>  		swap(hash1, hash2);
>  
>  	spin_lock(&net->unx.table.locks[hash1]);
> -	spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
> +	spin_lock(&net->unx.table.locks[hash2]);
>  }
>  
>  static void unix_table_double_unlock(struct net *net,
> @@ -997,6 +997,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
>  	u->path.dentry = NULL;
>  	u->path.mnt = NULL;
>  	spin_lock_init(&u->lock);
> +	lock_set_cmp_fn_ptr_order(&u->lock);
>  	atomic_long_set(&u->inflight, 0);
>  	INIT_LIST_HEAD(&u->link);
>  	mutex_init(&u->iolock); /* single task reading lock */
> @@ -1340,17 +1341,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
>  
>  static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
>  {
> -	if (unlikely(sk1 == sk2) || !sk2) {
> -		unix_state_lock(sk1);
> -		return;
> -	}
> -	if (sk1 < sk2) {
> +	if (sk1 > sk2)
> +		swap(sk1, sk2);
> +	if (sk1 && sk1 != sk2)
>  		unix_state_lock(sk1);
> -		unix_state_lock_nested(sk2);
> -	} else {
> -		unix_state_lock(sk2);
> -		unix_state_lock_nested(sk1);
> -	}
> +	unix_state_lock(sk2);
>  }
>  
>  static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
> @@ -1591,7 +1586,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
>  		goto out_unlock;
>  	}
>  
> -	unix_state_lock_nested(sk);
> +	unix_state_lock(sk);
>  
>  	if (sk->sk_state != st) {
>  		unix_state_unlock(sk);
> @@ -3575,6 +3570,7 @@ static int __net_init unix_net_init(struct net *net)
>  
>  	for (i = 0; i < UNIX_HASH_SIZE; i++) {
>  		spin_lock_init(&net->unx.table.locks[i]);
> +		lock_set_cmp_fn_ptr_order(&net->unx.table.locks[i]);
>  		INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
>  	}
>  
> diff --git a/net/unix/diag.c b/net/unix/diag.c
> index bec09a3a1d44..8ab5e2217e4c 100644
> --- a/net/unix/diag.c
> +++ b/net/unix/diag.c
> @@ -84,7 +84,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb)
>  			 * queue lock. With the other's queue locked it's
>  			 * OK to lock the state.
>  			 */
> -			unix_state_lock_nested(req);
> +			unix_state_lock(req);
>  			peer = unix_sk(req)->peer;
>  			buf[i++] = (peer ? sock_i_ino(peer) : 0);
>  			unix_state_unlock(req);
> -- 
> 2.43.0

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/4] net: Convert sk->sk_peer_lock to lock_set_cmp_fn_ptr_order()
  2024-01-27  2:08 ` [PATCH 3/4] net: Convert sk->sk_peer_lock to lock_set_cmp_fn_ptr_order() Kent Overstreet
@ 2024-01-28  9:17   ` Kuniyuki Iwashima
  0 siblings, 0 replies; 19+ messages in thread
From: Kuniyuki Iwashima @ 2024-01-28  9:17 UTC (permalink / raw)
  To: kent.overstreet
  Cc: boqun.feng, linux-fsdevel, linux-kernel, netdev, peterz, kuniyu

From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Fri, 26 Jan 2024 21:08:30 -0500
> Cc: netdev@vger.kernel.org
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> ---
>  net/core/sock.c    | 1 +
>  net/unix/af_unix.c | 4 ++--
>  2 files changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 158dbdebce6a..da7360c0f454 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -3474,6 +3474,7 @@ void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid)
>  	sk->sk_peer_pid 	=	NULL;
>  	sk->sk_peer_cred	=	NULL;
>  	spin_lock_init(&sk->sk_peer_lock);
> +	lock_set_cmp_fn_ptr_order(&sk->sk_peer_lock);
>  
>  	sk->sk_write_pending	=	0;
>  	sk->sk_rcvlowat		=	1;
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index ac1f2bc18fc9..d013de3c5490 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -706,10 +706,10 @@ static void copy_peercred(struct sock *sk, struct sock *peersk)
>  
>  	if (sk < peersk) {
>  		spin_lock(&sk->sk_peer_lock);
> -		spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
> +		spin_lock(&peersk->sk_peer_lock);
>  	} else {
>  		spin_lock(&peersk->sk_peer_lock);
> -		spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
> +		spin_lock(&sk->sk_peer_lock);
>  	}

hmm.. I think we need not hold two locks here in the first place.
Let me post patches.

Thanks!


>  	old_pid = sk->sk_peer_pid;
>  	old_cred = sk->sk_peer_cred;
> -- 
> 2.43.0

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 4/4] af_unix: convert to lock_cmp_fn
  2024-01-28  8:28   ` Kuniyuki Iwashima
@ 2024-01-28 19:38     ` Kent Overstreet
  2024-01-28 20:56       ` Kuniyuki Iwashima
  0 siblings, 1 reply; 19+ messages in thread
From: Kent Overstreet @ 2024-01-28 19:38 UTC (permalink / raw)
  To: Kuniyuki Iwashima; +Cc: boqun.feng, linux-fsdevel, linux-kernel, netdev, peterz

On Sun, Jan 28, 2024 at 12:28:38AM -0800, Kuniyuki Iwashima wrote:
> From: Kent Overstreet <kent.overstreet@linux.dev>
> Date: Fri, 26 Jan 2024 21:08:31 -0500
> > Kill
> >  - unix_state_lock_nested
> >  - _nested usage for net->unx.table.locks[].
> > 
> > replace both with lock_set_cmp_fn_ptr_order(&u->lock).
> > 
> > The lock ordering in sk_diag_dump_icons() looks suspicious; this may
> > turn up a real issue.
> 
> Yes, you cannot use lock_cmp_fn() for unix_state_lock_nested().
> 
> The lock order in sk_diag_dump_icons() is
> 
>   listening socket -> child socket in the listener's queue
> 
> , and the inverse order never happens.  ptr comparison does not make
> sense in this case, and lockdep will complain about false positive.

Is that a real lock ordering? Is this parent -> child relationship well
defined?

If it is, we should be able to write a lock_cmp_fn for it, as long as
it's not some handwavy "this will never happen but _nested won't check
for it" like I saw elsewhere in the net code... :)

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 4/4] af_unix: convert to lock_cmp_fn
  2024-01-28 19:38     ` Kent Overstreet
@ 2024-01-28 20:56       ` Kuniyuki Iwashima
  2024-01-29  1:34         ` Kent Overstreet
  0 siblings, 1 reply; 19+ messages in thread
From: Kuniyuki Iwashima @ 2024-01-28 20:56 UTC (permalink / raw)
  To: kent.overstreet
  Cc: boqun.feng, kuniyu, linux-fsdevel, linux-kernel, netdev, peterz

From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sun, 28 Jan 2024 14:38:02 -0500
> On Sun, Jan 28, 2024 at 12:28:38AM -0800, Kuniyuki Iwashima wrote:
> > From: Kent Overstreet <kent.overstreet@linux.dev>
> > Date: Fri, 26 Jan 2024 21:08:31 -0500
> > > Kill
> > >  - unix_state_lock_nested
> > >  - _nested usage for net->unx.table.locks[].
> > > 
> > > replace both with lock_set_cmp_fn_ptr_order(&u->lock).
> > > 
> > > The lock ordering in sk_diag_dump_icons() looks suspicious; this may
> > > turn up a real issue.
> > 
> > Yes, you cannot use lock_cmp_fn() for unix_state_lock_nested().
> > 
> > The lock order in sk_diag_dump_icons() is
> > 
> >   listening socket -> child socket in the listener's queue
> > 
> > , and the inverse order never happens.  ptr comparison does not make
> > sense in this case, and lockdep will complain about false positive.
> 
> Is that a real lock ordering? Is this parent -> child relationship well
> defined?
> 
> If it is, we should be able to write a lock_cmp_fn for it, as long as
> it's not some handwavy "this will never happen but _nested won't check
> for it" like I saw elsewhere in the net code... :)

The problem would be there's no handy way to detect the relationship
except for iterating the queue again.

---8<---
static int unix_state_lock_cmp_fn(const struct lockdep_map *_a,
				  const struct lockdep_map *_b)
{
	const struct unix_sock *a = container_of(_a, struct unix_sock, lock.dep_map);
	const struct unix_sock *b = container_of(_b, struct unix_sock, lock.dep_map);

	if (a->sk.sk_state == TCP_LISTEN && b->sk.sk_state == TCP_ESTABLISHED) {
		/* check if b is a's cihld */
	}

	/* otherwise, ptr comparison here. */
}
---8<---


This can be resolved by a patch like this, which is in my local tree
for another series.

So, after posting the series, I can revisit this and write lock_cmp_fn
for u->lock.

---8<---
commit 12d39766b06068fda5987f4e7b4827e8c3273137
Author: Kuniyuki Iwashima <kuniyu@amazon.com>
Date:   Thu Jan 11 22:36:58 2024 +0000

    af_unix: Save listener for embryo socket.
    
    Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 9ea04653c7c9..d0c0d81bcb1d 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -82,6 +82,7 @@ struct scm_stat {
 struct unix_sock {
 	/* WARNING: sk has to be the first member */
 	struct sock		sk;
+#define usk_listener		sk.__sk_common.skc_unix_listener
 	struct unix_address	*addr;
 	struct path		path;
 	struct mutex		iolock, bindlock;
diff --git a/include/net/sock.h b/include/net/sock.h
index a9d99a9c583f..3df3d068345a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -142,6 +142,8 @@ typedef __u64 __bitwise __addrpair;
  *		[union with @skc_incoming_cpu]
  *	@skc_tw_rcv_nxt: (aka tw_rcv_nxt) TCP window next expected seq number
  *		[union with @skc_incoming_cpu]
+ *	@skc_unix_listener: connection request listener socket for AF_UNIX
+ *		[union with @skc_rxhash]
  *	@skc_refcnt: reference count
  *
  *	This is the minimal network layer representation of sockets, the header
@@ -227,6 +229,7 @@ struct sock_common {
 		u32		skc_rxhash;
 		u32		skc_window_clamp;
 		u32		skc_tw_snd_nxt; /* struct tcp_timewait_sock */
+		struct sock	*skc_unix_listener; /* struct unix_sock */
 	};
 	/* public: */
 };
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5f9871555ec6..4a41bb727c32 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -991,6 +991,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
 	sk->sk_destruct		= unix_sock_destructor;
 	u = unix_sk(sk);
+	u->usk_listener = NULL;
 	u->inflight = 0;
 	u->path.dentry = NULL;
 	u->path.mnt = NULL;
@@ -1612,6 +1613,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 	newsk->sk_type		= sk->sk_type;
 	init_peercred(newsk);
 	newu = unix_sk(newsk);
+	newu->usk_listener = other;
 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
 	otheru = unix_sk(other);
 
@@ -1707,8 +1709,8 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
 		       bool kern)
 {
 	struct sock *sk = sock->sk;
-	struct sock *tsk;
 	struct sk_buff *skb;
+	struct sock *tsk;
 	int err;
 
 	err = -EOPNOTSUPP;
@@ -1733,6 +1735,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
 	}
 
 	tsk = skb->sk;
+	unix_sk(tsk)->usk_listener = NULL;
 	skb_free_datagram(sk, skb);
 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
 
---8<---


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH 4/4] af_unix: convert to lock_cmp_fn
  2024-01-28 20:56       ` Kuniyuki Iwashima
@ 2024-01-29  1:34         ` Kent Overstreet
  0 siblings, 0 replies; 19+ messages in thread
From: Kent Overstreet @ 2024-01-29  1:34 UTC (permalink / raw)
  To: Kuniyuki Iwashima; +Cc: boqun.feng, linux-fsdevel, linux-kernel, netdev, peterz

On Sun, Jan 28, 2024 at 12:56:32PM -0800, Kuniyuki Iwashima wrote:
> From: Kent Overstreet <kent.overstreet@linux.dev>
> Date: Sun, 28 Jan 2024 14:38:02 -0500
> > On Sun, Jan 28, 2024 at 12:28:38AM -0800, Kuniyuki Iwashima wrote:
> > > From: Kent Overstreet <kent.overstreet@linux.dev>
> > > Date: Fri, 26 Jan 2024 21:08:31 -0500
> > > > Kill
> > > >  - unix_state_lock_nested
> > > >  - _nested usage for net->unx.table.locks[].
> > > > 
> > > > replace both with lock_set_cmp_fn_ptr_order(&u->lock).
> > > > 
> > > > The lock ordering in sk_diag_dump_icons() looks suspicious; this may
> > > > turn up a real issue.
> > > 
> > > Yes, you cannot use lock_cmp_fn() for unix_state_lock_nested().
> > > 
> > > The lock order in sk_diag_dump_icons() is
> > > 
> > >   listening socket -> child socket in the listener's queue
> > > 
> > > , and the inverse order never happens.  ptr comparison does not make
> > > sense in this case, and lockdep will complain about false positive.
> > 
> > Is that a real lock ordering? Is this parent -> child relationship well
> > defined?
> > 
> > If it is, we should be able to write a lock_cmp_fn for it, as long as
> > it's not some handwavy "this will never happen but _nested won't check
> > for it" like I saw elsewhere in the net code... :)
> 
> The problem would be there's no handy way to detect the relationship
> except for iterating the queue again.
> 
> ---8<---
> static int unix_state_lock_cmp_fn(const struct lockdep_map *_a,
> 				  const struct lockdep_map *_b)
> {
> 	const struct unix_sock *a = container_of(_a, struct unix_sock, lock.dep_map);
> 	const struct unix_sock *b = container_of(_b, struct unix_sock, lock.dep_map);
> 
> 	if (a->sk.sk_state == TCP_LISTEN && b->sk.sk_state == TCP_ESTABLISHED) {
> 		/* check if b is a's cihld */
> 	}
> 
> 	/* otherwise, ptr comparison here. */
> }
> ---8<---
> 
> 
> This can be resolved by a patch like this, which is in my local tree
> for another series.
> 
> So, after posting the series, I can revisit this and write lock_cmp_fn
> for u->lock.

Sounds good! Please CC me when you do.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn
  2024-01-27  2:08 ` [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn Kent Overstreet
@ 2024-02-02 12:03   ` Jan Kara
  2024-02-02 12:25     ` Sedat Dilek
  2024-02-02 12:47     ` Kent Overstreet
  0 siblings, 2 replies; 19+ messages in thread
From: Jan Kara @ 2024-02-02 12:03 UTC (permalink / raw)
  To: Kent Overstreet
  Cc: linux-kernel, netdev, linux-fsdevel, peterz, boqun.feng,
	Alexander Viro, Christian Brauner, Jan Kara

On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> *_lock_nested() is fundamentally broken; lockdep needs to check lock
> ordering, but we cannot device a total ordering on an unbounded number
> of elements with only a few subclasses.
> 
> the replacement is to define lock ordering with a proper comparison
> function.
> 
> fs/pipe.c was already doing everything correctly otherwise, nothing
> much changes here.
> 
> Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> Cc: Christian Brauner <brauner@kernel.org>
> Cc: Jan Kara <jack@suse.cz>
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

I had to digest for a while what this new lockdep lock ordering feature is
about. I have one pending question - what is the motivation of this
conversion of pipe code? AFAIU we don't have any problems with lockdep
annotations on pipe->mutex because there are always only two subclasses?

								Honza

> ---
>  fs/pipe.c | 81 +++++++++++++++++++++++++------------------------------
>  1 file changed, 36 insertions(+), 45 deletions(-)
> 
> diff --git a/fs/pipe.c b/fs/pipe.c
> index f1adbfe743d4..50c8a8596b52 100644
> --- a/fs/pipe.c
> +++ b/fs/pipe.c
> @@ -76,18 +76,20 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
>   * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
>   */
>  
> -static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
> +#define cmp_int(l, r)		((l > r) - (l < r))
> +
> +#ifdef CONFIG_PROVE_LOCKING
> +static int pipe_lock_cmp_fn(const struct lockdep_map *a,
> +			    const struct lockdep_map *b)
>  {
> -	if (pipe->files)
> -		mutex_lock_nested(&pipe->mutex, subclass);
> +	return cmp_int((unsigned long) a, (unsigned long) b);
>  }
> +#endif
>  
>  void pipe_lock(struct pipe_inode_info *pipe)
>  {
> -	/*
> -	 * pipe_lock() nests non-pipe inode locks (for writing to a file)
> -	 */
> -	pipe_lock_nested(pipe, I_MUTEX_PARENT);
> +	if (pipe->files)
> +		mutex_lock(&pipe->mutex);
>  }
>  EXPORT_SYMBOL(pipe_lock);
>  
> @@ -98,28 +100,16 @@ void pipe_unlock(struct pipe_inode_info *pipe)
>  }
>  EXPORT_SYMBOL(pipe_unlock);
>  
> -static inline void __pipe_lock(struct pipe_inode_info *pipe)
> -{
> -	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
> -}
> -
> -static inline void __pipe_unlock(struct pipe_inode_info *pipe)
> -{
> -	mutex_unlock(&pipe->mutex);
> -}
> -
>  void pipe_double_lock(struct pipe_inode_info *pipe1,
>  		      struct pipe_inode_info *pipe2)
>  {
>  	BUG_ON(pipe1 == pipe2);
>  
> -	if (pipe1 < pipe2) {
> -		pipe_lock_nested(pipe1, I_MUTEX_PARENT);
> -		pipe_lock_nested(pipe2, I_MUTEX_CHILD);
> -	} else {
> -		pipe_lock_nested(pipe2, I_MUTEX_PARENT);
> -		pipe_lock_nested(pipe1, I_MUTEX_CHILD);
> -	}
> +	if (pipe1 > pipe2)
> +		swap(pipe1, pipe2);
> +
> +	pipe_lock(pipe1);
> +	pipe_lock(pipe2);
>  }
>  
>  static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
> @@ -271,7 +261,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>  		return 0;
>  
>  	ret = 0;
> -	__pipe_lock(pipe);
> +	mutex_lock(&pipe->mutex);
>  
>  	/*
>  	 * We only wake up writers if the pipe was full when we started
> @@ -368,7 +358,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>  			ret = -EAGAIN;
>  			break;
>  		}
> -		__pipe_unlock(pipe);
> +		mutex_unlock(&pipe->mutex);
>  
>  		/*
>  		 * We only get here if we didn't actually read anything.
> @@ -400,13 +390,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>  		if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
>  			return -ERESTARTSYS;
>  
> -		__pipe_lock(pipe);
> +		mutex_lock(&pipe->mutex);
>  		was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
>  		wake_next_reader = true;
>  	}
>  	if (pipe_empty(pipe->head, pipe->tail))
>  		wake_next_reader = false;
> -	__pipe_unlock(pipe);
> +	mutex_unlock(&pipe->mutex);
>  
>  	if (was_full)
>  		wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
> @@ -462,7 +452,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
>  	if (unlikely(total_len == 0))
>  		return 0;
>  
> -	__pipe_lock(pipe);
> +	mutex_lock(&pipe->mutex);
>  
>  	if (!pipe->readers) {
>  		send_sig(SIGPIPE, current, 0);
> @@ -582,19 +572,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
>  		 * after waiting we need to re-check whether the pipe
>  		 * become empty while we dropped the lock.
>  		 */
> -		__pipe_unlock(pipe);
> +		mutex_unlock(&pipe->mutex);
>  		if (was_empty)
>  			wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
>  		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
>  		wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
> -		__pipe_lock(pipe);
> +		mutex_lock(&pipe->mutex);
>  		was_empty = pipe_empty(pipe->head, pipe->tail);
>  		wake_next_writer = true;
>  	}
>  out:
>  	if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
>  		wake_next_writer = false;
> -	__pipe_unlock(pipe);
> +	mutex_unlock(&pipe->mutex);
>  
>  	/*
>  	 * If we do do a wakeup event, we do a 'sync' wakeup, because we
> @@ -629,7 +619,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  
>  	switch (cmd) {
>  	case FIONREAD:
> -		__pipe_lock(pipe);
> +		mutex_lock(&pipe->mutex);
>  		count = 0;
>  		head = pipe->head;
>  		tail = pipe->tail;
> @@ -639,16 +629,16 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  			count += pipe->bufs[tail & mask].len;
>  			tail++;
>  		}
> -		__pipe_unlock(pipe);
> +		mutex_unlock(&pipe->mutex);
>  
>  		return put_user(count, (int __user *)arg);
>  
>  #ifdef CONFIG_WATCH_QUEUE
>  	case IOC_WATCH_QUEUE_SET_SIZE: {
>  		int ret;
> -		__pipe_lock(pipe);
> +		mutex_lock(&pipe->mutex);
>  		ret = watch_queue_set_size(pipe, arg);
> -		__pipe_unlock(pipe);
> +		mutex_unlock(&pipe->mutex);
>  		return ret;
>  	}
>  
> @@ -734,7 +724,7 @@ pipe_release(struct inode *inode, struct file *file)
>  {
>  	struct pipe_inode_info *pipe = file->private_data;
>  
> -	__pipe_lock(pipe);
> +	mutex_lock(&pipe->mutex);
>  	if (file->f_mode & FMODE_READ)
>  		pipe->readers--;
>  	if (file->f_mode & FMODE_WRITE)
> @@ -747,7 +737,7 @@ pipe_release(struct inode *inode, struct file *file)
>  		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
>  		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
>  	}
> -	__pipe_unlock(pipe);
> +	mutex_unlock(&pipe->mutex);
>  
>  	put_pipe_info(inode, pipe);
>  	return 0;
> @@ -759,7 +749,7 @@ pipe_fasync(int fd, struct file *filp, int on)
>  	struct pipe_inode_info *pipe = filp->private_data;
>  	int retval = 0;
>  
> -	__pipe_lock(pipe);
> +	mutex_lock(&pipe->mutex);
>  	if (filp->f_mode & FMODE_READ)
>  		retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
>  	if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
> @@ -768,7 +758,7 @@ pipe_fasync(int fd, struct file *filp, int on)
>  			/* this can happen only if on == T */
>  			fasync_helper(-1, filp, 0, &pipe->fasync_readers);
>  	}
> -	__pipe_unlock(pipe);
> +	mutex_unlock(&pipe->mutex);
>  	return retval;
>  }
>  
> @@ -834,6 +824,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
>  		pipe->nr_accounted = pipe_bufs;
>  		pipe->user = user;
>  		mutex_init(&pipe->mutex);
> +		lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL);
>  		return pipe;
>  	}
>  
> @@ -1144,7 +1135,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
>  	filp->private_data = pipe;
>  	/* OK, we have a pipe and it's pinned down */
>  
> -	__pipe_lock(pipe);
> +	mutex_lock(&pipe->mutex);
>  
>  	/* We can only do regular read/write on fifos */
>  	stream_open(inode, filp);
> @@ -1214,7 +1205,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
>  	}
>  
>  	/* Ok! */
> -	__pipe_unlock(pipe);
> +	mutex_unlock(&pipe->mutex);
>  	return 0;
>  
>  err_rd:
> @@ -1230,7 +1221,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
>  	goto err;
>  
>  err:
> -	__pipe_unlock(pipe);
> +	mutex_unlock(&pipe->mutex);
>  
>  	put_pipe_info(inode, pipe);
>  	return ret;
> @@ -1411,7 +1402,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
>  	if (!pipe)
>  		return -EBADF;
>  
> -	__pipe_lock(pipe);
> +	mutex_lock(&pipe->mutex);
>  
>  	switch (cmd) {
>  	case F_SETPIPE_SZ:
> @@ -1425,7 +1416,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
>  		break;
>  	}
>  
> -	__pipe_unlock(pipe);
> +	mutex_unlock(&pipe->mutex);
>  	return ret;
>  }
>  
> -- 
> 2.43.0
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn
  2024-02-02 12:03   ` Jan Kara
@ 2024-02-02 12:25     ` Sedat Dilek
  2024-02-05  9:53       ` Jan Kara
  2024-02-02 12:47     ` Kent Overstreet
  1 sibling, 1 reply; 19+ messages in thread
From: Sedat Dilek @ 2024-02-02 12:25 UTC (permalink / raw)
  To: Jan Kara
  Cc: Kent Overstreet, linux-kernel, netdev, linux-fsdevel, peterz,
	boqun.feng, Alexander Viro, Christian Brauner

On Fri, Feb 2, 2024 at 1:12 PM Jan Kara <jack@suse.cz> wrote:
>
> On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> > *_lock_nested() is fundamentally broken; lockdep needs to check lock
> > ordering, but we cannot device a total ordering on an unbounded number
> > of elements with only a few subclasses.
> >
> > the replacement is to define lock ordering with a proper comparison
> > function.
> >
> > fs/pipe.c was already doing everything correctly otherwise, nothing
> > much changes here.
> >
> > Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> > Cc: Christian Brauner <brauner@kernel.org>
> > Cc: Jan Kara <jack@suse.cz>
> > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
>
> I had to digest for a while what this new lockdep lock ordering feature is
> about. I have one pending question - what is the motivation of this
> conversion of pipe code? AFAIU we don't have any problems with lockdep
> annotations on pipe->mutex because there are always only two subclasses?
>
>                                                                 Honza

Hi,

"Numbers talk - Bullshit walks." (Linus Torvalds)

In things of pipes - I normally benchmark like this (example):

root# cat /dev/sdc | pipebench > /dev/null

Do you have numbers for your patch-series?

Thanks.

BG,
-Sedat-

[1] https://packages.debian.org/pipebench

>
> > ---
> >  fs/pipe.c | 81 +++++++++++++++++++++++++------------------------------
> >  1 file changed, 36 insertions(+), 45 deletions(-)
> >
> > diff --git a/fs/pipe.c b/fs/pipe.c
> > index f1adbfe743d4..50c8a8596b52 100644
> > --- a/fs/pipe.c
> > +++ b/fs/pipe.c
> > @@ -76,18 +76,20 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
> >   * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
> >   */
> >
> > -static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
> > +#define cmp_int(l, r)                ((l > r) - (l < r))
> > +
> > +#ifdef CONFIG_PROVE_LOCKING
> > +static int pipe_lock_cmp_fn(const struct lockdep_map *a,
> > +                         const struct lockdep_map *b)
> >  {
> > -     if (pipe->files)
> > -             mutex_lock_nested(&pipe->mutex, subclass);
> > +     return cmp_int((unsigned long) a, (unsigned long) b);
> >  }
> > +#endif
> >
> >  void pipe_lock(struct pipe_inode_info *pipe)
> >  {
> > -     /*
> > -      * pipe_lock() nests non-pipe inode locks (for writing to a file)
> > -      */
> > -     pipe_lock_nested(pipe, I_MUTEX_PARENT);
> > +     if (pipe->files)
> > +             mutex_lock(&pipe->mutex);
> >  }
> >  EXPORT_SYMBOL(pipe_lock);
> >
> > @@ -98,28 +100,16 @@ void pipe_unlock(struct pipe_inode_info *pipe)
> >  }
> >  EXPORT_SYMBOL(pipe_unlock);
> >
> > -static inline void __pipe_lock(struct pipe_inode_info *pipe)
> > -{
> > -     mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
> > -}
> > -
> > -static inline void __pipe_unlock(struct pipe_inode_info *pipe)
> > -{
> > -     mutex_unlock(&pipe->mutex);
> > -}
> > -
> >  void pipe_double_lock(struct pipe_inode_info *pipe1,
> >                     struct pipe_inode_info *pipe2)
> >  {
> >       BUG_ON(pipe1 == pipe2);
> >
> > -     if (pipe1 < pipe2) {
> > -             pipe_lock_nested(pipe1, I_MUTEX_PARENT);
> > -             pipe_lock_nested(pipe2, I_MUTEX_CHILD);
> > -     } else {
> > -             pipe_lock_nested(pipe2, I_MUTEX_PARENT);
> > -             pipe_lock_nested(pipe1, I_MUTEX_CHILD);
> > -     }
> > +     if (pipe1 > pipe2)
> > +             swap(pipe1, pipe2);
> > +
> > +     pipe_lock(pipe1);
> > +     pipe_lock(pipe2);
> >  }
> >
> >  static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
> > @@ -271,7 +261,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> >               return 0;
> >
> >       ret = 0;
> > -     __pipe_lock(pipe);
> > +     mutex_lock(&pipe->mutex);
> >
> >       /*
> >        * We only wake up writers if the pipe was full when we started
> > @@ -368,7 +358,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> >                       ret = -EAGAIN;
> >                       break;
> >               }
> > -             __pipe_unlock(pipe);
> > +             mutex_unlock(&pipe->mutex);
> >
> >               /*
> >                * We only get here if we didn't actually read anything.
> > @@ -400,13 +390,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> >               if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
> >                       return -ERESTARTSYS;
> >
> > -             __pipe_lock(pipe);
> > +             mutex_lock(&pipe->mutex);
> >               was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
> >               wake_next_reader = true;
> >       }
> >       if (pipe_empty(pipe->head, pipe->tail))
> >               wake_next_reader = false;
> > -     __pipe_unlock(pipe);
> > +     mutex_unlock(&pipe->mutex);
> >
> >       if (was_full)
> >               wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
> > @@ -462,7 +452,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
> >       if (unlikely(total_len == 0))
> >               return 0;
> >
> > -     __pipe_lock(pipe);
> > +     mutex_lock(&pipe->mutex);
> >
> >       if (!pipe->readers) {
> >               send_sig(SIGPIPE, current, 0);
> > @@ -582,19 +572,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
> >                * after waiting we need to re-check whether the pipe
> >                * become empty while we dropped the lock.
> >                */
> > -             __pipe_unlock(pipe);
> > +             mutex_unlock(&pipe->mutex);
> >               if (was_empty)
> >                       wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
> >               kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
> >               wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
> > -             __pipe_lock(pipe);
> > +             mutex_lock(&pipe->mutex);
> >               was_empty = pipe_empty(pipe->head, pipe->tail);
> >               wake_next_writer = true;
> >       }
> >  out:
> >       if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
> >               wake_next_writer = false;
> > -     __pipe_unlock(pipe);
> > +     mutex_unlock(&pipe->mutex);
> >
> >       /*
> >        * If we do do a wakeup event, we do a 'sync' wakeup, because we
> > @@ -629,7 +619,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> >
> >       switch (cmd) {
> >       case FIONREAD:
> > -             __pipe_lock(pipe);
> > +             mutex_lock(&pipe->mutex);
> >               count = 0;
> >               head = pipe->head;
> >               tail = pipe->tail;
> > @@ -639,16 +629,16 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> >                       count += pipe->bufs[tail & mask].len;
> >                       tail++;
> >               }
> > -             __pipe_unlock(pipe);
> > +             mutex_unlock(&pipe->mutex);
> >
> >               return put_user(count, (int __user *)arg);
> >
> >  #ifdef CONFIG_WATCH_QUEUE
> >       case IOC_WATCH_QUEUE_SET_SIZE: {
> >               int ret;
> > -             __pipe_lock(pipe);
> > +             mutex_lock(&pipe->mutex);
> >               ret = watch_queue_set_size(pipe, arg);
> > -             __pipe_unlock(pipe);
> > +             mutex_unlock(&pipe->mutex);
> >               return ret;
> >       }
> >
> > @@ -734,7 +724,7 @@ pipe_release(struct inode *inode, struct file *file)
> >  {
> >       struct pipe_inode_info *pipe = file->private_data;
> >
> > -     __pipe_lock(pipe);
> > +     mutex_lock(&pipe->mutex);
> >       if (file->f_mode & FMODE_READ)
> >               pipe->readers--;
> >       if (file->f_mode & FMODE_WRITE)
> > @@ -747,7 +737,7 @@ pipe_release(struct inode *inode, struct file *file)
> >               kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
> >               kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
> >       }
> > -     __pipe_unlock(pipe);
> > +     mutex_unlock(&pipe->mutex);
> >
> >       put_pipe_info(inode, pipe);
> >       return 0;
> > @@ -759,7 +749,7 @@ pipe_fasync(int fd, struct file *filp, int on)
> >       struct pipe_inode_info *pipe = filp->private_data;
> >       int retval = 0;
> >
> > -     __pipe_lock(pipe);
> > +     mutex_lock(&pipe->mutex);
> >       if (filp->f_mode & FMODE_READ)
> >               retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
> >       if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
> > @@ -768,7 +758,7 @@ pipe_fasync(int fd, struct file *filp, int on)
> >                       /* this can happen only if on == T */
> >                       fasync_helper(-1, filp, 0, &pipe->fasync_readers);
> >       }
> > -     __pipe_unlock(pipe);
> > +     mutex_unlock(&pipe->mutex);
> >       return retval;
> >  }
> >
> > @@ -834,6 +824,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
> >               pipe->nr_accounted = pipe_bufs;
> >               pipe->user = user;
> >               mutex_init(&pipe->mutex);
> > +             lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL);
> >               return pipe;
> >       }
> >
> > @@ -1144,7 +1135,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
> >       filp->private_data = pipe;
> >       /* OK, we have a pipe and it's pinned down */
> >
> > -     __pipe_lock(pipe);
> > +     mutex_lock(&pipe->mutex);
> >
> >       /* We can only do regular read/write on fifos */
> >       stream_open(inode, filp);
> > @@ -1214,7 +1205,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
> >       }
> >
> >       /* Ok! */
> > -     __pipe_unlock(pipe);
> > +     mutex_unlock(&pipe->mutex);
> >       return 0;
> >
> >  err_rd:
> > @@ -1230,7 +1221,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
> >       goto err;
> >
> >  err:
> > -     __pipe_unlock(pipe);
> > +     mutex_unlock(&pipe->mutex);
> >
> >       put_pipe_info(inode, pipe);
> >       return ret;
> > @@ -1411,7 +1402,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
> >       if (!pipe)
> >               return -EBADF;
> >
> > -     __pipe_lock(pipe);
> > +     mutex_lock(&pipe->mutex);
> >
> >       switch (cmd) {
> >       case F_SETPIPE_SZ:
> > @@ -1425,7 +1416,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
> >               break;
> >       }
> >
> > -     __pipe_unlock(pipe);
> > +     mutex_unlock(&pipe->mutex);
> >       return ret;
> >  }
> >
> > --
> > 2.43.0
> >
> --
> Jan Kara <jack@suse.com>
> SUSE Labs, CR
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn
  2024-02-02 12:03   ` Jan Kara
  2024-02-02 12:25     ` Sedat Dilek
@ 2024-02-02 12:47     ` Kent Overstreet
  2024-02-05 10:09       ` Jan Kara
  1 sibling, 1 reply; 19+ messages in thread
From: Kent Overstreet @ 2024-02-02 12:47 UTC (permalink / raw)
  To: Jan Kara
  Cc: linux-kernel, netdev, linux-fsdevel, peterz, boqun.feng,
	Alexander Viro, Christian Brauner

On Fri, Feb 02, 2024 at 01:03:57PM +0100, Jan Kara wrote:
> On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> > *_lock_nested() is fundamentally broken; lockdep needs to check lock
> > ordering, but we cannot device a total ordering on an unbounded number
> > of elements with only a few subclasses.
> > 
> > the replacement is to define lock ordering with a proper comparison
> > function.
> > 
> > fs/pipe.c was already doing everything correctly otherwise, nothing
> > much changes here.
> > 
> > Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> > Cc: Christian Brauner <brauner@kernel.org>
> > Cc: Jan Kara <jack@suse.cz>
> > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> 
> I had to digest for a while what this new lockdep lock ordering feature is
> about. I have one pending question - what is the motivation of this
> conversion of pipe code? AFAIU we don't have any problems with lockdep
> annotations on pipe->mutex because there are always only two subclasses?

It's one of the easier conversions to do, and ideally /all/ users of
subclasses would go away.

Start with the easier ones, figure out those patterns, then the
harder...

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn
  2024-02-02 12:25     ` Sedat Dilek
@ 2024-02-05  9:53       ` Jan Kara
  2024-02-05  9:59         ` Sedat Dilek
  0 siblings, 1 reply; 19+ messages in thread
From: Jan Kara @ 2024-02-05  9:53 UTC (permalink / raw)
  To: Sedat Dilek
  Cc: Jan Kara, Kent Overstreet, linux-kernel, netdev, linux-fsdevel,
	peterz, boqun.feng, Alexander Viro, Christian Brauner

On Fri 02-02-24 13:25:20, Sedat Dilek wrote:
> On Fri, Feb 2, 2024 at 1:12 PM Jan Kara <jack@suse.cz> wrote:
> >
> > On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> > > *_lock_nested() is fundamentally broken; lockdep needs to check lock
> > > ordering, but we cannot device a total ordering on an unbounded number
> > > of elements with only a few subclasses.
> > >
> > > the replacement is to define lock ordering with a proper comparison
> > > function.
> > >
> > > fs/pipe.c was already doing everything correctly otherwise, nothing
> > > much changes here.
> > >
> > > Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> > > Cc: Christian Brauner <brauner@kernel.org>
> > > Cc: Jan Kara <jack@suse.cz>
> > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> >
> > I had to digest for a while what this new lockdep lock ordering feature is
> > about. I have one pending question - what is the motivation of this
> > conversion of pipe code? AFAIU we don't have any problems with lockdep
> > annotations on pipe->mutex because there are always only two subclasses?
> >
> >                                                                 Honza
> 
> Hi,
> 
> "Numbers talk - Bullshit walks." (Linus Torvalds)
> 
> In things of pipes - I normally benchmark like this (example):
> 
> root# cat /dev/sdc | pipebench > /dev/null
> 
> Do you have numbers for your patch-series?

Sedat AFAIU this patch is not about performance at all but rather about
lockdep instrumentation... But maybe I'm missing your point?

								Honza

-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn
  2024-02-05  9:53       ` Jan Kara
@ 2024-02-05  9:59         ` Sedat Dilek
  0 siblings, 0 replies; 19+ messages in thread
From: Sedat Dilek @ 2024-02-05  9:59 UTC (permalink / raw)
  To: Jan Kara
  Cc: Kent Overstreet, linux-kernel, netdev, linux-fsdevel, peterz,
	boqun.feng, Alexander Viro, Christian Brauner

On Mon, Feb 5, 2024 at 10:54 AM Jan Kara <jack@suse.cz> wrote:
>
> On Fri 02-02-24 13:25:20, Sedat Dilek wrote:
> > On Fri, Feb 2, 2024 at 1:12 PM Jan Kara <jack@suse.cz> wrote:
> > >
> > > On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> > > > *_lock_nested() is fundamentally broken; lockdep needs to check lock
> > > > ordering, but we cannot device a total ordering on an unbounded number
> > > > of elements with only a few subclasses.
> > > >
> > > > the replacement is to define lock ordering with a proper comparison
> > > > function.
> > > >
> > > > fs/pipe.c was already doing everything correctly otherwise, nothing
> > > > much changes here.
> > > >
> > > > Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> > > > Cc: Christian Brauner <brauner@kernel.org>
> > > > Cc: Jan Kara <jack@suse.cz>
> > > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> > >
> > > I had to digest for a while what this new lockdep lock ordering feature is
> > > about. I have one pending question - what is the motivation of this
> > > conversion of pipe code? AFAIU we don't have any problems with lockdep
> > > annotations on pipe->mutex because there are always only two subclasses?
> > >
> > >                                                                 Honza
> >
> > Hi,
> >
> > "Numbers talk - Bullshit walks." (Linus Torvalds)
> >
> > In things of pipes - I normally benchmark like this (example):
> >
> > root# cat /dev/sdc | pipebench > /dev/null
> >
> > Do you have numbers for your patch-series?
>
> Sedat AFAIU this patch is not about performance at all but rather about
> lockdep instrumentation... But maybe I'm missing your point?
>

Sorry, I missed the point, Jan.

-Sedat-

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn
  2024-02-02 12:47     ` Kent Overstreet
@ 2024-02-05 10:09       ` Jan Kara
  0 siblings, 0 replies; 19+ messages in thread
From: Jan Kara @ 2024-02-05 10:09 UTC (permalink / raw)
  To: Kent Overstreet
  Cc: Jan Kara, linux-kernel, netdev, linux-fsdevel, peterz,
	boqun.feng, Alexander Viro, Christian Brauner

On Fri 02-02-24 07:47:50, Kent Overstreet wrote:
> On Fri, Feb 02, 2024 at 01:03:57PM +0100, Jan Kara wrote:
> > On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> > > *_lock_nested() is fundamentally broken; lockdep needs to check lock
> > > ordering, but we cannot device a total ordering on an unbounded number
> > > of elements with only a few subclasses.
> > > 
> > > the replacement is to define lock ordering with a proper comparison
> > > function.
> > > 
> > > fs/pipe.c was already doing everything correctly otherwise, nothing
> > > much changes here.
> > > 
> > > Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> > > Cc: Christian Brauner <brauner@kernel.org>
> > > Cc: Jan Kara <jack@suse.cz>
> > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> > 
> > I had to digest for a while what this new lockdep lock ordering feature is
> > about. I have one pending question - what is the motivation of this
> > conversion of pipe code? AFAIU we don't have any problems with lockdep
> > annotations on pipe->mutex because there are always only two subclasses?
> 
> It's one of the easier conversions to do, and ideally /all/ users of
> subclasses would go away.
> 
> Start with the easier ones, figure out those patterns, then the
> harder...

I see, thanks for explanation. So in the pipes case I actually like that
the patch makes the locking less obfuscated with lockdep details (to which
I'm mostly used to but still ;)). So feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

for this patch. I'm not 100% convinced it will be always possible to
replace subclasses with the new ordering mechanism but I guess time will
show.

								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 2/4] pktcdvd: kill mutex_lock_nested() usage
  2024-01-27  2:01 [PATCH 0/4] lockdep cmp fn conversions Kent Overstreet
@ 2024-01-27  2:01 ` Kent Overstreet
  0 siblings, 0 replies; 19+ messages in thread
From: Kent Overstreet @ 2024-01-27  2:01 UTC (permalink / raw)
  To: linux-kernel, netdev, linux-fsdevel
  Cc: Kent Overstreet, peterz, boqun.feng, linux-block, Jens Axboe

Unecessary, we're not actually taking nested locks of the same type.

Cc: linux-block@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 drivers/block/pktcdvd.c  |  8 ++++----
 fs/pipe.c                | 10 +---------
 include/linux/lockdep.h  |  3 +++
 kernel/locking/lockdep.c |  6 ++++++
 4 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index d56d972aadb3..2eb68a624fda 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -332,7 +332,7 @@ static ssize_t device_map_show(const struct class *c, const struct class_attribu
 {
 	int n = 0;
 	int idx;
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+	mutex_lock(&ctl_mutex);
 	for (idx = 0; idx < MAX_WRITERS; idx++) {
 		struct pktcdvd_device *pd = pkt_devs[idx];
 		if (!pd)
@@ -2639,7 +2639,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
 	struct pktcdvd_device *pd;
 	struct gendisk *disk;
 
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+	mutex_lock(&ctl_mutex);
 
 	for (idx = 0; idx < MAX_WRITERS; idx++)
 		if (!pkt_devs[idx])
@@ -2729,7 +2729,7 @@ static int pkt_remove_dev(dev_t pkt_dev)
 	int idx;
 	int ret = 0;
 
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+	mutex_lock(&ctl_mutex);
 
 	for (idx = 0; idx < MAX_WRITERS; idx++) {
 		pd = pkt_devs[idx];
@@ -2780,7 +2780,7 @@ static void pkt_get_status(struct pkt_ctrl_command *ctrl_cmd)
 {
 	struct pktcdvd_device *pd;
 
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+	mutex_lock(&ctl_mutex);
 
 	pd = pkt_find_dev_from_minor(ctrl_cmd->dev_index);
 	if (pd) {
diff --git a/fs/pipe.c b/fs/pipe.c
index 50c8a8596b52..abe171566015 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -78,14 +78,6 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
 
 #define cmp_int(l, r)		((l > r) - (l < r))
 
-#ifdef CONFIG_PROVE_LOCKING
-static int pipe_lock_cmp_fn(const struct lockdep_map *a,
-			    const struct lockdep_map *b)
-{
-	return cmp_int((unsigned long) a, (unsigned long) b);
-}
-#endif
-
 void pipe_lock(struct pipe_inode_info *pipe)
 {
 	if (pipe->files)
@@ -824,7 +816,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
 		pipe->nr_accounted = pipe_bufs;
 		pipe->user = user;
 		mutex_init(&pipe->mutex);
-		lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL);
+		lock_set_cmp_fn_ptr_order(&pipe->mutex);
 		return pipe;
 	}
 
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 08b0d1d9d78b..e0b121f96c80 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -391,6 +391,7 @@ extern int lockdep_is_held(const void *);
 #endif /* !LOCKDEP */
 
 #ifdef CONFIG_PROVE_LOCKING
+int lockdep_ptr_order_cmp_fn(const struct lockdep_map *, const struct lockdep_map *);
 void lockdep_set_lock_cmp_fn(struct lockdep_map *, lock_cmp_fn, lock_print_fn);
 
 #define lock_set_cmp_fn(lock, ...)	lockdep_set_lock_cmp_fn(&(lock)->dep_map, __VA_ARGS__)
@@ -398,6 +399,8 @@ void lockdep_set_lock_cmp_fn(struct lockdep_map *, lock_cmp_fn, lock_print_fn);
 #define lock_set_cmp_fn(lock, ...)	do { } while (0)
 #endif
 
+#define lock_set_cmp_fn_ptr_order(lock)	lock_set_cmp_fn(lock, lockdep_ptr_order_cmp_fn);
+
 enum xhlock_context_t {
 	XHLOCK_HARD,
 	XHLOCK_SOFT,
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 151bd3de5936..5630be7f5cb2 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4919,6 +4919,12 @@ struct lock_class_key __lockdep_no_validate__;
 EXPORT_SYMBOL_GPL(__lockdep_no_validate__);
 
 #ifdef CONFIG_PROVE_LOCKING
+int lockdep_ptr_order_cmp_fn(const struct lockdep_map *a,
+			     const struct lockdep_map *b)
+{
+	return cmp_int((unsigned long) a, (unsigned long) b);
+}
+
 void lockdep_set_lock_cmp_fn(struct lockdep_map *lock, lock_cmp_fn cmp_fn,
 			     lock_print_fn print_fn)
 {
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2024-02-05 10:09 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-27  2:08 [PATCH 0/4] lockdep cmp fn conversions Kent Overstreet
2024-01-27  2:08 ` [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn Kent Overstreet
2024-02-02 12:03   ` Jan Kara
2024-02-02 12:25     ` Sedat Dilek
2024-02-05  9:53       ` Jan Kara
2024-02-05  9:59         ` Sedat Dilek
2024-02-02 12:47     ` Kent Overstreet
2024-02-05 10:09       ` Jan Kara
2024-01-27  2:08 ` [PATCH 2/4] pktcdvd: kill mutex_lock_nested() usage Kent Overstreet
2024-01-28  4:29   ` kernel test robot
2024-01-28  6:48   ` kernel test robot
2024-01-27  2:08 ` [PATCH 3/4] net: Convert sk->sk_peer_lock to lock_set_cmp_fn_ptr_order() Kent Overstreet
2024-01-28  9:17   ` Kuniyuki Iwashima
2024-01-27  2:08 ` [PATCH 4/4] af_unix: convert to lock_cmp_fn Kent Overstreet
2024-01-28  8:28   ` Kuniyuki Iwashima
2024-01-28 19:38     ` Kent Overstreet
2024-01-28 20:56       ` Kuniyuki Iwashima
2024-01-29  1:34         ` Kent Overstreet
  -- strict thread matches above, loose matches on Subject: below --
2024-01-27  2:01 [PATCH 0/4] lockdep cmp fn conversions Kent Overstreet
2024-01-27  2:01 ` [PATCH 2/4] pktcdvd: kill mutex_lock_nested() usage Kent Overstreet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).