All of lore.kernel.org
 help / color / mirror / Atom feed
* Re: possible circular locking dependency detected
       [not found]                     ` <CA+55aFzxJM4pbS_jySERnCoOvvPbo+FgM7FZEATLJnCseD0j0g@mail.gmail.com>
@ 2016-09-01 22:04                       ` Linus Torvalds
  2016-09-02 14:43                         ` CAI Qian
  2016-09-02 15:18                         ` Rainer Weikusat
  0 siblings, 2 replies; 18+ messages in thread
From: Linus Torvalds @ 2016-09-01 22:04 UTC (permalink / raw)
  To: Al Viro, CAI Qian
  Cc: Miklos Szeredi, Rainer Weikusat, Hannes Frederic Sowa,
	Rainer Weikusat, Eric Sandeen, Network Development

[-- Attachment #1: Type: text/plain, Size: 668 bytes --]

On Thu, Sep 1, 2016 at 2:43 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
> On Thu, Sep 1, 2016 at 2:01 PM, Al Viro <viro@zeniv.linux.org.uk> wrote:
>>
>> Outside as in "all fs activity in bind happens under it".  Along with
>> assignment to ->u.addr, etc.  IOW, make it the outermost lock there.
>
> Hah, yes. I misunderstood you.
>
> Yes. In fact that fixes the problem I mentioned, rather than introducing it.

So the easiest approach would seem to be to revert commit c845acb324aa
("af_unix: Fix splice-bind deadlock"), and then apply the lock split.

Like the attached two patches.

This is still *entirely* untested.

Rainer?

                 Linus

[-- Attachment #2: 0001-Revert-af_unix-Fix-splice-bind-deadlock.patch --]
[-- Type: text/x-patch, Size: 4466 bytes --]

From df486d3b0cf308cc1cf43199710ca47cc6e7535f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 1 Sep 2016 14:56:49 -0700
Subject: [PATCH 1/2] Revert "af_unix: Fix splice-bind deadlock"

This reverts commit c845acb324aa85a39650a14e7696982ceea75dc1.

It turns out that it just replaces one deadlock with another one: we can
still get the wrong lock ordering with the readlock due to overlayfs
calling back into the filesystem layer and still taking the vfs locks
after the readlock.

The proper solution ends up being to just split the readlock into two
pieces: the bind lock (taken *outside* the vfs locks) and the IO lock
(taken *inside* the filesystem locks).  The two locks are independent
anyway.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/unix/af_unix.c | 66 +++++++++++++++++++++---------------------------------
 1 file changed, 26 insertions(+), 40 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f1dffe84f0d5..433ae1bbef97 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -954,20 +954,32 @@ fail:
 	return NULL;
 }
 
-static int unix_mknod(struct dentry *dentry, const struct path *path, umode_t mode,
-		      struct path *res)
+static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 {
-	int err;
+	struct dentry *dentry;
+	struct path path;
+	int err = 0;
+	/*
+	 * Get the parent directory, calculate the hash for last
+	 * component.
+	 */
+	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
+	err = PTR_ERR(dentry);
+	if (IS_ERR(dentry))
+		return err;
 
-	err = security_path_mknod(path, dentry, mode, 0);
+	/*
+	 * All right, let's create it.
+	 */
+	err = security_path_mknod(&path, dentry, mode, 0);
 	if (!err) {
-		err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0);
+		err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 		if (!err) {
-			res->mnt = mntget(path->mnt);
+			res->mnt = mntget(path.mnt);
 			res->dentry = dget(dentry);
 		}
 	}
-
+	done_path_create(&path, dentry);
 	return err;
 }
 
@@ -978,12 +990,10 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	struct unix_sock *u = unix_sk(sk);
 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 	char *sun_path = sunaddr->sun_path;
-	int err, name_err;
+	int err;
 	unsigned int hash;
 	struct unix_address *addr;
 	struct hlist_head *list;
-	struct path path;
-	struct dentry *dentry;
 
 	err = -EINVAL;
 	if (sunaddr->sun_family != AF_UNIX)
@@ -999,34 +1009,14 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		goto out;
 	addr_len = err;
 
-	name_err = 0;
-	dentry = NULL;
-	if (sun_path[0]) {
-		/* Get the parent directory, calculate the hash for last
-		 * component.
-		 */
-		dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
-
-		if (IS_ERR(dentry)) {
-			/* delay report until after 'already bound' check */
-			name_err = PTR_ERR(dentry);
-			dentry = NULL;
-		}
-	}
-
 	err = mutex_lock_interruptible(&u->readlock);
 	if (err)
-		goto out_path;
+		goto out;
 
 	err = -EINVAL;
 	if (u->addr)
 		goto out_up;
 
-	if (name_err) {
-		err = name_err == -EEXIST ? -EADDRINUSE : name_err;
-		goto out_up;
-	}
-
 	err = -ENOMEM;
 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
 	if (!addr)
@@ -1037,11 +1027,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	addr->hash = hash ^ sk->sk_type;
 	atomic_set(&addr->refcnt, 1);
 
-	if (dentry) {
-		struct path u_path;
+	if (sun_path[0]) {
+		struct path path;
 		umode_t mode = S_IFSOCK |
 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
-		err = unix_mknod(dentry, &path, mode, &u_path);
+		err = unix_mknod(sun_path, mode, &path);
 		if (err) {
 			if (err == -EEXIST)
 				err = -EADDRINUSE;
@@ -1049,9 +1039,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 			goto out_up;
 		}
 		addr->hash = UNIX_HASH_SIZE;
-		hash = d_real_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
+		hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
 		spin_lock(&unix_table_lock);
-		u->path = u_path;
+		u->path = path;
 		list = &unix_socket_table[hash];
 	} else {
 		spin_lock(&unix_table_lock);
@@ -1074,10 +1064,6 @@ out_unlock:
 	spin_unlock(&unix_table_lock);
 out_up:
 	mutex_unlock(&u->readlock);
-out_path:
-	if (dentry)
-		done_path_create(&path, dentry);
-
 out:
 	return err;
 }
-- 
2.10.0.rc0.2.g0a9fa47


[-- Attachment #3: 0002-af_unix-split-u-readlock-into-two-iolock-and-bindloc.patch --]
[-- Type: text/x-patch, Size: 6998 bytes --]

From 9a76489d81f6d2b1da22906363d28c398d4f7c5c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 1 Sep 2016 14:43:53 -0700
Subject: [PATCH 2/2] af_unix: split 'u->readlock' into two: 'iolock' and
 'bindlock'

Right now we use the 'readlock' both for protecting some of the af_unix
IO path and for making the bind be single-threaded.

The two are independent, but using the same lock makes for a nasty
deadlock due to ordering with regards to filesystem locking.  The bind
locking would want to nest outside the VSF pathname locking, but the IO
locking wants to nest inside some of those same locks.

We tried to fix this earlier with commit c845acb324aa ("af_unix: Fix
splice-bind deadlock") which moved the readlock inside the vfs locks,
but that caused problems with overlayfs that will then call back into
filesystem routines that take the lock in the wrong order anyway.

Splitting the locks means that we can go back to having the bind lock be
the outermost lock, and we don't have any deadlocks with lock ordering.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/net/af_unix.h |  2 +-
 net/unix/af_unix.c    | 45 +++++++++++++++++++++++----------------------
 2 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 9b4c418bebd8..fd60eccb59a6 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -52,7 +52,7 @@ struct unix_sock {
 	struct sock		sk;
 	struct unix_address     *addr;
 	struct path		path;
-	struct mutex		readlock;
+	struct mutex		iolock, bindlock;
 	struct sock		*peer;
 	struct list_head	link;
 	atomic_long_t		inflight;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 433ae1bbef97..8309687a56b0 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -661,11 +661,11 @@ static int unix_set_peek_off(struct sock *sk, int val)
 {
 	struct unix_sock *u = unix_sk(sk);
 
-	if (mutex_lock_interruptible(&u->readlock))
+	if (mutex_lock_interruptible(&u->iolock))
 		return -EINTR;
 
 	sk->sk_peek_off = val;
-	mutex_unlock(&u->readlock);
+	mutex_unlock(&u->iolock);
 
 	return 0;
 }
@@ -779,7 +779,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 	spin_lock_init(&u->lock);
 	atomic_long_set(&u->inflight, 0);
 	INIT_LIST_HEAD(&u->link);
-	mutex_init(&u->readlock); /* single task reading lock */
+	mutex_init(&u->iolock); /* single task reading lock */
+	mutex_init(&u->bindlock); /* single task binding lock */
 	init_waitqueue_head(&u->peer_wait);
 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 	unix_insert_socket(unix_sockets_unbound(sk), sk);
@@ -848,7 +849,7 @@ static int unix_autobind(struct socket *sock)
 	int err;
 	unsigned int retries = 0;
 
-	err = mutex_lock_interruptible(&u->readlock);
+	err = mutex_lock_interruptible(&u->bindlock);
 	if (err)
 		return err;
 
@@ -895,7 +896,7 @@ retry:
 	spin_unlock(&unix_table_lock);
 	err = 0;
 
-out:	mutex_unlock(&u->readlock);
+out:	mutex_unlock(&u->bindlock);
 	return err;
 }
 
@@ -1009,7 +1010,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		goto out;
 	addr_len = err;
 
-	err = mutex_lock_interruptible(&u->readlock);
+	err = mutex_lock_interruptible(&u->bindlock);
 	if (err)
 		goto out;
 
@@ -1063,7 +1064,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 out_unlock:
 	spin_unlock(&unix_table_lock);
 out_up:
-	mutex_unlock(&u->readlock);
+	mutex_unlock(&u->bindlock);
 out:
 	return err;
 }
@@ -1955,17 +1956,17 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
 	if (false) {
 alloc_skb:
 		unix_state_unlock(other);
-		mutex_unlock(&unix_sk(other)->readlock);
+		mutex_unlock(&unix_sk(other)->iolock);
 		newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
 					      &err, 0);
 		if (!newskb)
 			goto err;
 	}
 
-	/* we must acquire readlock as we modify already present
+	/* we must acquire iolock as we modify already present
 	 * skbs in the sk_receive_queue and mess with skb->len
 	 */
-	err = mutex_lock_interruptible(&unix_sk(other)->readlock);
+	err = mutex_lock_interruptible(&unix_sk(other)->iolock);
 	if (err) {
 		err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
 		goto err;
@@ -2032,7 +2033,7 @@ alloc_skb:
 	}
 
 	unix_state_unlock(other);
-	mutex_unlock(&unix_sk(other)->readlock);
+	mutex_unlock(&unix_sk(other)->iolock);
 
 	other->sk_data_ready(other);
 	scm_destroy(&scm);
@@ -2041,7 +2042,7 @@ alloc_skb:
 err_state_unlock:
 	unix_state_unlock(other);
 err_unlock:
-	mutex_unlock(&unix_sk(other)->readlock);
+	mutex_unlock(&unix_sk(other)->iolock);
 err:
 	kfree_skb(newskb);
 	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
@@ -2109,7 +2110,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 
 	do {
-		mutex_lock(&u->readlock);
+		mutex_lock(&u->iolock);
 
 		skip = sk_peek_offset(sk, flags);
 		skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err,
@@ -2117,14 +2118,14 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
 		if (skb)
 			break;
 
-		mutex_unlock(&u->readlock);
+		mutex_unlock(&u->iolock);
 
 		if (err != -EAGAIN)
 			break;
 	} while (timeo &&
 		 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
 
-	if (!skb) { /* implies readlock unlocked */
+	if (!skb) { /* implies iolock unlocked */
 		unix_state_lock(sk);
 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
@@ -2189,7 +2190,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
 
 out_free:
 	skb_free_datagram(sk, skb);
-	mutex_unlock(&u->readlock);
+	mutex_unlock(&u->iolock);
 out:
 	return err;
 }
@@ -2284,7 +2285,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
 	/* Lock the socket to prevent queue disordering
 	 * while sleeps in memcpy_tomsg
 	 */
-	mutex_lock(&u->readlock);
+	mutex_lock(&u->iolock);
 
 	if (flags & MSG_PEEK)
 		skip = sk_peek_offset(sk, flags);
@@ -2326,7 +2327,7 @@ again:
 				break;
 			}
 
-			mutex_unlock(&u->readlock);
+			mutex_unlock(&u->iolock);
 
 			timeo = unix_stream_data_wait(sk, timeo, last,
 						      last_len);
@@ -2337,7 +2338,7 @@ again:
 				goto out;
 			}
 
-			mutex_lock(&u->readlock);
+			mutex_lock(&u->iolock);
 			goto redo;
 unlock:
 			unix_state_unlock(sk);
@@ -2440,7 +2441,7 @@ unlock:
 		}
 	} while (size);
 
-	mutex_unlock(&u->readlock);
+	mutex_unlock(&u->iolock);
 	if (state->msg)
 		scm_recv(sock, state->msg, &scm, flags);
 	else
@@ -2481,9 +2482,9 @@ static ssize_t skb_unix_socket_splice(struct sock *sk,
 	int ret;
 	struct unix_sock *u = unix_sk(sk);
 
-	mutex_unlock(&u->readlock);
+	mutex_unlock(&u->iolock);
 	ret = splice_to_pipe(pipe, spd);
-	mutex_lock(&u->readlock);
+	mutex_lock(&u->iolock);
 
 	return ret;
 }
-- 
2.10.0.rc0.2.g0a9fa47


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: possible circular locking dependency detected
  2016-09-01 22:04                       ` possible circular locking dependency detected Linus Torvalds
@ 2016-09-02 14:43                         ` CAI Qian
  2016-09-02 15:51                           ` CAI Qian
  2016-09-02 15:18                         ` Rainer Weikusat
  1 sibling, 1 reply; 18+ messages in thread
From: CAI Qian @ 2016-09-02 14:43 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Al Viro, Miklos Szeredi, Rainer Weikusat, Hannes Frederic Sowa,
	Rainer Weikusat, Eric Sandeen, Network Development



----- Original Message -----
> From: "Linus Torvalds" <torvalds@linux-foundation.org>
> To: "Al Viro" <viro@zeniv.linux.org.uk>, "CAI Qian" <caiqian@redhat.com>
> Cc: "Miklos Szeredi" <miklos@szeredi.hu>, "Rainer Weikusat" <rweikusat@cyberadapt.com>, "Hannes Frederic Sowa"
> <hannes@stressinduktion.org>, "Rainer Weikusat" <rweikusat@mobileactivedefense.com>, "Eric Sandeen"
> <esandeen@redhat.com>, "Network Development" <netdev@vger.kernel.org>
> Sent: Thursday, September 1, 2016 6:04:38 PM
> Subject: Re: possible circular locking dependency detected
> 
> On Thu, Sep 1, 2016 at 2:43 PM, Linus Torvalds
> <torvalds@linux-foundation.org> wrote:
> > On Thu, Sep 1, 2016 at 2:01 PM, Al Viro <viro@zeniv.linux.org.uk> wrote:
> >>
> >> Outside as in "all fs activity in bind happens under it".  Along with
> >> assignment to ->u.addr, etc.  IOW, make it the outermost lock there.
> >
> > Hah, yes. I misunderstood you.
> >
> > Yes. In fact that fixes the problem I mentioned, rather than introducing
> > it.
> 
> So the easiest approach would seem to be to revert commit c845acb324aa
> ("af_unix: Fix splice-bind deadlock"), and then apply the lock split.
> 
> Like the attached two patches.
> 
> This is still *entirely* untested.
Tested-by: CAI Qian <caiqian@redhat.com>
> 
> Rainer?
> 
>                  Linus
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: possible circular locking dependency detected
  2016-09-01 22:04                       ` possible circular locking dependency detected Linus Torvalds
  2016-09-02 14:43                         ` CAI Qian
@ 2016-09-02 15:18                         ` Rainer Weikusat
  2016-09-02 16:00                           ` Al Viro
  1 sibling, 1 reply; 18+ messages in thread
From: Rainer Weikusat @ 2016-09-02 15:18 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Al Viro, CAI Qian, Miklos Szeredi, Rainer Weikusat,
	Hannes Frederic Sowa, Rainer Weikusat, Eric Sandeen,
	Network Development

Linus Torvalds <torvalds@linux-foundation.org> writes:
> On Thu, Sep 1, 2016 at 2:43 PM, Linus Torvalds
> <torvalds@linux-foundation.org> wrote:
>> On Thu, Sep 1, 2016 at 2:01 PM, Al Viro <viro@zeniv.linux.org.uk> wrote:
>>>
>>> Outside as in "all fs activity in bind happens under it".  Along with
>>> assignment to ->u.addr, etc.  IOW, make it the outermost lock there.
>>
>> Hah, yes. I misunderstood you.
>>
>> Yes. In fact that fixes the problem I mentioned, rather than introducing it.
>
> So the easiest approach would seem to be to revert commit c845acb324aa
> ("af_unix: Fix splice-bind deadlock"), and then apply the lock split.
>
> Like the attached two patches.
>
> This is still *entirely* untested.

As far as I can tell, this should work as I can't currently imagine why
a fs operation might end up binding a unix socket despite the idea to
make af_unix.c yet more complicated in order to work around irregular
behaviour of (as far as I can tell) a single filesystem (for which
kern_path_create doesn't really mean kern_path_create and it has to work
around that once it gets control) goes against all instincts I have in
this area. If filesystems need to do arbitrary stuff when
__sb_start_write is called for 'their' superblock, they should be able
to do so directly.

At present, this is a theoretic concern as I can't (due to other work
committments) put any non-cursory work into this before Sunday. There
may also be other reasons why this idea is impractical or even
unworkable.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: possible circular locking dependency detected
  2016-09-02 14:43                         ` CAI Qian
@ 2016-09-02 15:51                           ` CAI Qian
  2016-09-02 16:46                             ` CAI Qian
  2016-09-02 17:10                             ` Linus Torvalds
  0 siblings, 2 replies; 18+ messages in thread
From: CAI Qian @ 2016-09-02 15:51 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Al Viro, Miklos Szeredi, Rainer Weikusat, Hannes Frederic Sowa,
	Rainer Weikusat, Eric Sandeen, Network Development



----- Original Message -----
> From: "CAI Qian" <caiqian@redhat.com>
> To: "Linus Torvalds" <torvalds@linux-foundation.org>
> Cc: "Al Viro" <viro@zeniv.linux.org.uk>, "Miklos Szeredi" <miklos@szeredi.hu>, "Rainer Weikusat"
> <rweikusat@cyberadapt.com>, "Hannes Frederic Sowa" <hannes@stressinduktion.org>, "Rainer Weikusat"
> <rweikusat@mobileactivedefense.com>, "Eric Sandeen" <esandeen@redhat.com>, "Network Development"
> <netdev@vger.kernel.org>
> Sent: Friday, September 2, 2016 10:43:20 AM
> Subject: Re: possible circular locking dependency detected
> 
> 
> 
> ----- Original Message -----
> > From: "Linus Torvalds" <torvalds@linux-foundation.org>
> > To: "Al Viro" <viro@zeniv.linux.org.uk>, "CAI Qian" <caiqian@redhat.com>
> > Cc: "Miklos Szeredi" <miklos@szeredi.hu>, "Rainer Weikusat"
> > <rweikusat@cyberadapt.com>, "Hannes Frederic Sowa"
> > <hannes@stressinduktion.org>, "Rainer Weikusat"
> > <rweikusat@mobileactivedefense.com>, "Eric Sandeen"
> > <esandeen@redhat.com>, "Network Development" <netdev@vger.kernel.org>
> > Sent: Thursday, September 1, 2016 6:04:38 PM
> > Subject: Re: possible circular locking dependency detected
> > 
> > On Thu, Sep 1, 2016 at 2:43 PM, Linus Torvalds
> > <torvalds@linux-foundation.org> wrote:
> > > On Thu, Sep 1, 2016 at 2:01 PM, Al Viro <viro@zeniv.linux.org.uk> wrote:
> > >>
> > >> Outside as in "all fs activity in bind happens under it".  Along with
> > >> assignment to ->u.addr, etc.  IOW, make it the outermost lock there.
> > >
> > > Hah, yes. I misunderstood you.
> > >
> > > Yes. In fact that fixes the problem I mentioned, rather than introducing
> > > it.
> > 
> > So the easiest approach would seem to be to revert commit c845acb324aa
> > ("af_unix: Fix splice-bind deadlock"), and then apply the lock split.
> > 
> > Like the attached two patches.
> > 
> > This is still *entirely* untested.
> Tested-by: CAI Qian <caiqian@redhat.com>
Actually, I took it back, and now spice seems start to deadlock using the reproducer,

https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/syscalls/splice/splice01.c

[ 1749.956818] 
[ 1749.958492] ======================================================
[ 1749.965386] [ INFO: possible circular locking dependency detected ]
[ 1749.972381] 4.8.0-rc4+ #34 Not tainted
[ 1749.976560] -------------------------------------------------------
[ 1749.983554] splice01/35921 is trying to acquire lock:
[ 1749.989188]  (&sb->s_type->i_mutex_key#14){+.+.+.}, at: [<ffffffffa083c1f7>] xfs_file_buffered_aio_write+0x127/0x840 [xfs]
[ 1750.001644] 
[ 1750.001644] but task is already holding lock:
[ 1750.008151]  (&pipe->mutex/1){+.+.+.}, at: [<ffffffff8169e7c1>] pipe_lock+0x51/0x60
[ 1750.016753] 
[ 1750.016753] which lock already depends on the new lock.
[ 1750.016753] 
[ 1750.025880] 
[ 1750.025880] the existing dependency chain (in reverse order) is:
[ 1750.034229] 
-> #2 (&pipe->mutex/1){+.+.+.}:
[ 1750.039139]        [<ffffffff812af52a>] lock_acquire+0x1fa/0x440
[ 1750.045857]        [<ffffffff8266448d>] mutex_lock_nested+0xdd/0x850
[ 1750.052963]        [<ffffffff8169e7c1>] pipe_lock+0x51/0x60
[ 1750.059190]        [<ffffffff8171ee25>] splice_to_pipe+0x75/0x9e0
[ 1750.066001]        [<ffffffff81723991>] __generic_file_splice_read+0xa71/0xe90
[ 1750.074071]        [<ffffffff81723e71>] generic_file_splice_read+0xc1/0x1f0
[ 1750.081849]        [<ffffffffa0838628>] xfs_file_splice_read+0x368/0x7b0 [xfs]
[ 1750.089940]        [<ffffffff8171fa7e>] do_splice_to+0xee/0x150
[ 1750.096555]        [<ffffffff817262f4>] SyS_splice+0x1144/0x1c10
[ 1750.103269]        [<ffffffff81007b66>] do_syscall_64+0x1a6/0x500
[ 1750.110084]        [<ffffffff8266ea7f>] return_from_SYSCALL_64+0x0/0x7a
[ 1750.117479] 
-> #1 (&(&ip->i_iolock)->mr_lock#2){++++++}:
[ 1750.123649]        [<ffffffff812af52a>] lock_acquire+0x1fa/0x440
[ 1750.130362]        [<ffffffff8129b93e>] down_write_nested+0x5e/0xe0
[ 1750.137371]        [<ffffffffa086772e>] xfs_ilock+0x2fe/0x550 [xfs]
[ 1750.144397]        [<ffffffffa083c204>] xfs_file_buffered_aio_write+0x134/0x840 [xfs]
[ 1750.153175]        [<ffffffffa083cb7d>] xfs_file_write_iter+0x26d/0x6d0 [xfs]
[ 1750.161177]        [<ffffffff8168374e>] __vfs_write+0x2be/0x640
[ 1750.167799]        [<ffffffff816876e2>] vfs_write+0x152/0x4b0
[ 1750.174220]        [<ffffffff8168b0df>] SyS_write+0xdf/0x1d0
[ 1750.180547]        [<ffffffff8266e9bc>] entry_SYSCALL_64_fastpath+0x1f/0xbd
[ 1750.188328] 
-> #0 (&sb->s_type->i_mutex_key#14){+.+.+.}:
[ 1750.194508]        [<ffffffff812adbc3>] __lock_acquire+0x3043/0x3dd0
[ 1750.201609]        [<ffffffff812af52a>] lock_acquire+0x1fa/0x440
[ 1750.208321]        [<ffffffff82668cda>] down_write+0x5a/0xe0
[ 1750.214645]        [<ffffffffa083c1f7>] xfs_file_buffered_aio_write+0x127/0x840 [xfs]
[ 1750.223421]        [<ffffffffa083cb7d>] xfs_file_write_iter+0x26d/0x6d0 [xfs]
[ 1750.231423]        [<ffffffff816859be>] vfs_iter_write+0x29e/0x550
[ 1750.238330]        [<ffffffff81722729>] iter_file_splice_write+0x529/0xb70
[ 1750.246012]        [<ffffffff817258d4>] SyS_splice+0x724/0x1c10
[ 1750.252627]        [<ffffffff81007b66>] do_syscall_64+0x1a6/0x500
[ 1750.259438]        [<ffffffff8266ea7f>] return_from_SYSCALL_64+0x0/0x7a
[ 1750.266830] 
[ 1750.266830] other info that might help us debug this:
[ 1750.266830] 
[ 1750.275764] Chain exists of:
  &sb->s_type->i_mutex_key#14 --> &(&ip->i_iolock)->mr_lock#2 --> &pipe->mutex/1

[ 1750.287213]  Possible unsafe locking scenario:
[ 1750.287213] 
[ 1750.293817]        CPU0                    CPU1
[ 1750.298871]        ----                    ----
[ 1750.303924]   lock(&pipe->mutex/1);
[ 1750.307845]                                lock(&(&ip->i_iolock)->mr_lock#2);
[ 1750.315836]                                lock(&pipe->mutex/1);
[ 1750.322567]   lock(&sb->s_type->i_mutex_key#14);
[ 1750.327748] 
[ 1750.327748]  *** DEADLOCK ***
[ 1750.327748] 
[ 1750.334355] 2 locks held by splice01/35921:
[ 1750.339019]  #0:  (sb_writers#8){.+.+.+}, at: [<ffffffff8168f444>] __sb_start_write+0xb4/0xf0
[ 1750.348595]  #1:  (&pipe->mutex/1){+.+.+.}, at: [<ffffffff8169e7c1>] pipe_lock+0x51/0x60
[ 1750.357686] 
[ 1750.357686] stack backtrace:
[ 1750.362548] CPU: 50 PID: 35921 Comm: splice01 Not tainted 4.8.0-rc4+ #34
[ 1750.370026] Hardware name: Intel Corporation S2600WTT/S2600WTT, BIOS GRNDSDP1.86B.0044.R00.1501191641 01/19/2015
[ 1750.381382]  0000000000000000 000000003bca9477 ffff88044c4176e0 ffffffff81a3d191
[ 1750.389675]  ffffffff84292880 ffffffff842b9e30 ffff88044c417730 ffffffff812a6aa6
[ 1750.397968]  ffffffff84292880 ffff880414a28cd0 ffff88044c417850 ffff880414a28cd0
[ 1750.406261] Call Trace:
[ 1750.408992]  [<ffffffff81a3d191>] dump_stack+0x85/0xc4
[ 1750.414725]  [<ffffffff812a6aa6>] print_circular_bug+0x356/0x460
[ 1750.421428]  [<ffffffff812adbc3>] __lock_acquire+0x3043/0x3dd0
[ 1750.427942]  [<ffffffff81414fe9>] ? is_ftrace_trampoline+0x99/0xe0
[ 1750.434840]  [<ffffffff812aab80>] ? debug_check_no_locks_freed+0x2c0/0x2c0
[ 1750.442512]  [<ffffffff812a0272>] ? add_lock_to_list.isra.29.constprop.45+0x142/0x1d0
[ 1750.451249]  [<ffffffff812acd9e>] ? __lock_acquire+0x221e/0x3dd0
[ 1750.457952]  [<ffffffff812aa3ce>] ? trace_hardirqs_on_caller+0x3fe/0x580
[ 1750.465430]  [<ffffffff812af52a>] lock_acquire+0x1fa/0x440
[ 1750.471578]  [<ffffffffa083c1f7>] ? xfs_file_buffered_aio_write+0x127/0x840 [xfs]
[ 1750.479929]  [<ffffffff82668cda>] down_write+0x5a/0xe0
[ 1750.485691]  [<ffffffffa083c1f7>] ? xfs_file_buffered_aio_write+0x127/0x840 [xfs]
[ 1750.494070]  [<ffffffffa083c1f7>] xfs_file_buffered_aio_write+0x127/0x840 [xfs]
[ 1750.502226]  [<ffffffff81007b66>] ? do_syscall_64+0x1a6/0x500
[ 1750.508666]  [<ffffffffa083c0d0>] ? xfs_file_dio_aio_write+0xca0/0xca0 [xfs]
[ 1750.516532]  [<ffffffff812a9f72>] ? mark_held_locks+0xd2/0x130
[ 1750.523044]  [<ffffffff812f5887>] ? debug_lockdep_rcu_enabled+0x77/0x90
[ 1750.530417]  [<ffffffff82664873>] ? mutex_lock_nested+0x4c3/0x850
[ 1750.537243]  [<ffffffffa083cb7d>] xfs_file_write_iter+0x26d/0x6d0 [xfs]
[ 1750.544625]  [<ffffffff8169e7c1>] ? pipe_lock+0x51/0x60
[ 1750.550456]  [<ffffffff816859be>] vfs_iter_write+0x29e/0x550
[ 1750.556770]  [<ffffffff81685720>] ? vfs_iter_read+0x540/0x540
[ 1750.563181]  [<ffffffff81722729>] iter_file_splice_write+0x529/0xb70
[ 1750.570271]  [<ffffffff81722200>] ? page_cache_pipe_buf_confirm+0x1f0/0x1f0
[ 1750.578041]  [<ffffffff812f5a33>] ? rcu_read_lock_sched_held+0xa3/0x120
[ 1750.585423]  [<ffffffff812f6055>] ? rcu_sync_lockdep_assert+0x75/0xb0
[ 1750.592609]  [<ffffffff8129bd6c>] ? percpu_down_read+0x5c/0xa0
[ 1750.599118]  [<ffffffff8168f444>] ? __sb_start_write+0xb4/0xf0
[ 1750.605627]  [<ffffffff817258d4>] SyS_splice+0x724/0x1c10
[ 1750.611651]  [<ffffffff812f5a33>] ? rcu_read_lock_sched_held+0xa3/0x120
[ 1750.619033]  [<ffffffff817251b0>] ? compat_SyS_vmsplice+0x1f0/0x1f0
[ 1750.626025]  [<ffffffff81007a12>] ? do_syscall_64+0x52/0x500
[ 1750.632338]  [<ffffffff817251b0>] ? compat_SyS_vmsplice+0x1f0/0x1f0
[ 1750.639330]  [<ffffffff81007b66>] do_syscall_64+0x1a6/0x500
[ 1750.645549]  [<ffffffff8100401a>] ? trace_hardirqs_on_thunk+0x1a/0x1c
[ 1750.652737]  [<ffffffff8266ea7f>] entry_SYSCALL64_slow_path+0x25/0x25
> > 
> > Rainer?
> > 
> >                  Linus
> > 
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: possible circular locking dependency detected
  2016-09-02 15:18                         ` Rainer Weikusat
@ 2016-09-02 16:00                           ` Al Viro
  2016-09-02 16:10                             ` Rainer Weikusat
  0 siblings, 1 reply; 18+ messages in thread
From: Al Viro @ 2016-09-02 16:00 UTC (permalink / raw)
  To: Rainer Weikusat
  Cc: Linus Torvalds, CAI Qian, Miklos Szeredi, Hannes Frederic Sowa,
	Rainer Weikusat, Eric Sandeen, Network Development

On Fri, Sep 02, 2016 at 04:18:04PM +0100, Rainer Weikusat wrote:

> As far as I can tell, this should work as I can't currently imagine why
> a fs operation might end up binding a unix socket despite the idea to
> make af_unix.c yet more complicated in order to work around irregular
> behaviour of (as far as I can tell) a single filesystem (for which
> kern_path_create doesn't really mean kern_path_create

Bullshit.  kern_path_create() *does* mean the same thing in all cases.
Namely, find the parent, lock it and leave the final name component for
the create-type operation.  It sure as hell is not guaranteed to take
*all* locks that are going to be taken in process of mknod/mkdir/etc.
Never had been.

 and it has to work
> around that once it gets control) goes against all instincts I have in
> this area. If filesystems need to do arbitrary stuff when
> __sb_start_write is called for 'their' superblock, they should be able
> to do so directly.
> 
> At present, this is a theoretic concern as I can't (due to other work
> committments) put any non-cursory work into this before Sunday. There
> may also be other reasons why this idea is impractical or even
> unworkable.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: possible circular locking dependency detected
  2016-09-02 16:00                           ` Al Viro
@ 2016-09-02 16:10                             ` Rainer Weikusat
  2016-09-02 17:02                               ` Al Viro
  0 siblings, 1 reply; 18+ messages in thread
From: Rainer Weikusat @ 2016-09-02 16:10 UTC (permalink / raw)
  To: Al Viro
  Cc: Rainer Weikusat, Linus Torvalds, CAI Qian, Miklos Szeredi,
	Hannes Frederic Sowa, Rainer Weikusat, Eric Sandeen,
	Network Development

Al Viro <viro@ZenIV.linux.org.uk> writes:
> On Fri, Sep 02, 2016 at 04:18:04PM +0100, Rainer Weikusat wrote:
>
>> As far as I can tell, this should work as I can't currently imagine
>> why a fs operation might end up binding a unix socket despite the
>> idea to make af_unix.c yet more complicated in order to work around
>> irregular behaviour of (as far as I can tell) a single filesystem
>> (for which kern_path_create doesn't really mean kern_path_create and
>> it has to work around that once it gets control) goes against all
>> instincts I have in this area. If filesystems need to do arbitrary
>> stuff when __sb_start_write is called for 'their' superblock, they
>> should be able to do so directly.
>
> Bullshit.  kern_path_create() *does* mean the same thing in all cases.
> Namely, find the parent, lock it and leave the final name component for
> the create-type operation.  It sure as hell is not guaranteed to take
> *all* locks that are going to be taken in process of mknod/mkdir/etc.
> Never had been.

This isn't about "all locks", it's about the lock in question. No other
mknod operation (I'm aware of) calls this with another superblock than
the one already acted upon by kern_path_create. This may be wrong (if
so, feel free to correct it) but it's not "bullshit" (intentional
deception in order to sell something to someone).

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: possible circular locking dependency detected
  2016-09-02 15:51                           ` CAI Qian
@ 2016-09-02 16:46                             ` CAI Qian
  2016-09-02 17:10                             ` Linus Torvalds
  1 sibling, 0 replies; 18+ messages in thread
From: CAI Qian @ 2016-09-02 16:46 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Al Viro, Miklos Szeredi, Rainer Weikusat, Hannes Frederic Sowa,
	Rainer Weikusat, Eric Sandeen, Network Development



----- Original Message -----
> From: "CAI Qian" <caiqian@redhat.com>
> To: "Linus Torvalds" <torvalds@linux-foundation.org>
> Cc: "Al Viro" <viro@zeniv.linux.org.uk>, "Miklos Szeredi" <miklos@szeredi.hu>, "Rainer Weikusat"
> <rweikusat@cyberadapt.com>, "Hannes Frederic Sowa" <hannes@stressinduktion.org>, "Rainer Weikusat"
> <rweikusat@mobileactivedefense.com>, "Eric Sandeen" <esandeen@redhat.com>, "Network Development"
> <netdev@vger.kernel.org>
> Sent: Friday, September 2, 2016 11:51:58 AM
> Subject: Re: possible circular locking dependency detected
> 
> 
> 
> ----- Original Message -----
> > From: "CAI Qian" <caiqian@redhat.com>
> > To: "Linus Torvalds" <torvalds@linux-foundation.org>
> > Cc: "Al Viro" <viro@zeniv.linux.org.uk>, "Miklos Szeredi"
> > <miklos@szeredi.hu>, "Rainer Weikusat"
> > <rweikusat@cyberadapt.com>, "Hannes Frederic Sowa"
> > <hannes@stressinduktion.org>, "Rainer Weikusat"
> > <rweikusat@mobileactivedefense.com>, "Eric Sandeen" <esandeen@redhat.com>,
> > "Network Development"
> > <netdev@vger.kernel.org>
> > Sent: Friday, September 2, 2016 10:43:20 AM
> > Subject: Re: possible circular locking dependency detected
> > 
> > 
> > 
> > ----- Original Message -----
> > > From: "Linus Torvalds" <torvalds@linux-foundation.org>
> > > To: "Al Viro" <viro@zeniv.linux.org.uk>, "CAI Qian" <caiqian@redhat.com>
> > > Cc: "Miklos Szeredi" <miklos@szeredi.hu>, "Rainer Weikusat"
> > > <rweikusat@cyberadapt.com>, "Hannes Frederic Sowa"
> > > <hannes@stressinduktion.org>, "Rainer Weikusat"
> > > <rweikusat@mobileactivedefense.com>, "Eric Sandeen"
> > > <esandeen@redhat.com>, "Network Development" <netdev@vger.kernel.org>
> > > Sent: Thursday, September 1, 2016 6:04:38 PM
> > > Subject: Re: possible circular locking dependency detected
> > > 
> > > On Thu, Sep 1, 2016 at 2:43 PM, Linus Torvalds
> > > <torvalds@linux-foundation.org> wrote:
> > > > On Thu, Sep 1, 2016 at 2:01 PM, Al Viro <viro@zeniv.linux.org.uk>
> > > > wrote:
> > > >>
> > > >> Outside as in "all fs activity in bind happens under it".  Along with
> > > >> assignment to ->u.addr, etc.  IOW, make it the outermost lock there.
> > > >
> > > > Hah, yes. I misunderstood you.
> > > >
> > > > Yes. In fact that fixes the problem I mentioned, rather than
> > > > introducing
> > > > it.
> > > 
> > > So the easiest approach would seem to be to revert commit c845acb324aa
> > > ("af_unix: Fix splice-bind deadlock"), and then apply the lock split.
> > > 
> > > Like the attached two patches.
> > > 
> > > This is still *entirely* untested.
> > Tested-by: CAI Qian <caiqian@redhat.com>
OK, this tag still stand. The below issue is also reproduced without those
patches, so a separate problem most likely was introduced recently (after
rc3 or rc4) by probably some xfs update.
   CAI Qian 
> Actually, I took it back, and now spice seems start to deadlock using the
> reproducer,
> 
> https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/syscalls/splice/splice01.c
> 
> [ 1749.956818]
> [ 1749.958492] ======================================================
> [ 1749.965386] [ INFO: possible circular locking dependency detected ]
> [ 1749.972381] 4.8.0-rc4+ #34 Not tainted
> [ 1749.976560] -------------------------------------------------------
> [ 1749.983554] splice01/35921 is trying to acquire lock:
> [ 1749.989188]  (&sb->s_type->i_mutex_key#14){+.+.+.}, at:
> [<ffffffffa083c1f7>] xfs_file_buffered_aio_write+0x127/0x840 [xfs]
> [ 1750.001644]
> [ 1750.001644] but task is already holding lock:
> [ 1750.008151]  (&pipe->mutex/1){+.+.+.}, at: [<ffffffff8169e7c1>]
> pipe_lock+0x51/0x60
> [ 1750.016753]
> [ 1750.016753] which lock already depends on the new lock.
> [ 1750.016753]
> [ 1750.025880]
> [ 1750.025880] the existing dependency chain (in reverse order) is:
> [ 1750.034229]
> -> #2 (&pipe->mutex/1){+.+.+.}:
> [ 1750.039139]        [<ffffffff812af52a>] lock_acquire+0x1fa/0x440
> [ 1750.045857]        [<ffffffff8266448d>] mutex_lock_nested+0xdd/0x850
> [ 1750.052963]        [<ffffffff8169e7c1>] pipe_lock+0x51/0x60
> [ 1750.059190]        [<ffffffff8171ee25>] splice_to_pipe+0x75/0x9e0
> [ 1750.066001]        [<ffffffff81723991>]
> __generic_file_splice_read+0xa71/0xe90
> [ 1750.074071]        [<ffffffff81723e71>]
> generic_file_splice_read+0xc1/0x1f0
> [ 1750.081849]        [<ffffffffa0838628>] xfs_file_splice_read+0x368/0x7b0
> [xfs]
> [ 1750.089940]        [<ffffffff8171fa7e>] do_splice_to+0xee/0x150
> [ 1750.096555]        [<ffffffff817262f4>] SyS_splice+0x1144/0x1c10
> [ 1750.103269]        [<ffffffff81007b66>] do_syscall_64+0x1a6/0x500
> [ 1750.110084]        [<ffffffff8266ea7f>] return_from_SYSCALL_64+0x0/0x7a
> [ 1750.117479]
> -> #1 (&(&ip->i_iolock)->mr_lock#2){++++++}:
> [ 1750.123649]        [<ffffffff812af52a>] lock_acquire+0x1fa/0x440
> [ 1750.130362]        [<ffffffff8129b93e>] down_write_nested+0x5e/0xe0
> [ 1750.137371]        [<ffffffffa086772e>] xfs_ilock+0x2fe/0x550 [xfs]
> [ 1750.144397]        [<ffffffffa083c204>]
> xfs_file_buffered_aio_write+0x134/0x840 [xfs]
> [ 1750.153175]        [<ffffffffa083cb7d>] xfs_file_write_iter+0x26d/0x6d0
> [xfs]
> [ 1750.161177]        [<ffffffff8168374e>] __vfs_write+0x2be/0x640
> [ 1750.167799]        [<ffffffff816876e2>] vfs_write+0x152/0x4b0
> [ 1750.174220]        [<ffffffff8168b0df>] SyS_write+0xdf/0x1d0
> [ 1750.180547]        [<ffffffff8266e9bc>]
> entry_SYSCALL_64_fastpath+0x1f/0xbd
> [ 1750.188328]
> -> #0 (&sb->s_type->i_mutex_key#14){+.+.+.}:
> [ 1750.194508]        [<ffffffff812adbc3>] __lock_acquire+0x3043/0x3dd0
> [ 1750.201609]        [<ffffffff812af52a>] lock_acquire+0x1fa/0x440
> [ 1750.208321]        [<ffffffff82668cda>] down_write+0x5a/0xe0
> [ 1750.214645]        [<ffffffffa083c1f7>]
> xfs_file_buffered_aio_write+0x127/0x840 [xfs]
> [ 1750.223421]        [<ffffffffa083cb7d>] xfs_file_write_iter+0x26d/0x6d0
> [xfs]
> [ 1750.231423]        [<ffffffff816859be>] vfs_iter_write+0x29e/0x550
> [ 1750.238330]        [<ffffffff81722729>] iter_file_splice_write+0x529/0xb70
> [ 1750.246012]        [<ffffffff817258d4>] SyS_splice+0x724/0x1c10
> [ 1750.252627]        [<ffffffff81007b66>] do_syscall_64+0x1a6/0x500
> [ 1750.259438]        [<ffffffff8266ea7f>] return_from_SYSCALL_64+0x0/0x7a
> [ 1750.266830]
> [ 1750.266830] other info that might help us debug this:
> [ 1750.266830]
> [ 1750.275764] Chain exists of:
>   &sb->s_type->i_mutex_key#14 --> &(&ip->i_iolock)->mr_lock#2 -->
>   &pipe->mutex/1
> 
> [ 1750.287213]  Possible unsafe locking scenario:
> [ 1750.287213]
> [ 1750.293817]        CPU0                    CPU1
> [ 1750.298871]        ----                    ----
> [ 1750.303924]   lock(&pipe->mutex/1);
> [ 1750.307845]
> lock(&(&ip->i_iolock)->mr_lock#2);
> [ 1750.315836]                                lock(&pipe->mutex/1);
> [ 1750.322567]   lock(&sb->s_type->i_mutex_key#14);
> [ 1750.327748]
> [ 1750.327748]  *** DEADLOCK ***
> [ 1750.327748]
> [ 1750.334355] 2 locks held by splice01/35921:
> [ 1750.339019]  #0:  (sb_writers#8){.+.+.+}, at: [<ffffffff8168f444>]
> __sb_start_write+0xb4/0xf0
> [ 1750.348595]  #1:  (&pipe->mutex/1){+.+.+.}, at: [<ffffffff8169e7c1>]
> pipe_lock+0x51/0x60
> [ 1750.357686]
> [ 1750.357686] stack backtrace:
> [ 1750.362548] CPU: 50 PID: 35921 Comm: splice01 Not tainted 4.8.0-rc4+ #34
> [ 1750.370026] Hardware name: Intel Corporation S2600WTT/S2600WTT, BIOS
> GRNDSDP1.86B.0044.R00.1501191641 01/19/2015
> [ 1750.381382]  0000000000000000 000000003bca9477 ffff88044c4176e0
> ffffffff81a3d191
> [ 1750.389675]  ffffffff84292880 ffffffff842b9e30 ffff88044c417730
> ffffffff812a6aa6
> [ 1750.397968]  ffffffff84292880 ffff880414a28cd0 ffff88044c417850
> ffff880414a28cd0
> [ 1750.406261] Call Trace:
> [ 1750.408992]  [<ffffffff81a3d191>] dump_stack+0x85/0xc4
> [ 1750.414725]  [<ffffffff812a6aa6>] print_circular_bug+0x356/0x460
> [ 1750.421428]  [<ffffffff812adbc3>] __lock_acquire+0x3043/0x3dd0
> [ 1750.427942]  [<ffffffff81414fe9>] ? is_ftrace_trampoline+0x99/0xe0
> [ 1750.434840]  [<ffffffff812aab80>] ? debug_check_no_locks_freed+0x2c0/0x2c0
> [ 1750.442512]  [<ffffffff812a0272>] ?
> add_lock_to_list.isra.29.constprop.45+0x142/0x1d0
> [ 1750.451249]  [<ffffffff812acd9e>] ? __lock_acquire+0x221e/0x3dd0
> [ 1750.457952]  [<ffffffff812aa3ce>] ? trace_hardirqs_on_caller+0x3fe/0x580
> [ 1750.465430]  [<ffffffff812af52a>] lock_acquire+0x1fa/0x440
> [ 1750.471578]  [<ffffffffa083c1f7>] ?
> xfs_file_buffered_aio_write+0x127/0x840 [xfs]
> [ 1750.479929]  [<ffffffff82668cda>] down_write+0x5a/0xe0
> [ 1750.485691]  [<ffffffffa083c1f7>] ?
> xfs_file_buffered_aio_write+0x127/0x840 [xfs]
> [ 1750.494070]  [<ffffffffa083c1f7>] xfs_file_buffered_aio_write+0x127/0x840
> [xfs]
> [ 1750.502226]  [<ffffffff81007b66>] ? do_syscall_64+0x1a6/0x500
> [ 1750.508666]  [<ffffffffa083c0d0>] ? xfs_file_dio_aio_write+0xca0/0xca0
> [xfs]
> [ 1750.516532]  [<ffffffff812a9f72>] ? mark_held_locks+0xd2/0x130
> [ 1750.523044]  [<ffffffff812f5887>] ? debug_lockdep_rcu_enabled+0x77/0x90
> [ 1750.530417]  [<ffffffff82664873>] ? mutex_lock_nested+0x4c3/0x850
> [ 1750.537243]  [<ffffffffa083cb7d>] xfs_file_write_iter+0x26d/0x6d0 [xfs]
> [ 1750.544625]  [<ffffffff8169e7c1>] ? pipe_lock+0x51/0x60
> [ 1750.550456]  [<ffffffff816859be>] vfs_iter_write+0x29e/0x550
> [ 1750.556770]  [<ffffffff81685720>] ? vfs_iter_read+0x540/0x540
> [ 1750.563181]  [<ffffffff81722729>] iter_file_splice_write+0x529/0xb70
> [ 1750.570271]  [<ffffffff81722200>] ?
> page_cache_pipe_buf_confirm+0x1f0/0x1f0
> [ 1750.578041]  [<ffffffff812f5a33>] ? rcu_read_lock_sched_held+0xa3/0x120
> [ 1750.585423]  [<ffffffff812f6055>] ? rcu_sync_lockdep_assert+0x75/0xb0
> [ 1750.592609]  [<ffffffff8129bd6c>] ? percpu_down_read+0x5c/0xa0
> [ 1750.599118]  [<ffffffff8168f444>] ? __sb_start_write+0xb4/0xf0
> [ 1750.605627]  [<ffffffff817258d4>] SyS_splice+0x724/0x1c10
> [ 1750.611651]  [<ffffffff812f5a33>] ? rcu_read_lock_sched_held+0xa3/0x120
> [ 1750.619033]  [<ffffffff817251b0>] ? compat_SyS_vmsplice+0x1f0/0x1f0
> [ 1750.626025]  [<ffffffff81007a12>] ? do_syscall_64+0x52/0x500
> [ 1750.632338]  [<ffffffff817251b0>] ? compat_SyS_vmsplice+0x1f0/0x1f0
> [ 1750.639330]  [<ffffffff81007b66>] do_syscall_64+0x1a6/0x500
> [ 1750.645549]  [<ffffffff8100401a>] ? trace_hardirqs_on_thunk+0x1a/0x1c
> [ 1750.652737]  [<ffffffff8266ea7f>] entry_SYSCALL64_slow_path+0x25/0x25
> > > 
> > > Rainer?
> > > 
> > >                  Linus
> > > 
> >

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: possible circular locking dependency detected
  2016-09-02 16:10                             ` Rainer Weikusat
@ 2016-09-02 17:02                               ` Al Viro
  2016-09-02 17:12                                 ` Linus Torvalds
  0 siblings, 1 reply; 18+ messages in thread
From: Al Viro @ 2016-09-02 17:02 UTC (permalink / raw)
  To: Rainer Weikusat
  Cc: Linus Torvalds, CAI Qian, Miklos Szeredi, Hannes Frederic Sowa,
	Rainer Weikusat, Eric Sandeen, Network Development

On Fri, Sep 02, 2016 at 05:10:13PM +0100, Rainer Weikusat wrote:

> > Bullshit.  kern_path_create() *does* mean the same thing in all cases.
> > Namely, find the parent, lock it and leave the final name component for
> > the create-type operation.  It sure as hell is not guaranteed to take
> > *all* locks that are going to be taken in process of mknod/mkdir/etc.
> > Never had been.
> 
> This isn't about "all locks", it's about the lock in question. No other
> mknod operation (I'm aware of) calls this with another superblock than
> the one already acted upon by kern_path_create. This may be wrong (if
> so, feel free to correct it) but it's not "bullshit" (intentional
> deception in order to sell something to someone).
> 

Never had been promised.  And it's not just this lock - e.g. ->i_rwsem is
taken on the parent by kern_path_create() and on parent in underlying
filesystem by ecryptfs ->mknod() (as well as overlayfs one).  bind/bind
deadlock - one for a path to ecryptfs, another for that on the raw
filesystem behind it (which can be mounted elsewhere/in another namespace/etc.)
with those paths ending in the matching directories (the last components may
be same or different - doesn't matter)

A: lock parent in ecryptfs (via kern_path_create())
B: lock the directory behind it in underlying fs (ditto)
A: grab ->readlock
B: block on ->readlock
A: call ecryptfs_mknod() and block trying to lock the directory held by B

Deadlock.  And while we are at it, ecryptfs probably ought to claim transient
write access for the duration of modifications of the underlying one similar
to overlayfs.  The point is, it had never been promised that you can stick
random locks just outside of ->mknod()/->mkdir()/etc.  The same goes for e.g.
NFS mount of something exported by localhost; knfsd must lock the parent
directory on server before creating anything in it.  Suppose you have
/srv/nfs/foo exported and mounted on the same host at /mnt/bar.  bind to
/mnt/bar/barf/a vs. bind to /srv/nfs/foo/barf/b:
A: lock /mnt/bar/barf
B: lock /srv/nfs/foo/barf
A: grab ->readlock
B: block on ->readlock
A: call nfs_mknod(), wait for reply
knfsd: block trying to lock /srv/nfs/foo/barf

It's very much _not_ just overlayfs being pathological - that it certainly is,
but the problem is much wider.  You could try to argue that kern_path_create()
should've known to lock all relevant directories in case of overlayfs and
ecryptfs, but it has absolutely no chance to do that in case of NFS - the
protocol doesn't allow "lock this directory, one of my next requests will
be to create something in it".  Even leases are only for regular files...

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: possible circular locking dependency detected
  2016-09-02 15:51                           ` CAI Qian
  2016-09-02 16:46                             ` CAI Qian
@ 2016-09-02 17:10                             ` Linus Torvalds
  1 sibling, 0 replies; 18+ messages in thread
From: Linus Torvalds @ 2016-09-02 17:10 UTC (permalink / raw)
  To: CAI Qian, Dave Chinner, Christoph Hellwig
  Cc: Al Viro, Miklos Szeredi, Rainer Weikusat, Hannes Frederic Sowa,
	Rainer Weikusat, Eric Sandeen, Network Development

On Fri, Sep 2, 2016 at 8:51 AM, CAI Qian <caiqian@redhat.com> wrote:
>
> Actually, I took it back, and now spice seems start to deadlock using the reproducer,
>
> https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/syscalls/splice/splice01.c

This is a different deadlock, though. This is a deadlock due to mixed
lock ordering between the pipe mutex, the XFS "mr_lock", and the inode
mutex.

If I read the lockdep trace correctly, we have:

Normally we have write doing inode->i_mutex -> i_iolock.mr_lock fro
the regular write path.

But the normal splice "write()" case has

  pipe->mutex -> filesystem write lock (normally i_mutex)

(in iter_file_splice_write() that calls vfs_iter_write() that calls
->write_iter())

and then the XFS splice case as

    mr_lock -> pipe->mutex

in xfs_file_splice_read() calling splice_to_pipe().

So you end up with a A->B->C->A chain.

I think it's new to the new iomap based buffered write path in 4.8.

Dave, Christoph?

                 Linus

> [ 1749.956818]
> [ 1749.958492] ======================================================
> [ 1749.965386] [ INFO: possible circular locking dependency detected ]
> [ 1749.972381] 4.8.0-rc4+ #34 Not tainted
> [ 1749.976560] -------------------------------------------------------
> [ 1749.983554] splice01/35921 is trying to acquire lock:
> [ 1749.989188]  (&sb->s_type->i_mutex_key#14){+.+.+.}, at: [<ffffffffa083c1f7>] xfs_file_buffered_aio_write+0x127/0x840 [xfs]
> [ 1750.001644]
> [ 1750.001644] but task is already holding lock:
> [ 1750.008151]  (&pipe->mutex/1){+.+.+.}, at: [<ffffffff8169e7c1>] pipe_lock+0x51/0x60
> [ 1750.016753]
> [ 1750.016753] which lock already depends on the new lock.
> [ 1750.016753]
> [ 1750.025880]
> [ 1750.025880] the existing dependency chain (in reverse order) is:
> [ 1750.034229]
> -> #2 (&pipe->mutex/1){+.+.+.}:
> [ 1750.039139]        [<ffffffff812af52a>] lock_acquire+0x1fa/0x440
> [ 1750.045857]        [<ffffffff8266448d>] mutex_lock_nested+0xdd/0x850
> [ 1750.052963]        [<ffffffff8169e7c1>] pipe_lock+0x51/0x60
> [ 1750.059190]        [<ffffffff8171ee25>] splice_to_pipe+0x75/0x9e0
> [ 1750.066001]        [<ffffffff81723991>] __generic_file_splice_read+0xa71/0xe90
> [ 1750.074071]        [<ffffffff81723e71>] generic_file_splice_read+0xc1/0x1f0
> [ 1750.081849]        [<ffffffffa0838628>] xfs_file_splice_read+0x368/0x7b0 [xfs]
> [ 1750.089940]        [<ffffffff8171fa7e>] do_splice_to+0xee/0x150
> [ 1750.096555]        [<ffffffff817262f4>] SyS_splice+0x1144/0x1c10
> [ 1750.103269]        [<ffffffff81007b66>] do_syscall_64+0x1a6/0x500
> [ 1750.110084]        [<ffffffff8266ea7f>] return_from_SYSCALL_64+0x0/0x7a
> [ 1750.117479]
> -> #1 (&(&ip->i_iolock)->mr_lock#2){++++++}:
> [ 1750.123649]        [<ffffffff812af52a>] lock_acquire+0x1fa/0x440
> [ 1750.130362]        [<ffffffff8129b93e>] down_write_nested+0x5e/0xe0
> [ 1750.137371]        [<ffffffffa086772e>] xfs_ilock+0x2fe/0x550 [xfs]
> [ 1750.144397]        [<ffffffffa083c204>] xfs_file_buffered_aio_write+0x134/0x840 [xfs]
> [ 1750.153175]        [<ffffffffa083cb7d>] xfs_file_write_iter+0x26d/0x6d0 [xfs]
> [ 1750.161177]        [<ffffffff8168374e>] __vfs_write+0x2be/0x640
> [ 1750.167799]        [<ffffffff816876e2>] vfs_write+0x152/0x4b0
> [ 1750.174220]        [<ffffffff8168b0df>] SyS_write+0xdf/0x1d0
> [ 1750.180547]        [<ffffffff8266e9bc>] entry_SYSCALL_64_fastpath+0x1f/0xbd
> [ 1750.188328]
> -> #0 (&sb->s_type->i_mutex_key#14){+.+.+.}:
> [ 1750.194508]        [<ffffffff812adbc3>] __lock_acquire+0x3043/0x3dd0
> [ 1750.201609]        [<ffffffff812af52a>] lock_acquire+0x1fa/0x440
> [ 1750.208321]        [<ffffffff82668cda>] down_write+0x5a/0xe0
> [ 1750.214645]        [<ffffffffa083c1f7>] xfs_file_buffered_aio_write+0x127/0x840 [xfs]
> [ 1750.223421]        [<ffffffffa083cb7d>] xfs_file_write_iter+0x26d/0x6d0 [xfs]
> [ 1750.231423]        [<ffffffff816859be>] vfs_iter_write+0x29e/0x550
> [ 1750.238330]        [<ffffffff81722729>] iter_file_splice_write+0x529/0xb70
> [ 1750.246012]        [<ffffffff817258d4>] SyS_splice+0x724/0x1c10
> [ 1750.252627]        [<ffffffff81007b66>] do_syscall_64+0x1a6/0x500
> [ 1750.259438]        [<ffffffff8266ea7f>] return_from_SYSCALL_64+0x0/0x7a
> [ 1750.266830]
> [ 1750.266830] other info that might help us debug this:
> [ 1750.266830]
> [ 1750.275764] Chain exists of:
>   &sb->s_type->i_mutex_key#14 --> &(&ip->i_iolock)->mr_lock#2 --> &pipe->mutex/1
>
> [ 1750.287213]  Possible unsafe locking scenario:
> [ 1750.287213]
> [ 1750.293817]        CPU0                    CPU1
> [ 1750.298871]        ----                    ----
> [ 1750.303924]   lock(&pipe->mutex/1);
> [ 1750.307845]                                lock(&(&ip->i_iolock)->mr_lock#2);
> [ 1750.315836]                                lock(&pipe->mutex/1);
> [ 1750.322567]   lock(&sb->s_type->i_mutex_key#14);
> [ 1750.327748]
> [ 1750.327748]  *** DEADLOCK ***
> [ 1750.327748]
> [ 1750.334355] 2 locks held by splice01/35921:
> [ 1750.339019]  #0:  (sb_writers#8){.+.+.+}, at: [<ffffffff8168f444>] __sb_start_write+0xb4/0xf0
> [ 1750.348595]  #1:  (&pipe->mutex/1){+.+.+.}, at: [<ffffffff8169e7c1>] pipe_lock+0x51/0x60
> [ 1750.357686]
> [ 1750.357686] stack backtrace:
> [ 1750.362548] CPU: 50 PID: 35921 Comm: splice01 Not tainted 4.8.0-rc4+ #34
> [ 1750.370026] Hardware name: Intel Corporation S2600WTT/S2600WTT, BIOS GRNDSDP1.86B.0044.R00.1501191641 01/19/2015
> [ 1750.381382]  0000000000000000 000000003bca9477 ffff88044c4176e0 ffffffff81a3d191
> [ 1750.389675]  ffffffff84292880 ffffffff842b9e30 ffff88044c417730 ffffffff812a6aa6
> [ 1750.397968]  ffffffff84292880 ffff880414a28cd0 ffff88044c417850 ffff880414a28cd0
> [ 1750.406261] Call Trace:
> [ 1750.408992]  [<ffffffff81a3d191>] dump_stack+0x85/0xc4
> [ 1750.414725]  [<ffffffff812a6aa6>] print_circular_bug+0x356/0x460
> [ 1750.421428]  [<ffffffff812adbc3>] __lock_acquire+0x3043/0x3dd0
> [ 1750.427942]  [<ffffffff81414fe9>] ? is_ftrace_trampoline+0x99/0xe0
> [ 1750.434840]  [<ffffffff812aab80>] ? debug_check_no_locks_freed+0x2c0/0x2c0
> [ 1750.442512]  [<ffffffff812a0272>] ? add_lock_to_list.isra.29.constprop.45+0x142/0x1d0
> [ 1750.451249]  [<ffffffff812acd9e>] ? __lock_acquire+0x221e/0x3dd0
> [ 1750.457952]  [<ffffffff812aa3ce>] ? trace_hardirqs_on_caller+0x3fe/0x580
> [ 1750.465430]  [<ffffffff812af52a>] lock_acquire+0x1fa/0x440
> [ 1750.471578]  [<ffffffffa083c1f7>] ? xfs_file_buffered_aio_write+0x127/0x840 [xfs]
> [ 1750.479929]  [<ffffffff82668cda>] down_write+0x5a/0xe0
> [ 1750.485691]  [<ffffffffa083c1f7>] ? xfs_file_buffered_aio_write+0x127/0x840 [xfs]
> [ 1750.494070]  [<ffffffffa083c1f7>] xfs_file_buffered_aio_write+0x127/0x840 [xfs]
> [ 1750.502226]  [<ffffffff81007b66>] ? do_syscall_64+0x1a6/0x500
> [ 1750.508666]  [<ffffffffa083c0d0>] ? xfs_file_dio_aio_write+0xca0/0xca0 [xfs]
> [ 1750.516532]  [<ffffffff812a9f72>] ? mark_held_locks+0xd2/0x130
> [ 1750.523044]  [<ffffffff812f5887>] ? debug_lockdep_rcu_enabled+0x77/0x90
> [ 1750.530417]  [<ffffffff82664873>] ? mutex_lock_nested+0x4c3/0x850
> [ 1750.537243]  [<ffffffffa083cb7d>] xfs_file_write_iter+0x26d/0x6d0 [xfs]
> [ 1750.544625]  [<ffffffff8169e7c1>] ? pipe_lock+0x51/0x60
> [ 1750.550456]  [<ffffffff816859be>] vfs_iter_write+0x29e/0x550
> [ 1750.556770]  [<ffffffff81685720>] ? vfs_iter_read+0x540/0x540
> [ 1750.563181]  [<ffffffff81722729>] iter_file_splice_write+0x529/0xb70
> [ 1750.570271]  [<ffffffff81722200>] ? page_cache_pipe_buf_confirm+0x1f0/0x1f0
> [ 1750.578041]  [<ffffffff812f5a33>] ? rcu_read_lock_sched_held+0xa3/0x120
> [ 1750.585423]  [<ffffffff812f6055>] ? rcu_sync_lockdep_assert+0x75/0xb0
> [ 1750.592609]  [<ffffffff8129bd6c>] ? percpu_down_read+0x5c/0xa0
> [ 1750.599118]  [<ffffffff8168f444>] ? __sb_start_write+0xb4/0xf0
> [ 1750.605627]  [<ffffffff817258d4>] SyS_splice+0x724/0x1c10
> [ 1750.611651]  [<ffffffff812f5a33>] ? rcu_read_lock_sched_held+0xa3/0x120
> [ 1750.619033]  [<ffffffff817251b0>] ? compat_SyS_vmsplice+0x1f0/0x1f0
> [ 1750.626025]  [<ffffffff81007a12>] ? do_syscall_64+0x52/0x500
> [ 1750.632338]  [<ffffffff817251b0>] ? compat_SyS_vmsplice+0x1f0/0x1f0
> [ 1750.639330]  [<ffffffff81007b66>] do_syscall_64+0x1a6/0x500
> [ 1750.645549]  [<ffffffff8100401a>] ? trace_hardirqs_on_thunk+0x1a/0x1c
> [ 1750.652737]  [<ffffffff8266ea7f>] entry_SYSCALL64_slow_path+0x25/0x25

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: possible circular locking dependency detected
  2016-09-02 17:02                               ` Al Viro
@ 2016-09-02 17:12                                 ` Linus Torvalds
  2016-09-02 17:40                                   ` Rainer Weikusat
  2016-09-02 17:52                                   ` Al Viro
  0 siblings, 2 replies; 18+ messages in thread
From: Linus Torvalds @ 2016-09-02 17:12 UTC (permalink / raw)
  To: Al Viro
  Cc: Rainer Weikusat, CAI Qian, Miklos Szeredi, Hannes Frederic Sowa,
	Rainer Weikusat, Eric Sandeen, Network Development

On Fri, Sep 2, 2016 at 10:02 AM, Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> It's very much _not_ just overlayfs being pathological - that it certainly is,
> but the problem is much wider.

Al, can you take a look at my two patches, and see if you agree that
they fix it, though?

Of course, we now have *another* splice deadlock. That pipe inode is
nasty, it's very easy to deadlock on it in subtle ways.

             Linus

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: possible circular locking dependency detected
  2016-09-02 17:12                                 ` Linus Torvalds
@ 2016-09-02 17:40                                   ` Rainer Weikusat
  2016-09-02 17:53                                     ` Al Viro
  2016-09-02 17:52                                   ` Al Viro
  1 sibling, 1 reply; 18+ messages in thread
From: Rainer Weikusat @ 2016-09-02 17:40 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Al Viro, Rainer Weikusat, CAI Qian, Miklos Szeredi,
	Hannes Frederic Sowa, Eric Sandeen, Network Development

Linus Torvalds <torvalds@linux-foundation.org> writes:
> On Fri, Sep 2, 2016 at 10:02 AM, Al Viro <viro@zeniv.linux.org.uk> wrote:
>>
>> It's very much _not_ just overlayfs being pathological - that it certainly is,
>> but the problem is much wider.
>
> Al, can you take a look at my two patches, and see if you agree that
> they fix it, though?

The original deadlock occurred because of some code path locking the
superblock followed by trying to acquire the af_unix readlock while
unix_bind did the same in the opposite order (by doing kern_path_create
with the readlock held). If unix_bind doesn't share a lock with the
receive routines anymore, this obviously can't happen anymore.

The other problem situation is one where a lock on someting can be
acquired both by kern_path_create and a mknod operation and the readlock
is taken in between. Because that sits in between the kern_path_create
and the mknod, it can block the thread which got a certain lock via
kern_path_create because the one which is about to try to acquire it via
mknod got the readlock first. This obviously can't happen anymore the when the
original 'acquire readlock (now bindlock) first' is restored.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: possible circular locking dependency detected
  2016-09-02 17:12                                 ` Linus Torvalds
  2016-09-02 17:40                                   ` Rainer Weikusat
@ 2016-09-02 17:52                                   ` Al Viro
  1 sibling, 0 replies; 18+ messages in thread
From: Al Viro @ 2016-09-02 17:52 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Rainer Weikusat, CAI Qian, Miklos Szeredi, Hannes Frederic Sowa,
	Rainer Weikusat, Eric Sandeen, Network Development

On Fri, Sep 02, 2016 at 10:12:08AM -0700, Linus Torvalds wrote:
> On Fri, Sep 2, 2016 at 10:02 AM, Al Viro <viro@zeniv.linux.org.uk> wrote:
> >
> > It's very much _not_ just overlayfs being pathological - that it certainly is,
> > but the problem is much wider.
> 
> Al, can you take a look at my two patches, and see if you agree that
> they fix it, though?

AFAICS, they should.  Locking is obviously saner that way and AFAICS the
rest is absolutely straightforward.

Acked-by: Al Viro <viro@zeniv.linux.org.uk>

> Of course, we now have *another* splice deadlock. That pipe inode is
> nasty, it's very easy to deadlock on it in subtle ways.

I'm still digging through iomap.c, but that's better taken to another branch
of this thread...

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: possible circular locking dependency detected
  2016-09-02 17:40                                   ` Rainer Weikusat
@ 2016-09-02 17:53                                     ` Al Viro
  0 siblings, 0 replies; 18+ messages in thread
From: Al Viro @ 2016-09-02 17:53 UTC (permalink / raw)
  To: Rainer Weikusat
  Cc: Linus Torvalds, CAI Qian, Miklos Szeredi, Hannes Frederic Sowa,
	Eric Sandeen, Network Development

On Fri, Sep 02, 2016 at 06:40:59PM +0100, Rainer Weikusat wrote:

> The original deadlock occurred because of some code path locking the
> superblock followed by trying to acquire the af_unix readlock while

Not even that - one code path takes ->readlock under pipe lock, while
another takes pipe lock under sb_start_write...

^ permalink raw reply	[flat|nested] 18+ messages in thread

* possible circular locking dependency detected
@ 2024-05-01 16:47 Chuck Lever III
  0 siblings, 0 replies; 18+ messages in thread
From: Chuck Lever III @ 2024-05-01 16:47 UTC (permalink / raw)
  To: Linux NFS Mailing List, Linux-XFS

Hi-

Lockdep splat showed up during xfstest on v6.9-rc6:

[10205.014915] WARNING: possible circular locking dependency detected
[10205.016399] 6.9.0-rc6-00022-g06cd86b25b98 #3 Not tainted
[10205.017724] ------------------------------------------------------
[10205.019194] kswapd0/64 is trying to acquire lock:
[10205.020656] ffff88813f60db18 (&xfs_nondir_ilock_class#3){++++}-{4:4}, at: xfs_ilock+0x14d/0x3c0 [xfs]
[10205.023544]  [10205.023544] but task is already holding lock:
[10205.024932] ffffffff8deb5420 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat+0x167/0x1530
[10205.027718]  [10205.027718] which lock already depends on the new lock.
[10205.027718]  [10205.029842]  [10205.029842] the existing dependency chain (in reverse order) is:
[10205.031760]  [10205.031760] -> #1 (fs_reclaim){+.+.}-{0:0}:
[10205.033478]        fs_reclaim_acquire+0x111/0x170
[10205.035002]        __kmalloc+0xa9/0x4b0
[10205.036223]        xfs_attr_list_ilocked+0x6f0/0x1540 [xfs]
[10205.038856]        xfs_attr_list+0x1ce/0x260 [xfs]
[10205.040688]        xfs_vn_listxattr+0x102/0x180 [xfs]
[10205.042753]        vfs_listxattr+0x9e/0xf0
[10205.044100]        nfsd_listxattr+0x134/0x250 [nfsd]
[10205.045890]        nfsd4_listxattrs+0x16/0x20 [nfsd]
[10205.047585]        nfsd4_proc_compound+0xe41/0x24e0 [nfsd]
[10205.049477]        nfsd_dispatch+0x258/0x7d0 [nfsd]
[10205.051196]        svc_process_common+0xa88/0x1db0 [sunrpc]
[10205.052934]        svc_process+0x552/0x800 [sunrpc]
[10205.054315]        svc_recv+0x1958/0x2460 [sunrpc]
[10205.055877]        nfsd+0x23d/0x360 [nfsd]
[10205.057315]        kthread+0x2f3/0x3e0
[10205.058524]        ret_from_fork+0x3d/0x80
[10205.059821]        ret_from_fork_asm+0x1a/0x30
[10205.061222]  [10205.061222] -> #0 (&xfs_nondir_ilock_class#3){++++}-{4:4}:
[10205.063319]        __lock_acquire+0x3437/0x6e60
[10205.065023]        lock_acquire+0x1ad/0x520
[10205.066444]        down_write_nested+0x96/0x1f0
[10205.067897]        xfs_ilock+0x14d/0x3c0 [xfs]
[10205.071022]        xfs_icwalk_ag+0x885/0x1580 [xfs]
[10205.073093]        xfs_icwalk+0x50/0xd0 [xfs]
[10205.075391]        xfs_reclaim_inodes_nr+0x158/0x210 [xfs]
[10205.077605]        xfs_fs_free_cached_objects+0x5a/0x90 [xfs]
[10205.079657]        super_cache_scan+0x389/0x4e0
[10205.080918]        do_shrink_slab+0x352/0xd20
[10205.082122]        shrink_slab+0x161/0xe40
[10205.083157]        shrink_one+0x3f2/0x6c0
[10205.084218]        shrink_node+0x1f6e/0x35e0
[10205.085322]        balance_pgdat+0x87f/0x1530
[10205.086433]        kswapd+0x559/0xa00
[10205.087399]        kthread+0x2f3/0x3e0
[10205.088673]        ret_from_fork+0x3d/0x80
[10205.090550]        ret_from_fork_asm+0x1a/0x30

I can leave the test NFS server up for a bit if you'd like me to
collect more information.


--
Chuck Lever



^ permalink raw reply	[flat|nested] 18+ messages in thread

* possible circular locking dependency detected
@ 2015-03-11 12:52 Daniel Wagner
  0 siblings, 0 replies; 18+ messages in thread
From: Daniel Wagner @ 2015-03-11 12:52 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: linux-kernel

Hi, 

I am seeing this info when I boot up my kvm guest. I think I haven't 
seen any reports on this one. In case I missed the report, sorry about
the noise.

[   92.867888] ======================================================
[   92.868440] [ INFO: possible circular locking dependency detected ]
[   92.868591] 4.0.0-rc3 #1 Not tainted
[   92.868591] -------------------------------------------------------
[   92.868591] sulogin/1617 is trying to acquire lock:
[   92.868591]  (&isec->lock){+.+.+.}, at: [<ffffffff8149e185>] inode_doinit_with_dentry+0xa5/0x680
[   92.868591] 
[   92.868591] but task is already holding lock:
[   92.868591]  (&mm->mmap_sem){++++++}, at: [<ffffffff8118635f>] vm_mmap_pgoff+0x6f/0xc0
[   92.868591] 
[   92.868591] which lock already depends on the new lock.
[   92.868591] 
[   92.868591] 
[   92.868591] the existing dependency chain (in reverse order) is:
[   92.868591] 
-> #2 (&mm->mmap_sem){++++++}:
[   92.868591]        [<ffffffff810a7ae5>] lock_acquire+0xd5/0x2a0
[   92.868591]        [<ffffffff8119879c>] might_fault+0x8c/0xb0
[   92.868591]        [<ffffffff811e6832>] filldir+0x92/0x120
[   92.868591]        [<ffffffff8138880b>] xfs_dir2_block_getdents.isra.12+0x19b/0x1f0
[   92.868591]        [<ffffffff81388994>] xfs_readdir+0x134/0x2f0
[   92.868591]        [<ffffffff8138b78b>] xfs_file_readdir+0x2b/0x30
[   92.868591]        [<ffffffff811e660a>] iterate_dir+0x9a/0x140
[   92.868591]        [<ffffffff811e6af1>] SyS_getdents+0x81/0x100
[   92.868591]        [<ffffffff81b5cfb2>] system_call_fastpath+0x12/0x17
[   92.868591] 
-> #1 (&xfs_dir_ilock_class){++++.+}:
[   92.868591]        [<ffffffff810a7ae5>] lock_acquire+0xd5/0x2a0
[   92.868591]        [<ffffffff8109feb7>] down_read_nested+0x57/0xa0
[   92.868591]        [<ffffffff8139b612>] xfs_ilock+0x92/0x290
[   92.868591]        [<ffffffff8139b888>] xfs_ilock_attr_map_shared+0x38/0x50
[   92.868591]        [<ffffffff8133c081>] xfs_attr_get+0xc1/0x180
[   92.868591]        [<ffffffff813aa9d7>] xfs_xattr_get+0x37/0x50
[   92.868591]        [<ffffffff811fb21f>] generic_getxattr+0x4f/0x70
[   92.868591]        [<ffffffff8149e232>] inode_doinit_with_dentry+0x152/0x680
[   92.868591]        [<ffffffff8149e83b>] sb_finish_set_opts+0xdb/0x260
[   92.868591]        [<ffffffff8149ec84>] selinux_set_mnt_opts+0x2c4/0x600
[   92.868591]        [<ffffffff8149f024>] superblock_doinit+0x64/0xd0
[   92.868591]        [<ffffffff8149f0a0>] delayed_superblock_init+0x10/0x20
[   92.868591]        [<ffffffff811d2d52>] iterate_supers+0xb2/0x110
[   92.868591]        [<ffffffff8149f333>] selinux_complete_init+0x33/0x40
[   92.868591]        [<ffffffff814aea46>] security_load_policy+0xf6/0x560
[   92.868591]        [<ffffffff814a0d42>] sel_write_load+0xa2/0x740
[   92.868591]        [<ffffffff811cf92a>] vfs_write+0xba/0x200
[   92.868591]        [<ffffffff811d00a9>] SyS_write+0x49/0xb0
[   92.868591]        [<ffffffff81b5cfb2>] system_call_fastpath+0x12/0x17
[   92.868591] 
-> #0 (&isec->lock){+.+.+.}:
[   92.868591]        [<ffffffff810a6a4e>] __lock_acquire+0x1ede/0x1ee0
[   92.868591]        [<ffffffff810a7ae5>] lock_acquire+0xd5/0x2a0
[   92.868591]        [<ffffffff81b588be>] mutex_lock_nested+0x6e/0x3f0
[   92.868591]        [<ffffffff8149e185>] inode_doinit_with_dentry+0xa5/0x680
[   92.868591]        [<ffffffff8149f2fc>] selinux_d_instantiate+0x1c/0x20
[   92.868591]        [<ffffffff81491b4b>] security_d_instantiate+0x1b/0x30
[   92.868591]        [<ffffffff811e9f74>] d_instantiate+0x54/0x80
[   92.868591]        [<ffffffff8118215d>] __shmem_file_setup+0xcd/0x230
[   92.868591]        [<ffffffff81185e28>] shmem_zero_setup+0x28/0x70
[   92.868591]        [<ffffffff811a2408>] mmap_region+0x5d8/0x5f0
[   92.868591]        [<ffffffff811a273b>] do_mmap_pgoff+0x31b/0x400
[   92.868591]        [<ffffffff81186380>] vm_mmap_pgoff+0x90/0xc0
[   92.868591]        [<ffffffff811a0ae6>] SyS_mmap_pgoff+0x106/0x290
[   92.868591]        [<ffffffff81008a22>] SyS_mmap+0x22/0x30
[   92.868591]        [<ffffffff81b5cfb2>] system_call_fastpath+0x12/0x17
[   92.868591] 
[   92.868591] other info that might help us debug this:
[   92.868591] 
[   92.868591] Chain exists of:
  &isec->lock --> &xfs_dir_ilock_class --> &mm->mmap_sem

[   92.868591]  Possible unsafe locking scenario:
[   92.868591] 
[   92.868591]        CPU0                    CPU1
[   92.868591]        ----                    ----
[   92.868591]   lock(&mm->mmap_sem);
[   92.868591]                                lock(&xfs_dir_ilock_class);
[   92.868591]                                lock(&mm->mmap_sem);
[   92.868591]   lock(&isec->lock);
[   92.868591] 
[   92.868591]  *** DEADLOCK ***
[   92.868591] 
[   92.868591] 1 lock held by sulogin/1617:
[   92.868591]  #0:  (&mm->mmap_sem){++++++}, at: [<ffffffff8118635f>] vm_mmap_pgoff+0x6f/0xc0
[   92.868591] 
[   92.868591] stack backtrace:
[   92.868591] CPU: 0 PID: 1617 Comm: sulogin Not tainted 4.0.0-rc3 #1
[   92.868591] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140709_153950- 04/01/2014
[   92.868591]  ffffffff82e6e980 ffff880078d279f8 ffffffff81b508c5 0000000000000007
[   92.868591]  ffffffff82e31af0 ffff880078d27a48 ffffffff810a30bd ffff880078fd87a0
[   92.868591]  ffff880078d27ac8 ffff880078d27a48 ffff880078fd8000 0000000000000001
[   92.868591] Call Trace:
[   92.868591]  [<ffffffff81b508c5>] dump_stack+0x4c/0x65
[   92.868591]  [<ffffffff810a30bd>] print_circular_bug+0x1cd/0x230
[   92.868591]  [<ffffffff810a6a4e>] __lock_acquire+0x1ede/0x1ee0
[   92.868591]  [<ffffffff810a0be5>] ? __bfs+0x105/0x240
[   92.868591]  [<ffffffff810a7ae5>] lock_acquire+0xd5/0x2a0
[   92.868591]  [<ffffffff8149e185>] ? inode_doinit_with_dentry+0xa5/0x680
[   92.868591]  [<ffffffff81b588be>] mutex_lock_nested+0x6e/0x3f0
[   92.868591]  [<ffffffff8149e185>] ? inode_doinit_with_dentry+0xa5/0x680
[   92.868591]  [<ffffffff811e9ef5>] ? __d_instantiate+0xd5/0x100
[   92.868591]  [<ffffffff8149e185>] ? inode_doinit_with_dentry+0xa5/0x680
[   92.868591]  [<ffffffff811e9f69>] ? d_instantiate+0x49/0x80
[   92.868591]  [<ffffffff8149e185>] inode_doinit_with_dentry+0xa5/0x680
[   92.868591]  [<ffffffff811e9f69>] ? d_instantiate+0x49/0x80
[   92.868591]  [<ffffffff8149f2fc>] selinux_d_instantiate+0x1c/0x20
[   92.868591]  [<ffffffff81491b4b>] security_d_instantiate+0x1b/0x30
[   92.868591]  [<ffffffff811e9f74>] d_instantiate+0x54/0x80
[   92.868591]  [<ffffffff8118215d>] __shmem_file_setup+0xcd/0x230
[   92.868591]  [<ffffffff81185e28>] shmem_zero_setup+0x28/0x70
[   92.868591]  [<ffffffff811a2408>] mmap_region+0x5d8/0x5f0
[   92.868591]  [<ffffffff811a273b>] do_mmap_pgoff+0x31b/0x400
[   92.868591]  [<ffffffff8118635f>] ? vm_mmap_pgoff+0x6f/0xc0
[   92.868591]  [<ffffffff81186380>] vm_mmap_pgoff+0x90/0xc0
[   92.868591]  [<ffffffff811a0ae6>] SyS_mmap_pgoff+0x106/0x290
[   92.868591]  [<ffffffff81507bfb>] ? trace_hardirqs_on_thunk+0x3a/0x3f
[   92.868591]  [<ffffffff81008a22>] SyS_mmap+0x22/0x30
[   92.868591]  [<ffffffff81b5cfb2>] system_call_fastpath+0x12/0x17


cheers,
daniel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: possible circular locking dependency detected
  2010-05-20 16:34 Ciprian Docan
@ 2010-05-21 21:14 ` Andrew Morton
  0 siblings, 0 replies; 18+ messages in thread
From: Andrew Morton @ 2010-05-21 21:14 UTC (permalink / raw)
  To: Ciprian Docan; +Cc: linux-kernel, Al Viro, Tejun Heo

On Thu, 20 May 2010 12:34:00 -0400 (EDT)
Ciprian Docan <docan@eden.rutgers.edu> wrote:

> 
> Hi,
> 
> I got the following in the dmesg:
> 
> --------------------------------------------------------------------
> [ INFO: possible circular locking dependency detected ]
> 2.6.33-rc8 #4
> -------------------------------------------------------
> fdisk/29231 is trying to acquire lock:
>   (&type->s_umount_key#47){++++..}, at: [<ffffffff810fb13c>] 
> get_super+0x5c/0xaf
> 
> but task is already holding lock:
>   (&bdev->bd_mutex){+.+.+.}, at: [<ffffffff811f2df0>] 
> blkdev_ioctl+0x5c5/0x6b1
> 
> which lock already depends on the new lock.
> 
> 
> the existing dependency chain (in reverse order) is:
> 
> -> #1 (&bdev->bd_mutex){+.+.+.}:
>         [<ffffffff8106e65b>] __lock_acquire+0xb5d/0xd05
>         [<ffffffff8106e8cf>] lock_acquire+0xcc/0xe9
>         [<ffffffff81402d09>] __mutex_lock_common+0x4c/0x348
>         [<ffffffff814030c9>] mutex_lock_nested+0x3e/0x43
>         [<ffffffff8111f4a9>] __blkdev_put+0x34/0x16c
>         [<ffffffff8111f5f1>] blkdev_put+0x10/0x12
>         [<ffffffff8112063b>] close_bdev_exclusive+0x24/0x2d
>         [<ffffffff810fbcaa>] get_sb_bdev+0xef/0x1a1
>         [<ffffffffa0114189>] vfat_get_sb+0x18/0x1a [vfat]
>         [<ffffffff810fb8bc>] vfs_kern_mount+0xa9/0x168
>         [<ffffffff810fb9e3>] do_kern_mount+0x4d/0xed
>         [<ffffffff81110f54>] do_mount+0x72f/0x7a6
>         [<ffffffff81111053>] sys_mount+0x88/0xc2
>         [<ffffffff8100236b>] system_call_fastpath+0x16/0x1b

vfs_kern_mount() holds s_umount.  My brain isn't large enough to work
out where that lock was taken, yet it's so obvious that no code
comments were needed.  Sigh.  Might be down under sget().

vfs_kern_mount() ends up calling into __blkdev_put(), which takes
bd_mutex.

> -> #0 (&type->s_umount_key#47){++++..}:
>         [<ffffffff8106e505>] __lock_acquire+0xa07/0xd05
>         [<ffffffff8106e8cf>] lock_acquire+0xcc/0xe9
>         [<ffffffff81403450>] down_read+0x51/0x84
>         [<ffffffff810fb13c>] get_super+0x5c/0xaf
>         [<ffffffff8111facd>] fsync_bdev+0x18/0x48
>         [<ffffffff811f433c>] invalidate_partition+0x25/0x42
>         [<ffffffff8114bda2>] rescan_partitions+0x37/0x3a7
>         [<ffffffff811f2dff>] blkdev_ioctl+0x5d4/0x6b1
>         [<ffffffff8111eca4>] block_ioctl+0x37/0x3b
>         [<ffffffff811060d0>] vfs_ioctl+0x32/0xa6
>         [<ffffffff81106650>] do_vfs_ioctl+0x490/0x4d6
>         [<ffffffff811066ec>] sys_ioctl+0x56/0x79
>         [<ffffffff8100236b>] system_call_fastpath+0x16/0x1b

blkdev_reread_part() takes bd_mutex then does
	rescan_partitions
	->invalidate_partition
	  ->fsync_bdev
	    ->get_super  (takes s_umount for reading)

> other info that might help us debug this:
> 
> 1 lock held by fdisk/29231:
>   #0:  (&bdev->bd_mutex){+.+.+.}, at: [<ffffffff811f2df0>] 
> blkdev_ioctl+0x5c5/0x6b1
> 
> stack backtrace:
> Pid: 29231, comm: fdisk Not tainted 2.6.33-rc8 #4
> Call Trace:
>   [<ffffffff8106d6dc>] print_circular_bug+0xa8/0xb6
>   [<ffffffff8106e505>] __lock_acquire+0xa07/0xd05
>   [<ffffffff81062009>] ? sched_clock_local+0x1c/0x82
>   [<ffffffff8106e8cf>] lock_acquire+0xcc/0xe9
>   [<ffffffff810fb13c>] ? get_super+0x5c/0xaf
>   [<ffffffff8106b936>] ? lock_release_holdtime+0x2c/0xdb
>   [<ffffffff81403450>] down_read+0x51/0x84
>   [<ffffffff810fb13c>] ? get_super+0x5c/0xaf
>   [<ffffffff810fb13c>] get_super+0x5c/0xaf
>   [<ffffffff8111facd>] fsync_bdev+0x18/0x48
>   [<ffffffff811f433c>] invalidate_partition+0x25/0x42
>   [<ffffffff81402c8e>] ? mutex_trylock+0x12a/0x159
>   [<ffffffff8114bda2>] rescan_partitions+0x37/0x3a7
>   [<ffffffff8106d0c9>] ? trace_hardirqs_on+0xd/0xf
>   [<ffffffff811f2df0>] ? blkdev_ioctl+0x5c5/0x6b1
>   [<ffffffff811f2dff>] blkdev_ioctl+0x5d4/0x6b1
>   [<ffffffff8106d098>] ? trace_hardirqs_on_caller+0x118/0x13c
>   [<ffffffff8111eca4>] block_ioctl+0x37/0x3b
>   [<ffffffff811060d0>] vfs_ioctl+0x32/0xa6
>   [<ffffffff81106650>] do_vfs_ioctl+0x490/0x4d6
>   [<ffffffff811066ec>] sys_ioctl+0x56/0x79
>   [<ffffffff8102f9bd>] ? __wake_up+0x22/0x4d
>   [<ffffffff8100236b>] system_call_fastpath+0x16/0x1b
> -------------------------------------------------------------------------
> 
> Kernel version used: 2.6.33-rc8 #4. I do not remember the exact steps, but 
> I was trying to format an USB stick using the fdisk. Please let me know if 
> you need additional informations. Thank you.
> 

So yup, that's ab/ba deadlockable.  I cannot immediately see any change
which might have caused that.  Tejun has been mucking with the
partitions code recently but nothing leaps out at me.


^ permalink raw reply	[flat|nested] 18+ messages in thread

* possible circular locking dependency detected
@ 2010-05-20 16:34 Ciprian Docan
  2010-05-21 21:14 ` Andrew Morton
  0 siblings, 1 reply; 18+ messages in thread
From: Ciprian Docan @ 2010-05-20 16:34 UTC (permalink / raw)
  To: linux-kernel


Hi,

I got the following in the dmesg:

--------------------------------------------------------------------
[ INFO: possible circular locking dependency detected ]
2.6.33-rc8 #4
-------------------------------------------------------
fdisk/29231 is trying to acquire lock:
  (&type->s_umount_key#47){++++..}, at: [<ffffffff810fb13c>] 
get_super+0x5c/0xaf

but task is already holding lock:
  (&bdev->bd_mutex){+.+.+.}, at: [<ffffffff811f2df0>] 
blkdev_ioctl+0x5c5/0x6b1

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #1 (&bdev->bd_mutex){+.+.+.}:
        [<ffffffff8106e65b>] __lock_acquire+0xb5d/0xd05
        [<ffffffff8106e8cf>] lock_acquire+0xcc/0xe9
        [<ffffffff81402d09>] __mutex_lock_common+0x4c/0x348
        [<ffffffff814030c9>] mutex_lock_nested+0x3e/0x43
        [<ffffffff8111f4a9>] __blkdev_put+0x34/0x16c
        [<ffffffff8111f5f1>] blkdev_put+0x10/0x12
        [<ffffffff8112063b>] close_bdev_exclusive+0x24/0x2d
        [<ffffffff810fbcaa>] get_sb_bdev+0xef/0x1a1
        [<ffffffffa0114189>] vfat_get_sb+0x18/0x1a [vfat]
        [<ffffffff810fb8bc>] vfs_kern_mount+0xa9/0x168
        [<ffffffff810fb9e3>] do_kern_mount+0x4d/0xed
        [<ffffffff81110f54>] do_mount+0x72f/0x7a6
        [<ffffffff81111053>] sys_mount+0x88/0xc2
        [<ffffffff8100236b>] system_call_fastpath+0x16/0x1b

-> #0 (&type->s_umount_key#47){++++..}:
        [<ffffffff8106e505>] __lock_acquire+0xa07/0xd05
        [<ffffffff8106e8cf>] lock_acquire+0xcc/0xe9
        [<ffffffff81403450>] down_read+0x51/0x84
        [<ffffffff810fb13c>] get_super+0x5c/0xaf
        [<ffffffff8111facd>] fsync_bdev+0x18/0x48
        [<ffffffff811f433c>] invalidate_partition+0x25/0x42
        [<ffffffff8114bda2>] rescan_partitions+0x37/0x3a7
        [<ffffffff811f2dff>] blkdev_ioctl+0x5d4/0x6b1
        [<ffffffff8111eca4>] block_ioctl+0x37/0x3b
        [<ffffffff811060d0>] vfs_ioctl+0x32/0xa6
        [<ffffffff81106650>] do_vfs_ioctl+0x490/0x4d6
        [<ffffffff811066ec>] sys_ioctl+0x56/0x79
        [<ffffffff8100236b>] system_call_fastpath+0x16/0x1b

other info that might help us debug this:

1 lock held by fdisk/29231:
  #0:  (&bdev->bd_mutex){+.+.+.}, at: [<ffffffff811f2df0>] 
blkdev_ioctl+0x5c5/0x6b1

stack backtrace:
Pid: 29231, comm: fdisk Not tainted 2.6.33-rc8 #4
Call Trace:
  [<ffffffff8106d6dc>] print_circular_bug+0xa8/0xb6
  [<ffffffff8106e505>] __lock_acquire+0xa07/0xd05
  [<ffffffff81062009>] ? sched_clock_local+0x1c/0x82
  [<ffffffff8106e8cf>] lock_acquire+0xcc/0xe9
  [<ffffffff810fb13c>] ? get_super+0x5c/0xaf
  [<ffffffff8106b936>] ? lock_release_holdtime+0x2c/0xdb
  [<ffffffff81403450>] down_read+0x51/0x84
  [<ffffffff810fb13c>] ? get_super+0x5c/0xaf
  [<ffffffff810fb13c>] get_super+0x5c/0xaf
  [<ffffffff8111facd>] fsync_bdev+0x18/0x48
  [<ffffffff811f433c>] invalidate_partition+0x25/0x42
  [<ffffffff81402c8e>] ? mutex_trylock+0x12a/0x159
  [<ffffffff8114bda2>] rescan_partitions+0x37/0x3a7
  [<ffffffff8106d0c9>] ? trace_hardirqs_on+0xd/0xf
  [<ffffffff811f2df0>] ? blkdev_ioctl+0x5c5/0x6b1
  [<ffffffff811f2dff>] blkdev_ioctl+0x5d4/0x6b1
  [<ffffffff8106d098>] ? trace_hardirqs_on_caller+0x118/0x13c
  [<ffffffff8111eca4>] block_ioctl+0x37/0x3b
  [<ffffffff811060d0>] vfs_ioctl+0x32/0xa6
  [<ffffffff81106650>] do_vfs_ioctl+0x490/0x4d6
  [<ffffffff811066ec>] sys_ioctl+0x56/0x79
  [<ffffffff8102f9bd>] ? __wake_up+0x22/0x4d
  [<ffffffff8100236b>] system_call_fastpath+0x16/0x1b
-------------------------------------------------------------------------

Kernel version used: 2.6.33-rc8 #4. I do not remember the exact steps, but 
I was trying to format an USB stick using the fdisk. Please let me know if 
you need additional informations. Thank you.

Regards,
--
 	Ciprian Docan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* possible circular locking dependency detected
@ 2007-05-13 18:11 Marko Macek
  0 siblings, 0 replies; 18+ messages in thread
From: Marko Macek @ 2007-05-13 18:11 UTC (permalink / raw)
  To: linux-kernel, video4linux-list

Hello!

=======================================================
[ INFO: possible circular locking dependency detected ]
2.6.21.1-cfs-v11 #4
-------------------------------------------------------
tvtime/6360 is trying to acquire lock:
 (&mm->mmap_sem){----}, at: [<f8a6a50a>] 
videobuf_dma_init_user+0xb6/0x14e [video_buf]
 
but task is already holding lock:
 (&q->lock#2){--..}, at: [<f8a6ac43>] videobuf_qbuf+0x10/0x288 [video_buf]
 
which lock already depends on the new lock.
 
 
the existing dependency chain (in reverse order) is:
 
-> #1 (&q->lock#2){--..}:
       [<c01437eb>] __lock_acquire+0x9de/0xb58
       [<f8a695fe>] videobuf_mmap_mapper+0x12/0x1ff [video_buf]
       [<c0143d19>] lock_acquire+0x56/0x6e
       [<f8a695fe>] videobuf_mmap_mapper+0x12/0x1ff [video_buf]
       [<c032579a>] __mutex_lock_slowpath+0xe3/0x23b
       [<f8a695fe>] videobuf_mmap_mapper+0x12/0x1ff [video_buf]
       [<f8a695fe>] videobuf_mmap_mapper+0x12/0x1ff [video_buf]
       [<c0173559>] kmem_cache_zalloc+0x69/0x97
       [<c0142a07>] trace_hardirqs_on+0x11e/0x141
       [<c0168eea>] do_mmap_pgoff+0x43e/0x714
       [<c0108adb>] sys_mmap2+0x9d/0xb7
       [<c0104d9e>] sysenter_past_esp+0x5f/0x99
       [<ffffffff>] 0xffffffff

-> #0 (&mm->mmap_sem){----}:
       [<c014170f>] print_circular_bug_entry+0x40/0x46
       [<c01436d7>] __lock_acquire+0x8ca/0xb58
       [<f8a6a50a>] videobuf_dma_init_user+0xb6/0x14e [video_buf]
       [<c0143d19>] lock_acquire+0x56/0x6e
       [<f8a6a50a>] videobuf_dma_init_user+0xb6/0x14e [video_buf]
       [<c013d082>] down_read+0x3d/0x4e
       [<f8a6a50a>] videobuf_dma_init_user+0xb6/0x14e [video_buf]
       [<f8a6a50a>] videobuf_dma_init_user+0xb6/0x14e [video_buf]
       [<f8a69ae7>] videobuf_waiton+0xdf/0xe9 [video_buf]
       [<f8a6a836>] videobuf_iolock+0x7f/0xdf [video_buf]
       [<f8a7fdb2>] buffer_prepare+0x174/0x1d8 [saa7134]
       [<c03258ea>] __mutex_lock_slowpath+0x233/0x23b
       [<f8a6ac43>] videobuf_qbuf+0x10/0x288 [video_buf]
       [<f8a6ae23>] videobuf_qbuf+0x1f0/0x288 [video_buf]
       [<c0159626>] find_get_page+0x40/0x45
       [<f8a8144d>] video_do_ioctl+0xd6f/0xec7 [saa7134]
       [<c0326a8a>] _spin_unlock+0x14/0x1c
       [<f8a0e946>] video_usercopy+0x172/0x237 [videodev]
       [<c0328621>] do_page_fault+0x202/0x5df
       [<f8a7ef74>] video_ioctl+0x18/0x1c [saa7134]
       [<f8a806de>] video_do_ioctl+0x0/0xec7 [saa7134]
       [<c0180ca0>] do_ioctl+0x4c/0x62
       [<c0180efa>] vfs_ioctl+0x244/0x256
       [<c0180f58>] sys_ioctl+0x4c/0x64
       [<c0104d9e>] sysenter_past_esp+0x5f/0x99
       [<ffffffff>] 0xffffffff

other info that might help us debug this:

1 lock held by tvtime/6360:
 #0:  (&q->lock#2){--..}, at: [<f8a6ac43>] videobuf_qbuf+0x10/0x288 
[video_buf]

stack backtrace:
 [<c0141fa7>] print_circular_bug_tail+0x5f/0x67
 [<c014170f>] print_circular_bug_entry+0x40/0x46
 [<c01436d7>] __lock_acquire+0x8ca/0xb58
 [<f8a6a50a>] videobuf_dma_init_user+0xb6/0x14e [video_buf]
 [<c0143d19>] lock_acquire+0x56/0x6e
 [<f8a6a50a>] videobuf_dma_init_user+0xb6/0x14e [video_buf]
 [<c013d082>] down_read+0x3d/0x4e
 [<f8a6a50a>] videobuf_dma_init_user+0xb6/0x14e [video_buf]
 [<f8a6a50a>] videobuf_dma_init_user+0xb6/0x14e [video_buf]
 [<f8a69ae7>] videobuf_waiton+0xdf/0xe9 [video_buf]
 [<f8a6a836>] videobuf_iolock+0x7f/0xdf [video_buf]
 [<f8a7fdb2>] buffer_prepare+0x174/0x1d8 [saa7134]
 [<c03258ea>] __mutex_lock_slowpath+0x233/0x23b
 [<f8a6ac43>] videobuf_qbuf+0x10/0x288 [video_buf]
 [<f8a6ae23>] videobuf_qbuf+0x1f0/0x288 [video_buf]
 [<c0159626>] find_get_page+0x40/0x45
 [<f8a8144d>] video_do_ioctl+0xd6f/0xec7 [saa7134]
 [<c0326a8a>] _spin_unlock+0x14/0x1c
 [<f8a0e946>] video_usercopy+0x172/0x237 [videodev]
 [<c0328621>] do_page_fault+0x202/0x5df
 [<f8a7ef74>] video_ioctl+0x18/0x1c [saa7134]
 [<f8a806de>] video_do_ioctl+0x0/0xec7 [saa7134]
 [<c0180ca0>] do_ioctl+0x4c/0x62
 [<c0180efa>] vfs_ioctl+0x244/0x256
 [<c0180f58>] sys_ioctl+0x4c/0x64
 [<c0104d9e>] sysenter_past_esp+0x5f/0x99
 =======================


The TV card is saa7134.
Machine is AMD Athlon 64 X2. Fedora Core 6.

I get a lockup while watching TV about every month or two. Machine 
responds to ping, but nothing else.

Mark

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2024-05-01 16:47 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <CA+55aFy0cQq569m=0umnqZe6HJp8eQX2ed-yi=Fmntuhd2AM=Q@mail.gmail.com>
     [not found] ` <87h99zo4d8.fsf@doppelsaurus.mobileactivedefense.com>
     [not found]   ` <CA+55aFwcPK_wXS8SE5wPitCXUewZ1+OGqVrWxvvU8tVDuCeHWA@mail.gmail.com>
     [not found]     ` <CA+55aFxGW8_DpYa6rZAq0s7zzFCB58U=7Kgy1T+7cY2=TasGrw@mail.gmail.com>
     [not found]       ` <871t13o1n1.fsf@doppelsaurus.mobileactivedefense.com>
     [not found]         ` <CA+55aFzbCDwOTbHiawpY2xSaR_EBkTDbYeJV3CH09OLLtPW3nw@mail.gmail.com>
     [not found]           ` <6f7d587b-3933-7c02-a804-db1732ced1cf@stressinduktion.org>
     [not found]             ` <CA+55aFyNJg_brA4rGF0S2ve0V_2vuhZCFKEMFDNhHKEXoVCRGA@mail.gmail.com>
     [not found]               ` <20160901204746.GR2356@ZenIV.linux.org.uk>
     [not found]                 ` <CA+55aFxzRnLCev6i_ehw9LFf-dw3s0znF+nP_c86i=219OZhfg@mail.gmail.com>
     [not found]                   ` <20160901210142.GS2356@ZenIV.linux.org.uk>
     [not found]                     ` <CA+55aFzxJM4pbS_jySERnCoOvvPbo+FgM7FZEATLJnCseD0j0g@mail.gmail.com>
2016-09-01 22:04                       ` possible circular locking dependency detected Linus Torvalds
2016-09-02 14:43                         ` CAI Qian
2016-09-02 15:51                           ` CAI Qian
2016-09-02 16:46                             ` CAI Qian
2016-09-02 17:10                             ` Linus Torvalds
2016-09-02 15:18                         ` Rainer Weikusat
2016-09-02 16:00                           ` Al Viro
2016-09-02 16:10                             ` Rainer Weikusat
2016-09-02 17:02                               ` Al Viro
2016-09-02 17:12                                 ` Linus Torvalds
2016-09-02 17:40                                   ` Rainer Weikusat
2016-09-02 17:53                                     ` Al Viro
2016-09-02 17:52                                   ` Al Viro
2024-05-01 16:47 Chuck Lever III
  -- strict thread matches above, loose matches on Subject: below --
2015-03-11 12:52 Daniel Wagner
2010-05-20 16:34 Ciprian Docan
2010-05-21 21:14 ` Andrew Morton
2007-05-13 18:11 Marko Macek

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.