linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH AUTOSEL 5.8 01/12] epoll: do not insert into poll queues until all sanity checks are done
@ 2020-10-05 14:44 Sasha Levin
  2020-10-05 14:44 ` [PATCH AUTOSEL 5.8 02/12] epoll: replace ->visited/visited_list with generation count Sasha Levin
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Sasha Levin @ 2020-10-05 14:44 UTC (permalink / raw)
  To: linux-kernel, stable; +Cc: Al Viro, Sasha Levin, linux-fsdevel

From: Al Viro <viro@zeniv.linux.org.uk>

[ Upstream commit f8d4f44df056c5b504b0d49683fb7279218fd207 ]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/eventpoll.c | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 8107e06d7f6f5..5207dfc85b786 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1522,6 +1522,22 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
 		RCU_INIT_POINTER(epi->ws, NULL);
 	}
 
+	/* Add the current item to the list of active epoll hook for this file */
+	spin_lock(&tfile->f_lock);
+	list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
+	spin_unlock(&tfile->f_lock);
+
+	/*
+	 * Add the current item to the RB tree. All RB tree operations are
+	 * protected by "mtx", and ep_insert() is called with "mtx" held.
+	 */
+	ep_rbtree_insert(ep, epi);
+
+	/* now check if we've created too many backpaths */
+	error = -EINVAL;
+	if (full_check && reverse_path_check())
+		goto error_remove_epi;
+
 	/* Initialize the poll table using the queue callback */
 	epq.epi = epi;
 	init_poll_funcptr(&epq.pt, ep_ptable_queue_proc);
@@ -1544,22 +1560,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
 	if (epi->nwait < 0)
 		goto error_unregister;
 
-	/* Add the current item to the list of active epoll hook for this file */
-	spin_lock(&tfile->f_lock);
-	list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
-	spin_unlock(&tfile->f_lock);
-
-	/*
-	 * Add the current item to the RB tree. All RB tree operations are
-	 * protected by "mtx", and ep_insert() is called with "mtx" held.
-	 */
-	ep_rbtree_insert(ep, epi);
-
-	/* now check if we've created too many backpaths */
-	error = -EINVAL;
-	if (full_check && reverse_path_check())
-		goto error_remove_epi;
-
 	/* We have to drop the new item inside our item list to keep track of it */
 	write_lock_irq(&ep->lock);
 
@@ -1588,6 +1588,8 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
 
 	return 0;
 
+error_unregister:
+	ep_unregister_pollwait(ep, epi);
 error_remove_epi:
 	spin_lock(&tfile->f_lock);
 	list_del_rcu(&epi->fllink);
@@ -1595,9 +1597,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
 
 	rb_erase_cached(&epi->rbn, &ep->rbr);
 
-error_unregister:
-	ep_unregister_pollwait(ep, epi);
-
 	/*
 	 * We need to do this because an event could have been arrived on some
 	 * allocated wait queue. Note that we don't care about the ep->ovflist
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH AUTOSEL 5.8 02/12] epoll: replace ->visited/visited_list with generation count
  2020-10-05 14:44 [PATCH AUTOSEL 5.8 01/12] epoll: do not insert into poll queues until all sanity checks are done Sasha Levin
@ 2020-10-05 14:44 ` Sasha Levin
  2020-10-05 14:44 ` [PATCH AUTOSEL 5.8 03/12] epoll: EPOLL_CTL_ADD: close the race in decision to take fast path Sasha Levin
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Sasha Levin @ 2020-10-05 14:44 UTC (permalink / raw)
  To: linux-kernel, stable; +Cc: Al Viro, Sasha Levin, linux-fsdevel

From: Al Viro <viro@zeniv.linux.org.uk>

[ Upstream commit 18306c404abe18a0972587a6266830583c60c928 ]

removes the need to clear it, along with the races.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/eventpoll.c | 27 ++++++++-------------------
 1 file changed, 8 insertions(+), 19 deletions(-)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 5207dfc85b786..82ab9a25f12f2 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -218,8 +218,7 @@ struct eventpoll {
 	struct file *file;
 
 	/* used to optimize loop detection check */
-	struct list_head visited_list_link;
-	int visited;
+	u64 gen;
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	/* used to track busy poll napi_id */
@@ -274,6 +273,8 @@ static long max_user_watches __read_mostly;
  */
 static DEFINE_MUTEX(epmutex);
 
+static u64 loop_check_gen = 0;
+
 /* Used to check for epoll file descriptor inclusion loops */
 static struct nested_calls poll_loop_ncalls;
 
@@ -283,9 +284,6 @@ static struct kmem_cache *epi_cache __read_mostly;
 /* Slab cache used to allocate "struct eppoll_entry" */
 static struct kmem_cache *pwq_cache __read_mostly;
 
-/* Visited nodes during ep_loop_check(), so we can unset them when we finish */
-static LIST_HEAD(visited_list);
-
 /*
  * List of files with newly added links, where we may need to limit the number
  * of emanating paths. Protected by the epmutex.
@@ -1971,13 +1969,12 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
 	struct epitem *epi;
 
 	mutex_lock_nested(&ep->mtx, call_nests + 1);
-	ep->visited = 1;
-	list_add(&ep->visited_list_link, &visited_list);
+	ep->gen = loop_check_gen;
 	for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
 		epi = rb_entry(rbp, struct epitem, rbn);
 		if (unlikely(is_file_epoll(epi->ffd.file))) {
 			ep_tovisit = epi->ffd.file->private_data;
-			if (ep_tovisit->visited)
+			if (ep_tovisit->gen == loop_check_gen)
 				continue;
 			error = ep_call_nested(&poll_loop_ncalls,
 					ep_loop_check_proc, epi->ffd.file,
@@ -2018,18 +2015,8 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
  */
 static int ep_loop_check(struct eventpoll *ep, struct file *file)
 {
-	int ret;
-	struct eventpoll *ep_cur, *ep_next;
-
-	ret = ep_call_nested(&poll_loop_ncalls,
+	return ep_call_nested(&poll_loop_ncalls,
 			      ep_loop_check_proc, file, ep, current);
-	/* clear visited list */
-	list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
-							visited_list_link) {
-		ep_cur->visited = 0;
-		list_del(&ep_cur->visited_list_link);
-	}
-	return ret;
 }
 
 static void clear_tfile_check_list(void)
@@ -2199,6 +2186,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
 			error = epoll_mutex_lock(&epmutex, 0, nonblock);
 			if (error)
 				goto error_tgt_fput;
+			loop_check_gen++;
 			full_check = 1;
 			if (is_file_epoll(tf.file)) {
 				error = -ELOOP;
@@ -2262,6 +2250,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
 error_tgt_fput:
 	if (full_check) {
 		clear_tfile_check_list();
+		loop_check_gen++;
 		mutex_unlock(&epmutex);
 	}
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH AUTOSEL 5.8 03/12] epoll: EPOLL_CTL_ADD: close the race in decision to take fast path
  2020-10-05 14:44 [PATCH AUTOSEL 5.8 01/12] epoll: do not insert into poll queues until all sanity checks are done Sasha Levin
  2020-10-05 14:44 ` [PATCH AUTOSEL 5.8 02/12] epoll: replace ->visited/visited_list with generation count Sasha Levin
@ 2020-10-05 14:44 ` Sasha Levin
  2020-10-05 14:44 ` [PATCH AUTOSEL 5.8 05/12] ep_create_wakeup_source(): dentry name can change under you Sasha Levin
  2020-10-05 14:44 ` [PATCH AUTOSEL 5.8 07/12] io_uring: fix potential ABBA deadlock in ->show_fdinfo() Sasha Levin
  3 siblings, 0 replies; 5+ messages in thread
From: Sasha Levin @ 2020-10-05 14:44 UTC (permalink / raw)
  To: linux-kernel, stable; +Cc: Al Viro, Sasha Levin, linux-fsdevel

From: Al Viro <viro@zeniv.linux.org.uk>

[ Upstream commit fe0a916c1eae8e17e86c3753d13919177d63ed7e ]

Checking for the lack of epitems refering to the epoll we want to insert into
is not enough; we might have an insertion of that epoll into another one that
has already collected the set of files to recheck for excessive reverse paths,
but hasn't gotten to creating/inserting the epitem for it.

However, any such insertion in progress can be detected - it will update the
generation count in our epoll when it's done looking through it for files
to check.  That gets done under ->mtx of our epoll and that allows us to
detect that safely.

We are *not* holding epmutex here, so the generation count is not stable.
However, since both the update of ep->gen by loop check and (later)
insertion into ->f_ep_link are done with ep->mtx held, we are fine -
the sequence is
	grab epmutex
	bump loop_check_gen
	...
	grab tep->mtx		// 1
	tep->gen = loop_check_gen
	...
	drop tep->mtx		// 2
	...
	grab tep->mtx		// 3
	...
	insert into ->f_ep_link
	...
	drop tep->mtx		// 4
	bump loop_check_gen
	drop epmutex
and if the fastpath check in another thread happens for that
eventpoll, it can come
	* before (1) - in that case fastpath is just fine
	* after (4) - we'll see non-empty ->f_ep_link, slow path
taken
	* between (2) and (3) - loop_check_gen is stable,
with ->mtx providing barriers and we end up taking slow path.

Note that ->f_ep_link emptiness check is slightly racy - we are protected
against insertions into that list, but removals can happen right under us.
Not a problem - in the worst case we'll end up taking a slow path for
no good reason.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/eventpoll.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 82ab9a25f12f2..16313180e4c16 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -2181,6 +2181,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
 		goto error_tgt_fput;
 	if (op == EPOLL_CTL_ADD) {
 		if (!list_empty(&f.file->f_ep_links) ||
+				ep->gen == loop_check_gen ||
 						is_file_epoll(tf.file)) {
 			mutex_unlock(&ep->mtx);
 			error = epoll_mutex_lock(&epmutex, 0, nonblock);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH AUTOSEL 5.8 05/12] ep_create_wakeup_source(): dentry name can change under you...
  2020-10-05 14:44 [PATCH AUTOSEL 5.8 01/12] epoll: do not insert into poll queues until all sanity checks are done Sasha Levin
  2020-10-05 14:44 ` [PATCH AUTOSEL 5.8 02/12] epoll: replace ->visited/visited_list with generation count Sasha Levin
  2020-10-05 14:44 ` [PATCH AUTOSEL 5.8 03/12] epoll: EPOLL_CTL_ADD: close the race in decision to take fast path Sasha Levin
@ 2020-10-05 14:44 ` Sasha Levin
  2020-10-05 14:44 ` [PATCH AUTOSEL 5.8 07/12] io_uring: fix potential ABBA deadlock in ->show_fdinfo() Sasha Levin
  3 siblings, 0 replies; 5+ messages in thread
From: Sasha Levin @ 2020-10-05 14:44 UTC (permalink / raw)
  To: linux-kernel, stable; +Cc: Al Viro, Sasha Levin, linux-fsdevel

From: Al Viro <viro@zeniv.linux.org.uk>

[ Upstream commit 3701cb59d892b88d569427586f01491552f377b1 ]

or get freed, for that matter, if it's a long (separately stored)
name.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/eventpoll.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 16313180e4c16..4df61129566d4 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1448,7 +1448,7 @@ static int reverse_path_check(void)
 
 static int ep_create_wakeup_source(struct epitem *epi)
 {
-	const char *name;
+	struct name_snapshot n;
 	struct wakeup_source *ws;
 
 	if (!epi->ep->ws) {
@@ -1457,8 +1457,9 @@ static int ep_create_wakeup_source(struct epitem *epi)
 			return -ENOMEM;
 	}
 
-	name = epi->ffd.file->f_path.dentry->d_name.name;
-	ws = wakeup_source_register(NULL, name);
+	take_dentry_name_snapshot(&n, epi->ffd.file->f_path.dentry);
+	ws = wakeup_source_register(NULL, n.name.name);
+	release_dentry_name_snapshot(&n);
 
 	if (!ws)
 		return -ENOMEM;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH AUTOSEL 5.8 07/12] io_uring: fix potential ABBA deadlock in ->show_fdinfo()
  2020-10-05 14:44 [PATCH AUTOSEL 5.8 01/12] epoll: do not insert into poll queues until all sanity checks are done Sasha Levin
                   ` (2 preceding siblings ...)
  2020-10-05 14:44 ` [PATCH AUTOSEL 5.8 05/12] ep_create_wakeup_source(): dentry name can change under you Sasha Levin
@ 2020-10-05 14:44 ` Sasha Levin
  3 siblings, 0 replies; 5+ messages in thread
From: Sasha Levin @ 2020-10-05 14:44 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Jens Axboe, syzbot+2f8fa4e860edc3066aba, Sasha Levin,
	linux-fsdevel, io-uring

From: Jens Axboe <axboe@kernel.dk>

[ Upstream commit fad8e0de4426a776c9bcb060555e7c09e2d08db6 ]

syzbot reports a potential lock deadlock between the normal IO path and
->show_fdinfo():

======================================================
WARNING: possible circular locking dependency detected
5.9.0-rc6-syzkaller #0 Not tainted
------------------------------------------------------
syz-executor.2/19710 is trying to acquire lock:
ffff888098ddc450 (sb_writers#4){.+.+}-{0:0}, at: io_write+0x6b5/0xb30 fs/io_uring.c:3296

but task is already holding lock:
ffff8880a11b8428 (&ctx->uring_lock){+.+.}-{3:3}, at: __do_sys_io_uring_enter+0xe9a/0x1bd0 fs/io_uring.c:8348

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (&ctx->uring_lock){+.+.}-{3:3}:
       __mutex_lock_common kernel/locking/mutex.c:956 [inline]
       __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103
       __io_uring_show_fdinfo fs/io_uring.c:8417 [inline]
       io_uring_show_fdinfo+0x194/0xc70 fs/io_uring.c:8460
       seq_show+0x4a8/0x700 fs/proc/fd.c:65
       seq_read+0x432/0x1070 fs/seq_file.c:208
       do_loop_readv_writev fs/read_write.c:734 [inline]
       do_loop_readv_writev fs/read_write.c:721 [inline]
       do_iter_read+0x48e/0x6e0 fs/read_write.c:955
       vfs_readv+0xe5/0x150 fs/read_write.c:1073
       kernel_readv fs/splice.c:355 [inline]
       default_file_splice_read.constprop.0+0x4e6/0x9e0 fs/splice.c:412
       do_splice_to+0x137/0x170 fs/splice.c:871
       splice_direct_to_actor+0x307/0x980 fs/splice.c:950
       do_splice_direct+0x1b3/0x280 fs/splice.c:1059
       do_sendfile+0x55f/0xd40 fs/read_write.c:1540
       __do_sys_sendfile64 fs/read_write.c:1601 [inline]
       __se_sys_sendfile64 fs/read_write.c:1587 [inline]
       __x64_sys_sendfile64+0x1cc/0x210 fs/read_write.c:1587
       do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
       entry_SYSCALL_64_after_hwframe+0x44/0xa9

-> #1 (&p->lock){+.+.}-{3:3}:
       __mutex_lock_common kernel/locking/mutex.c:956 [inline]
       __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103
       seq_read+0x61/0x1070 fs/seq_file.c:155
       pde_read fs/proc/inode.c:306 [inline]
       proc_reg_read+0x221/0x300 fs/proc/inode.c:318
       do_loop_readv_writev fs/read_write.c:734 [inline]
       do_loop_readv_writev fs/read_write.c:721 [inline]
       do_iter_read+0x48e/0x6e0 fs/read_write.c:955
       vfs_readv+0xe5/0x150 fs/read_write.c:1073
       kernel_readv fs/splice.c:355 [inline]
       default_file_splice_read.constprop.0+0x4e6/0x9e0 fs/splice.c:412
       do_splice_to+0x137/0x170 fs/splice.c:871
       splice_direct_to_actor+0x307/0x980 fs/splice.c:950
       do_splice_direct+0x1b3/0x280 fs/splice.c:1059
       do_sendfile+0x55f/0xd40 fs/read_write.c:1540
       __do_sys_sendfile64 fs/read_write.c:1601 [inline]
       __se_sys_sendfile64 fs/read_write.c:1587 [inline]
       __x64_sys_sendfile64+0x1cc/0x210 fs/read_write.c:1587
       do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
       entry_SYSCALL_64_after_hwframe+0x44/0xa9

-> #0 (sb_writers#4){.+.+}-{0:0}:
       check_prev_add kernel/locking/lockdep.c:2496 [inline]
       check_prevs_add kernel/locking/lockdep.c:2601 [inline]
       validate_chain kernel/locking/lockdep.c:3218 [inline]
       __lock_acquire+0x2a96/0x5780 kernel/locking/lockdep.c:4441
       lock_acquire+0x1f3/0xaf0 kernel/locking/lockdep.c:5029
       percpu_down_read include/linux/percpu-rwsem.h:51 [inline]
       __sb_start_write+0x228/0x450 fs/super.c:1672
       io_write+0x6b5/0xb30 fs/io_uring.c:3296
       io_issue_sqe+0x18f/0x5c50 fs/io_uring.c:5719
       __io_queue_sqe+0x280/0x1160 fs/io_uring.c:6175
       io_queue_sqe+0x692/0xfa0 fs/io_uring.c:6254
       io_submit_sqe fs/io_uring.c:6324 [inline]
       io_submit_sqes+0x1761/0x2400 fs/io_uring.c:6521
       __do_sys_io_uring_enter+0xeac/0x1bd0 fs/io_uring.c:8349
       do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
       entry_SYSCALL_64_after_hwframe+0x44/0xa9

other info that might help us debug this:

Chain exists of:
  sb_writers#4 --> &p->lock --> &ctx->uring_lock

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&ctx->uring_lock);
                               lock(&p->lock);
                               lock(&ctx->uring_lock);
  lock(sb_writers#4);

 *** DEADLOCK ***

1 lock held by syz-executor.2/19710:
 #0: ffff8880a11b8428 (&ctx->uring_lock){+.+.}-{3:3}, at: __do_sys_io_uring_enter+0xe9a/0x1bd0 fs/io_uring.c:8348

stack backtrace:
CPU: 0 PID: 19710 Comm: syz-executor.2 Not tainted 5.9.0-rc6-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x198/0x1fd lib/dump_stack.c:118
 check_noncircular+0x324/0x3e0 kernel/locking/lockdep.c:1827
 check_prev_add kernel/locking/lockdep.c:2496 [inline]
 check_prevs_add kernel/locking/lockdep.c:2601 [inline]
 validate_chain kernel/locking/lockdep.c:3218 [inline]
 __lock_acquire+0x2a96/0x5780 kernel/locking/lockdep.c:4441
 lock_acquire+0x1f3/0xaf0 kernel/locking/lockdep.c:5029
 percpu_down_read include/linux/percpu-rwsem.h:51 [inline]
 __sb_start_write+0x228/0x450 fs/super.c:1672
 io_write+0x6b5/0xb30 fs/io_uring.c:3296
 io_issue_sqe+0x18f/0x5c50 fs/io_uring.c:5719
 __io_queue_sqe+0x280/0x1160 fs/io_uring.c:6175
 io_queue_sqe+0x692/0xfa0 fs/io_uring.c:6254
 io_submit_sqe fs/io_uring.c:6324 [inline]
 io_submit_sqes+0x1761/0x2400 fs/io_uring.c:6521
 __do_sys_io_uring_enter+0xeac/0x1bd0 fs/io_uring.c:8349
 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x45e179
Code: 3d b2 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 0b b2 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007f1194e74c78 EFLAGS: 00000246 ORIG_RAX: 00000000000001aa
RAX: ffffffffffffffda RBX: 00000000000082c0 RCX: 000000000045e179
RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000004
RBP: 000000000118cf98 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 000000000118cf4c
R13: 00007ffd1aa5756f R14: 00007f1194e759c0 R15: 000000000118cf4c

Fix this by just not diving into details if we fail to trylock the
io_uring mutex. We know the ctx isn't going away during this operation,
but we cannot safely iterate buffers/files/personalities if we don't
hold the io_uring mutex.

Reported-by: syzbot+2f8fa4e860edc3066aba@syzkaller.appspotmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/io_uring.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1d5640cc2a488..25017418348ca 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7994,11 +7994,19 @@ static int io_uring_show_cred(int id, void *p, void *data)
 
 static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
 {
+	bool has_lock;
 	int i;
 
-	mutex_lock(&ctx->uring_lock);
+	/*
+	 * Avoid ABBA deadlock between the seq lock and the io_uring mutex,
+	 * since fdinfo case grabs it in the opposite direction of normal use
+	 * cases. If we fail to get the lock, we just don't iterate any
+	 * structures that could be going away outside the io_uring mutex.
+	 */
+	has_lock = mutex_trylock(&ctx->uring_lock);
+
 	seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
-	for (i = 0; i < ctx->nr_user_files; i++) {
+	for (i = 0; has_lock && i < ctx->nr_user_files; i++) {
 		struct fixed_file_table *table;
 		struct file *f;
 
@@ -8010,13 +8018,13 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
 			seq_printf(m, "%5u: <none>\n", i);
 	}
 	seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs);
-	for (i = 0; i < ctx->nr_user_bufs; i++) {
+	for (i = 0; has_lock && i < ctx->nr_user_bufs; i++) {
 		struct io_mapped_ubuf *buf = &ctx->user_bufs[i];
 
 		seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf,
 						(unsigned int) buf->len);
 	}
-	if (!idr_is_empty(&ctx->personality_idr)) {
+	if (has_lock && !idr_is_empty(&ctx->personality_idr)) {
 		seq_printf(m, "Personalities:\n");
 		idr_for_each(&ctx->personality_idr, io_uring_show_cred, m);
 	}
@@ -8031,7 +8039,8 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
 					req->task->task_works != NULL);
 	}
 	spin_unlock_irq(&ctx->completion_lock);
-	mutex_unlock(&ctx->uring_lock);
+	if (has_lock)
+		mutex_unlock(&ctx->uring_lock);
 }
 
 static void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-10-05 14:47 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-05 14:44 [PATCH AUTOSEL 5.8 01/12] epoll: do not insert into poll queues until all sanity checks are done Sasha Levin
2020-10-05 14:44 ` [PATCH AUTOSEL 5.8 02/12] epoll: replace ->visited/visited_list with generation count Sasha Levin
2020-10-05 14:44 ` [PATCH AUTOSEL 5.8 03/12] epoll: EPOLL_CTL_ADD: close the race in decision to take fast path Sasha Levin
2020-10-05 14:44 ` [PATCH AUTOSEL 5.8 05/12] ep_create_wakeup_source(): dentry name can change under you Sasha Levin
2020-10-05 14:44 ` [PATCH AUTOSEL 5.8 07/12] io_uring: fix potential ABBA deadlock in ->show_fdinfo() Sasha Levin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).