linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jens Axboe <axboe@kernel.dk>
To: Kent Overstreet <kent.overstreet@linux.dev>
Cc: torvalds@linux-foundation.org, linux-kernel@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, linux-bcachefs@vger.kernel.org,
	Christoph Hellwig <hch@lst.de>
Subject: Re: [GIT PULL] bcachefs
Date: Tue, 27 Jun 2023 11:16:01 -0600	[thread overview]
Message-ID: <23922545-917a-06bd-ec92-ff6aa66118e2@kernel.dk> (raw)
In-Reply-To: <b92ea170-d531-00f3-ca7a-613c05dcbf5f@kernel.dk>

On 6/26/23 8:59?PM, Jens Axboe wrote:
> On 6/26/23 8:05?PM, Kent Overstreet wrote:
>> On Mon, Jun 26, 2023 at 07:13:54PM -0600, Jens Axboe wrote:
>>> Doesn't reproduce for me with XFS. The above ktest doesn't work for me
>>> either:
>>
>> It just popped for me on xfs, but it took half an hour or so of looping
>> vs. 30 seconds on bcachefs.
> 
> OK, I'll try and leave it running overnight and see if I can get it to
> trigger.

I did manage to reproduce it, and also managed to get bcachefs to run
the test. But I had to add:

diff --git a/check b/check
index 5f9f1a6bec88..6d74bd4933bd 100755
--- a/check
+++ b/check
@@ -283,7 +283,7 @@ while [ $# -gt 0 ]; do
 	case "$1" in
 	-\? | -h | --help) usage ;;
 
-	-nfs|-afs|-glusterfs|-cifs|-9p|-fuse|-virtiofs|-pvfs2|-tmpfs|-ubifs)
+	-nfs|-afs|-glusterfs|-cifs|-9p|-fuse|-virtiofs|-pvfs2|-tmpfs|-ubifs|-bcachefs)
 		FSTYP="${1:1}"
 		;;
 	-overlay)

to ktest/tests/xfstests/ and run it with -bcachefs, otherwise it kept
failing because it assumed it was XFS.

I suspected this was just a timing issue, and it looks like that's
exactly what it is. Looking at the test case, it'll randomly kill -9
fsstress, and if that happens while we have io_uring IO pending, then we
process completions inline (for a PF_EXITING current). This means they
get pushed to fallback work, which runs out of line. If we hit that case
AND the timing is such that it hasn't been processed yet, we'll still be
holding a file reference under the mount point and umount will -EBUSY
fail.

As far as I can tell, this can happen with aio as well, it's just harder
to hit. If the fput happens while the task is exiting, then fput will
end up being delayed through a workqueue as well. The test case assumes
that once it's reaped the exit of the killed task that all files are
released, which isn't necessarily true if they are done out-of-line.

For io_uring specifically, it may make sense to wait on the fallback
work. The below patch does this, and should fix the issue. But I'm not
fully convinced that this is really needed, as I do think this can
happen without io_uring as well. It just doesn't right now as the test
does buffered IO, and aio will be fully sync with buffered IO. That
means there's either no gap where aio will hit it without O_DIRECT, or
it's just small enough that it hasn't been hit.


diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 3bca7a79efda..7abad5cb2131 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -150,7 +150,6 @@ static void io_clean_op(struct io_kiocb *req);
 static void io_queue_sqe(struct io_kiocb *req);
 static void io_move_task_work_from_local(struct io_ring_ctx *ctx);
 static void __io_submit_flush_completions(struct io_ring_ctx *ctx);
-static __cold void io_fallback_tw(struct io_uring_task *tctx);
 
 struct kmem_cache *req_cachep;
 
@@ -1248,6 +1247,49 @@ static inline struct llist_node *io_llist_cmpxchg(struct llist_head *head,
 	return cmpxchg(&head->first, old, new);
 }
 
+#define NR_FALLBACK_CTX	8
+
+static __cold void io_flush_fallback(struct io_ring_ctx **ctxs, int *nr_ctx)
+{
+	int i;
+
+	for (i = 0; i < *nr_ctx; i++) {
+		struct io_ring_ctx *ctx = ctxs[i];
+
+		flush_delayed_work(&ctx->fallback_work);
+		percpu_ref_put(&ctx->refs);
+	}
+	*nr_ctx = 0;
+}
+
+static __cold void io_flush_fallback_add(struct io_ring_ctx *ctx,
+					 struct io_ring_ctx **ctxs, int *nr_ctx)
+{
+	percpu_ref_get(&ctx->refs);
+	ctxs[(*nr_ctx)++] = ctx;
+	if (*nr_ctx == NR_FALLBACK_CTX)
+		io_flush_fallback(ctxs, nr_ctx);
+}
+
+static __cold void io_fallback_tw(struct io_uring_task *tctx, bool sync)
+{
+	struct llist_node *node = llist_del_all(&tctx->task_list);
+	struct io_ring_ctx *ctxs[NR_FALLBACK_CTX];
+	struct io_kiocb *req;
+	int nr_ctx = 0;
+
+	while (node) {
+		req = container_of(node, struct io_kiocb, io_task_work.node);
+		node = node->next;
+		if (sync)
+			io_flush_fallback_add(req->ctx, ctxs, &nr_ctx);
+		if (llist_add(&req->io_task_work.node,
+			      &req->ctx->fallback_llist))
+			schedule_delayed_work(&req->ctx->fallback_work, 1);
+	}
+	io_flush_fallback(ctxs, &nr_ctx);
+}
+
 void tctx_task_work(struct callback_head *cb)
 {
 	struct io_tw_state ts = {};
@@ -1260,7 +1302,7 @@ void tctx_task_work(struct callback_head *cb)
 	unsigned int count = 0;
 
 	if (unlikely(current->flags & PF_EXITING)) {
-		io_fallback_tw(tctx);
+		io_fallback_tw(tctx, true);
 		return;
 	}
 
@@ -1289,20 +1331,6 @@ void tctx_task_work(struct callback_head *cb)
 	trace_io_uring_task_work_run(tctx, count, loops);
 }
 
-static __cold void io_fallback_tw(struct io_uring_task *tctx)
-{
-	struct llist_node *node = llist_del_all(&tctx->task_list);
-	struct io_kiocb *req;
-
-	while (node) {
-		req = container_of(node, struct io_kiocb, io_task_work.node);
-		node = node->next;
-		if (llist_add(&req->io_task_work.node,
-			      &req->ctx->fallback_llist))
-			schedule_delayed_work(&req->ctx->fallback_work, 1);
-	}
-}
-
 static void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
 {
 	struct io_ring_ctx *ctx = req->ctx;
@@ -1377,7 +1405,7 @@ void __io_req_task_work_add(struct io_kiocb *req, unsigned flags)
 	if (likely(!task_work_add(req->task, &tctx->task_work, ctx->notify_method)))
 		return;
 
-	io_fallback_tw(tctx);
+	io_fallback_tw(tctx, false);
 }
 
 static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx)

-- 
Jens Axboe


  parent reply	other threads:[~2023-06-27 17:16 UTC|newest]

Thread overview: 140+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-26 21:46 [GIT PULL] bcachefs Kent Overstreet
2023-06-26 23:11 ` Jens Axboe
2023-06-27  0:06   ` Kent Overstreet
2023-06-27  1:13     ` Jens Axboe
2023-06-27  2:05       ` Kent Overstreet
2023-06-27  2:59         ` Jens Axboe
2023-06-27  3:10           ` Kent Overstreet
2023-06-27 17:16           ` Jens Axboe [this message]
2023-06-27 20:15             ` Kent Overstreet
2023-06-27 22:05               ` Dave Chinner
2023-06-27 22:41                 ` Kent Overstreet
2023-06-28 14:40                 ` Jens Axboe
2023-06-28 14:48                   ` Thomas Weißschuh
2023-06-28 14:58                     ` Jens Axboe
2023-06-28  3:16               ` Jens Axboe
2023-06-28  4:01                 ` Kent Overstreet
2023-06-28 14:58                   ` Jens Axboe
2023-06-28 15:22                     ` Jens Axboe
2023-06-28 17:56                       ` Kent Overstreet
2023-06-28 20:45                         ` Jens Axboe
2023-06-28 16:57                     ` Jens Axboe
2023-06-28 17:33                       ` Christian Brauner
2023-06-28 17:52                       ` Kent Overstreet
2023-06-28 20:44                         ` Jens Axboe
2023-06-28 21:17                           ` Jens Axboe
2023-06-28 22:13                             ` Kent Overstreet
2023-06-28 22:33                               ` Jens Axboe
2023-06-28 22:55                                 ` Kent Overstreet
2023-06-28 23:14                                   ` Jens Axboe
2023-06-28 23:50                                     ` Kent Overstreet
2023-06-29  1:00                                       ` Dave Chinner
2023-06-29  1:33                                         ` Jens Axboe
2023-06-29 11:18                                           ` Christian Brauner
2023-06-29 14:17                                             ` Kent Overstreet
2023-06-29 15:31                                             ` Kent Overstreet
2023-06-30  9:40                                               ` Christian Brauner
2023-07-06 15:20                                                 ` Kent Overstreet
2023-07-06 16:26                                                   ` Jens Axboe
2023-07-06 16:34                                                     ` Kent Overstreet
2023-06-29  1:29                                       ` Jens Axboe
2023-07-06 20:15                             ` Kent Overstreet
2023-06-28 17:54                     ` Kent Overstreet
2023-06-28 20:54                       ` Jens Axboe
2023-06-28 22:14                         ` Jens Axboe
2023-06-28 23:04                           ` Kent Overstreet
2023-06-28 23:11                             ` Jens Axboe
2023-06-27  2:33       ` Kent Overstreet
2023-06-27  2:59         ` Jens Axboe
2023-06-27  3:19           ` Matthew Wilcox
2023-06-27  3:22             ` Kent Overstreet
2023-06-27  3:52 ` Christoph Hellwig
2023-06-27  4:36   ` Kent Overstreet
2023-07-06 15:56 ` Kent Overstreet
2023-07-06 16:40   ` Josef Bacik
2023-07-06 17:38     ` Kent Overstreet
2023-07-06 19:17       ` Eric Sandeen
2023-07-06 19:31         ` Kent Overstreet
2023-07-06 21:19       ` Darrick J. Wong
2023-07-06 22:43         ` Kent Overstreet
2023-07-07 13:13           ` Jan Kara
2023-07-07 13:52             ` Kent Overstreet
2023-07-07  8:48         ` Christian Brauner
2023-07-07  9:18           ` Kent Overstreet
2023-07-07 16:26             ` James Bottomley
2023-07-07 16:48               ` Kent Overstreet
2023-07-07 17:04                 ` James Bottomley
2023-07-07 17:26                   ` Kent Overstreet
2023-07-08  3:54               ` Matthew Wilcox
2023-07-08  4:10                 ` Kent Overstreet
2023-07-08  4:31                 ` Kent Overstreet
2023-07-08 15:02                   ` Theodore Ts'o
2023-07-08 15:23                     ` Kent Overstreet
2023-07-08 16:42                 ` James Bottomley
2023-07-09  1:16                   ` Kent Overstreet
2023-07-07  9:35           ` Kent Overstreet
2023-07-07  2:04       ` Theodore Ts'o
2023-07-07 12:18       ` Brian Foster
2023-07-07 14:49         ` Kent Overstreet
2023-07-12  2:54   ` Kent Overstreet
2023-07-12 19:48     ` Kees Cook
2023-07-12 19:57       ` Kent Overstreet
2023-07-12 22:10     ` Darrick J. Wong
2023-07-12 23:57       ` Kent Overstreet
2023-08-09  1:27     ` Linus Torvalds
2023-08-10 15:54       ` Kent Overstreet
2023-08-10 16:40         ` Linus Torvalds
2023-08-10 18:02           ` Kent Overstreet
2023-08-10 18:09             ` Linus Torvalds
2023-08-10 17:52         ` Jan Kara
2023-08-11  2:47           ` Kent Overstreet
2023-08-11  8:10             ` Jan Kara
2023-08-11  8:13               ` Christian Brauner
2023-08-10 22:39         ` Darrick J. Wong
2023-08-10 23:47           ` Linus Torvalds
2023-08-11  2:40             ` Jens Axboe
2023-08-11  4:03             ` Kent Overstreet
2023-08-11  5:20               ` Linus Torvalds
2023-08-11  5:29                 ` Kent Overstreet
2023-08-11  5:53                   ` Linus Torvalds
2023-08-11  7:52                     ` Christian Brauner
2023-08-11 14:31                     ` Jens Axboe
2023-08-11  3:45           ` Kent Overstreet
2023-08-21  0:09             ` Dave Chinner
2023-08-10 23:07         ` Matthew Wilcox
2023-08-11 10:54         ` Christian Brauner
2023-08-11 12:58           ` Kent Overstreet
2023-08-14  7:25             ` Christian Brauner
2023-08-14 15:23               ` Kent Overstreet
2023-08-11 13:21           ` Kent Overstreet
2023-08-11 22:56             ` Darrick J. Wong
2023-08-14  7:21             ` Christian Brauner
2023-08-14 15:27               ` Kent Overstreet
2023-09-03  3:25 Kent Overstreet
2023-09-05 13:24 ` Christoph Hellwig
2023-09-06  0:00   ` Kent Overstreet
2023-09-06  0:41     ` Matthew Wilcox
2023-09-06 16:10       ` Kent Overstreet
2023-09-06 17:57         ` Darrick J. Wong
2023-09-08  9:37     ` Christoph Hellwig
2023-09-06 19:36 ` Linus Torvalds
2023-09-06 20:02   ` Linus Torvalds
2023-09-06 20:20     ` Linus Torvalds
2023-09-06 21:55       ` Arnaldo Carvalho de Melo
2023-09-06 23:13         ` David Sterba
2023-09-06 23:34           ` Linus Torvalds
2023-09-06 23:46             ` Arnaldo Carvalho de Melo
2023-09-06 23:53               ` Arnaldo Carvalho de Melo
2023-09-06 23:16         ` Linus Torvalds
2023-09-10  0:53       ` Kent Overstreet
2023-09-07 20:37   ` Kent Overstreet
2023-09-07 20:51     ` Linus Torvalds
2023-09-07 23:40   ` Kent Overstreet
2023-09-08  6:29     ` Martin Steigerwald
2023-09-08  9:11     ` Joshua Ashton
2023-09-06 22:28 ` Nathan Chancellor
2023-09-07  0:03   ` Kees Cook
2023-09-07 14:29     ` Chris Mason
2023-09-07 20:39     ` Kent Overstreet
2023-09-08 10:50       ` Brian Foster
2023-09-08 23:05     ` Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=23922545-917a-06bd-ec92-ff6aa66118e2@kernel.dk \
    --to=axboe@kernel.dk \
    --cc=hch@lst.de \
    --cc=kent.overstreet@linux.dev \
    --cc=linux-bcachefs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).