All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 5.13 00/16] random 5.13 patches
@ 2021-04-11  0:46 Pavel Begunkov
  2021-04-11  0:46 ` [PATCH 01/16] io_uring: unify task and files cancel loops Pavel Begunkov
                   ` (16 more replies)
  0 siblings, 17 replies; 24+ messages in thread
From: Pavel Begunkov @ 2021-04-11  0:46 UTC (permalink / raw)
  To: Jens Axboe, io-uring

1-3: task vs files cancellation unification, sheds some lines
8-9: inlines fill_event(), gives some performance
10-13: more of registered buffer / resource preparation patches
others are just separate not connected cleanups/improvements

Pavel Begunkov (16):
  io_uring: unify task and files cancel loops
  io_uring: track inflight requests through counter
  io_uring: unify files and task cancel
  io_uring: refactor io_close
  io_uring: enable inline completion for more cases
  io_uring: refactor compat_msghdr import
  io_uring: optimise non-eventfd post-event
  io_uring: always pass cflags into fill_event()
  io_uring: optimise fill_event() by inlining
  io_uring: simplify io_rsrc_data refcounting
  io_uring: add buffer unmap helper
  io_uring: cleanup buffer register
  io_uring: split file table from rsrc nodes
  io_uring: improve sqo stop
  io_uring: improve hardlink code generation
  io_uring: return back safer resurrect

 fs/io_uring.c            | 386 ++++++++++++++++-----------------------
 include/linux/io_uring.h |  12 +-
 2 files changed, 163 insertions(+), 235 deletions(-)

-- 
2.24.0


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 01/16] io_uring: unify task and files cancel loops
  2021-04-11  0:46 [PATCH 5.13 00/16] random 5.13 patches Pavel Begunkov
@ 2021-04-11  0:46 ` Pavel Begunkov
  2021-04-11  0:46 ` [PATCH 02/16] io_uring: track inflight requests through counter Pavel Begunkov
                   ` (15 subsequent siblings)
  16 siblings, 0 replies; 24+ messages in thread
From: Pavel Begunkov @ 2021-04-11  0:46 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Move tracked inflight number check up the stack into
__io_uring_files_cancel() so it's similar to task cancel. Will be used
for further cleaning.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 74 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 41 insertions(+), 33 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index f1fcb32f8e0b..5c2364ceb6e1 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8869,28 +8869,6 @@ static int io_uring_count_inflight(struct io_ring_ctx *ctx,
 	return cnt;
 }
 
-static void io_uring_cancel_files(struct io_ring_ctx *ctx,
-				  struct task_struct *task,
-				  struct files_struct *files)
-{
-	while (!list_empty_careful(&ctx->inflight_list)) {
-		DEFINE_WAIT(wait);
-		int inflight;
-
-		inflight = io_uring_count_inflight(ctx, task, files);
-		if (!inflight)
-			break;
-
-		io_uring_try_cancel_requests(ctx, task, files);
-
-		prepare_to_wait(&task->io_uring->wait, &wait,
-				TASK_UNINTERRUPTIBLE);
-		if (inflight == io_uring_count_inflight(ctx, task, files))
-			schedule();
-		finish_wait(&task->io_uring->wait, &wait);
-	}
-}
-
 static int __io_uring_add_task_file(struct io_ring_ctx *ctx)
 {
 	struct io_uring_task *tctx = current->io_uring;
@@ -8976,6 +8954,19 @@ static void io_uring_clean_tctx(struct io_uring_task *tctx)
 	}
 }
 
+static s64 tctx_inflight_tracked(struct task_struct *task,
+				 struct files_struct *files)
+{
+	struct io_uring_task *tctx = task->io_uring;
+	struct io_tctx_node *node;
+	unsigned long index;
+	s64 cnt = 0;
+
+	xa_for_each(&tctx->xa, index, node)
+		cnt += io_uring_count_inflight(node->ctx, task, files);
+	return cnt;
+}
+
 static s64 tctx_inflight(struct io_uring_task *tctx)
 {
 	return percpu_counter_sum(&tctx->inflight);
@@ -9014,14 +9005,12 @@ static void io_sqpoll_cancel_sync(struct io_ring_ctx *ctx)
 		wait_for_completion(&work.completion);
 }
 
-void __io_uring_files_cancel(struct files_struct *files)
+static void io_uring_try_cancel(struct files_struct *files)
 {
 	struct io_uring_task *tctx = current->io_uring;
 	struct io_tctx_node *node;
 	unsigned long index;
 
-	/* make sure overflow events are dropped */
-	atomic_inc(&tctx->in_idle);
 	xa_for_each(&tctx->xa, index, node) {
 		struct io_ring_ctx *ctx = node->ctx;
 
@@ -9029,14 +9018,8 @@ void __io_uring_files_cancel(struct files_struct *files)
 			io_sqpoll_cancel_sync(ctx);
 			continue;
 		}
-		io_uring_cancel_files(ctx, current, files);
-		if (!files)
-			io_uring_try_cancel_requests(ctx, current, NULL);
+		io_uring_try_cancel_requests(ctx, current, files);
 	}
-	atomic_dec(&tctx->in_idle);
-
-	if (files)
-		io_uring_clean_tctx(tctx);
 }
 
 /* should only be called by SQPOLL task */
@@ -9070,6 +9053,31 @@ static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx)
 	atomic_dec(&tctx->in_idle);
 }
 
+void __io_uring_files_cancel(struct files_struct *files)
+{
+	struct io_uring_task *tctx = current->io_uring;
+	DEFINE_WAIT(wait);
+	s64 inflight;
+
+	/* make sure overflow events are dropped */
+	atomic_inc(&tctx->in_idle);
+	do {
+		/* read completions before cancelations */
+		inflight = tctx_inflight_tracked(current, files);
+		if (!inflight)
+			break;
+		io_uring_try_cancel(files);
+
+		prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
+		if (inflight == tctx_inflight_tracked(current, files))
+			schedule();
+		finish_wait(&tctx->wait, &wait);
+	} while (1);
+	atomic_dec(&tctx->in_idle);
+
+	io_uring_clean_tctx(tctx);
+}
+
 /*
  * Find any io_uring fd that this task has registered or done IO on, and cancel
  * requests.
@@ -9089,7 +9097,7 @@ void __io_uring_task_cancel(void)
 		inflight = tctx_inflight(tctx);
 		if (!inflight)
 			break;
-		__io_uring_files_cancel(NULL);
+		io_uring_try_cancel(NULL);
 
 		prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
 
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 02/16] io_uring: track inflight requests through counter
  2021-04-11  0:46 [PATCH 5.13 00/16] random 5.13 patches Pavel Begunkov
  2021-04-11  0:46 ` [PATCH 01/16] io_uring: unify task and files cancel loops Pavel Begunkov
@ 2021-04-11  0:46 ` Pavel Begunkov
  2021-04-11  0:46 ` [PATCH 03/16] io_uring: unify files and task cancel Pavel Begunkov
                   ` (14 subsequent siblings)
  16 siblings, 0 replies; 24+ messages in thread
From: Pavel Begunkov @ 2021-04-11  0:46 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Instead of keeping requests in a inflight_list, just track them with a
per tctx atomic counter. Apart from it being much easier and more
consistent with task cancel, it frees ->inflight_entry from being shared
between iopoll and cancel-track, so less headache for us.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 56 +++++++++------------------------------------------
 1 file changed, 10 insertions(+), 46 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5c2364ceb6e1..3353a0b1032a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -444,9 +444,6 @@ struct io_ring_ctx {
 		struct hlist_head	*cancel_hash;
 		unsigned		cancel_hash_bits;
 		bool			poll_multi_file;
-
-		spinlock_t		inflight_lock;
-		struct list_head	inflight_list;
 	} ____cacheline_aligned_in_smp;
 
 	struct delayed_work		rsrc_put_work;
@@ -473,6 +470,7 @@ struct io_uring_task {
 	const struct io_ring_ctx *last;
 	struct io_wq		*io_wq;
 	struct percpu_counter	inflight;
+	atomic_t		inflight_tracked;
 	atomic_t		in_idle;
 
 	spinlock_t		task_lock;
@@ -833,10 +831,7 @@ struct io_kiocb {
 	struct io_kiocb			*link;
 	struct percpu_ref		*fixed_rsrc_refs;
 
-	/*
-	 * 1. used with ctx->iopoll_list with reads/writes
-	 * 2. to track reqs with ->files (see io_op_def::file_table)
-	 */
+	/* used with ctx->iopoll_list with reads/writes */
 	struct list_head		inflight_entry;
 	union {
 		struct io_task_work	io_task_work;
@@ -1164,8 +1159,6 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	INIT_LIST_HEAD(&ctx->iopoll_list);
 	INIT_LIST_HEAD(&ctx->defer_list);
 	INIT_LIST_HEAD(&ctx->timeout_list);
-	spin_lock_init(&ctx->inflight_lock);
-	INIT_LIST_HEAD(&ctx->inflight_list);
 	spin_lock_init(&ctx->rsrc_ref_lock);
 	INIT_LIST_HEAD(&ctx->rsrc_ref_list);
 	INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work);
@@ -1194,14 +1187,9 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq)
 
 static void io_req_track_inflight(struct io_kiocb *req)
 {
-	struct io_ring_ctx *ctx = req->ctx;
-
 	if (!(req->flags & REQ_F_INFLIGHT)) {
 		req->flags |= REQ_F_INFLIGHT;
-
-		spin_lock_irq(&ctx->inflight_lock);
-		list_add(&req->inflight_entry, &ctx->inflight_list);
-		spin_unlock_irq(&ctx->inflight_lock);
+		atomic_inc(&current->io_uring->inflight_tracked);
 	}
 }
 
@@ -1719,12 +1707,9 @@ static void io_dismantle_req(struct io_kiocb *req)
 		io_clean_op(req);
 
 		if (req->flags & REQ_F_INFLIGHT) {
-			struct io_ring_ctx *ctx = req->ctx;
-			unsigned long flags;
+			struct io_uring_task *tctx = req->task->io_uring;
 
-			spin_lock_irqsave(&ctx->inflight_lock, flags);
-			list_del(&req->inflight_entry);
-			spin_unlock_irqrestore(&ctx->inflight_lock, flags);
+			atomic_dec(&tctx->inflight_tracked);
 			req->flags &= ~REQ_F_INFLIGHT;
 		}
 	}
@@ -7917,6 +7902,7 @@ static int io_uring_alloc_task_context(struct task_struct *task,
 	init_waitqueue_head(&tctx->wait);
 	tctx->last = NULL;
 	atomic_set(&tctx->in_idle, 0);
+	atomic_set(&tctx->inflight_tracked, 0);
 	task->io_uring = tctx;
 	spin_lock_init(&tctx->task_lock);
 	INIT_WQ_LIST(&tctx->task_list);
@@ -8855,20 +8841,6 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 	}
 }
 
-static int io_uring_count_inflight(struct io_ring_ctx *ctx,
-				   struct task_struct *task,
-				   struct files_struct *files)
-{
-	struct io_kiocb *req;
-	int cnt = 0;
-
-	spin_lock_irq(&ctx->inflight_lock);
-	list_for_each_entry(req, &ctx->inflight_list, inflight_entry)
-		cnt += io_match_task(req, task, files);
-	spin_unlock_irq(&ctx->inflight_lock);
-	return cnt;
-}
-
 static int __io_uring_add_task_file(struct io_ring_ctx *ctx)
 {
 	struct io_uring_task *tctx = current->io_uring;
@@ -8954,17 +8926,9 @@ static void io_uring_clean_tctx(struct io_uring_task *tctx)
 	}
 }
 
-static s64 tctx_inflight_tracked(struct task_struct *task,
-				 struct files_struct *files)
+static s64 tctx_inflight_tracked(struct io_uring_task *tctx)
 {
-	struct io_uring_task *tctx = task->io_uring;
-	struct io_tctx_node *node;
-	unsigned long index;
-	s64 cnt = 0;
-
-	xa_for_each(&tctx->xa, index, node)
-		cnt += io_uring_count_inflight(node->ctx, task, files);
-	return cnt;
+	return atomic_read(&tctx->inflight_tracked);
 }
 
 static s64 tctx_inflight(struct io_uring_task *tctx)
@@ -9063,13 +9027,13 @@ void __io_uring_files_cancel(struct files_struct *files)
 	atomic_inc(&tctx->in_idle);
 	do {
 		/* read completions before cancelations */
-		inflight = tctx_inflight_tracked(current, files);
+		inflight = tctx_inflight_tracked(tctx);
 		if (!inflight)
 			break;
 		io_uring_try_cancel(files);
 
 		prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
-		if (inflight == tctx_inflight_tracked(current, files))
+		if (inflight == tctx_inflight_tracked(tctx))
 			schedule();
 		finish_wait(&tctx->wait, &wait);
 	} while (1);
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 03/16] io_uring: unify files and task cancel
  2021-04-11  0:46 [PATCH 5.13 00/16] random 5.13 patches Pavel Begunkov
  2021-04-11  0:46 ` [PATCH 01/16] io_uring: unify task and files cancel loops Pavel Begunkov
  2021-04-11  0:46 ` [PATCH 02/16] io_uring: track inflight requests through counter Pavel Begunkov
@ 2021-04-11  0:46 ` Pavel Begunkov
  2021-04-11  0:46 ` [PATCH 04/16] io_uring: refactor io_close Pavel Begunkov
                   ` (13 subsequent siblings)
  16 siblings, 0 replies; 24+ messages in thread
From: Pavel Begunkov @ 2021-04-11  0:46 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Now __io_uring_cancel() and __io_uring_files_cancel() are very similar
and mostly differ by how we count requests, merge them and allow
tctx_inflight() to handle counting.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c            | 56 ++++++++++------------------------------
 include/linux/io_uring.h | 12 ++++-----
 2 files changed, 19 insertions(+), 49 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 3353a0b1032a..0dcf9b7a7423 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8926,13 +8926,10 @@ static void io_uring_clean_tctx(struct io_uring_task *tctx)
 	}
 }
 
-static s64 tctx_inflight_tracked(struct io_uring_task *tctx)
-{
-	return atomic_read(&tctx->inflight_tracked);
-}
-
-static s64 tctx_inflight(struct io_uring_task *tctx)
+static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
 {
+	if (tracked)
+		return atomic_read(&tctx->inflight_tracked);
 	return percpu_counter_sum(&tctx->inflight);
 }
 
@@ -8999,7 +8996,7 @@ static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx)
 	atomic_inc(&tctx->in_idle);
 	do {
 		/* read completions before cancelations */
-		inflight = tctx_inflight(tctx);
+		inflight = tctx_inflight(tctx, false);
 		if (!inflight)
 			break;
 		io_uring_try_cancel_requests(ctx, current, NULL);
@@ -9010,43 +9007,18 @@ static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx)
 		 * avoids a race where a completion comes in before we did
 		 * prepare_to_wait().
 		 */
-		if (inflight == tctx_inflight(tctx))
+		if (inflight == tctx_inflight(tctx, false))
 			schedule();
 		finish_wait(&tctx->wait, &wait);
 	} while (1);
 	atomic_dec(&tctx->in_idle);
 }
 
-void __io_uring_files_cancel(struct files_struct *files)
-{
-	struct io_uring_task *tctx = current->io_uring;
-	DEFINE_WAIT(wait);
-	s64 inflight;
-
-	/* make sure overflow events are dropped */
-	atomic_inc(&tctx->in_idle);
-	do {
-		/* read completions before cancelations */
-		inflight = tctx_inflight_tracked(tctx);
-		if (!inflight)
-			break;
-		io_uring_try_cancel(files);
-
-		prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
-		if (inflight == tctx_inflight_tracked(tctx))
-			schedule();
-		finish_wait(&tctx->wait, &wait);
-	} while (1);
-	atomic_dec(&tctx->in_idle);
-
-	io_uring_clean_tctx(tctx);
-}
-
 /*
  * Find any io_uring fd that this task has registered or done IO on, and cancel
  * requests.
  */
-void __io_uring_task_cancel(void)
+void __io_uring_cancel(struct files_struct *files)
 {
 	struct io_uring_task *tctx = current->io_uring;
 	DEFINE_WAIT(wait);
@@ -9054,15 +9026,14 @@ void __io_uring_task_cancel(void)
 
 	/* make sure overflow events are dropped */
 	atomic_inc(&tctx->in_idle);
-	__io_uring_files_cancel(NULL);
+	io_uring_try_cancel(files);
 
 	do {
 		/* read completions before cancelations */
-		inflight = tctx_inflight(tctx);
+		inflight = tctx_inflight(tctx, !!files);
 		if (!inflight)
 			break;
-		io_uring_try_cancel(NULL);
-
+		io_uring_try_cancel(files);
 		prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
 
 		/*
@@ -9070,16 +9041,17 @@ void __io_uring_task_cancel(void)
 		 * avoids a race where a completion comes in before we did
 		 * prepare_to_wait().
 		 */
-		if (inflight == tctx_inflight(tctx))
+		if (inflight == tctx_inflight(tctx, !!files))
 			schedule();
 		finish_wait(&tctx->wait, &wait);
 	} while (1);
-
 	atomic_dec(&tctx->in_idle);
 
 	io_uring_clean_tctx(tctx);
-	/* all current's requests should be gone, we can kill tctx */
-	__io_uring_free(current);
+	if (!files) {
+		/* for exec all current's requests should be gone, kill tctx */
+		__io_uring_free(current);
+	}
 }
 
 static void *io_uring_validate_mmap_request(struct file *file,
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 79cde9906be0..04b650bcbbe5 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -7,19 +7,17 @@
 
 #if defined(CONFIG_IO_URING)
 struct sock *io_uring_get_socket(struct file *file);
-void __io_uring_task_cancel(void);
-void __io_uring_files_cancel(struct files_struct *files);
+void __io_uring_cancel(struct files_struct *files);
 void __io_uring_free(struct task_struct *tsk);
 
-static inline void io_uring_task_cancel(void)
+static inline void io_uring_files_cancel(struct files_struct *files)
 {
 	if (current->io_uring)
-		__io_uring_task_cancel();
+		__io_uring_cancel(files);
 }
-static inline void io_uring_files_cancel(struct files_struct *files)
+static inline void io_uring_task_cancel(void)
 {
-	if (current->io_uring)
-		__io_uring_files_cancel(files);
+	return io_uring_files_cancel(NULL);
 }
 static inline void io_uring_free(struct task_struct *tsk)
 {
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 04/16] io_uring: refactor io_close
  2021-04-11  0:46 [PATCH 5.13 00/16] random 5.13 patches Pavel Begunkov
                   ` (2 preceding siblings ...)
  2021-04-11  0:46 ` [PATCH 03/16] io_uring: unify files and task cancel Pavel Begunkov
@ 2021-04-11  0:46 ` Pavel Begunkov
  2021-04-11  0:46 ` [PATCH 05/16] io_uring: enable inline completion for more cases Pavel Begunkov
                   ` (12 subsequent siblings)
  16 siblings, 0 replies; 24+ messages in thread
From: Pavel Begunkov @ 2021-04-11  0:46 UTC (permalink / raw)
  To: Jens Axboe, io-uring

A small refactoring shrinking it and making easier to read.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0dcf9b7a7423..2b6177c63b50 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4184,11 +4184,9 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags)
 	struct files_struct *files = current->files;
 	struct io_close *close = &req->close;
 	struct fdtable *fdt;
-	struct file *file;
-	int ret;
+	struct file *file = NULL;
+	int ret = -EBADF;
 
-	file = NULL;
-	ret = -EBADF;
 	spin_lock(&files->file_lock);
 	fdt = files_fdtable(files);
 	if (close->fd >= fdt->max_fds) {
@@ -4196,12 +4194,7 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags)
 		goto err;
 	}
 	file = fdt->fd[close->fd];
-	if (!file) {
-		spin_unlock(&files->file_lock);
-		goto err;
-	}
-
-	if (file->f_op == &io_uring_fops) {
+	if (!file || file->f_op == &io_uring_fops) {
 		spin_unlock(&files->file_lock);
 		file = NULL;
 		goto err;
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 05/16] io_uring: enable inline completion for more cases
  2021-04-11  0:46 [PATCH 5.13 00/16] random 5.13 patches Pavel Begunkov
                   ` (3 preceding siblings ...)
  2021-04-11  0:46 ` [PATCH 04/16] io_uring: refactor io_close Pavel Begunkov
@ 2021-04-11  0:46 ` Pavel Begunkov
  2021-04-11  0:46 ` [PATCH 06/16] io_uring: refactor compat_msghdr import Pavel Begunkov
                   ` (11 subsequent siblings)
  16 siblings, 0 replies; 24+ messages in thread
From: Pavel Begunkov @ 2021-04-11  0:46 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Take advantage of delayed/inline completion flushing and pass right
issue flags for completion of open, open2, fadvise and poll remove
opcodes. All others either already use it or always punted and never
executed inline.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 2b6177c63b50..cfd77500e16c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3848,7 +3848,7 @@ static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
 	req->flags &= ~REQ_F_NEED_CLEANUP;
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_req_complete(req, ret);
+	__io_req_complete(req, issue_flags, ret, 0);
 	return 0;
 }
 
@@ -4126,7 +4126,7 @@ static int io_fadvise(struct io_kiocb *req, unsigned int issue_flags)
 	ret = vfs_fadvise(req->file, fa->offset, fa->len, fa->advice);
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_req_complete(req, ret);
+	__io_req_complete(req, issue_flags, ret, 0);
 	return 0;
 }
 
@@ -5322,7 +5322,7 @@ static int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags)
 
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_req_complete(req, ret);
+	__io_req_complete(req, issue_flags, ret, 0);
 	return 0;
 }
 
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 06/16] io_uring: refactor compat_msghdr import
  2021-04-11  0:46 [PATCH 5.13 00/16] random 5.13 patches Pavel Begunkov
                   ` (4 preceding siblings ...)
  2021-04-11  0:46 ` [PATCH 05/16] io_uring: enable inline completion for more cases Pavel Begunkov
@ 2021-04-11  0:46 ` Pavel Begunkov
  2021-04-11  0:46 ` [PATCH 07/16] io_uring: optimise non-eventfd post-event Pavel Begunkov
                   ` (10 subsequent siblings)
  16 siblings, 0 replies; 24+ messages in thread
From: Pavel Begunkov @ 2021-04-11  0:46 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Add an entry for user pointer to compat_msghdr into io_connect, so it's
explicit that we may use it as this, and removes annoying casts.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index cfd77500e16c..dd9dffbd4da6 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -572,8 +572,9 @@ struct io_connect {
 struct io_sr_msg {
 	struct file			*file;
 	union {
-		struct user_msghdr __user *umsg;
-		void __user		*buf;
+		struct compat_msghdr __user	*umsg_compat;
+		struct user_msghdr __user	*umsg;
+		void __user			*buf;
 	};
 	int				msg_flags;
 	int				bgid;
@@ -4438,16 +4439,14 @@ static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
 static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
 					struct io_async_msghdr *iomsg)
 {
-	struct compat_msghdr __user *msg_compat;
 	struct io_sr_msg *sr = &req->sr_msg;
 	struct compat_iovec __user *uiov;
 	compat_uptr_t ptr;
 	compat_size_t len;
 	int ret;
 
-	msg_compat = (struct compat_msghdr __user *) sr->umsg;
-	ret = __get_compat_msghdr(&iomsg->msg, msg_compat, &iomsg->uaddr,
-					&ptr, &len);
+	ret = __get_compat_msghdr(&iomsg->msg, sr->umsg_compat, &iomsg->uaddr,
+				  &ptr, &len);
 	if (ret)
 		return ret;
 
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 07/16] io_uring: optimise non-eventfd post-event
  2021-04-11  0:46 [PATCH 5.13 00/16] random 5.13 patches Pavel Begunkov
                   ` (5 preceding siblings ...)
  2021-04-11  0:46 ` [PATCH 06/16] io_uring: refactor compat_msghdr import Pavel Begunkov
@ 2021-04-11  0:46 ` Pavel Begunkov
  2021-04-11  0:46 ` [PATCH 08/16] io_uring: always pass cflags into fill_event() Pavel Begunkov
                   ` (9 subsequent siblings)
  16 siblings, 0 replies; 24+ messages in thread
From: Pavel Begunkov @ 2021-04-11  0:46 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Eventfd is not the canonical way of using io_uring, annotate
io_should_trigger_evfd() with likely so it improves code generation for
non-eventfd branch.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index dd9dffbd4da6..87c1c614411c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1357,13 +1357,11 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
 
 static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx)
 {
-	if (!ctx->cq_ev_fd)
+	if (likely(!ctx->cq_ev_fd))
 		return false;
 	if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
 		return false;
-	if (!ctx->eventfd_async)
-		return true;
-	return io_wq_current_is_worker();
+	return !ctx->eventfd_async || io_wq_current_is_worker();
 }
 
 static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 08/16] io_uring: always pass cflags into fill_event()
  2021-04-11  0:46 [PATCH 5.13 00/16] random 5.13 patches Pavel Begunkov
                   ` (6 preceding siblings ...)
  2021-04-11  0:46 ` [PATCH 07/16] io_uring: optimise non-eventfd post-event Pavel Begunkov
@ 2021-04-11  0:46 ` Pavel Begunkov
  2021-04-11  0:46 ` [PATCH 09/16] io_uring: optimise fill_event() by inlining Pavel Begunkov
                   ` (8 subsequent siblings)
  16 siblings, 0 replies; 24+ messages in thread
From: Pavel Begunkov @ 2021-04-11  0:46 UTC (permalink / raw)
  To: Jens Axboe, io-uring

A simple preparation patch inlining io_cqring_fill_event(), which only
role was to pass cflags=0 into an actual fill event. It helps to keep
number of related helpers sane in following patches.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 37 ++++++++++++++++---------------------
 1 file changed, 16 insertions(+), 21 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 87c1c614411c..1a7bfb10d2b2 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1030,7 +1030,7 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx);
 static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx);
 
-static void io_cqring_fill_event(struct io_kiocb *req, long res);
+static bool io_cqring_fill_event(struct io_kiocb *req, long res, unsigned cflags);
 static void io_put_req(struct io_kiocb *req);
 static void io_put_req_deferred(struct io_kiocb *req, int nr);
 static void io_dismantle_req(struct io_kiocb *req);
@@ -1260,7 +1260,7 @@ static void io_kill_timeout(struct io_kiocb *req, int status)
 		atomic_set(&req->ctx->cq_timeouts,
 			atomic_read(&req->ctx->cq_timeouts) + 1);
 		list_del_init(&req->timeout.list);
-		io_cqring_fill_event(req, status);
+		io_cqring_fill_event(req, status, 0);
 		io_put_req_deferred(req, 1);
 	}
 }
@@ -1494,8 +1494,8 @@ static inline void req_ref_get(struct io_kiocb *req)
 	atomic_inc(&req->refs);
 }
 
-static bool __io_cqring_fill_event(struct io_kiocb *req, long res,
-				   unsigned int cflags)
+static bool io_cqring_fill_event(struct io_kiocb *req, long res,
+				 unsigned int cflags)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 	struct io_uring_cqe *cqe;
@@ -1541,11 +1541,6 @@ static bool __io_cqring_fill_event(struct io_kiocb *req, long res,
 	return false;
 }
 
-static void io_cqring_fill_event(struct io_kiocb *req, long res)
-{
-	__io_cqring_fill_event(req, res, 0);
-}
-
 static void io_req_complete_post(struct io_kiocb *req, long res,
 				 unsigned int cflags)
 {
@@ -1553,7 +1548,7 @@ static void io_req_complete_post(struct io_kiocb *req, long res,
 	unsigned long flags;
 
 	spin_lock_irqsave(&ctx->completion_lock, flags);
-	__io_cqring_fill_event(req, res, cflags);
+	io_cqring_fill_event(req, res, cflags);
 	/*
 	 * If we're the last reference to this request, add to our locked
 	 * free_list cache.
@@ -1769,7 +1764,7 @@ static bool io_kill_linked_timeout(struct io_kiocb *req)
 		link->timeout.head = NULL;
 		ret = hrtimer_try_to_cancel(&io->timer);
 		if (ret != -1) {
-			io_cqring_fill_event(link, -ECANCELED);
+			io_cqring_fill_event(link, -ECANCELED, 0);
 			io_put_req_deferred(link, 1);
 			return true;
 		}
@@ -1788,7 +1783,7 @@ static void io_fail_links(struct io_kiocb *req)
 		link->link = NULL;
 
 		trace_io_uring_fail_link(req, link);
-		io_cqring_fill_event(link, -ECANCELED);
+		io_cqring_fill_event(link, -ECANCELED, 0);
 		io_put_req_deferred(link, 2);
 		link = nxt;
 	}
@@ -2108,7 +2103,7 @@ static void io_submit_flush_completions(struct io_comp_state *cs,
 	spin_lock_irq(&ctx->completion_lock);
 	for (i = 0; i < nr; i++) {
 		req = cs->reqs[i];
-		__io_cqring_fill_event(req, req->result, req->compl.cflags);
+		io_cqring_fill_event(req, req->result, req->compl.cflags);
 	}
 	io_commit_cqring(ctx);
 	spin_unlock_irq(&ctx->completion_lock);
@@ -2248,7 +2243,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
 		if (req->flags & REQ_F_BUFFER_SELECTED)
 			cflags = io_put_rw_kbuf(req);
 
-		__io_cqring_fill_event(req, req->result, cflags);
+		io_cqring_fill_event(req, req->result, cflags);
 		(*nr_events)++;
 
 		if (req_ref_put_and_test(req))
@@ -4887,7 +4882,7 @@ static bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
 	}
 	if (req->poll.events & EPOLLONESHOT)
 		flags = 0;
-	if (!__io_cqring_fill_event(req, error, flags)) {
+	if (!io_cqring_fill_event(req, error, flags)) {
 		io_poll_remove_waitqs(req);
 		req->poll.done = true;
 		flags = 0;
@@ -5224,7 +5219,7 @@ static bool io_poll_remove_one(struct io_kiocb *req)
 
 	do_complete = io_poll_remove_waitqs(req);
 	if (do_complete) {
-		io_cqring_fill_event(req, -ECANCELED);
+		io_cqring_fill_event(req, -ECANCELED, 0);
 		io_commit_cqring(req->ctx);
 		req_set_fail_links(req);
 		io_put_req_deferred(req, 1);
@@ -5483,7 +5478,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
 	atomic_set(&req->ctx->cq_timeouts,
 		atomic_read(&req->ctx->cq_timeouts) + 1);
 
-	io_cqring_fill_event(req, -ETIME);
+	io_cqring_fill_event(req, -ETIME, 0);
 	io_commit_cqring(ctx);
 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
 
@@ -5528,7 +5523,7 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
 		return PTR_ERR(req);
 
 	req_set_fail_links(req);
-	io_cqring_fill_event(req, -ECANCELED);
+	io_cqring_fill_event(req, -ECANCELED, 0);
 	io_put_req_deferred(req, 1);
 	return 0;
 }
@@ -5601,7 +5596,7 @@ static int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
 		ret = io_timeout_update(ctx, tr->addr, &tr->ts,
 					io_translate_timeout_mode(tr->flags));
 
-	io_cqring_fill_event(req, ret);
+	io_cqring_fill_event(req, ret, 0);
 	io_commit_cqring(ctx);
 	spin_unlock_irq(&ctx->completion_lock);
 	io_cqring_ev_posted(ctx);
@@ -5753,7 +5748,7 @@ static void io_async_find_and_cancel(struct io_ring_ctx *ctx,
 done:
 	if (!ret)
 		ret = success_ret;
-	io_cqring_fill_event(req, ret);
+	io_cqring_fill_event(req, ret, 0);
 	io_commit_cqring(ctx);
 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
 	io_cqring_ev_posted(ctx);
@@ -5810,7 +5805,7 @@ static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
 
 	spin_lock_irq(&ctx->completion_lock);
 done:
-	io_cqring_fill_event(req, ret);
+	io_cqring_fill_event(req, ret, 0);
 	io_commit_cqring(ctx);
 	spin_unlock_irq(&ctx->completion_lock);
 	io_cqring_ev_posted(ctx);
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 09/16] io_uring: optimise fill_event() by inlining
  2021-04-11  0:46 [PATCH 5.13 00/16] random 5.13 patches Pavel Begunkov
                   ` (7 preceding siblings ...)
  2021-04-11  0:46 ` [PATCH 08/16] io_uring: always pass cflags into fill_event() Pavel Begunkov
@ 2021-04-11  0:46 ` Pavel Begunkov
  2021-04-11  0:46 ` [PATCH 10/16] io_uring: simplify io_rsrc_data refcounting Pavel Begunkov
                   ` (7 subsequent siblings)
  16 siblings, 0 replies; 24+ messages in thread
From: Pavel Begunkov @ 2021-04-11  0:46 UTC (permalink / raw)
  To: Jens Axboe, io-uring

There are three cases where we much care about performance of
io_cqring_fill_event() -- flushing inline completions, iopoll and
io_req_complete_post(). Inline a hot part of fill_event() into them.

All others are not as important and we don't want to bloat binary for
them, so add a noinline version of the function for all other use
use cases.

nops test(batch=32): 16.932 vs 17.822 KIOPS

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 57 ++++++++++++++++++++++++++++++++-------------------
 1 file changed, 36 insertions(+), 21 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1a7bfb10d2b2..a8d6ea1ecd2d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1338,7 +1338,7 @@ static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
 	return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head);
 }
 
-static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
+static inline struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
 {
 	struct io_rings *rings = ctx->rings;
 	unsigned tail;
@@ -1494,26 +1494,11 @@ static inline void req_ref_get(struct io_kiocb *req)
 	atomic_inc(&req->refs);
 }
 
-static bool io_cqring_fill_event(struct io_kiocb *req, long res,
-				 unsigned int cflags)
+static bool io_cqring_event_overflow(struct io_kiocb *req, long res,
+				     unsigned int cflags)
 {
 	struct io_ring_ctx *ctx = req->ctx;
-	struct io_uring_cqe *cqe;
 
-	trace_io_uring_complete(ctx, req->user_data, res, cflags);
-
-	/*
-	 * If we can't get a cq entry, userspace overflowed the
-	 * submission (by quite a lot). Increment the overflow count in
-	 * the ring.
-	 */
-	cqe = io_get_cqring(ctx);
-	if (likely(cqe)) {
-		WRITE_ONCE(cqe->user_data, req->user_data);
-		WRITE_ONCE(cqe->res, res);
-		WRITE_ONCE(cqe->flags, cflags);
-		return true;
-	}
 	if (!atomic_read(&req->task->io_uring->in_idle)) {
 		struct io_overflow_cqe *ocqe;
 
@@ -1541,6 +1526,36 @@ static bool io_cqring_fill_event(struct io_kiocb *req, long res,
 	return false;
 }
 
+static inline bool __io_cqring_fill_event(struct io_kiocb *req, long res,
+					     unsigned int cflags)
+{
+	struct io_ring_ctx *ctx = req->ctx;
+	struct io_uring_cqe *cqe;
+
+	trace_io_uring_complete(ctx, req->user_data, res, cflags);
+
+	/*
+	 * If we can't get a cq entry, userspace overflowed the
+	 * submission (by quite a lot). Increment the overflow count in
+	 * the ring.
+	 */
+	cqe = io_get_cqring(ctx);
+	if (likely(cqe)) {
+		WRITE_ONCE(cqe->user_data, req->user_data);
+		WRITE_ONCE(cqe->res, res);
+		WRITE_ONCE(cqe->flags, cflags);
+		return true;
+	}
+	return io_cqring_event_overflow(req, res, cflags);
+}
+
+/* not as hot to bloat with inlining */
+static noinline bool io_cqring_fill_event(struct io_kiocb *req, long res,
+					  unsigned int cflags)
+{
+	return __io_cqring_fill_event(req, res, cflags);
+}
+
 static void io_req_complete_post(struct io_kiocb *req, long res,
 				 unsigned int cflags)
 {
@@ -1548,7 +1563,7 @@ static void io_req_complete_post(struct io_kiocb *req, long res,
 	unsigned long flags;
 
 	spin_lock_irqsave(&ctx->completion_lock, flags);
-	io_cqring_fill_event(req, res, cflags);
+	__io_cqring_fill_event(req, res, cflags);
 	/*
 	 * If we're the last reference to this request, add to our locked
 	 * free_list cache.
@@ -2103,7 +2118,7 @@ static void io_submit_flush_completions(struct io_comp_state *cs,
 	spin_lock_irq(&ctx->completion_lock);
 	for (i = 0; i < nr; i++) {
 		req = cs->reqs[i];
-		io_cqring_fill_event(req, req->result, req->compl.cflags);
+		__io_cqring_fill_event(req, req->result, req->compl.cflags);
 	}
 	io_commit_cqring(ctx);
 	spin_unlock_irq(&ctx->completion_lock);
@@ -2243,7 +2258,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
 		if (req->flags & REQ_F_BUFFER_SELECTED)
 			cflags = io_put_rw_kbuf(req);
 
-		io_cqring_fill_event(req, req->result, cflags);
+		__io_cqring_fill_event(req, req->result, cflags);
 		(*nr_events)++;
 
 		if (req_ref_put_and_test(req))
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 10/16] io_uring: simplify io_rsrc_data refcounting
  2021-04-11  0:46 [PATCH 5.13 00/16] random 5.13 patches Pavel Begunkov
                   ` (8 preceding siblings ...)
  2021-04-11  0:46 ` [PATCH 09/16] io_uring: optimise fill_event() by inlining Pavel Begunkov
@ 2021-04-11  0:46 ` Pavel Begunkov
  2021-04-11  0:46 ` [PATCH 11/16] io_uring: add buffer unmap helper Pavel Begunkov
                   ` (6 subsequent siblings)
  16 siblings, 0 replies; 24+ messages in thread
From: Pavel Begunkov @ 2021-04-11  0:46 UTC (permalink / raw)
  To: Jens Axboe, io-uring

We don't take many references of struct io_rsrc_data, only one per each
io_rsrc_node, so using percpu refs is overkill. Use atomic ref instead,
which is much simpler.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 48 ++++++++++++++++--------------------------------
 1 file changed, 16 insertions(+), 32 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index a8d6ea1ecd2d..143afe827cad 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -240,7 +240,7 @@ struct io_rsrc_data {
 	struct io_ring_ctx		*ctx;
 
 	rsrc_put_fn			*do_put;
-	struct percpu_ref		refs;
+	atomic_t			refs;
 	struct completion		done;
 	bool				quiesce;
 };
@@ -7082,13 +7082,6 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
 #endif
 }
 
-static void io_rsrc_data_ref_zero(struct percpu_ref *ref)
-{
-	struct io_rsrc_data *data = container_of(ref, struct io_rsrc_data, refs);
-
-	complete(&data->done);
-}
-
 static inline void io_rsrc_ref_lock(struct io_ring_ctx *ctx)
 {
 	spin_lock_bh(&ctx->rsrc_ref_lock);
@@ -7119,7 +7112,7 @@ static void io_rsrc_node_switch(struct io_ring_ctx *ctx,
 		list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list);
 		io_rsrc_ref_unlock(ctx);
 
-		percpu_ref_get(&data_to_kill->refs);
+		atomic_inc(&data_to_kill->refs);
 		percpu_ref_kill(&rsrc_node->refs);
 		ctx->rsrc_node = NULL;
 	}
@@ -7153,14 +7146,17 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ct
 			break;
 		io_rsrc_node_switch(ctx, data);
 
-		percpu_ref_kill(&data->refs);
+		/* kill initial ref, already quiesced if zero */
+		if (atomic_dec_and_test(&data->refs))
+			break;
 		flush_delayed_work(&ctx->rsrc_put_work);
-
 		ret = wait_for_completion_interruptible(&data->done);
 		if (!ret)
 			break;
 
-		percpu_ref_resurrect(&data->refs);
+		atomic_inc(&data->refs);
+		/* wait for all works potentially completing data->done */
+		flush_delayed_work(&ctx->rsrc_put_work);
 		reinit_completion(&data->done);
 
 		mutex_unlock(&ctx->uring_lock);
@@ -7181,23 +7177,13 @@ static struct io_rsrc_data *io_rsrc_data_alloc(struct io_ring_ctx *ctx,
 	if (!data)
 		return NULL;
 
-	if (percpu_ref_init(&data->refs, io_rsrc_data_ref_zero,
-			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
-		kfree(data);
-		return NULL;
-	}
+	atomic_set(&data->refs, 1);
 	data->ctx = ctx;
 	data->do_put = do_put;
 	init_completion(&data->done);
 	return data;
 }
 
-static void io_rsrc_data_free(struct io_rsrc_data *data)
-{
-	percpu_ref_exit(&data->refs);
-	kfree(data);
-}
-
 static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 {
 	struct io_rsrc_data *data = ctx->file_data;
@@ -7211,7 +7197,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 
 	__io_sqe_files_unregister(ctx);
 	io_free_file_tables(data, ctx->nr_user_files);
-	io_rsrc_data_free(data);
+	kfree(data);
 	ctx->file_data = NULL;
 	ctx->nr_user_files = 0;
 	return 0;
@@ -7544,7 +7530,8 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
 	}
 
 	io_rsrc_node_destroy(ref_node);
-	percpu_ref_put(&rsrc_data->refs);
+	if (atomic_dec_and_test(&rsrc_data->refs))
+		complete(&rsrc_data->done);
 }
 
 static void io_rsrc_put_work(struct work_struct *work)
@@ -7568,10 +7555,8 @@ static void io_rsrc_put_work(struct work_struct *work)
 static void io_rsrc_node_ref_zero(struct percpu_ref *ref)
 {
 	struct io_rsrc_node *node = container_of(ref, struct io_rsrc_node, refs);
-	struct io_rsrc_data *data = node->rsrc_data;
-	struct io_ring_ctx *ctx = data->ctx;
+	struct io_ring_ctx *ctx = node->rsrc_data->ctx;
 	bool first_add = false;
-	int delay;
 
 	io_rsrc_ref_lock(ctx);
 	node->done = true;
@@ -7587,9 +7572,8 @@ static void io_rsrc_node_ref_zero(struct percpu_ref *ref)
 	}
 	io_rsrc_ref_unlock(ctx);
 
-	delay = percpu_ref_is_dying(&data->refs) ? 0 : HZ;
-	if (first_add || !delay)
-		mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay);
+	if (first_add)
+		mod_delayed_work(system_wq, &ctx->rsrc_put_work, HZ);
 }
 
 static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx)
@@ -7684,7 +7668,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 	io_free_file_tables(file_data, nr_args);
 	ctx->nr_user_files = 0;
 out_free:
-	io_rsrc_data_free(ctx->file_data);
+	kfree(ctx->file_data);
 	ctx->file_data = NULL;
 	return ret;
 }
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 11/16] io_uring: add buffer unmap helper
  2021-04-11  0:46 [PATCH 5.13 00/16] random 5.13 patches Pavel Begunkov
                   ` (9 preceding siblings ...)
  2021-04-11  0:46 ` [PATCH 10/16] io_uring: simplify io_rsrc_data refcounting Pavel Begunkov
@ 2021-04-11  0:46 ` Pavel Begunkov
  2021-04-11  0:46 ` [PATCH 12/16] io_uring: cleanup buffer register Pavel Begunkov
                   ` (5 subsequent siblings)
  16 siblings, 0 replies; 24+ messages in thread
From: Pavel Begunkov @ 2021-04-11  0:46 UTC (permalink / raw)
  To: Jens Axboe, io-uring; +Cc: Bijan Mottahedeh

Add a helper for unmapping registered buffers, better than double
indexing and will be reused in the future.

Suggested-by: Bijan Mottahedeh <bijan.mottahedeh@oracle.com>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 143afe827cad..157e8b6f1fc4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8098,25 +8098,27 @@ static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries,
 	return off;
 }
 
+static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu)
+{
+	unsigned int i;
+
+	for (i = 0; i < imu->nr_bvecs; i++)
+		unpin_user_page(imu->bvec[i].bv_page);
+	if (imu->acct_pages)
+		io_unaccount_mem(ctx, imu->acct_pages);
+	kvfree(imu->bvec);
+	imu->nr_bvecs = 0;
+}
+
 static int io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
 {
-	int i, j;
+	unsigned int i;
 
 	if (!ctx->user_bufs)
 		return -ENXIO;
 
-	for (i = 0; i < ctx->nr_user_bufs; i++) {
-		struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
-
-		for (j = 0; j < imu->nr_bvecs; j++)
-			unpin_user_page(imu->bvec[j].bv_page);
-
-		if (imu->acct_pages)
-			io_unaccount_mem(ctx, imu->acct_pages);
-		kvfree(imu->bvec);
-		imu->nr_bvecs = 0;
-	}
-
+	for (i = 0; i < ctx->nr_user_bufs; i++)
+		io_buffer_unmap(ctx, &ctx->user_bufs[i]);
 	kfree(ctx->user_bufs);
 	ctx->user_bufs = NULL;
 	ctx->nr_user_bufs = 0;
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 12/16] io_uring: cleanup buffer register
  2021-04-11  0:46 [PATCH 5.13 00/16] random 5.13 patches Pavel Begunkov
                   ` (10 preceding siblings ...)
  2021-04-11  0:46 ` [PATCH 11/16] io_uring: add buffer unmap helper Pavel Begunkov
@ 2021-04-11  0:46 ` Pavel Begunkov
  2021-04-11  0:46 ` [PATCH 13/16] io_uring: split file table from rsrc nodes Pavel Begunkov
                   ` (4 subsequent siblings)
  16 siblings, 0 replies; 24+ messages in thread
From: Pavel Begunkov @ 2021-04-11  0:46 UTC (permalink / raw)
  To: Jens Axboe, io-uring

In preparation for more changes do a little cleanup of
io_sqe_buffers_register(). Move all args/invariant checking into it from
io_buffers_map_alloc(), because it's confusing. And add a bit more
cleaning for the loop.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 157e8b6f1fc4..4be1f1efce26 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8311,17 +8311,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
 
 static int io_buffers_map_alloc(struct io_ring_ctx *ctx, unsigned int nr_args)
 {
-	if (ctx->user_bufs)
-		return -EBUSY;
-	if (!nr_args || nr_args > UIO_MAXIOV)
-		return -EINVAL;
-
-	ctx->user_bufs = kcalloc(nr_args, sizeof(struct io_mapped_ubuf),
-					GFP_KERNEL);
-	if (!ctx->user_bufs)
-		return -ENOMEM;
-
-	return 0;
+	ctx->user_bufs = kcalloc(nr_args, sizeof(*ctx->user_bufs), GFP_KERNEL);
+	return ctx->user_bufs ? 0 : -ENOMEM;
 }
 
 static int io_buffer_validate(struct iovec *iov)
@@ -8353,26 +8344,26 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
 	struct iovec iov;
 	struct page *last_hpage = NULL;
 
+	if (ctx->user_bufs)
+		return -EBUSY;
+	if (!nr_args || nr_args > UIO_MAXIOV)
+		return -EINVAL;
 	ret = io_buffers_map_alloc(ctx, nr_args);
 	if (ret)
 		return ret;
 
-	for (i = 0; i < nr_args; i++) {
+	for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) {
 		struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
 
 		ret = io_copy_iov(ctx, &iov, arg, i);
 		if (ret)
 			break;
-
 		ret = io_buffer_validate(&iov);
 		if (ret)
 			break;
-
 		ret = io_sqe_buffer_register(ctx, &iov, imu, &last_hpage);
 		if (ret)
 			break;
-
-		ctx->nr_user_bufs++;
 	}
 
 	if (ret)
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 13/16] io_uring: split file table from rsrc nodes
  2021-04-11  0:46 [PATCH 5.13 00/16] random 5.13 patches Pavel Begunkov
                   ` (11 preceding siblings ...)
  2021-04-11  0:46 ` [PATCH 12/16] io_uring: cleanup buffer register Pavel Begunkov
@ 2021-04-11  0:46 ` Pavel Begunkov
  2021-04-11  0:46 ` [PATCH 14/16] io_uring: improve sqo stop Pavel Begunkov
                   ` (3 subsequent siblings)
  16 siblings, 0 replies; 24+ messages in thread
From: Pavel Begunkov @ 2021-04-11  0:46 UTC (permalink / raw)
  To: Jens Axboe, io-uring

We don't need to store file tables in rsrc nodes, for now it's easier to
handle tables not generically, so move file tables into the context. A
nice side effect is having one less pointer dereference for request with
fixed file initialisation.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 53 ++++++++++++++++++++++++---------------------------
 1 file changed, 25 insertions(+), 28 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4be1f1efce26..d14a64cd2741 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -220,8 +220,9 @@ struct io_rsrc_put {
 	};
 };
 
-struct fixed_rsrc_table {
-	struct io_fixed_file *files;
+struct io_file_table {
+	/* two level table */
+	struct io_fixed_file **files;
 };
 
 struct io_rsrc_node {
@@ -236,7 +237,6 @@ struct io_rsrc_node {
 typedef void (rsrc_put_fn)(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc);
 
 struct io_rsrc_data {
-	struct fixed_rsrc_table		*table;
 	struct io_ring_ctx		*ctx;
 
 	rsrc_put_fn			*do_put;
@@ -400,6 +400,7 @@ struct io_ring_ctx {
 	 * used. Only updated through io_uring_register(2).
 	 */
 	struct io_rsrc_data	*file_data;
+	struct io_file_table	file_table;
 	unsigned		nr_user_files;
 
 	/* if used, fixed mapped user buffers */
@@ -6270,19 +6271,19 @@ static void io_wq_submit_work(struct io_wq_work *work)
 #endif
 #define FFS_MASK		~(FFS_ASYNC_READ|FFS_ASYNC_WRITE|FFS_ISREG)
 
-static inline struct io_fixed_file *io_fixed_file_slot(struct io_rsrc_data *file_data,
+static inline struct io_fixed_file *io_fixed_file_slot(struct io_file_table *table,
 						      unsigned i)
 {
-	struct fixed_rsrc_table *table;
+	struct io_fixed_file *table_l2;
 
-	table = &file_data->table[i >> IORING_FILE_TABLE_SHIFT];
-	return &table->files[i & IORING_FILE_TABLE_MASK];
+	table_l2 = table->files[i >> IORING_FILE_TABLE_SHIFT];
+	return &table_l2[i & IORING_FILE_TABLE_MASK];
 }
 
 static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
 					      int index)
 {
-	struct io_fixed_file *slot = io_fixed_file_slot(ctx->file_data, index);
+	struct io_fixed_file *slot = io_fixed_file_slot(&ctx->file_table, index);
 
 	return (struct file *) (slot->file_ptr & FFS_MASK);
 }
@@ -6312,7 +6313,7 @@ static struct file *io_file_get(struct io_submit_state *state,
 		if (unlikely((unsigned int)fd >= ctx->nr_user_files))
 			return NULL;
 		fd = array_index_nospec(fd, ctx->nr_user_files);
-		file_ptr = io_fixed_file_slot(ctx->file_data, fd)->file_ptr;
+		file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr;
 		file = (struct file *) (file_ptr & FFS_MASK);
 		file_ptr &= ~FFS_MASK;
 		/* mask in overlapping REQ_F and FFS bits */
@@ -7049,14 +7050,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 	return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
 }
 
-static void io_free_file_tables(struct io_rsrc_data *data, unsigned nr_files)
+static void io_free_file_tables(struct io_file_table *table, unsigned nr_files)
 {
 	unsigned i, nr_tables = DIV_ROUND_UP(nr_files, IORING_MAX_FILES_TABLE);
 
 	for (i = 0; i < nr_tables; i++)
-		kfree(data->table[i].files);
-	kfree(data->table);
-	data->table = NULL;
+		kfree(table->files[i]);
+	kfree(table->files);
+	table->files = NULL;
 }
 
 static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
@@ -7196,7 +7197,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 		return ret;
 
 	__io_sqe_files_unregister(ctx);
-	io_free_file_tables(data, ctx->nr_user_files);
+	io_free_file_tables(&ctx->file_table, ctx->nr_user_files);
 	kfree(data);
 	ctx->file_data = NULL;
 	ctx->nr_user_files = 0;
@@ -7426,23 +7427,20 @@ static int io_sqe_files_scm(struct io_ring_ctx *ctx)
 }
 #endif
 
-static bool io_alloc_file_tables(struct io_rsrc_data *file_data,
-				 unsigned nr_files)
+static bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files)
 {
 	unsigned i, nr_tables = DIV_ROUND_UP(nr_files, IORING_MAX_FILES_TABLE);
 
-	file_data->table = kcalloc(nr_tables, sizeof(*file_data->table),
-				   GFP_KERNEL);
-	if (!file_data->table)
+	table->files = kcalloc(nr_tables, sizeof(*table->files), GFP_KERNEL);
+	if (!table->files)
 		return false;
 
 	for (i = 0; i < nr_tables; i++) {
-		struct fixed_rsrc_table *table = &file_data->table[i];
 		unsigned int this_files = min(nr_files, IORING_MAX_FILES_TABLE);
 
-		table->files = kcalloc(this_files, sizeof(struct file *),
+		table->files[i] = kcalloc(this_files, sizeof(*table->files[i]),
 					GFP_KERNEL);
-		if (!table->files)
+		if (!table->files[i])
 			break;
 		nr_files -= this_files;
 	}
@@ -7450,7 +7448,7 @@ static bool io_alloc_file_tables(struct io_rsrc_data *file_data,
 	if (i == nr_tables)
 		return true;
 
-	io_free_file_tables(file_data, nr_tables * IORING_MAX_FILES_TABLE);
+	io_free_file_tables(table, nr_tables * IORING_MAX_FILES_TABLE);
 	return false;
 }
 
@@ -7618,9 +7616,8 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 	if (!file_data)
 		return -ENOMEM;
 	ctx->file_data = file_data;
-
 	ret = -ENOMEM;
-	if (!io_alloc_file_tables(file_data, nr_args))
+	if (!io_alloc_file_tables(&ctx->file_table, nr_args))
 		goto out_free;
 
 	for (i = 0; i < nr_args; i++, ctx->nr_user_files++) {
@@ -7648,7 +7645,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 			fput(file);
 			goto out_fput;
 		}
-		io_fixed_file_set(io_fixed_file_slot(file_data, i), file);
+		io_fixed_file_set(io_fixed_file_slot(&ctx->file_table, i), file);
 	}
 
 	ret = io_sqe_files_scm(ctx);
@@ -7665,7 +7662,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 		if (file)
 			fput(file);
 	}
-	io_free_file_tables(file_data, nr_args);
+	io_free_file_tables(&ctx->file_table, nr_args);
 	ctx->nr_user_files = 0;
 out_free:
 	kfree(ctx->file_data);
@@ -7761,7 +7758,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
 			continue;
 
 		i = array_index_nospec(up->offset + done, ctx->nr_user_files);
-		file_slot = io_fixed_file_slot(ctx->file_data, i);
+		file_slot = io_fixed_file_slot(&ctx->file_table, i);
 
 		if (file_slot->file_ptr) {
 			file = (struct file *)(file_slot->file_ptr & FFS_MASK);
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 14/16] io_uring: improve sqo stop
  2021-04-11  0:46 [PATCH 5.13 00/16] random 5.13 patches Pavel Begunkov
                   ` (12 preceding siblings ...)
  2021-04-11  0:46 ` [PATCH 13/16] io_uring: split file table from rsrc nodes Pavel Begunkov
@ 2021-04-11  0:46 ` Pavel Begunkov
  2021-04-11  0:46 ` [PATCH 15/16] io_uring: improve hardlink code generation Pavel Begunkov
                   ` (2 subsequent siblings)
  16 siblings, 0 replies; 24+ messages in thread
From: Pavel Begunkov @ 2021-04-11  0:46 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Set IO_SQ_THREAD_SHOULD_STOP before taking sqd lock, so the sqpoll task
sees earlier. Not a problem, it will stop eventually. Also check
invariant that it's stopped only once.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index d14a64cd2741..4e6a6f6df6a2 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7234,9 +7234,10 @@ static void io_sq_thread_park(struct io_sq_data *sqd)
 static void io_sq_thread_stop(struct io_sq_data *sqd)
 {
 	WARN_ON_ONCE(sqd->thread == current);
+	WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state));
 
-	mutex_lock(&sqd->lock);
 	set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
+	mutex_lock(&sqd->lock);
 	if (sqd->thread)
 		wake_up_process(sqd->thread);
 	mutex_unlock(&sqd->lock);
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 15/16] io_uring: improve hardlink code generation
  2021-04-11  0:46 [PATCH 5.13 00/16] random 5.13 patches Pavel Begunkov
                   ` (13 preceding siblings ...)
  2021-04-11  0:46 ` [PATCH 14/16] io_uring: improve sqo stop Pavel Begunkov
@ 2021-04-11  0:46 ` Pavel Begunkov
  2021-04-11  0:46 ` [PATCH 16/16] io_uring: return back safer resurrect Pavel Begunkov
  2021-04-11  2:38 ` [PATCH 5.13 00/16] random 5.13 patches Jens Axboe
  16 siblings, 0 replies; 24+ messages in thread
From: Pavel Begunkov @ 2021-04-11  0:46 UTC (permalink / raw)
  To: Jens Axboe, io-uring

req_set_fail_links() condition checking is bulky. Even though it's
always in a slow path, it's inlined and generates lots of extra code,
simplify it be moving HARDLINK checking into helpers killing linked
requests.

          text    data     bss     dec     hex filename
before:  79318   12330       8   91656   16608 ./fs/io_uring.o
after:   79126   12330       8   91464   16548 ./fs/io_uring.o

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4e6a6f6df6a2..2a465b6e90a4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1103,7 +1103,7 @@ static bool io_match_task(struct io_kiocb *head,
 
 static inline void req_set_fail_links(struct io_kiocb *req)
 {
-	if ((req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) == REQ_F_LINK)
+	if (req->flags & REQ_F_LINK)
 		req->flags |= REQ_F_FAIL_LINK;
 }
 
@@ -1812,7 +1812,8 @@ static bool io_disarm_next(struct io_kiocb *req)
 
 	if (likely(req->flags & REQ_F_LINK_TIMEOUT))
 		posted = io_kill_linked_timeout(req);
-	if (unlikely(req->flags & REQ_F_FAIL_LINK)) {
+	if (unlikely((req->flags & REQ_F_FAIL_LINK) &&
+		     !(req->flags & REQ_F_HARDLINK))) {
 		posted |= (req->link != NULL);
 		io_fail_links(req);
 	}
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 16/16] io_uring: return back safer resurrect
  2021-04-11  0:46 [PATCH 5.13 00/16] random 5.13 patches Pavel Begunkov
                   ` (14 preceding siblings ...)
  2021-04-11  0:46 ` [PATCH 15/16] io_uring: improve hardlink code generation Pavel Begunkov
@ 2021-04-11  0:46 ` Pavel Begunkov
  2021-05-10  2:22   ` yangerkun
  2022-03-16 16:18   ` [PATCH] " Lee Jones
  2021-04-11  2:38 ` [PATCH 5.13 00/16] random 5.13 patches Jens Axboe
  16 siblings, 2 replies; 24+ messages in thread
From: Pavel Begunkov @ 2021-04-11  0:46 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Revert of revert of "io_uring: wait potential ->release() on resurrect",
which adds a helper for resurrect not racing completion reinit, as was
removed because of a strange bug with no clear root or link to the
patch.

Was improved, instead of rcu_synchronize(), just wait_for_completion()
because we're at 0 refs and it will happen very shortly. Specifically
use non-interruptible version to ignore all pending signals that may
have ended prior interruptible wait.

This reverts commit cb5e1b81304e089ee3ca948db4d29f71902eb575.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 2a465b6e90a4..257eddd4cd82 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1083,6 +1083,18 @@ static inline void io_req_set_rsrc_node(struct io_kiocb *req)
 	}
 }
 
+static void io_refs_resurrect(struct percpu_ref *ref, struct completion *compl)
+{
+	bool got = percpu_ref_tryget(ref);
+
+	/* already at zero, wait for ->release() */
+	if (!got)
+		wait_for_completion(compl);
+	percpu_ref_resurrect(ref);
+	if (got)
+		percpu_ref_put(ref);
+}
+
 static bool io_match_task(struct io_kiocb *head,
 			  struct task_struct *task,
 			  struct files_struct *files)
@@ -9798,12 +9810,11 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			if (ret < 0)
 				break;
 		} while (1);
-
 		mutex_lock(&ctx->uring_lock);
 
 		if (ret) {
-			percpu_ref_resurrect(&ctx->refs);
-			goto out_quiesce;
+			io_refs_resurrect(&ctx->refs, &ctx->ref_comp);
+			return ret;
 		}
 	}
 
@@ -9896,7 +9907,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 	if (io_register_op_must_quiesce(opcode)) {
 		/* bring the ctx back to life */
 		percpu_ref_reinit(&ctx->refs);
-out_quiesce:
 		reinit_completion(&ctx->ref_comp);
 	}
 	return ret;
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 5.13 00/16] random 5.13 patches
  2021-04-11  0:46 [PATCH 5.13 00/16] random 5.13 patches Pavel Begunkov
                   ` (15 preceding siblings ...)
  2021-04-11  0:46 ` [PATCH 16/16] io_uring: return back safer resurrect Pavel Begunkov
@ 2021-04-11  2:38 ` Jens Axboe
  16 siblings, 0 replies; 24+ messages in thread
From: Jens Axboe @ 2021-04-11  2:38 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 4/10/21 6:46 PM, Pavel Begunkov wrote:
> 1-3: task vs files cancellation unification, sheds some lines
> 8-9: inlines fill_event(), gives some performance
> 10-13: more of registered buffer / resource preparation patches
> others are just separate not connected cleanups/improvements
> 
> Pavel Begunkov (16):
>   io_uring: unify task and files cancel loops
>   io_uring: track inflight requests through counter
>   io_uring: unify files and task cancel
>   io_uring: refactor io_close
>   io_uring: enable inline completion for more cases
>   io_uring: refactor compat_msghdr import
>   io_uring: optimise non-eventfd post-event
>   io_uring: always pass cflags into fill_event()
>   io_uring: optimise fill_event() by inlining
>   io_uring: simplify io_rsrc_data refcounting
>   io_uring: add buffer unmap helper
>   io_uring: cleanup buffer register
>   io_uring: split file table from rsrc nodes
>   io_uring: improve sqo stop
>   io_uring: improve hardlink code generation
>   io_uring: return back safer resurrect
> 
>  fs/io_uring.c            | 386 ++++++++++++++++-----------------------
>  include/linux/io_uring.h |  12 +-
>  2 files changed, 163 insertions(+), 235 deletions(-)

Look good to me, and some nice cleanups. Would be nice to get the
mostly two-sided cancelations down to one single thing at some
point.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 16/16] io_uring: return back safer resurrect
  2021-04-11  0:46 ` [PATCH 16/16] io_uring: return back safer resurrect Pavel Begunkov
@ 2021-05-10  2:22   ` yangerkun
  2021-05-10  9:15     ` Pavel Begunkov
  2022-03-16 16:18   ` [PATCH] " Lee Jones
  1 sibling, 1 reply; 24+ messages in thread
From: yangerkun @ 2021-05-10  2:22 UTC (permalink / raw)
  To: Pavel Begunkov, Jens Axboe, io-uring



在 2021/4/11 8:46, Pavel Begunkov 写道:
> Revert of revert of "io_uring: wait potential ->release() on resurrect",
> which adds a helper for resurrect not racing completion reinit, as was
> removed because of a strange bug with no clear root or link to the
> patch.
> 
> Was improved, instead of rcu_synchronize(), just wait_for_completion()
> because we're at 0 refs and it will happen very shortly. Specifically
> use non-interruptible version to ignore all pending signals that may
> have ended prior interruptible wait.
> 
> This reverts commit cb5e1b81304e089ee3ca948db4d29f71902eb575.
> 
> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> ---
>   fs/io_uring.c | 18 ++++++++++++++----
>   1 file changed, 14 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 2a465b6e90a4..257eddd4cd82 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -1083,6 +1083,18 @@ static inline void io_req_set_rsrc_node(struct io_kiocb *req)
>   	}
>   }
>   
> +static void io_refs_resurrect(struct percpu_ref *ref, struct completion *compl)
> +{
> +	bool got = percpu_ref_tryget(ref);
> +
> +	/* already at zero, wait for ->release() */
> +	if (!got)
> +		wait_for_completion(compl);
> +	percpu_ref_resurrect(ref);
> +	if (got)
> +		percpu_ref_put(ref);
> +}
> +
>   static bool io_match_task(struct io_kiocb *head,
>   			  struct task_struct *task,
>   			  struct files_struct *files)
> @@ -9798,12 +9810,11 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
>   			if (ret < 0)
>   				break;
>   		} while (1);
> -
>   		mutex_lock(&ctx->uring_lock);
>   
>   		if (ret) {
> -			percpu_ref_resurrect(&ctx->refs);
> -			goto out_quiesce;
> +			io_refs_resurrect(&ctx->refs, &ctx->ref_comp);
> +			return ret;

Hi,

I have a question. Compare with the logical before this patch. We need 
call reinit_completion(&ctx->ref_comp) to make sure the effective use of 
the ref_comp.

Does we forget to do this? Or I miss something?

Thanks,
Kun.

>   		}
>   	}
>   
> @@ -9896,7 +9907,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
>   	if (io_register_op_must_quiesce(opcode)) {
>   		/* bring the ctx back to life */
>   		percpu_ref_reinit(&ctx->refs);
> -out_quiesce:
>   		reinit_completion(&ctx->ref_comp);
>   	}
>   	return ret;
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 16/16] io_uring: return back safer resurrect
  2021-05-10  2:22   ` yangerkun
@ 2021-05-10  9:15     ` Pavel Begunkov
  2021-05-11  1:11       ` yangerkun
  0 siblings, 1 reply; 24+ messages in thread
From: Pavel Begunkov @ 2021-05-10  9:15 UTC (permalink / raw)
  To: yangerkun, Jens Axboe, io-uring

On 5/10/21 3:22 AM, yangerkun wrote:
> 在 2021/4/11 8:46, Pavel Begunkov 写道:
>> Revert of revert of "io_uring: wait potential ->release() on resurrect",
>> which adds a helper for resurrect not racing completion reinit, as was
>> removed because of a strange bug with no clear root or link to the
>> patch.
>>
>> Was improved, instead of rcu_synchronize(), just wait_for_completion()
>> because we're at 0 refs and it will happen very shortly. Specifically
>> use non-interruptible version to ignore all pending signals that may
>> have ended prior interruptible wait.
>>
>> This reverts commit cb5e1b81304e089ee3ca948db4d29f71902eb575.
>>
>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> 
> I have a question. Compare with the logical before this patch. We need call reinit_completion(&ctx->ref_comp) to make sure the effective use of the ref_comp.
> 
> Does we forget to do this? Or I miss something?
If percpu_ref_tryget() there succeeds, it should have not called
complete by design, otherwise it do complete once (+1 completion
count) following with a single wait(-1 completion count), so +1 -1
should return it back to zero. See how struct completion works,
e.g. completion.rst, number of completions is counted inside.

btw, you have a strange mail encoding, apparently it's not Unicode

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 16/16] io_uring: return back safer resurrect
  2021-05-10  9:15     ` Pavel Begunkov
@ 2021-05-11  1:11       ` yangerkun
  0 siblings, 0 replies; 24+ messages in thread
From: yangerkun @ 2021-05-11  1:11 UTC (permalink / raw)
  To: Pavel Begunkov, Jens Axboe, io-uring



在 2021/5/10 17:15, Pavel Begunkov 写道:
> On 5/10/21 3:22 AM, yangerkun wrote:
>> 在 2021/4/11 8:46, Pavel Begunkov 写道:
>>> Revert of revert of "io_uring: wait potential ->release() on resurrect",
>>> which adds a helper for resurrect not racing completion reinit, as was
>>> removed because of a strange bug with no clear root or link to the
>>> patch.
>>>
>>> Was improved, instead of rcu_synchronize(), just wait_for_completion()
>>> because we're at 0 refs and it will happen very shortly. Specifically
>>> use non-interruptible version to ignore all pending signals that may
>>> have ended prior interruptible wait.
>>>
>>> This reverts commit cb5e1b81304e089ee3ca948db4d29f71902eb575.
>>>
>>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
>>
>> I have a question. Compare with the logical before this patch. We need call reinit_completion(&ctx->ref_comp) to make sure the effective use of the ref_comp.
>>
>> Does we forget to do this? Or I miss something?
> If percpu_ref_tryget() there succeeds, it should have not called
> complete by design, otherwise it do complete once (+1 completion
> count) following with a single wait(-1 completion count), so +1 -1
> should return it back to zero. See how struct completion works,
> e.g. completion.rst, number of completions is counted inside.

Got it. Thanks for your explain!

> 
> btw, you have a strange mail encoding, apparently it's not Unicode

Yeah... I have change to Unicode!

Thanks,
Kun.
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH] io_uring: return back safer resurrect
  2021-04-11  0:46 ` [PATCH 16/16] io_uring: return back safer resurrect Pavel Begunkov
  2021-05-10  2:22   ` yangerkun
@ 2022-03-16 16:18   ` Lee Jones
  2022-03-16 16:38     ` Greg KH
  1 sibling, 1 reply; 24+ messages in thread
From: Lee Jones @ 2022-03-16 16:18 UTC (permalink / raw)
  To: Pavel Begunkov, stable; +Cc: Jens Axboe, io-uring

Stable Team,

> Revert of revert of "io_uring: wait potential ->release() on resurrect",
> which adds a helper for resurrect not racing completion reinit, as was
> removed because of a strange bug with no clear root or link to the
> patch.
> 
> Was improved, instead of rcu_synchronize(), just wait_for_completion()
> because we're at 0 refs and it will happen very shortly. Specifically
> use non-interruptible version to ignore all pending signals that may
> have ended prior interruptible wait.
> 
> This reverts commit cb5e1b81304e089ee3ca948db4d29f71902eb575.
> 
> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> ---
>  fs/io_uring.c | 18 ++++++++++++++----
>  1 file changed, 14 insertions(+), 4 deletions(-)

Please back-port this as far as it will apply.

Definitely through v5.10.y.

It solves a critical bug.

Subject: "io_uring: return back safer resurrect"

Upstream commit:: f70865db5ff35f5ed0c7e9ef63e7cca3d4947f04

-- 
Lee Jones [李琼斯]
Principal Technical Lead - Developer Services
Linaro.org │ Open source software for Arm SoCs
Follow Linaro: Facebook | Twitter | Blog

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] io_uring: return back safer resurrect
  2022-03-16 16:18   ` [PATCH] " Lee Jones
@ 2022-03-16 16:38     ` Greg KH
  2022-03-16 16:46       ` Lee Jones
  0 siblings, 1 reply; 24+ messages in thread
From: Greg KH @ 2022-03-16 16:38 UTC (permalink / raw)
  To: Lee Jones; +Cc: Pavel Begunkov, stable, Jens Axboe, io-uring

On Wed, Mar 16, 2022 at 04:18:16PM +0000, Lee Jones wrote:
> Stable Team,
> 
> > Revert of revert of "io_uring: wait potential ->release() on resurrect",
> > which adds a helper for resurrect not racing completion reinit, as was
> > removed because of a strange bug with no clear root or link to the
> > patch.
> > 
> > Was improved, instead of rcu_synchronize(), just wait_for_completion()
> > because we're at 0 refs and it will happen very shortly. Specifically
> > use non-interruptible version to ignore all pending signals that may
> > have ended prior interruptible wait.
> > 
> > This reverts commit cb5e1b81304e089ee3ca948db4d29f71902eb575.
> > 
> > Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> > ---
> >  fs/io_uring.c | 18 ++++++++++++++----
> >  1 file changed, 14 insertions(+), 4 deletions(-)
> 
> Please back-port this as far as it will apply.
> 
> Definitely through v5.10.y.
> 
> It solves a critical bug.
> 
> Subject: "io_uring: return back safer resurrect"
> 
> Upstream commit:: f70865db5ff35f5ed0c7e9ef63e7cca3d4947f04

It only applies to 5.10.y.  It showed up in 5.12, so if you want it
further back than 5.10.y, can you provide a working backport?

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] io_uring: return back safer resurrect
  2022-03-16 16:38     ` Greg KH
@ 2022-03-16 16:46       ` Lee Jones
  0 siblings, 0 replies; 24+ messages in thread
From: Lee Jones @ 2022-03-16 16:46 UTC (permalink / raw)
  To: Greg KH; +Cc: Pavel Begunkov, stable, Jens Axboe, io-uring

On Wed, 16 Mar 2022, Greg KH wrote:

> On Wed, Mar 16, 2022 at 04:18:16PM +0000, Lee Jones wrote:
> > Stable Team,
> > 
> > > Revert of revert of "io_uring: wait potential ->release() on resurrect",
> > > which adds a helper for resurrect not racing completion reinit, as was
> > > removed because of a strange bug with no clear root or link to the
> > > patch.
> > > 
> > > Was improved, instead of rcu_synchronize(), just wait_for_completion()
> > > because we're at 0 refs and it will happen very shortly. Specifically
> > > use non-interruptible version to ignore all pending signals that may
> > > have ended prior interruptible wait.
> > > 
> > > This reverts commit cb5e1b81304e089ee3ca948db4d29f71902eb575.
> > > 
> > > Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> > > ---
> > >  fs/io_uring.c | 18 ++++++++++++++----
> > >  1 file changed, 14 insertions(+), 4 deletions(-)
> > 
> > Please back-port this as far as it will apply.
> > 
> > Definitely through v5.10.y.
> > 
> > It solves a critical bug.
> > 
> > Subject: "io_uring: return back safer resurrect"
> > 
> > Upstream commit:: f70865db5ff35f5ed0c7e9ef63e7cca3d4947f04
> 
> It only applies to 5.10.y.  It showed up in 5.12, so if you want it
> further back than 5.10.y, can you provide a working backport?

Works for me, thanks.

-- 
Lee Jones [李琼斯]
Principal Technical Lead - Developer Services
Linaro.org │ Open source software for Arm SoCs
Follow Linaro: Facebook | Twitter | Blog

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2022-03-16 16:46 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-11  0:46 [PATCH 5.13 00/16] random 5.13 patches Pavel Begunkov
2021-04-11  0:46 ` [PATCH 01/16] io_uring: unify task and files cancel loops Pavel Begunkov
2021-04-11  0:46 ` [PATCH 02/16] io_uring: track inflight requests through counter Pavel Begunkov
2021-04-11  0:46 ` [PATCH 03/16] io_uring: unify files and task cancel Pavel Begunkov
2021-04-11  0:46 ` [PATCH 04/16] io_uring: refactor io_close Pavel Begunkov
2021-04-11  0:46 ` [PATCH 05/16] io_uring: enable inline completion for more cases Pavel Begunkov
2021-04-11  0:46 ` [PATCH 06/16] io_uring: refactor compat_msghdr import Pavel Begunkov
2021-04-11  0:46 ` [PATCH 07/16] io_uring: optimise non-eventfd post-event Pavel Begunkov
2021-04-11  0:46 ` [PATCH 08/16] io_uring: always pass cflags into fill_event() Pavel Begunkov
2021-04-11  0:46 ` [PATCH 09/16] io_uring: optimise fill_event() by inlining Pavel Begunkov
2021-04-11  0:46 ` [PATCH 10/16] io_uring: simplify io_rsrc_data refcounting Pavel Begunkov
2021-04-11  0:46 ` [PATCH 11/16] io_uring: add buffer unmap helper Pavel Begunkov
2021-04-11  0:46 ` [PATCH 12/16] io_uring: cleanup buffer register Pavel Begunkov
2021-04-11  0:46 ` [PATCH 13/16] io_uring: split file table from rsrc nodes Pavel Begunkov
2021-04-11  0:46 ` [PATCH 14/16] io_uring: improve sqo stop Pavel Begunkov
2021-04-11  0:46 ` [PATCH 15/16] io_uring: improve hardlink code generation Pavel Begunkov
2021-04-11  0:46 ` [PATCH 16/16] io_uring: return back safer resurrect Pavel Begunkov
2021-05-10  2:22   ` yangerkun
2021-05-10  9:15     ` Pavel Begunkov
2021-05-11  1:11       ` yangerkun
2022-03-16 16:18   ` [PATCH] " Lee Jones
2022-03-16 16:38     ` Greg KH
2022-03-16 16:46       ` Lee Jones
2021-04-11  2:38 ` [PATCH 5.13 00/16] random 5.13 patches Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.