All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 1/2] io_uring: avoid whole io_wq_work copy for requests completed inline
@ 2020-05-30 14:39 Xiaoguang Wang
  2020-05-30 14:39 ` [PATCH v4 2/2] io_uring: avoid unnecessary io_wq_work copy for fast poll feature Xiaoguang Wang
  2020-05-30 16:44 ` [PATCH v4 1/2] io_uring: avoid whole io_wq_work copy for requests completed inline Jens Axboe
  0 siblings, 2 replies; 27+ messages in thread
From: Xiaoguang Wang @ 2020-05-30 14:39 UTC (permalink / raw)
  To: io-uring; +Cc: axboe, asml.silence, joseph.qi, Xiaoguang Wang

If requests can be submitted and completed inline, we don't need to
initialize whole io_wq_work in io_init_req(), which is an expensive
operation, add a new 'REQ_F_WORK_INITIALIZED' to control whether
io_wq_work is initialized.

I use /dev/nullb0 to evaluate performance improvement in my physical
machine:
  modprobe null_blk nr_devices=1 completion_nsec=0
  sudo taskset -c 60 fio  -name=fiotest -filename=/dev/nullb0 -iodepth=128
  -thread -rw=read -ioengine=io_uring -direct=1 -bs=4k -size=100G -numjobs=1
  -time_based -runtime=120

before this patch:
Run status group 0 (all jobs):
   READ: bw=724MiB/s (759MB/s), 724MiB/s-724MiB/s (759MB/s-759MB/s),
   io=84.8GiB (91.1GB), run=120001-120001msec

With this patch:
Run status group 0 (all jobs):
   READ: bw=761MiB/s (798MB/s), 761MiB/s-761MiB/s (798MB/s-798MB/s),
   io=89.2GiB (95.8GB), run=120001-120001msec

About 5% improvement.

Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>

---
V4:
  add io_req_init_async() helper
---
 fs/io-wq.h    |  5 ----
 fs/io_uring.c | 66 ++++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 50 insertions(+), 21 deletions(-)

diff --git a/fs/io-wq.h b/fs/io-wq.h
index 5ba12de7572f..3d85d365d764 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -94,11 +94,6 @@ struct io_wq_work {
 	pid_t task_pid;
 };
 
-#define INIT_IO_WORK(work, _func)				\
-	do {							\
-		*(work) = (struct io_wq_work){ .func = _func };	\
-	} while (0)						\
-
 static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
 {
 	if (!work->list.next)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 95df63b0b2ce..12296ce3e8b9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -538,6 +538,7 @@ enum {
 	REQ_F_POLLED_BIT,
 	REQ_F_BUFFER_SELECTED_BIT,
 	REQ_F_NO_FILE_TABLE_BIT,
+	REQ_F_WORK_INITIALIZED_BIT,
 
 	/* not a real bit, just to check we're not overflowing the space */
 	__REQ_F_LAST_BIT,
@@ -593,6 +594,8 @@ enum {
 	REQ_F_BUFFER_SELECTED	= BIT(REQ_F_BUFFER_SELECTED_BIT),
 	/* doesn't need file table for this request */
 	REQ_F_NO_FILE_TABLE	= BIT(REQ_F_NO_FILE_TABLE_BIT),
+	/* io_wq_work is initialized */
+	REQ_F_WORK_INITIALIZED	= BIT(REQ_F_WORK_INITIALIZED_BIT),
 };
 
 struct async_poll {
@@ -638,6 +641,7 @@ struct io_kiocb {
 	unsigned int		flags;
 	refcount_t		refs;
 	struct task_struct	*task;
+	const struct cred	*creds;
 	unsigned long		fsize;
 	u64			user_data;
 	u32			result;
@@ -900,6 +904,17 @@ EXPORT_SYMBOL(io_uring_get_socket);
 
 static void io_file_put_work(struct work_struct *work);
 
+static inline void io_req_init_async(struct io_kiocb *req,
+			void (*func)(struct io_wq_work **))
+{
+	if (req->flags & REQ_F_WORK_INITIALIZED)
+		req->work.func = func;
+	else {
+		req->work = (struct io_wq_work){ .func = func };
+		req->flags |= REQ_F_WORK_INITIALIZED;
+	}
+}
+
 static inline bool io_async_submit(struct io_ring_ctx *ctx)
 {
 	return ctx->flags & IORING_SETUP_SQPOLL;
@@ -1025,8 +1040,15 @@ static inline void io_req_work_grab_env(struct io_kiocb *req,
 		mmgrab(current->mm);
 		req->work.mm = current->mm;
 	}
-	if (!req->work.creds)
-		req->work.creds = get_current_cred();
+
+	if (!req->work.creds) {
+		if (!req->creds)
+			req->work.creds = get_current_cred();
+		else {
+			req->work.creds = req->creds;
+			req->creds = NULL;
+		}
+	}
 	if (!req->work.fs && def->needs_fs) {
 		spin_lock(&current->fs->lock);
 		if (!current->fs->in_exec) {
@@ -1043,6 +1065,9 @@ static inline void io_req_work_grab_env(struct io_kiocb *req,
 
 static inline void io_req_work_drop_env(struct io_kiocb *req)
 {
+	if (!(req->flags & REQ_F_WORK_INITIALIZED))
+		return;
+
 	if (req->work.mm) {
 		mmdrop(req->work.mm);
 		req->work.mm = NULL;
@@ -3045,7 +3070,7 @@ static int io_fsync(struct io_kiocb *req, bool force_nonblock)
 {
 	/* fsync always requires a blocking context */
 	if (force_nonblock) {
-		req->work.func = io_fsync_finish;
+		io_req_init_async(req, io_fsync_finish);
 		return -EAGAIN;
 	}
 	__io_fsync(req);
@@ -3093,7 +3118,7 @@ static int io_fallocate(struct io_kiocb *req, bool force_nonblock)
 {
 	/* fallocate always requiring blocking context */
 	if (force_nonblock) {
-		req->work.func = io_fallocate_finish;
+		io_req_init_async(req, io_fallocate_finish);
 		return -EAGAIN;
 	}
 
@@ -3618,7 +3643,7 @@ static int io_close(struct io_kiocb *req, bool force_nonblock)
 	if (req->close.put_file->f_op->flush && force_nonblock) {
 		/* avoid grabbing files - we don't need the files */
 		req->flags |= REQ_F_NO_FILE_TABLE | REQ_F_MUST_PUNT;
-		req->work.func = io_close_finish;
+		io_req_init_async(req, io_close_finish);
 		return -EAGAIN;
 	}
 
@@ -3675,7 +3700,7 @@ static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock)
 {
 	/* sync_file_range always requires a blocking context */
 	if (force_nonblock) {
-		req->work.func = io_sync_file_range_finish;
+		io_req_init_async(req, io_sync_file_range_finish);
 		return -EAGAIN;
 	}
 
@@ -4144,7 +4169,7 @@ static int io_accept(struct io_kiocb *req, bool force_nonblock)
 
 	ret = __io_accept(req, force_nonblock);
 	if (ret == -EAGAIN && force_nonblock) {
-		req->work.func = io_accept_finish;
+		io_req_init_async(req, io_accept_finish);
 		return -EAGAIN;
 	}
 	return 0;
@@ -5144,6 +5169,8 @@ static int io_req_defer_prep(struct io_kiocb *req,
 	if (!sqe)
 		return 0;
 
+	io_req_init_async(req, io_wq_submit_work);
+
 	if (io_op_defs[req->opcode].file_table) {
 		ret = io_grab_files(req);
 		if (unlikely(ret))
@@ -5779,19 +5806,24 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_kiocb *linked_timeout;
 	struct io_kiocb *nxt;
-	const struct cred *old_creds = NULL;
+	const struct cred *creds, *old_creds = NULL;
 	int ret;
 
 again:
 	linked_timeout = io_prep_linked_timeout(req);
 
-	if (req->work.creds && req->work.creds != current_cred()) {
+	if (req->flags & REQ_F_WORK_INITIALIZED)
+		creds = req->work.creds;
+	else
+		creds = req->creds;
+
+	if (creds && creds != current_cred()) {
 		if (old_creds)
 			revert_creds(old_creds);
-		if (old_creds == req->work.creds)
+		if (old_creds == creds)
 			old_creds = NULL; /* restored original creds */
 		else
-			old_creds = override_creds(req->work.creds);
+			old_creds = override_creds(creds);
 	}
 
 	ret = io_issue_sqe(req, sqe, true);
@@ -5808,6 +5840,8 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 			goto exit;
 		}
 punt:
+		io_req_init_async(req, io_wq_submit_work);
+
 		if (io_op_defs[req->opcode].file_table) {
 			ret = io_grab_files(req);
 			if (ret)
@@ -6060,7 +6094,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	refcount_set(&req->refs, 2);
 	req->task = NULL;
 	req->result = 0;
-	INIT_IO_WORK(&req->work, io_wq_submit_work);
 
 	if (unlikely(req->opcode >= IORING_OP_LAST))
 		return -EINVAL;
@@ -6082,11 +6115,12 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 
 	id = READ_ONCE(sqe->personality);
 	if (id) {
-		req->work.creds = idr_find(&ctx->personality_idr, id);
-		if (unlikely(!req->work.creds))
+		req->creds = idr_find(&ctx->personality_idr, id);
+		if (unlikely(!req->creds))
 			return -EINVAL;
-		get_cred(req->work.creds);
-	}
+		get_cred(req->creds);
+	} else
+		req->creds = NULL;
 
 	/* same numerical values with corresponding REQ_F_*, safe to copy */
 	req->flags |= sqe_flags;
-- 
2.17.2


^ permalink raw reply related	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2020-06-08 20:09 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-30 14:39 [PATCH v4 1/2] io_uring: avoid whole io_wq_work copy for requests completed inline Xiaoguang Wang
2020-05-30 14:39 ` [PATCH v4 2/2] io_uring: avoid unnecessary io_wq_work copy for fast poll feature Xiaoguang Wang
2020-05-30 16:44 ` [PATCH v4 1/2] io_uring: avoid whole io_wq_work copy for requests completed inline Jens Axboe
2020-05-30 17:14   ` Jens Axboe
2020-05-30 17:29     ` Jens Axboe
2020-05-30 17:36       ` Pavel Begunkov
2020-05-31 13:57         ` Xiaoguang Wang
2020-05-31 14:49           ` Pavel Begunkov
2020-05-31 14:12       ` Xiaoguang Wang
2020-05-31 14:31         ` Xiaoguang Wang
2020-05-31 14:57           ` Pavel Begunkov
2020-05-31 16:15             ` Xiaoguang Wang
2020-06-01 10:43               ` Pavel Begunkov
2020-06-01 11:50                 ` Xiaoguang Wang
2020-06-01  4:56             ` [PATCH v5 " Xiaoguang Wang
2020-06-01  4:56               ` [PATCH v5 2/2] io_uring: avoid unnecessary io_wq_work copy for fast poll feature Xiaoguang Wang
2020-06-02  1:16                 ` Xiaoguang Wang
2020-06-03 13:46                   ` Pavel Begunkov
2020-06-07 15:02                     ` Jens Axboe
2020-06-07 20:36                       ` Pavel Begunkov
2020-06-07 20:57                         ` Pavel Begunkov
2020-06-07 23:29                           ` Jens Axboe
2020-06-08  7:43                             ` Pavel Begunkov
2020-06-08 12:14                               ` Xiaoguang Wang
2020-06-08 20:08                                 ` Jens Axboe
2020-06-07 23:26                         ` Jens Axboe
2020-05-31 14:33         ` [PATCH v4 1/2] io_uring: avoid whole io_wq_work copy for requests completed inline Pavel Begunkov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.