io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v6 1/2] io_uring: avoid whole io_wq_work copy for requests completed inline
@ 2020-06-09  8:25 Xiaoguang Wang
  2020-06-09  8:25 ` [PATCH v6 2/2] io_uring: avoid unnecessary io_wq_work copy for fast poll feature Xiaoguang Wang
  2020-06-09 16:44 ` [PATCH v6 1/2] io_uring: avoid whole io_wq_work copy for requests completed inline Pavel Begunkov
  0 siblings, 2 replies; 7+ messages in thread
From: Xiaoguang Wang @ 2020-06-09  8:25 UTC (permalink / raw)
  To: io-uring; +Cc: axboe, asml.silence, joseph.qi, Xiaoguang Wang

If requests can be submitted and completed inline, we don't need to
initialize whole io_wq_work in io_init_req(), which is an expensive
operation, add a new 'REQ_F_WORK_INITIALIZED' to control whether
io_wq_work is initialized.

I use /dev/nullb0 to evaluate performance improvement in my physical
machine:
  modprobe null_blk nr_devices=1 completion_nsec=0
  sudo taskset -c 60 fio  -name=fiotest -filename=/dev/nullb0 -iodepth=128
  -thread -rw=read -ioengine=io_uring -direct=1 -bs=4k -size=100G -numjobs=1
  -time_based -runtime=120

before this patch:
Run status group 0 (all jobs):
   READ: bw=724MiB/s (759MB/s), 724MiB/s-724MiB/s (759MB/s-759MB/s),
   io=84.8GiB (91.1GB), run=120001-120001msec

With this patch:
Run status group 0 (all jobs):
   READ: bw=761MiB/s (798MB/s), 761MiB/s-761MiB/s (798MB/s-798MB/s),
   io=89.2GiB (95.8GB), run=120001-120001msec

About 5% improvement.

Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>

---
V4:
  add io_req_init_async() helper

V5:
  refactor io_req_init_async() to io_init_req_work() and io_init_req_work_func
  in case we need to change io_wq_work.func separately.

V6:
  Drop the refactor work in V5, and rebase to io_uring-5.8.
---
 fs/io-wq.h    |  5 ----
 fs/io_uring.c | 63 +++++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/fs/io-wq.h b/fs/io-wq.h
index 2db24d31fbc5..8e138fa88b9f 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -93,11 +93,6 @@ struct io_wq_work {
 	pid_t task_pid;
 };
 
-#define INIT_IO_WORK(work)					\
-	do {							\
-		*(work) = (struct io_wq_work){};		\
-	} while (0)						\
-
 static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
 {
 	if (!work->list.next)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 3ffe03194c1e..bde8b17a7275 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -542,6 +542,7 @@ enum {
 	REQ_F_BUFFER_SELECTED_BIT,
 	REQ_F_NO_FILE_TABLE_BIT,
 	REQ_F_QUEUE_TIMEOUT_BIT,
+	REQ_F_WORK_INITIALIZED_BIT,
 
 	/* not a real bit, just to check we're not overflowing the space */
 	__REQ_F_LAST_BIT,
@@ -599,6 +600,8 @@ enum {
 	REQ_F_NO_FILE_TABLE	= BIT(REQ_F_NO_FILE_TABLE_BIT),
 	/* needs to queue linked timeout */
 	REQ_F_QUEUE_TIMEOUT	= BIT(REQ_F_QUEUE_TIMEOUT_BIT),
+	/* io_wq_work is initialized */
+	REQ_F_WORK_INITIALIZED	= BIT(REQ_F_WORK_INITIALIZED_BIT),
 };
 
 struct async_poll {
@@ -645,6 +648,7 @@ struct io_kiocb {
 	unsigned int		flags;
 	refcount_t		refs;
 	struct task_struct	*task;
+	const struct cred	*creds;
 	unsigned long		fsize;
 	u64			user_data;
 	u32			result;
@@ -911,6 +915,15 @@ EXPORT_SYMBOL(io_uring_get_socket);
 
 static void io_file_put_work(struct work_struct *work);
 
+static inline void io_req_init_async(struct io_kiocb *req)
+{
+	if (req->flags & REQ_F_WORK_INITIALIZED)
+		return;
+
+	memset(&req->work, 0, sizeof(req->work));
+	req->flags |= REQ_F_WORK_INITIALIZED;
+}
+
 static inline bool io_async_submit(struct io_ring_ctx *ctx)
 {
 	return ctx->flags & IORING_SETUP_SQPOLL;
@@ -1019,8 +1032,14 @@ static inline void io_req_work_grab_env(struct io_kiocb *req,
 		mmgrab(current->mm);
 		req->work.mm = current->mm;
 	}
-	if (!req->work.creds)
-		req->work.creds = get_current_cred();
+	if (!req->work.creds) {
+		if (!req->creds) {
+			req->work.creds = get_current_cred();
+		} else {
+			req->work.creds = req->creds;
+			req->creds = NULL;
+		}
+	}
 	if (!req->work.fs && def->needs_fs) {
 		spin_lock(&current->fs->lock);
 		if (!current->fs->in_exec) {
@@ -1037,6 +1056,9 @@ static inline void io_req_work_grab_env(struct io_kiocb *req,
 
 static inline void io_req_work_drop_env(struct io_kiocb *req)
 {
+	if (!(req->flags & REQ_F_WORK_INITIALIZED))
+		return;
+
 	if (req->work.mm) {
 		mmdrop(req->work.mm);
 		req->work.mm = NULL;
@@ -2781,8 +2803,14 @@ static int __io_splice_prep(struct io_kiocb *req,
 		return ret;
 	req->flags |= REQ_F_NEED_CLEANUP;
 
-	if (!S_ISREG(file_inode(sp->file_in)->i_mode))
+	if (!S_ISREG(file_inode(sp->file_in)->i_mode)) {
+		/*
+		 * Splice operation will be punted aync, and here need to
+		 * modify io_wq_work.flags, so initialize io_wq_work firstly.
+		 */
+		io_req_init_async(req);
 		req->work.flags |= IO_WQ_WORK_UNBOUND;
+	}
 
 	return 0;
 }
@@ -3368,8 +3396,10 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	/*
 	 * If we queue this for async, it must not be cancellable. That would
-	 * leave the 'file' in an undeterminate state.
+	 * leave the 'file' in an undeterminate state, and here need to modify
+	 * io_wq_work.flags, so initialize io_wq_work firstly.
 	 */
+	io_req_init_async(req);
 	req->work.flags |= IO_WQ_WORK_NO_CANCEL;
 
 	if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
@@ -4847,6 +4877,8 @@ static int io_req_defer_prep(struct io_kiocb *req,
 	if (!sqe)
 		return 0;
 
+	io_req_init_async(req);
+
 	if (io_op_defs[req->opcode].file_table) {
 		ret = io_grab_files(req);
 		if (unlikely(ret))
@@ -5495,19 +5527,23 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_kiocb *linked_timeout;
 	struct io_kiocb *nxt;
-	const struct cred *old_creds = NULL;
+	const struct cred *creds, *old_creds = NULL;
 	int ret;
 
 again:
 	linked_timeout = io_prep_linked_timeout(req);
 
-	if (req->work.creds && req->work.creds != current_cred()) {
+	if (req->flags & REQ_F_WORK_INITIALIZED)
+		creds = req->work.creds;
+	else
+		creds = req->creds;
+	if (creds && creds != current_cred()) {
 		if (old_creds)
 			revert_creds(old_creds);
-		if (old_creds == req->work.creds)
+		if (old_creds == creds)
 			old_creds = NULL; /* restored original creds */
 		else
-			old_creds = override_creds(req->work.creds);
+			old_creds = override_creds(creds);
 	}
 
 	ret = io_issue_sqe(req, sqe, true);
@@ -5524,6 +5560,8 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 			goto exit;
 		}
 punt:
+		io_req_init_async(req);
+
 		if (io_op_defs[req->opcode].file_table) {
 			ret = io_grab_files(req);
 			if (ret)
@@ -5776,7 +5814,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	refcount_set(&req->refs, 2);
 	req->task = NULL;
 	req->result = 0;
-	INIT_IO_WORK(&req->work);
 
 	if (unlikely(req->opcode >= IORING_OP_LAST))
 		return -EINVAL;
@@ -5798,10 +5835,12 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 
 	id = READ_ONCE(sqe->personality);
 	if (id) {
-		req->work.creds = idr_find(&ctx->personality_idr, id);
-		if (unlikely(!req->work.creds))
+		req->creds = idr_find(&ctx->personality_idr, id);
+		if (unlikely(!req->creds))
 			return -EINVAL;
-		get_cred(req->work.creds);
+		get_cred(req->creds);
+	} else {
+		req->creds = NULL;
 	}
 
 	/* same numerical values with corresponding REQ_F_*, safe to copy */
-- 
2.17.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v6 2/2] io_uring: avoid unnecessary io_wq_work copy for fast poll feature
  2020-06-09  8:25 [PATCH v6 1/2] io_uring: avoid whole io_wq_work copy for requests completed inline Xiaoguang Wang
@ 2020-06-09  8:25 ` Xiaoguang Wang
  2020-06-09  8:32   ` Xiaoguang Wang
  2020-06-09 16:44 ` [PATCH v6 1/2] io_uring: avoid whole io_wq_work copy for requests completed inline Pavel Begunkov
  1 sibling, 1 reply; 7+ messages in thread
From: Xiaoguang Wang @ 2020-06-09  8:25 UTC (permalink / raw)
  To: io-uring; +Cc: axboe, asml.silence, joseph.qi, Xiaoguang Wang

Basically IORING_OP_POLL_ADD command and async armed poll handlers
for regular commands don't touch io_wq_work, so only REQ_F_WORK_INITIALIZED
is set, can we do io_wq_work copy and restore.

Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>

---
V3:
  drop the REQ_F_WORK_NEED_RESTORE flag introduced in V2 patch, just
  use REQ_F_WORK_INITIALIZED to control whether to do io_wq_work copy
  and restore.

V6:
  rebase to io_uring-5.8.
---
 fs/io_uring.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index bde8b17a7275..3bec6057c189 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4261,7 +4261,8 @@ static void io_async_task_func(struct callback_head *cb)
 	spin_unlock_irq(&ctx->completion_lock);
 
 	/* restore ->work in case we need to retry again */
-	memcpy(&req->work, &apoll->work, sizeof(req->work));
+	if (req->flags & REQ_F_WORK_INITIALIZED)
+		memcpy(&req->work, &apoll->work, sizeof(req->work));
 	kfree(apoll);
 
 	if (!canceled) {
@@ -4358,7 +4359,8 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
 		return false;
 
 	req->flags |= REQ_F_POLLED;
-	memcpy(&apoll->work, &req->work, sizeof(req->work));
+	if (req->flags & REQ_F_WORK_INITIALIZED)
+		memcpy(&apoll->work, &req->work, sizeof(req->work));
 	had_io = req->io != NULL;
 
 	get_task_struct(current);
@@ -4383,7 +4385,8 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
 		if (!had_io)
 			io_poll_remove_double(req);
 		spin_unlock_irq(&ctx->completion_lock);
-		memcpy(&req->work, &apoll->work, sizeof(req->work));
+		if (req->flags & REQ_F_WORK_INITIALIZED)
+			memcpy(&req->work, &apoll->work, sizeof(req->work));
 		kfree(apoll);
 		return false;
 	}
@@ -4428,7 +4431,9 @@ static bool io_poll_remove_one(struct io_kiocb *req)
 			 * io_req_work_drop_env below when dropping the
 			 * final reference.
 			 */
-			memcpy(&req->work, &apoll->work, sizeof(req->work));
+			if (req->flags & REQ_F_WORK_INITIALIZED)
+				memcpy(&req->work, &apoll->work,
+				       sizeof(req->work));
 			kfree(apoll);
 		}
 	}
-- 
2.17.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v6 2/2] io_uring: avoid unnecessary io_wq_work copy for fast poll feature
  2020-06-09  8:25 ` [PATCH v6 2/2] io_uring: avoid unnecessary io_wq_work copy for fast poll feature Xiaoguang Wang
@ 2020-06-09  8:32   ` Xiaoguang Wang
  0 siblings, 0 replies; 7+ messages in thread
From: Xiaoguang Wang @ 2020-06-09  8:32 UTC (permalink / raw)
  To: io-uring; +Cc: axboe, asml.silence, joseph.qi

hi,

I also use below debug patch to run test cases in liburing:
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 3bec6057c189..119764d18a61 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5819,6 +5819,13 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
         refcount_set(&req->refs, 2);
         req->task = NULL;
         req->result = 0;
+       req->work.list.next = 0x1;
+       req->work.files = 0x2;
+       req->work.mm = 0x3;
+       req->work.creds = 0x4;
+       req->work.fs = 0x5;
+       req->work.flags = 0x6;
+       req->work.task_pid = 0x7;

All test cases pass.

Regards,
Xiaoguang Wang

> Basically IORING_OP_POLL_ADD command and async armed poll handlers
> for regular commands don't touch io_wq_work, so only REQ_F_WORK_INITIALIZED
> is set, can we do io_wq_work copy and restore.
> 
> Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
> 
> ---
> V3:
>    drop the REQ_F_WORK_NEED_RESTORE flag introduced in V2 patch, just
>    use REQ_F_WORK_INITIALIZED to control whether to do io_wq_work copy
>    and restore.
> 
> V6:
>    rebase to io_uring-5.8.
> ---
>   fs/io_uring.c | 13 +++++++++----
>   1 file changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index bde8b17a7275..3bec6057c189 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -4261,7 +4261,8 @@ static void io_async_task_func(struct callback_head *cb)
>   	spin_unlock_irq(&ctx->completion_lock);
>   
>   	/* restore ->work in case we need to retry again */
> -	memcpy(&req->work, &apoll->work, sizeof(req->work));
> +	if (req->flags & REQ_F_WORK_INITIALIZED)
> +		memcpy(&req->work, &apoll->work, sizeof(req->work));
>   	kfree(apoll);
>   
>   	if (!canceled) {
> @@ -4358,7 +4359,8 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
>   		return false;
>   
>   	req->flags |= REQ_F_POLLED;
> -	memcpy(&apoll->work, &req->work, sizeof(req->work));
> +	if (req->flags & REQ_F_WORK_INITIALIZED)
> +		memcpy(&apoll->work, &req->work, sizeof(req->work));
>   	had_io = req->io != NULL;
>   
>   	get_task_struct(current);
> @@ -4383,7 +4385,8 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
>   		if (!had_io)
>   			io_poll_remove_double(req);
>   		spin_unlock_irq(&ctx->completion_lock);
> -		memcpy(&req->work, &apoll->work, sizeof(req->work));
> +		if (req->flags & REQ_F_WORK_INITIALIZED)
> +			memcpy(&req->work, &apoll->work, sizeof(req->work));
>   		kfree(apoll);
>   		return false;
>   	}
> @@ -4428,7 +4431,9 @@ static bool io_poll_remove_one(struct io_kiocb *req)
>   			 * io_req_work_drop_env below when dropping the
>   			 * final reference.
>   			 */
> -			memcpy(&req->work, &apoll->work, sizeof(req->work));
> +			if (req->flags & REQ_F_WORK_INITIALIZED)
> +				memcpy(&req->work, &apoll->work,
> +				       sizeof(req->work));
>   			kfree(apoll);
>   		}
>   	}
> 

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v6 1/2] io_uring: avoid whole io_wq_work copy for requests completed inline
  2020-06-09  8:25 [PATCH v6 1/2] io_uring: avoid whole io_wq_work copy for requests completed inline Xiaoguang Wang
  2020-06-09  8:25 ` [PATCH v6 2/2] io_uring: avoid unnecessary io_wq_work copy for fast poll feature Xiaoguang Wang
@ 2020-06-09 16:44 ` Pavel Begunkov
  2020-06-10  1:40   ` Jens Axboe
  1 sibling, 1 reply; 7+ messages in thread
From: Pavel Begunkov @ 2020-06-09 16:44 UTC (permalink / raw)
  To: Xiaoguang Wang, io-uring; +Cc: axboe, joseph.qi

On 09/06/2020 11:25, Xiaoguang Wang wrote:
> If requests can be submitted and completed inline, we don't need to
> initialize whole io_wq_work in io_init_req(), which is an expensive
> operation, add a new 'REQ_F_WORK_INITIALIZED' to control whether
> io_wq_work is initialized.

Basically it's "call io_req_init_async() before touching ->work" now.
This shouldn't be as easy to screw as was with ->func.

The only thing left that I don't like _too_ much to stop complaining
is ->creds handling. But this one should be easy, see incremental diff
below (not tested). If custom creds are provided, it initialises
req->work in req_init() and sets work.creds. And then we can remove
req->creds.

What do you think? Custom ->creds (aka personality) is a niche feature,
and the speedup is not so great to care.


diff --git a/fs/io_uring.c b/fs/io_uring.c
index c03408342320..5df7e02852bb 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -648,7 +648,6 @@ struct io_kiocb {
 	unsigned int		flags;
 	refcount_t		refs;
 	struct task_struct	*task;
-	const struct cred	*creds;
 	unsigned long		fsize;
 	u64			user_data;
 	u32			result;
@@ -1031,14 +1030,9 @@ static inline void io_req_work_grab_env(struct io_kiocb *req,
 		mmgrab(current->mm);
 		req->work.mm = current->mm;
 	}
-	if (!req->work.creds) {
-		if (!req->creds) {
-			req->work.creds = get_current_cred();
-		} else {
-			req->work.creds = req->creds;
-			req->creds = NULL;
-		}
-	}
+	if (!req->work.creds)
+		req->work.creds = get_current_cred();
+
 	if (!req->work.fs && def->needs_fs) {
 		spin_lock(&current->fs->lock);
 		if (!current->fs->in_exec) {
@@ -5569,23 +5563,20 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_kiocb *linked_timeout;
 	struct io_kiocb *nxt;
-	const struct cred *creds, *old_creds = NULL;
+	const struct cred *old_creds = NULL;
 	int ret;
 
 again:
 	linked_timeout = io_prep_linked_timeout(req);
 
-	if (req->flags & REQ_F_WORK_INITIALIZED)
-		creds = req->work.creds;
-	else
-		creds = req->creds;
-	if (creds && creds != current_cred()) {
+	if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.creds &&
+	     req->work.creds != current_cred()) {
 		if (old_creds)
 			revert_creds(old_creds);
-		if (old_creds == creds)
+		if (old_creds == req->work.creds)
 			old_creds = NULL; /* restored original creds */
 		else
-			old_creds = override_creds(creds);
+			old_creds = override_creds(req->work.creds);
 	}
 
 	ret = io_issue_sqe(req, sqe, true);
@@ -5939,12 +5930,11 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 
 	id = READ_ONCE(sqe->personality);
 	if (id) {
-		req->creds = idr_find(&ctx->personality_idr, id);
-		if (unlikely(!req->creds))
+		io_req_init_async(req);
+		req->work.creds = idr_find(&ctx->personality_idr, id);
+		if (unlikely(!req->work.creds))
 			return -EINVAL;
-		get_cred(req->creds);
-	} else {
-		req->creds = NULL;
+		get_cred(req->work.creds);
 	}
 
 	/* same numerical values with corresponding REQ_F_*, safe to copy */


-- 
Pavel Begunkov

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v6 1/2] io_uring: avoid whole io_wq_work copy for requests completed inline
  2020-06-09 16:44 ` [PATCH v6 1/2] io_uring: avoid whole io_wq_work copy for requests completed inline Pavel Begunkov
@ 2020-06-10  1:40   ` Jens Axboe
  2020-06-10 11:39     ` Xiaoguang Wang
  0 siblings, 1 reply; 7+ messages in thread
From: Jens Axboe @ 2020-06-10  1:40 UTC (permalink / raw)
  To: Pavel Begunkov, Xiaoguang Wang, io-uring; +Cc: joseph.qi

On 6/9/20 10:44 AM, Pavel Begunkov wrote:
> On 09/06/2020 11:25, Xiaoguang Wang wrote:
>> If requests can be submitted and completed inline, we don't need to
>> initialize whole io_wq_work in io_init_req(), which is an expensive
>> operation, add a new 'REQ_F_WORK_INITIALIZED' to control whether
>> io_wq_work is initialized.
> 
> Basically it's "call io_req_init_async() before touching ->work" now.
> This shouldn't be as easy to screw as was with ->func.
> 
> The only thing left that I don't like _too_ much to stop complaining
> is ->creds handling. But this one should be easy, see incremental diff
> below (not tested). If custom creds are provided, it initialises
> req->work in req_init() and sets work.creds. And then we can remove
> req->creds.
> 
> What do you think? Custom ->creds (aka personality) is a niche feature,
> and the speedup is not so great to care.

Thanks for reviewing, I agree. Xiaoguang, care to fold in that change
and then I think we're good to shove this in.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v6 1/2] io_uring: avoid whole io_wq_work copy for requests completed inline
  2020-06-10  1:40   ` Jens Axboe
@ 2020-06-10 11:39     ` Xiaoguang Wang
  2020-06-11 13:45       ` Pavel Begunkov
  0 siblings, 1 reply; 7+ messages in thread
From: Xiaoguang Wang @ 2020-06-10 11:39 UTC (permalink / raw)
  To: Jens Axboe, Pavel Begunkov, io-uring; +Cc: joseph.qi

hi,

> On 6/9/20 10:44 AM, Pavel Begunkov wrote:
>> On 09/06/2020 11:25, Xiaoguang Wang wrote:
>>> If requests can be submitted and completed inline, we don't need to
>>> initialize whole io_wq_work in io_init_req(), which is an expensive
>>> operation, add a new 'REQ_F_WORK_INITIALIZED' to control whether
>>> io_wq_work is initialized.
>>
>> Basically it's "call io_req_init_async() before touching ->work" now.
>> This shouldn't be as easy to screw as was with ->func.
>>
>> The only thing left that I don't like _too_ much to stop complaining
>> is ->creds handling. But this one should be easy, see incremental diff
>> below (not tested). If custom creds are provided, it initialises
>> req->work in req_init() and sets work.creds. And then we can remove
>> req->creds.
>>
>> What do you think? Custom ->creds (aka personality) is a niche feature,
>> and the speedup is not so great to care.
> 
> Thanks for reviewing, I agree. Xiaoguang, care to fold in that change
> and then I think we're good to shove this in.
Yeah, I'll send new version soon.
Pavel, thanks for your great work, and really appreciate both of you and jens' patience.

Regards,
Xiaoguang Wang
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v6 1/2] io_uring: avoid whole io_wq_work copy for requests completed inline
  2020-06-10 11:39     ` Xiaoguang Wang
@ 2020-06-11 13:45       ` Pavel Begunkov
  0 siblings, 0 replies; 7+ messages in thread
From: Pavel Begunkov @ 2020-06-11 13:45 UTC (permalink / raw)
  To: Xiaoguang Wang, Jens Axboe, io-uring; +Cc: joseph.qi

On 10/06/2020 14:39, Xiaoguang Wang wrote:
>> On 6/9/20 10:44 AM, Pavel Begunkov wrote:
>>> On 09/06/2020 11:25, Xiaoguang Wang wrote:
>>>> If requests can be submitted and completed inline, we don't need to
>>>> initialize whole io_wq_work in io_init_req(), which is an expensive
>>>> operation, add a new 'REQ_F_WORK_INITIALIZED' to control whether
>>>> io_wq_work is initialized.
>>>
>>> Basically it's "call io_req_init_async() before touching ->work" now.
>>> This shouldn't be as easy to screw as was with ->func.
>>>
>>> The only thing left that I don't like _too_ much to stop complaining
>>> is ->creds handling. But this one should be easy, see incremental diff
>>> below (not tested). If custom creds are provided, it initialises
>>> req->work in req_init() and sets work.creds. And then we can remove
>>> req->creds.
>>>
>>> What do you think? Custom ->creds (aka personality) is a niche feature,
>>> and the speedup is not so great to care.
>>
>> Thanks for reviewing, I agree. Xiaoguang, care to fold in that change
>> and then I think we're good to shove this in.
> Yeah, I'll send new version soon.
> Pavel, thanks for your great work, and really appreciate both of you and jens' patience.

You're welcome. It lead us to rethinking and cleaning some parts, that's
great! And there are more such places, it's definitely worth to look into.

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-06-11 13:46 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-09  8:25 [PATCH v6 1/2] io_uring: avoid whole io_wq_work copy for requests completed inline Xiaoguang Wang
2020-06-09  8:25 ` [PATCH v6 2/2] io_uring: avoid unnecessary io_wq_work copy for fast poll feature Xiaoguang Wang
2020-06-09  8:32   ` Xiaoguang Wang
2020-06-09 16:44 ` [PATCH v6 1/2] io_uring: avoid whole io_wq_work copy for requests completed inline Pavel Begunkov
2020-06-10  1:40   ` Jens Axboe
2020-06-10 11:39     ` Xiaoguang Wang
2020-06-11 13:45       ` Pavel Begunkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).