All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jens Axboe <axboe@kernel.dk>
To: Pavel Begunkov <asml.silence@gmail.com>,
	Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>,
	io-uring <io-uring@vger.kernel.org>
Cc: joseph qi <joseph.qi@linux.alibaba.com>,
	Jiufei Xue <jiufei.xue@linux.alibaba.com>
Subject: Re: [PATCH RFC} io_uring: io_kiocb alloc cache
Date: Thu, 14 May 2020 09:15:28 -0600	[thread overview]
Message-ID: <a2b5e500-316d-dc06-1a25-72aaf67ac227@kernel.dk> (raw)
In-Reply-To: <0ec1b33d-893f-1b10-128e-f8a8950b0384@gmail.com>

On 5/14/20 8:53 AM, Pavel Begunkov wrote:
> On 14/05/2020 17:33, Jens Axboe wrote:
>> On 5/14/20 8:22 AM, Jens Axboe wrote:
>>>> I still use my previous io_uring_nop_stress tool to evaluate the improvement
>>>> in a physical machine. Memory 250GB and cpu is "Intel(R) Xeon(R) CPU E5-2682 v4 @ 2.50GHz".
>>>> Before this patch:
>>>> $sudo taskset -c 60 ./io_uring_nop_stress -r 300
>>>> total ios: 1608773840
>>>> IOPS:      5362579
>>>>
>>>> With this patch:
>>>> sudo taskset -c 60 ./io_uring_nop_stress -r 300
>>>> total ios: 1676910736
>>>> IOPS:      5589702
>>>> About 4.2% improvement.
>>>
>>> That's not bad. Can you try the patch from Pekka as well, just to see if
>>> that helps for you?
>>>
>>> I also had another idea... We basically have two types of request life
>>> times:
>>>
>>> 1) io_kiocb can get queued up internally
>>> 2) io_kiocb completes inline
>>>
>>> For the latter, it's totally feasible to just have the io_kiocb on
>>> stack. The downside is if we need to go the slower path, then we need to
>>> alloc an io_kiocb then and copy it. But maybe that's OK... I'll play
>>> with it.
> 
> Does it differ from having one pre-allocated req? Like fallback_req,
> but without atomics and returned only under uring_mutex (i.e. in
> __io_queue_sqe()). Putting aside its usefulness, at least it will have
> a chance to work with reads/writes.

But then you need atomics. I actually think the bigger win here is not
having to use atomic refcounts for this particular part, since we know
the request can't get shared.

Modified below with poor-man-dec-and-test, pretty sure that part is a
bigger win than avoiding the allocator. So maybe that's a better
directiont to take, for things we can't get irq completions on.


diff --git a/fs/io_uring.c b/fs/io_uring.c
index d2e37215d05a..27c1c8f4d2f4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -525,6 +525,7 @@ enum {
 	REQ_F_POLLED_BIT,
 	REQ_F_BUFFER_SELECTED_BIT,
 	REQ_F_NO_FILE_TABLE_BIT,
+	REQ_F_STACK_REQ_BIT,
 
 	/* not a real bit, just to check we're not overflowing the space */
 	__REQ_F_LAST_BIT,
@@ -580,6 +581,8 @@ enum {
 	REQ_F_BUFFER_SELECTED	= BIT(REQ_F_BUFFER_SELECTED_BIT),
 	/* doesn't need file table for this request */
 	REQ_F_NO_FILE_TABLE	= BIT(REQ_F_NO_FILE_TABLE_BIT),
+	/* on-stack req */
+	REQ_F_STACK_REQ		= BIT(REQ_F_STACK_REQ_BIT),
 };
 
 struct async_poll {
@@ -695,10 +698,14 @@ struct io_op_def {
 	unsigned		pollout : 1;
 	/* op supports buffer selection */
 	unsigned		buffer_select : 1;
+	/* op can use stack req */
+	unsigned		stack_req : 1;
 };
 
 static const struct io_op_def io_op_defs[] = {
-	[IORING_OP_NOP] = {},
+	[IORING_OP_NOP] = {
+		.stack_req		= 1,
+	},
 	[IORING_OP_READV] = {
 		.async_ctx		= 1,
 		.needs_mm		= 1,
@@ -1345,7 +1352,8 @@ static void __io_req_aux_free(struct io_kiocb *req)
 	if (req->flags & REQ_F_NEED_CLEANUP)
 		io_cleanup_req(req);
 
-	kfree(req->io);
+	if (req->io)
+		kfree(req->io);
 	if (req->file)
 		io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE));
 	if (req->task)
@@ -1370,6 +1378,8 @@ static void __io_free_req(struct io_kiocb *req)
 	}
 
 	percpu_ref_put(&req->ctx->refs);
+	if (req->flags & REQ_F_STACK_REQ)
+		return;
 	if (likely(!io_is_fallback_req(req)))
 		kmem_cache_free(req_cachep, req);
 	else
@@ -1585,7 +1595,14 @@ static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt)
 __attribute__((nonnull))
 static void io_put_req_find_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
 {
-	if (refcount_dec_and_test(&req->refs)) {
+	bool free_it;
+
+	if (req->flags & REQ_F_STACK_REQ)
+		free_it = __refcount_dec_and_test(&req->refs);
+	else
+		free_it = refcount_dec_and_test(&req->refs);
+
+	if (free_it) {
 		io_req_find_next(req, nxtptr);
 		__io_free_req(req);
 	}
@@ -1593,7 +1610,13 @@ static void io_put_req_find_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
 
 static void io_put_req(struct io_kiocb *req)
 {
-	if (refcount_dec_and_test(&req->refs))
+	bool free_it;
+
+	if (req->flags & REQ_F_STACK_REQ)
+		free_it = __refcount_dec_and_test(&req->refs);
+	else
+		free_it = refcount_dec_and_test(&req->refs);
+	if (free_it)
 		io_free_req(req);
 }
 
@@ -5784,12 +5807,10 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	 * link list.
 	 */
 	req->sequence = ctx->cached_sq_head - ctx->cached_sq_dropped;
-	req->opcode = READ_ONCE(sqe->opcode);
 	req->user_data = READ_ONCE(sqe->user_data);
 	req->io = NULL;
 	req->file = NULL;
 	req->ctx = ctx;
-	req->flags = 0;
 	/* one is dropped after submission, the other at completion */
 	refcount_set(&req->refs, 2);
 	req->task = NULL;
@@ -5839,6 +5860,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 {
 	struct io_submit_state state, *statep = NULL;
 	struct io_kiocb *link = NULL;
+	struct io_kiocb stack_req;
 	int i, submitted = 0;
 
 	/* if we have a backlog and couldn't flush it all, return BUSY */
@@ -5865,20 +5887,31 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 	for (i = 0; i < nr; i++) {
 		const struct io_uring_sqe *sqe;
 		struct io_kiocb *req;
-		int err;
+		int err, op;
 
 		sqe = io_get_sqe(ctx);
 		if (unlikely(!sqe)) {
 			io_consume_sqe(ctx);
 			break;
 		}
-		req = io_alloc_req(ctx, statep);
-		if (unlikely(!req)) {
-			if (!submitted)
-				submitted = -EAGAIN;
-			break;
+
+		op = READ_ONCE(sqe->opcode);
+
+		if (io_op_defs[op].stack_req) {
+			req = &stack_req;
+			req->flags = REQ_F_STACK_REQ;
+		} else {
+			req = io_alloc_req(ctx, statep);
+			if (unlikely(!req)) {
+				if (!submitted)
+					submitted = -EAGAIN;
+				break;
+			}
+			req->flags = 0;
 		}
 
+		req->opcode = op;
+
 		err = io_init_req(ctx, req, sqe, statep, async);
 		io_consume_sqe(ctx);
 		/* will complete beyond this point, count as submitted */
diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index 0e3ee25eb156..1afaf73c0984 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -294,6 +294,13 @@ static inline __must_check bool refcount_dec_and_test(refcount_t *r)
 	return refcount_sub_and_test(1, r);
 }
 
+static inline __must_check bool __refcount_dec_and_test(refcount_t *r)
+{
+	int old = --r->refs.counter;
+
+	return old == 0;
+}
+
 /**
  * refcount_dec - decrement a refcount
  * @r: the refcount

-- 
Jens Axboe


  reply	other threads:[~2020-05-14 15:15 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-13 16:30 [PATCH RFC} io_uring: io_kiocb alloc cache Jens Axboe
2020-05-13 17:42 ` Jann Horn
2020-05-13 17:42   ` Jann Horn
2020-05-13 18:34   ` Jens Axboe
2020-05-13 19:20   ` Pekka Enberg
2020-05-13 20:09     ` Jens Axboe
2020-05-13 20:31       ` Pekka Enberg
2020-05-13 20:44         ` Jens Axboe
2020-05-14  8:25 ` Xiaoguang Wang
2020-05-14 14:22   ` Jens Axboe
2020-05-14 14:33     ` Jens Axboe
2020-05-14 14:53       ` Pavel Begunkov
2020-05-14 15:15         ` Jens Axboe [this message]
2020-05-14 15:37           ` Pavel Begunkov
2020-05-14 15:53             ` Jens Axboe
2020-05-14 16:18               ` Pavel Begunkov
2020-05-14 16:21                 ` Jens Axboe
2020-05-14 16:25                 ` Pavel Begunkov
2020-05-14 17:01                   ` Jens Axboe
2020-05-14 17:41                     ` Jens Axboe
2020-05-16  9:20       ` Xiaoguang Wang
2020-05-16 16:15     ` Xiaoguang Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a2b5e500-316d-dc06-1a25-72aaf67ac227@kernel.dk \
    --to=axboe@kernel.dk \
    --cc=asml.silence@gmail.com \
    --cc=io-uring@vger.kernel.org \
    --cc=jiufei.xue@linux.alibaba.com \
    --cc=joseph.qi@linux.alibaba.com \
    --cc=xiaoguang.wang@linux.alibaba.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.