IO-Uring Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH] io_uring: reduce/pack size of io_ring_ctx
@ 2019-11-07 20:23 Jens Axboe
  2019-11-08  0:00 ` Jackie Liu
  0 siblings, 1 reply; 5+ messages in thread
From: Jens Axboe @ 2019-11-07 20:23 UTC (permalink / raw)
  To: io-uring

With the recent flurry of additions and changes to io_uring, the
layout of io_ring_ctx has become a bit stale. We're right now at
704 bytes in size on my x86-64 build, or 11 cachelines. This
patch does two things:

- We have to completion structs embedded, that we only use for
  quiesce of the ctx (or shutdown) and for sqthread init cases.
  That 2x32 bytes right there, let's dynamically allocate them.

- Reorder the struct a bit with an eye on cachelines, use cases,
  and holes.

With this patch, we're down to 512 bytes, or 8 cachelines.

Signed-off-by: Jens Axboe <axboe@kernel.dk>

--

diff --git a/fs/io_uring.c b/fs/io_uring.c
index f8344f95817e..2dbc108fa27b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -212,25 +212,14 @@ struct io_ring_ctx {
 		wait_queue_head_t	inflight_wait;
 	} ____cacheline_aligned_in_smp;
 
+	struct io_rings	*rings;
+
 	/* IO offload */
 	struct io_wq		*io_wq;
 	struct task_struct	*sqo_thread;	/* if using sq thread polling */
 	struct mm_struct	*sqo_mm;
 	wait_queue_head_t	sqo_wait;
-	struct completion	sqo_thread_started;
-
-	struct {
-		unsigned		cached_cq_tail;
-		atomic_t		cached_cq_overflow;
-		unsigned		cq_entries;
-		unsigned		cq_mask;
-		struct wait_queue_head	cq_wait;
-		struct fasync_struct	*cq_fasync;
-		struct eventfd_ctx	*cq_ev_fd;
-		atomic_t		cq_timeouts;
-	} ____cacheline_aligned_in_smp;
-
-	struct io_rings	*rings;
+	struct completion	*sqo_done;
 
 	/*
 	 * If used, fixed file set. Writers must ensure that ->refs is dead,
@@ -246,7 +235,22 @@ struct io_ring_ctx {
 
 	struct user_struct	*user;
 
-	struct completion	ctx_done;
+	struct completion	*ctx_done;
+
+#if defined(CONFIG_UNIX)
+	struct socket		*ring_sock;
+#endif
+
+	struct {
+		unsigned		cached_cq_tail;
+		atomic_t		cached_cq_overflow;
+		unsigned		cq_entries;
+		unsigned		cq_mask;
+		struct wait_queue_head	cq_wait;
+		struct fasync_struct	*cq_fasync;
+		struct eventfd_ctx	*cq_ev_fd;
+		atomic_t		cq_timeouts;
+	} ____cacheline_aligned_in_smp;
 
 	struct {
 		struct mutex		uring_lock;
@@ -268,10 +272,6 @@ struct io_ring_ctx {
 		spinlock_t		inflight_lock;
 		struct list_head	inflight_list;
 	} ____cacheline_aligned_in_smp;
-
-#if defined(CONFIG_UNIX)
-	struct socket		*ring_sock;
-#endif
 };
 
 struct sqe_submit {
@@ -396,7 +396,7 @@ static void io_ring_ctx_ref_free(struct percpu_ref *ref)
 {
 	struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs);
 
-	complete(&ctx->ctx_done);
+	complete(ctx->ctx_done);
 }
 
 static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
@@ -407,17 +407,20 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	if (!ctx)
 		return NULL;
 
+	ctx->ctx_done = kmalloc(sizeof(struct completion), GFP_KERNEL);
+	ctx->sqo_done = kmalloc(sizeof(struct completion), GFP_KERNEL);
+	if (!ctx->ctx_done || !ctx->sqo_done)
+		goto err;
+
 	if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
-			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
-		kfree(ctx);
-		return NULL;
-	}
+			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
+		goto err;
 
 	ctx->flags = p->flags;
 	init_waitqueue_head(&ctx->cq_wait);
 	INIT_LIST_HEAD(&ctx->cq_overflow_list);
-	init_completion(&ctx->ctx_done);
-	init_completion(&ctx->sqo_thread_started);
+	init_completion(ctx->ctx_done);
+	init_completion(ctx->sqo_done);
 	mutex_init(&ctx->uring_lock);
 	init_waitqueue_head(&ctx->wait);
 	spin_lock_init(&ctx->completion_lock);
@@ -429,6 +432,11 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	spin_lock_init(&ctx->inflight_lock);
 	INIT_LIST_HEAD(&ctx->inflight_list);
 	return ctx;
+err:
+	kfree(ctx->ctx_done);
+	kfree(ctx->sqo_done);
+	kfree(ctx);
+	return NULL;
 }
 
 static inline bool __io_sequence_defer(struct io_ring_ctx *ctx,
@@ -3037,7 +3045,7 @@ static int io_sq_thread(void *data)
 	unsigned inflight;
 	unsigned long timeout;
 
-	complete(&ctx->sqo_thread_started);
+	complete(ctx->sqo_done);
 
 	old_fs = get_fs();
 	set_fs(USER_DS);
@@ -3276,7 +3284,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 static void io_sq_thread_stop(struct io_ring_ctx *ctx)
 {
 	if (ctx->sqo_thread) {
-		wait_for_completion(&ctx->sqo_thread_started);
+		wait_for_completion(ctx->sqo_done);
 		/*
 		 * The park is a bit of a work-around, without it we get
 		 * warning spews on shutdown with SQPOLL set and affinity
@@ -4098,6 +4106,8 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 		io_unaccount_mem(ctx->user,
 				ring_pages(ctx->sq_entries, ctx->cq_entries));
 	free_uid(ctx->user);
+	kfree(ctx->ctx_done);
+	kfree(ctx->sqo_done);
 	kfree(ctx);
 }
 
@@ -4141,7 +4151,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
 		io_wq_cancel_all(ctx->io_wq);
 
 	io_iopoll_reap_events(ctx);
-	wait_for_completion(&ctx->ctx_done);
+	wait_for_completion(ctx->ctx_done);
 	io_ring_ctx_free(ctx);
 }
 
@@ -4545,7 +4555,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 	 * no new references will come in after we've killed the percpu ref.
 	 */
 	mutex_unlock(&ctx->uring_lock);
-	wait_for_completion(&ctx->ctx_done);
+	wait_for_completion(ctx->ctx_done);
 	mutex_lock(&ctx->uring_lock);
 
 	switch (opcode) {
@@ -4588,7 +4598,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 	}
 
 	/* bring the ctx back to life */
-	reinit_completion(&ctx->ctx_done);
+	reinit_completion(ctx->ctx_done);
 	percpu_ref_reinit(&ctx->refs);
 	return ret;
 }

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] io_uring: reduce/pack size of io_ring_ctx
  2019-11-07 20:23 [PATCH] io_uring: reduce/pack size of io_ring_ctx Jens Axboe
@ 2019-11-08  0:00 ` Jackie Liu
  2019-11-08  0:06   ` Jens Axboe
  0 siblings, 1 reply; 5+ messages in thread
From: Jackie Liu @ 2019-11-08  0:00 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring



> 2019年11月8日 04:23,Jens Axboe <axboe@kernel.dk> 写道:
> 
> With the recent flurry of additions and changes to io_uring, the
> layout of io_ring_ctx has become a bit stale. We're right now at
> 704 bytes in size on my x86-64 build, or 11 cachelines. This
> patch does two things:
> 
> - We have to completion structs embedded, that we only use for
>  quiesce of the ctx (or shutdown) and for sqthread init cases.
>  That 2x32 bytes right there, let's dynamically allocate them.
> 
> - Reorder the struct a bit with an eye on cachelines, use cases,
>  and holes.
> 
> With this patch, we're down to 512 bytes, or 8 cachelines.
> 
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> 
> --
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index f8344f95817e..2dbc108fa27b 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -212,25 +212,14 @@ struct io_ring_ctx {
> 		wait_queue_head_t	inflight_wait;
> 	} ____cacheline_aligned_in_smp;
> 
> +	struct io_rings	*rings;
> +
> 	/* IO offload */
> 	struct io_wq		*io_wq;
> 	struct task_struct	*sqo_thread;	/* if using sq thread polling */
> 	struct mm_struct	*sqo_mm;
> 	wait_queue_head_t	sqo_wait;
> -	struct completion	sqo_thread_started;
> -
> -	struct {
> -		unsigned		cached_cq_tail;
> -		atomic_t		cached_cq_overflow;
> -		unsigned		cq_entries;
> -		unsigned		cq_mask;
> -		struct wait_queue_head	cq_wait;
> -		struct fasync_struct	*cq_fasync;
> -		struct eventfd_ctx	*cq_ev_fd;
> -		atomic_t		cq_timeouts;
> -	} ____cacheline_aligned_in_smp;
> -
> -	struct io_rings	*rings;
> +	struct completion	*sqo_done;
> 
> 	/*
> 	 * If used, fixed file set. Writers must ensure that ->refs is dead,
> @@ -246,7 +235,22 @@ struct io_ring_ctx {
> 
> 	struct user_struct	*user;
> 
> -	struct completion	ctx_done;
> +	struct completion	*ctx_done;
> +
> +#if defined(CONFIG_UNIX)
> +	struct socket		*ring_sock;
> +#endif
> +
> +	struct {
> +		unsigned		cached_cq_tail;
> +		atomic_t		cached_cq_overflow;
> +		unsigned		cq_entries;
> +		unsigned		cq_mask;
> +		struct wait_queue_head	cq_wait;
> +		struct fasync_struct	*cq_fasync;
> +		struct eventfd_ctx	*cq_ev_fd;
> +		atomic_t		cq_timeouts;
> +	} ____cacheline_aligned_in_smp;
> 
> 	struct {
> 		struct mutex		uring_lock;
> @@ -268,10 +272,6 @@ struct io_ring_ctx {
> 		spinlock_t		inflight_lock;
> 		struct list_head	inflight_list;
> 	} ____cacheline_aligned_in_smp;
> -
> -#if defined(CONFIG_UNIX)
> -	struct socket		*ring_sock;
> -#endif
> };
> 
> struct sqe_submit {
> @@ -396,7 +396,7 @@ static void io_ring_ctx_ref_free(struct percpu_ref *ref)
> {
> 	struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs);
> 
> -	complete(&ctx->ctx_done);
> +	complete(ctx->ctx_done);
> }
> 
> static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
> @@ -407,17 +407,20 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
> 	if (!ctx)
> 		return NULL;
> 
> +	ctx->ctx_done = kmalloc(sizeof(struct completion), GFP_KERNEL);
> +	ctx->sqo_done = kmalloc(sizeof(struct completion), GFP_KERNEL);
> +	if (!ctx->ctx_done || !ctx->sqo_done)
> +		goto err;
> +
> 	if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
> -			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
> -		kfree(ctx);
> -		return NULL;
> -	}
> +			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
> +		goto err;
> 
> 	ctx->flags = p->flags;
> 	init_waitqueue_head(&ctx->cq_wait);
> 	INIT_LIST_HEAD(&ctx->cq_overflow_list);
> -	init_completion(&ctx->ctx_done);
> -	init_completion(&ctx->sqo_thread_started);
> +	init_completion(ctx->ctx_done);
> +	init_completion(ctx->sqo_done);
> 	mutex_init(&ctx->uring_lock);
> 	init_waitqueue_head(&ctx->wait);
> 	spin_lock_init(&ctx->completion_lock);
> @@ -429,6 +432,11 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
> 	spin_lock_init(&ctx->inflight_lock);
> 	INIT_LIST_HEAD(&ctx->inflight_list);
> 	return ctx;
> +err:
> +	kfree(ctx->ctx_done);
> +	kfree(ctx->sqo_done);
> +	kfree(ctx);
> +	return NULL;
> }
> 
> static inline bool __io_sequence_defer(struct io_ring_ctx *ctx,
> @@ -3037,7 +3045,7 @@ static int io_sq_thread(void *data)
> 	unsigned inflight;
> 	unsigned long timeout;
> 
> -	complete(&ctx->sqo_thread_started);
> +	complete(ctx->sqo_done);
> 
> 	old_fs = get_fs();
> 	set_fs(USER_DS);
> @@ -3276,7 +3284,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
> static void io_sq_thread_stop(struct io_ring_ctx *ctx)
> {
> 	if (ctx->sqo_thread) {
> -		wait_for_completion(&ctx->sqo_thread_started);
> +		wait_for_completion(ctx->sqo_done);
> 		/*
> 		 * The park is a bit of a work-around, without it we get
> 		 * warning spews on shutdown with SQPOLL set and affinity
> @@ -4098,6 +4106,8 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
> 		io_unaccount_mem(ctx->user,
> 				ring_pages(ctx->sq_entries, ctx->cq_entries));
> 	free_uid(ctx->user);
> +	kfree(ctx->ctx_done);
> +	kfree(ctx->sqo_done);
> 	kfree(ctx);
> }
> 
> @@ -4141,7 +4151,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
> 		io_wq_cancel_all(ctx->io_wq);
> 
> 	io_iopoll_reap_events(ctx);
> -	wait_for_completion(&ctx->ctx_done);
> +	wait_for_completion(ctx->ctx_done);
> 	io_ring_ctx_free(ctx);
> }
> 
> @@ -4545,7 +4555,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
> 	 * no new references will come in after we've killed the percpu ref.
> 	 */
> 	mutex_unlock(&ctx->uring_lock);
> -	wait_for_completion(&ctx->ctx_done);
> +	wait_for_completion(ctx->ctx_done);
> 	mutex_lock(&ctx->uring_lock);
> 
> 	switch (opcode) {
> @@ -4588,7 +4598,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
> 	}
> 
> 	/* bring the ctx back to life */
> -	reinit_completion(&ctx->ctx_done);
> +	reinit_completion(ctx->ctx_done);
> 	percpu_ref_reinit(&ctx->refs);
> 	return ret;
> }

This patch looks good, but I prefer sqo_thread_started instead of sqo_done,
because we are marking the thread started, not the end of the thread.

Anyway, Reviewed-by: Jackie Liu <liuyun01@kylinos.cn>

--
BR, Jackie Liu




^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] io_uring: reduce/pack size of io_ring_ctx
  2019-11-08  0:00 ` Jackie Liu
@ 2019-11-08  0:06   ` Jens Axboe
  2019-11-08  0:35     ` Jens Axboe
  0 siblings, 1 reply; 5+ messages in thread
From: Jens Axboe @ 2019-11-08  0:06 UTC (permalink / raw)
  To: Jackie Liu; +Cc: io-uring

On 11/7/19 5:00 PM, Jackie Liu wrote:
> This patch looks good, but I prefer sqo_thread_started instead of sqo_done,
> because we are marking the thread started, not the end of the thread.
> 
> Anyway, Reviewed-by: Jackie Liu <liuyun01@kylinos.cn>

Yeah, let's retain the old name. I'll make that change and add your
reviewed-by, thanks.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] io_uring: reduce/pack size of io_ring_ctx
  2019-11-08  0:06   ` Jens Axboe
@ 2019-11-08  0:35     ` Jens Axboe
  2019-11-08  0:43       ` Jackie Liu
  0 siblings, 1 reply; 5+ messages in thread
From: Jens Axboe @ 2019-11-08  0:35 UTC (permalink / raw)
  To: Jackie Liu; +Cc: io-uring

On 11/7/19 5:06 PM, Jens Axboe wrote:
> On 11/7/19 5:00 PM, Jackie Liu wrote:
>> This patch looks good, but I prefer sqo_thread_started instead of sqo_done,
>> because we are marking the thread started, not the end of the thread.
>>
>> Anyway, Reviewed-by: Jackie Liu <liuyun01@kylinos.cn>
> 
> Yeah, let's retain the old name. I'll make that change and add your
> reviewed-by, thanks.

Actually, would you mind if we just make it ->completions[2] instead?
That saves a kmalloc per ctx setup, I think that's worthwhile enough
to bundle them together:


commit 3b830211e99976650d5da0613dfca105c5007f8b
Author: Jens Axboe <axboe@kernel.dk>
Date:   Thu Nov 7 17:27:39 2019 -0700

    io_uring: reduce/pack size of io_ring_ctx
    
    With the recent flurry of additions and changes to io_uring, the
    layout of io_ring_ctx has become a bit stale. We're right now at
    704 bytes in size on my x86-64 build, or 11 cachelines. This
    patch does two things:
    
    - We have to completion structs embedded, that we only use for
      quiesce of the ctx (or shutdown) and for sqthread init cases.
      That 2x32 bytes right there, let's dynamically allocate them.
    
    - Reorder the struct a bit with an eye on cachelines, use cases,
      and holes.
    
    With this patch, we're down to 512 bytes, or 8 cachelines.
    
    Reviewed-by: Jackie Liu <liuyun01@kylinos.cn>
    Signed-off-by: Jens Axboe <axboe@kernel.dk>

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4c488bf6e889..2b784262eaff 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -213,24 +213,13 @@ struct io_ring_ctx {
 		wait_queue_head_t	inflight_wait;
 	} ____cacheline_aligned_in_smp;
 
+	struct io_rings	*rings;
+
 	/* IO offload */
 	struct io_wq		*io_wq;
 	struct task_struct	*sqo_thread;	/* if using sq thread polling */
 	struct mm_struct	*sqo_mm;
 	wait_queue_head_t	sqo_wait;
-	struct completion	sqo_thread_started;
-
-	struct {
-		unsigned		cached_cq_tail;
-		unsigned		cq_entries;
-		unsigned		cq_mask;
-		atomic_t		cq_timeouts;
-		struct wait_queue_head	cq_wait;
-		struct fasync_struct	*cq_fasync;
-		struct eventfd_ctx	*cq_ev_fd;
-	} ____cacheline_aligned_in_smp;
-
-	struct io_rings	*rings;
 
 	/*
 	 * If used, fixed file set. Writers must ensure that ->refs is dead,
@@ -246,7 +235,22 @@ struct io_ring_ctx {
 
 	struct user_struct	*user;
 
-	struct completion	ctx_done;
+	/* 0 is for ctx quiesce/reinit/free, 1 is for sqo_thread started */
+	struct completion	*completions;
+
+#if defined(CONFIG_UNIX)
+	struct socket		*ring_sock;
+#endif
+
+	struct {
+		unsigned		cached_cq_tail;
+		unsigned		cq_entries;
+		unsigned		cq_mask;
+		atomic_t		cq_timeouts;
+		struct wait_queue_head	cq_wait;
+		struct fasync_struct	*cq_fasync;
+		struct eventfd_ctx	*cq_ev_fd;
+	} ____cacheline_aligned_in_smp;
 
 	struct {
 		struct mutex		uring_lock;
@@ -268,10 +272,6 @@ struct io_ring_ctx {
 		spinlock_t		inflight_lock;
 		struct list_head	inflight_list;
 	} ____cacheline_aligned_in_smp;
-
-#if defined(CONFIG_UNIX)
-	struct socket		*ring_sock;
-#endif
 };
 
 struct sqe_submit {
@@ -396,7 +396,7 @@ static void io_ring_ctx_ref_free(struct percpu_ref *ref)
 {
 	struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs);
 
-	complete(&ctx->ctx_done);
+	complete(&ctx->completions[0]);
 }
 
 static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
@@ -407,17 +407,19 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	if (!ctx)
 		return NULL;
 
+	ctx->completions = kmalloc(2 * sizeof(struct completion), GFP_KERNEL);
+	if (!ctx->completions)
+		goto err;
+
 	if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
-			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
-		kfree(ctx);
-		return NULL;
-	}
+			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
+		goto err;
 
 	ctx->flags = p->flags;
 	init_waitqueue_head(&ctx->cq_wait);
 	INIT_LIST_HEAD(&ctx->cq_overflow_list);
-	init_completion(&ctx->ctx_done);
-	init_completion(&ctx->sqo_thread_started);
+	init_completion(&ctx->completions[0]);
+	init_completion(&ctx->completions[1]);
 	mutex_init(&ctx->uring_lock);
 	init_waitqueue_head(&ctx->wait);
 	spin_lock_init(&ctx->completion_lock);
@@ -429,6 +431,10 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	spin_lock_init(&ctx->inflight_lock);
 	INIT_LIST_HEAD(&ctx->inflight_list);
 	return ctx;
+err:
+	kfree(ctx->completions);
+	kfree(ctx);
+	return NULL;
 }
 
 static inline bool __io_sequence_defer(struct io_ring_ctx *ctx,
@@ -3065,7 +3071,7 @@ static int io_sq_thread(void *data)
 	unsigned inflight;
 	unsigned long timeout;
 
-	complete(&ctx->sqo_thread_started);
+	complete(&ctx->completions[1]);
 
 	old_fs = get_fs();
 	set_fs(USER_DS);
@@ -3304,7 +3310,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 static void io_sq_thread_stop(struct io_ring_ctx *ctx)
 {
 	if (ctx->sqo_thread) {
-		wait_for_completion(&ctx->sqo_thread_started);
+		wait_for_completion(&ctx->completions[1]);
 		/*
 		 * The park is a bit of a work-around, without it we get
 		 * warning spews on shutdown with SQPOLL set and affinity
@@ -4126,6 +4132,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 		io_unaccount_mem(ctx->user,
 				ring_pages(ctx->sq_entries, ctx->cq_entries));
 	free_uid(ctx->user);
+	kfree(ctx->completions);
 	kfree(ctx);
 }
 
@@ -4169,7 +4176,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
 		io_wq_cancel_all(ctx->io_wq);
 
 	io_iopoll_reap_events(ctx);
-	wait_for_completion(&ctx->ctx_done);
+	wait_for_completion(&ctx->completions[0]);
 	io_ring_ctx_free(ctx);
 }
 
@@ -4573,7 +4580,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 	 * no new references will come in after we've killed the percpu ref.
 	 */
 	mutex_unlock(&ctx->uring_lock);
-	wait_for_completion(&ctx->ctx_done);
+	wait_for_completion(&ctx->completions[0]);
 	mutex_lock(&ctx->uring_lock);
 
 	switch (opcode) {
@@ -4616,7 +4623,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 	}
 
 	/* bring the ctx back to life */
-	reinit_completion(&ctx->ctx_done);
+	reinit_completion(&ctx->completions[0]);
 	percpu_ref_reinit(&ctx->refs);
 	return ret;
 }

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] io_uring: reduce/pack size of io_ring_ctx
  2019-11-08  0:35     ` Jens Axboe
@ 2019-11-08  0:43       ` Jackie Liu
  0 siblings, 0 replies; 5+ messages in thread
From: Jackie Liu @ 2019-11-08  0:43 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring



> 2019年11月8日 08:35,Jens Axboe <axboe@kernel.dk> 写道:
> 
> On 11/7/19 5:06 PM, Jens Axboe wrote:
>> On 11/7/19 5:00 PM, Jackie Liu wrote:
>>> This patch looks good, but I prefer sqo_thread_started instead of sqo_done,
>>> because we are marking the thread started, not the end of the thread.
>>> 
>>> Anyway, Reviewed-by: Jackie Liu <liuyun01@kylinos.cn>
>> 
>> Yeah, let's retain the old name. I'll make that change and add your
>> reviewed-by, thanks.
> 
> Actually, would you mind if we just make it ->completions[2] instead?
> That saves a kmalloc per ctx setup, I think that's worthwhile enough
> to bundle them together:
> 
> 
> commit 3b830211e99976650d5da0613dfca105c5007f8b
> Author: Jens Axboe <axboe@kernel.dk>
> Date:   Thu Nov 7 17:27:39 2019 -0700
> 
>    io_uring: reduce/pack size of io_ring_ctx
> 
>    With the recent flurry of additions and changes to io_uring, the
>    layout of io_ring_ctx has become a bit stale. We're right now at
>    704 bytes in size on my x86-64 build, or 11 cachelines. This
>    patch does two things:
> 
>    - We have to completion structs embedded, that we only use for
>      quiesce of the ctx (or shutdown) and for sqthread init cases.
>      That 2x32 bytes right there, let's dynamically allocate them.
> 
>    - Reorder the struct a bit with an eye on cachelines, use cases,
>      and holes.
> 
>    With this patch, we're down to 512 bytes, or 8 cachelines.
> 
>    Reviewed-by: Jackie Liu <liuyun01@kylinos.cn>
>    Signed-off-by: Jens Axboe <axboe@kernel.dk>
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 4c488bf6e889..2b784262eaff 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -213,24 +213,13 @@ struct io_ring_ctx {
> 		wait_queue_head_t	inflight_wait;
> 	} ____cacheline_aligned_in_smp;
> 
> +	struct io_rings	*rings;
> +
> 	/* IO offload */
> 	struct io_wq		*io_wq;
> 	struct task_struct	*sqo_thread;	/* if using sq thread polling */
> 	struct mm_struct	*sqo_mm;
> 	wait_queue_head_t	sqo_wait;
> -	struct completion	sqo_thread_started;
> -
> -	struct {
> -		unsigned		cached_cq_tail;
> -		unsigned		cq_entries;
> -		unsigned		cq_mask;
> -		atomic_t		cq_timeouts;
> -		struct wait_queue_head	cq_wait;
> -		struct fasync_struct	*cq_fasync;
> -		struct eventfd_ctx	*cq_ev_fd;
> -	} ____cacheline_aligned_in_smp;
> -
> -	struct io_rings	*rings;
> 
> 	/*
> 	 * If used, fixed file set. Writers must ensure that ->refs is dead,
> @@ -246,7 +235,22 @@ struct io_ring_ctx {
> 
> 	struct user_struct	*user;
> 
> -	struct completion	ctx_done;
> +	/* 0 is for ctx quiesce/reinit/free, 1 is for sqo_thread started */
> +	struct completion	*completions;
> +

I think it's okay, it's clear through comments here.

--
BR, Jackie Liu




^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, back to index

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-07 20:23 [PATCH] io_uring: reduce/pack size of io_ring_ctx Jens Axboe
2019-11-08  0:00 ` Jackie Liu
2019-11-08  0:06   ` Jens Axboe
2019-11-08  0:35     ` Jens Axboe
2019-11-08  0:43       ` Jackie Liu

IO-Uring Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/io-uring/0 io-uring/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 io-uring io-uring/ https://lore.kernel.org/io-uring \
		io-uring@vger.kernel.org
	public-inbox-index io-uring

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.io-uring


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git