[v2,2/2] io_uring: add io-wq workqueue sharing
diff mbox series

Message ID c40338a9989a45ec38f36e5937365eca6a089795.1580170474.git.asml.silence@gmail.com
State New
Headers show
Series
  • io-wq sharing
Related show

Commit Message

Pavel Begunkov Jan. 28, 2020, 12:15 a.m. UTC
If IORING_SETUP_ATTACH_WQ is set, it expects wq_fd in io_uring_params to
be a valid io_uring fd io-wq of which will be shared with the newly
created io_uring instance. If the flag is set but it can't share io-wq,
it fails.

This allows creation of "sibling" io_urings, where we prefer to keep the
SQ/CQ private, but want to share the async backend to minimize the amount
of overhead associated with having multiple rings that belong to the same
backend.

Reported-by: Jens Axboe <axboe@kernel.dk>
Reported-by: Daurnimator <quae@daurnimator.com>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c                 | 68 +++++++++++++++++++++++++++--------
 include/uapi/linux/io_uring.h |  4 ++-
 2 files changed, 57 insertions(+), 15 deletions(-)

Comments

Jens Axboe Jan. 28, 2020, 12:22 a.m. UTC | #1
On 1/27/20 5:15 PM, Pavel Begunkov wrote:
> @@ -6577,7 +6613,11 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
>  
>  	if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
>  			IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
> -			IORING_SETUP_CLAMP))
> +			IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ))
> +		return -EINVAL;
> +
> +	/* wq_fd isn't valid without ATTACH_WQ being set */
> +	if (!(p.flags & IORING_SETUP_ATTACH_WQ) && p.wq_fd)
>  		return -EINVAL;

Since we're now using file descriptors, this no longer works. Any values
(outside of -1) is fair game.

Patch
diff mbox series

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5ec6428933c3..de1cb3135721 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5692,11 +5692,56 @@  static void io_get_work(struct io_wq_work *work)
 	refcount_inc(&req->refs);
 }
 
+static int io_init_wq_offload(struct io_ring_ctx *ctx,
+			      struct io_uring_params *p)
+{
+	struct io_wq_data data;
+	struct fd f;
+	struct io_ring_ctx *ctx_attach;
+	unsigned int concurrency;
+	int ret = 0;
+
+	data.user = ctx->user;
+	data.get_work = io_get_work;
+	data.put_work = io_put_work;
+
+	if (!(p->flags & IORING_SETUP_ATTACH_WQ)) {
+		/* Do QD, or 4 * CPUS, whatever is smallest */
+		concurrency = min(ctx->sq_entries, 4 * num_online_cpus());
+
+		ctx->io_wq = io_wq_create(concurrency, &data);
+		if (IS_ERR(ctx->io_wq)) {
+			ret = PTR_ERR(ctx->io_wq);
+			ctx->io_wq = NULL;
+		}
+		return ret;
+	}
+
+	f = fdget(p->wq_fd);
+	if (!f.file)
+		return -EBADF;
+
+	if (f.file->f_op != &io_uring_fops) {
+		ret = -EOPNOTSUPP;
+		goto out_fput;
+	}
+
+	ctx_attach = f.file->private_data;
+	/* @io_wq is protected by holding the fd */
+	if (!io_wq_get(ctx_attach->io_wq, &data)) {
+		ret = -EINVAL;
+		goto out_fput;
+	}
+
+	ctx->io_wq = ctx_attach->io_wq;
+out_fput:
+	fdput(f);
+	return ret;
+}
+
 static int io_sq_offload_start(struct io_ring_ctx *ctx,
 			       struct io_uring_params *p)
 {
-	struct io_wq_data data;
-	unsigned concurrency;
 	int ret;
 
 	init_waitqueue_head(&ctx->sqo_wait);
@@ -5740,18 +5785,9 @@  static int io_sq_offload_start(struct io_ring_ctx *ctx,
 		goto err;
 	}
 
-	data.user = ctx->user;
-	data.get_work = io_get_work;
-	data.put_work = io_put_work;
-
-	/* Do QD, or 4 * CPUS, whatever is smallest */
-	concurrency = min(ctx->sq_entries, 4 * num_online_cpus());
-	ctx->io_wq = io_wq_create(concurrency, &data);
-	if (IS_ERR(ctx->io_wq)) {
-		ret = PTR_ERR(ctx->io_wq);
-		ctx->io_wq = NULL;
+	ret = io_init_wq_offload(ctx, p);
+	if (ret)
 		goto err;
-	}
 
 	return 0;
 err:
@@ -6577,7 +6613,11 @@  static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
 
 	if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
 			IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
-			IORING_SETUP_CLAMP))
+			IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ))
+		return -EINVAL;
+
+	/* wq_fd isn't valid without ATTACH_WQ being set */
+	if (!(p.flags & IORING_SETUP_ATTACH_WQ) && p.wq_fd)
 		return -EINVAL;
 
 	ret = io_uring_create(entries, &p);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 9988e82f858b..e067b92af5ad 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -75,6 +75,7 @@  enum {
 #define IORING_SETUP_SQ_AFF	(1U << 2)	/* sq_thread_cpu is valid */
 #define IORING_SETUP_CQSIZE	(1U << 3)	/* app defines CQ size */
 #define IORING_SETUP_CLAMP	(1U << 4)	/* clamp SQ/CQ ring sizes */
+#define IORING_SETUP_ATTACH_WQ	(1U << 5)	/* attach to existing wq */
 
 enum {
 	IORING_OP_NOP,
@@ -183,7 +184,8 @@  struct io_uring_params {
 	__u32 sq_thread_cpu;
 	__u32 sq_thread_idle;
 	__u32 features;
-	__u32 resv[4];
+	__u32 wq_fd;
+	__u32 resv[3];
 	struct io_sqring_offsets sq_off;
 	struct io_cqring_offsets cq_off;
 };