io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH for-next 0/2] io_uring SQPOLL related
@ 2020-09-04  0:02 Jens Axboe
       [not found] ` <20200904000229.90868-2-axboe@kernel.dk>
  0 siblings, 1 reply; 3+ messages in thread
From: Jens Axboe @ 2020-09-04  0:02 UTC (permalink / raw)
  To: io-uring

Patch #1 makes it easier to support a fool proof assignment of the
shared SQPOLL data structure, instead of doing it after the fact. Would
also enable some cleanups around the fd install.

Patch #2 enables an application to wait on SQ ring consumption if using
SQPOLL, instead of having to use busy polling for that part. This can
help provide the necessary backpressure when using SQPOLL.

-- 
Jens Axboe



^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] io_uring: separate ring/file creation from descriptor install
       [not found] ` <20200904000229.90868-2-axboe@kernel.dk>
@ 2020-09-04  0:02   ` Jens Axboe
  2020-09-04  0:02   ` [PATCH 2/2] io_uring: provide IORING_ENTER_SQ_WAIT for SQPOLL SQ ring waits Jens Axboe
  1 sibling, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2020-09-04  0:02 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

As soon as the descriptor is installed, we could potentially have
someone close it. Separate the getting of the anon file and fd from
the descriptor installation, so we can use the fd before we finally
install it at the end.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 38 +++++++++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index fc824b94c7ca..79bc148c0f51 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8616,7 +8616,7 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx,
  * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled,
  * we have to tie this fd to a socket for file garbage collection purposes.
  */
-static int io_uring_get_fd(struct io_ring_ctx *ctx)
+static int io_uring_get_fd(struct io_ring_ctx *ctx, struct file **fptr)
 {
 	struct file *file;
 	int ret;
@@ -8643,7 +8643,7 @@ static int io_uring_get_fd(struct io_ring_ctx *ctx)
 #if defined(CONFIG_UNIX)
 	ctx->ring_sock->file = file;
 #endif
-	fd_install(ret, file);
+	*fptr = file;
 	return ret;
 err:
 #if defined(CONFIG_UNIX)
@@ -8658,8 +8658,9 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 {
 	struct user_struct *user = NULL;
 	struct io_ring_ctx *ctx;
+	struct file *file;
 	bool limit_mem;
-	int ret;
+	int ret, fd = -1;
 
 	if (!entries)
 		return -EINVAL;
@@ -8737,6 +8738,13 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 	if (ret)
 		goto err;
 
+	/* Only gets the ring fd, doesn't install it in the file table */
+	fd = io_uring_get_fd(ctx, &file);
+	if (fd < 0) {
+		ret = fd;
+		goto err;
+	}
+
 	ret = io_sq_offload_create(ctx, p);
 	if (ret)
 		goto err;
@@ -8772,16 +8780,9 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 		goto err;
 	}
 
-	/*
-	 * Install ring fd as the very last thing, so we don't risk someone
-	 * having closed it before we finish setup
-	 */
-	ret = io_uring_get_fd(ctx);
-	if (ret < 0)
-		goto err;
-
 	trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
-	return ret;
+	fd_install(fd, file);
+	return fd;
 err:
 	/*
 	 * Our wait-and-kill does do this, but we need it done before we
@@ -8789,7 +8790,18 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 	 * files could be done as soon as we exit here.
 	 */
 	io_finish_async(ctx);
-	io_ring_ctx_wait_and_kill(ctx);
+
+	/*
+	 * Final fput() will call release and free everything, so if we're
+	 * failing beyond having gotten a file and fd, just let normal
+	 * release off fput() free things.
+	 */
+	if (fd >= 0) {
+		fput(file);
+		put_unused_fd(fd);
+	} else {
+		io_ring_ctx_wait_and_kill(ctx);
+	}
 	return ret;
 }
 
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] io_uring: provide IORING_ENTER_SQ_WAIT for SQPOLL SQ ring waits
       [not found] ` <20200904000229.90868-2-axboe@kernel.dk>
  2020-09-04  0:02   ` [PATCH 1/2] io_uring: separate ring/file creation from descriptor install Jens Axboe
@ 2020-09-04  0:02   ` Jens Axboe
  1 sibling, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2020-09-04  0:02 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

When using SQPOLL, applications can run into the issue of running out of
SQ ring entries because the thread hasn't consumed them yet. The only
option for dealing with that is checking later, or busy checking for the
condition.

Provide IORING_ENTER_SQ_WAIT if applications want to wait on this
condition.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 40 ++++++++++++++++++++++++++++++++---
 include/uapi/linux/io_uring.h |  1 +
 2 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 7a3f10a9329a..44c11bdc0dc7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -304,6 +304,7 @@ struct io_ring_ctx {
 
 	struct wait_queue_entry	sqo_wait_entry;
 	struct list_head	sqd_list;
+	struct wait_queue_head	sqo_sq_wait;
 
 	struct io_sq_data	*sq_data;	/* if using sq thread polling */
 
@@ -1094,6 +1095,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 		goto err;
 
 	ctx->flags = p->flags;
+	init_waitqueue_head(&ctx->sqo_sq_wait);
 	INIT_LIST_HEAD(&ctx->sqd_list);
 	init_waitqueue_head(&ctx->cq_wait);
 	INIT_LIST_HEAD(&ctx->cq_overflow_list);
@@ -1324,6 +1326,13 @@ static void io_commit_cqring(struct io_ring_ctx *ctx)
 		__io_queue_deferred(ctx);
 }
 
+static inline bool io_sqring_full(struct io_ring_ctx *ctx)
+{
+	struct io_rings *r = ctx->rings;
+
+	return READ_ONCE(r->sq.tail) - ctx->cached_sq_head == r->sq_ring_entries;
+}
+
 static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
 {
 	struct io_rings *rings = ctx->rings;
@@ -6673,6 +6682,10 @@ static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx,
 	if (likely(!percpu_ref_is_dying(&ctx->refs)))
 		ret = io_submit_sqes(ctx, to_submit, NULL, -1);
 	mutex_unlock(&ctx->uring_lock);
+
+	if (!io_sqring_full(ctx) && wq_has_sleeper(&ctx->sqo_sq_wait))
+		wake_up(&ctx->sqo_sq_wait);
+
 	return SQT_DID_WORK;
 }
 
@@ -8124,8 +8137,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
 	 * io_commit_cqring
 	 */
 	smp_rmb();
-	if (READ_ONCE(ctx->rings->sq.tail) - ctx->cached_sq_head !=
-	    ctx->rings->sq_ring_entries)
+	if (!io_sqring_full(ctx))
 		mask |= EPOLLOUT | EPOLLWRNORM;
 	if (io_cqring_events(ctx, false))
 		mask |= EPOLLIN | EPOLLRDNORM;
@@ -8448,6 +8460,25 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
 
 #endif /* !CONFIG_MMU */
 
+static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
+{
+	DEFINE_WAIT(wait);
+
+	do {
+		if (!io_sqring_full(ctx))
+			break;
+
+		prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE);
+
+		if (!io_sqring_full(ctx))
+			break;
+
+		schedule();
+	} while (!signal_pending(current));
+
+	finish_wait(&ctx->sqo_sq_wait, &wait);
+}
+
 SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 		u32, min_complete, u32, flags, const sigset_t __user *, sig,
 		size_t, sigsz)
@@ -8459,7 +8490,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 
 	io_run_task_work();
 
-	if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP))
+	if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
+			IORING_ENTER_SQ_WAIT))
 		return -EINVAL;
 
 	f = fdget(fd);
@@ -8489,6 +8521,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 			io_cqring_overflow_flush(ctx, false);
 		if (flags & IORING_ENTER_SQ_WAKEUP)
 			wake_up(&ctx->sq_data->wait);
+		if (flags & IORING_ENTER_SQ_WAIT)
+			io_sqpoll_wait_sq(ctx);
 		submitted = to_submit;
 	} else if (to_submit) {
 		mutex_lock(&ctx->uring_lock);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 1112c0f05641..7539d912690b 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -225,6 +225,7 @@ struct io_cqring_offsets {
  */
 #define IORING_ENTER_GETEVENTS	(1U << 0)
 #define IORING_ENTER_SQ_WAKEUP	(1U << 1)
+#define IORING_ENTER_SQ_WAIT	(1U << 2)
 
 /*
  * Passed in for io_uring_setup(2). Copied back with updated info on success
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-09-04  0:04 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-04  0:02 [PATCH for-next 0/2] io_uring SQPOLL related Jens Axboe
     [not found] ` <20200904000229.90868-2-axboe@kernel.dk>
2020-09-04  0:02   ` [PATCH 1/2] io_uring: separate ring/file creation from descriptor install Jens Axboe
2020-09-04  0:02   ` [PATCH 2/2] io_uring: provide IORING_ENTER_SQ_WAIT for SQPOLL SQ ring waits Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).