All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST
@ 2022-04-27  1:54 Jens Axboe
  2022-04-27  1:54 ` [PATCH 1/2] io_uring: check IOPOLL/ioprio support upfront Jens Axboe
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Jens Axboe @ 2022-04-27  1:54 UTC (permalink / raw)
  To: io-uring

Hi,

I had a re-think on the flags2 addition [1] that was posted earlier
today, and I don't really like the fact that flags2 then can't work
with ioprio for read/write etc. We might also want to extend the
ioprio field for other types of IO in the future.

So rather than do that, do a simpler approach and just add an io_uring
specific flag set for send/recv and friends. This then allow setting
IORING_RECVSEND_POLL_FIRST in sqe->addr2 for those, and if set, io_uring
will arm poll first rather than attempt a send/recv operation.

[1] https://lore.kernel.org/io-uring/20220426183343.150273-1-axboe@kernel.dk/

-- 
Jens Axboe



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/2] io_uring: check IOPOLL/ioprio support upfront
  2022-04-27  1:54 [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST Jens Axboe
@ 2022-04-27  1:54 ` Jens Axboe
  2022-04-27  1:54 ` [PATCH 2/2] io_uring: add POLL_FIRST support for send/sendmsg and recv/recvmsg Jens Axboe
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2022-04-27  1:54 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

Don't punt this check to the op prep handlers, add the support to
io_op_defs and we can check them while setting up the request.

This reduces the text size by 500 bytes on aarch64, and makes this less
fragile by having the check in one spot and needing opcodes to opt in
to IOPOLL or ioprio support.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 148 ++++++++++++++++++--------------------------------
 1 file changed, 52 insertions(+), 96 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index f1a9595ba4c2..39325e469738 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1028,6 +1028,10 @@ struct io_op_def {
 	unsigned		not_supported : 1;
 	/* skip auditing */
 	unsigned		audit_skip : 1;
+	/* supports ioprio */
+	unsigned		ioprio : 1;
+	/* supports iopoll */
+	unsigned		iopoll : 1;
 	/* size of async data needed, if any */
 	unsigned short		async_size;
 };
@@ -1042,6 +1046,8 @@ static const struct io_op_def io_op_defs[] = {
 		.needs_async_setup	= 1,
 		.plug			= 1,
 		.audit_skip		= 1,
+		.ioprio			= 1,
+		.iopoll			= 1,
 		.async_size		= sizeof(struct io_async_rw),
 	},
 	[IORING_OP_WRITEV] = {
@@ -1052,6 +1058,8 @@ static const struct io_op_def io_op_defs[] = {
 		.needs_async_setup	= 1,
 		.plug			= 1,
 		.audit_skip		= 1,
+		.ioprio			= 1,
+		.iopoll			= 1,
 		.async_size		= sizeof(struct io_async_rw),
 	},
 	[IORING_OP_FSYNC] = {
@@ -1064,6 +1072,8 @@ static const struct io_op_def io_op_defs[] = {
 		.pollin			= 1,
 		.plug			= 1,
 		.audit_skip		= 1,
+		.ioprio			= 1,
+		.iopoll			= 1,
 		.async_size		= sizeof(struct io_async_rw),
 	},
 	[IORING_OP_WRITE_FIXED] = {
@@ -1073,6 +1083,8 @@ static const struct io_op_def io_op_defs[] = {
 		.pollout		= 1,
 		.plug			= 1,
 		.audit_skip		= 1,
+		.ioprio			= 1,
+		.iopoll			= 1,
 		.async_size		= sizeof(struct io_async_rw),
 	},
 	[IORING_OP_POLL_ADD] = {
@@ -1137,6 +1149,7 @@ static const struct io_op_def io_op_defs[] = {
 	[IORING_OP_CLOSE] = {},
 	[IORING_OP_FILES_UPDATE] = {
 		.audit_skip		= 1,
+		.iopoll			= 1,
 	},
 	[IORING_OP_STATX] = {
 		.audit_skip		= 1,
@@ -1148,6 +1161,8 @@ static const struct io_op_def io_op_defs[] = {
 		.buffer_select		= 1,
 		.plug			= 1,
 		.audit_skip		= 1,
+		.ioprio			= 1,
+		.iopoll			= 1,
 		.async_size		= sizeof(struct io_async_rw),
 	},
 	[IORING_OP_WRITE] = {
@@ -1157,6 +1172,8 @@ static const struct io_op_def io_op_defs[] = {
 		.pollout		= 1,
 		.plug			= 1,
 		.audit_skip		= 1,
+		.ioprio			= 1,
+		.iopoll			= 1,
 		.async_size		= sizeof(struct io_async_rw),
 	},
 	[IORING_OP_FADVISE] = {
@@ -1191,9 +1208,11 @@ static const struct io_op_def io_op_defs[] = {
 	},
 	[IORING_OP_PROVIDE_BUFFERS] = {
 		.audit_skip		= 1,
+		.iopoll			= 1,
 	},
 	[IORING_OP_REMOVE_BUFFERS] = {
 		.audit_skip		= 1,
+		.iopoll			= 1,
 	},
 	[IORING_OP_TEE] = {
 		.needs_file		= 1,
@@ -1211,6 +1230,7 @@ static const struct io_op_def io_op_defs[] = {
 	[IORING_OP_LINKAT] = {},
 	[IORING_OP_MSG_RING] = {
 		.needs_file		= 1,
+		.iopoll			= 1,
 	},
 };
 
@@ -4139,9 +4159,7 @@ static int io_renameat_prep(struct io_kiocb *req,
 	struct io_rename *ren = &req->rename;
 	const char __user *oldf, *newf;
 
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
-	if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+	if (sqe->buf_index || sqe->splice_fd_in)
 		return -EINVAL;
 	if (unlikely(req->flags & REQ_F_FIXED_FILE))
 		return -EBADF;
@@ -4190,10 +4208,7 @@ static int io_unlinkat_prep(struct io_kiocb *req,
 	struct io_unlink *un = &req->unlink;
 	const char __user *fname;
 
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
-	if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index ||
-	    sqe->splice_fd_in)
+	if (sqe->off || sqe->len || sqe->buf_index || sqe->splice_fd_in)
 		return -EINVAL;
 	if (unlikely(req->flags & REQ_F_FIXED_FILE))
 		return -EBADF;
@@ -4239,10 +4254,7 @@ static int io_mkdirat_prep(struct io_kiocb *req,
 	struct io_mkdir *mkd = &req->mkdir;
 	const char __user *fname;
 
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
-	if (sqe->ioprio || sqe->off || sqe->rw_flags || sqe->buf_index ||
-	    sqe->splice_fd_in)
+	if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
 		return -EINVAL;
 	if (unlikely(req->flags & REQ_F_FIXED_FILE))
 		return -EBADF;
@@ -4282,10 +4294,7 @@ static int io_symlinkat_prep(struct io_kiocb *req,
 	struct io_symlink *sl = &req->symlink;
 	const char __user *oldpath, *newpath;
 
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
-	if (sqe->ioprio || sqe->len || sqe->rw_flags || sqe->buf_index ||
-	    sqe->splice_fd_in)
+	if (sqe->len || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
 		return -EINVAL;
 	if (unlikely(req->flags & REQ_F_FIXED_FILE))
 		return -EBADF;
@@ -4331,9 +4340,7 @@ static int io_linkat_prep(struct io_kiocb *req,
 	struct io_hardlink *lnk = &req->hardlink;
 	const char __user *oldf, *newf;
 
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
-	if (sqe->ioprio || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
+	if (sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
 		return -EINVAL;
 	if (unlikely(req->flags & REQ_F_FIXED_FILE))
 		return -EBADF;
@@ -4380,9 +4387,7 @@ static int io_shutdown_prep(struct io_kiocb *req,
 			    const struct io_uring_sqe *sqe)
 {
 #if defined(CONFIG_NET)
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
-	if (unlikely(sqe->ioprio || sqe->off || sqe->addr || sqe->rw_flags ||
+	if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
 		     sqe->buf_index || sqe->splice_fd_in))
 		return -EINVAL;
 
@@ -4422,9 +4427,6 @@ static int __io_splice_prep(struct io_kiocb *req,
 	struct io_splice *sp = &req->splice;
 	unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL;
 
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
-
 	sp->len = READ_ONCE(sqe->len);
 	sp->flags = READ_ONCE(sqe->splice_flags);
 	if (unlikely(sp->flags & ~valid_flags))
@@ -4523,11 +4525,6 @@ static int io_splice(struct io_kiocb *req, unsigned int issue_flags)
  */
 static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
 {
-	struct io_ring_ctx *ctx = req->ctx;
-
-	if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
-
 	__io_req_complete(req, issue_flags, 0, 0);
 	return 0;
 }
@@ -4535,8 +4532,8 @@ static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
 static int io_msg_ring_prep(struct io_kiocb *req,
 			    const struct io_uring_sqe *sqe)
 {
-	if (unlikely(sqe->addr || sqe->ioprio || sqe->rw_flags ||
-		     sqe->splice_fd_in || sqe->buf_index || sqe->personality))
+	if (unlikely(sqe->addr || sqe->rw_flags || sqe->splice_fd_in ||
+		     sqe->buf_index || sqe->personality))
 		return -EINVAL;
 
 	req->msg.user_data = READ_ONCE(sqe->off);
@@ -4577,12 +4574,7 @@ static int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
 
 static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-	struct io_ring_ctx *ctx = req->ctx;
-
-	if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
-	if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
-		     sqe->splice_fd_in))
+	if (unlikely(sqe->addr || sqe->buf_index || sqe->splice_fd_in))
 		return -EINVAL;
 
 	req->sync.flags = READ_ONCE(sqe->fsync_flags);
@@ -4615,10 +4607,7 @@ static int io_fsync(struct io_kiocb *req, unsigned int issue_flags)
 static int io_fallocate_prep(struct io_kiocb *req,
 			     const struct io_uring_sqe *sqe)
 {
-	if (sqe->ioprio || sqe->buf_index || sqe->rw_flags ||
-	    sqe->splice_fd_in)
-		return -EINVAL;
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+	if (sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
 		return -EINVAL;
 
 	req->sync.off = READ_ONCE(sqe->off);
@@ -4649,9 +4638,7 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
 	const char __user *fname;
 	int ret;
 
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
-	if (unlikely(sqe->ioprio || sqe->buf_index))
+	if (unlikely(sqe->buf_index))
 		return -EINVAL;
 	if (unlikely(req->flags & REQ_F_FIXED_FILE))
 		return -EBADF;
@@ -4783,7 +4770,7 @@ static int io_remove_buffers_prep(struct io_kiocb *req,
 	struct io_provide_buf *p = &req->pbuf;
 	u64 tmp;
 
-	if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off ||
+	if (sqe->rw_flags || sqe->addr || sqe->len || sqe->off ||
 	    sqe->splice_fd_in)
 		return -EINVAL;
 
@@ -4850,7 +4837,7 @@ static int io_provide_buffers_prep(struct io_kiocb *req,
 	struct io_provide_buf *p = &req->pbuf;
 	u64 tmp;
 
-	if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
+	if (sqe->rw_flags || sqe->splice_fd_in)
 		return -EINVAL;
 
 	tmp = READ_ONCE(sqe->fd);
@@ -4980,9 +4967,7 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
 			     const struct io_uring_sqe *sqe)
 {
 #if defined(CONFIG_EPOLL)
-	if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
-		return -EINVAL;
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+	if (sqe->buf_index || sqe->splice_fd_in)
 		return -EINVAL;
 
 	req->epoll.epfd = READ_ONCE(sqe->fd);
@@ -5026,9 +5011,7 @@ static int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags)
 static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 #if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
-	if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->splice_fd_in)
-		return -EINVAL;
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+	if (sqe->buf_index || sqe->off || sqe->splice_fd_in)
 		return -EINVAL;
 
 	req->madvise.addr = READ_ONCE(sqe->addr);
@@ -5061,9 +5044,7 @@ static int io_madvise(struct io_kiocb *req, unsigned int issue_flags)
 
 static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-	if (sqe->ioprio || sqe->buf_index || sqe->addr || sqe->splice_fd_in)
-		return -EINVAL;
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+	if (sqe->buf_index || sqe->addr || sqe->splice_fd_in)
 		return -EINVAL;
 
 	req->fadvise.offset = READ_ONCE(sqe->off);
@@ -5099,9 +5080,7 @@ static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	const char __user *path;
 
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
-	if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+	if (sqe->buf_index || sqe->splice_fd_in)
 		return -EINVAL;
 	if (req->flags & REQ_F_FIXED_FILE)
 		return -EBADF;
@@ -5146,10 +5125,7 @@ static int io_statx(struct io_kiocb *req, unsigned int issue_flags)
 
 static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
-	if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
-	    sqe->rw_flags || sqe->buf_index)
+	if (sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index)
 		return -EINVAL;
 	if (req->flags & REQ_F_FIXED_FILE)
 		return -EBADF;
@@ -5215,12 +5191,7 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags)
 
 static int io_sfr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-	struct io_ring_ctx *ctx = req->ctx;
-
-	if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
-	if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
-		     sqe->splice_fd_in))
+	if (unlikely(sqe->addr || sqe->buf_index || sqe->splice_fd_in))
 		return -EINVAL;
 
 	req->sync.off = READ_ONCE(sqe->off);
@@ -5298,8 +5269,6 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_sr_msg *sr = &req->sr_msg;
 
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
 	if (unlikely(sqe->addr2 || sqe->file_index))
 		return -EINVAL;
 
@@ -5533,8 +5502,6 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_sr_msg *sr = &req->sr_msg;
 
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
 	if (unlikely(sqe->addr2 || sqe->file_index))
 		return -EINVAL;
 
@@ -5692,9 +5659,7 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_accept *accept = &req->accept;
 
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
-	if (sqe->ioprio || sqe->len || sqe->buf_index)
+	if (sqe->len || sqe->buf_index)
 		return -EINVAL;
 
 	accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
@@ -5760,10 +5725,7 @@ static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_connect *conn = &req->connect;
 
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
-	if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags ||
-	    sqe->splice_fd_in)
+	if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
 		return -EINVAL;
 
 	conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
@@ -6446,9 +6408,7 @@ static int io_poll_update_prep(struct io_kiocb *req,
 	struct io_poll_update *upd = &req->poll_update;
 	u32 flags;
 
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
-	if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+	if (sqe->buf_index || sqe->splice_fd_in)
 		return -EINVAL;
 	flags = READ_ONCE(sqe->len);
 	if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA |
@@ -6478,9 +6438,7 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
 	struct io_poll_iocb *poll = &req->poll;
 	u32 flags;
 
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
-	if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->addr)
+	if (sqe->buf_index || sqe->off || sqe->addr)
 		return -EINVAL;
 	flags = READ_ONCE(sqe->len);
 	if (flags & ~IORING_POLL_ADD_MULTI)
@@ -6687,11 +6645,9 @@ static int io_timeout_remove_prep(struct io_kiocb *req,
 {
 	struct io_timeout_rem *tr = &req->timeout_rem;
 
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
 	if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
 		return -EINVAL;
-	if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->splice_fd_in)
+	if (sqe->buf_index || sqe->len || sqe->splice_fd_in)
 		return -EINVAL;
 
 	tr->ltimeout = false;
@@ -6761,10 +6717,7 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	unsigned flags;
 	u32 off = READ_ONCE(sqe->off);
 
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
-	if (sqe->ioprio || sqe->buf_index || sqe->len != 1 ||
-	    sqe->splice_fd_in)
+	if (sqe->buf_index || sqe->len != 1 || sqe->splice_fd_in)
 		return -EINVAL;
 	if (off && is_timeout_link)
 		return -EINVAL;
@@ -6946,11 +6899,9 @@ static int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd)
 static int io_async_cancel_prep(struct io_kiocb *req,
 				const struct io_uring_sqe *sqe)
 {
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
 	if (unlikely(req->flags & REQ_F_BUFFER_SELECT))
 		return -EINVAL;
-	if (sqe->ioprio || sqe->off || sqe->len || sqe->splice_fd_in)
+	if (sqe->off || sqe->len || sqe->splice_fd_in)
 		return -EINVAL;
 
 	req->cancel.addr = READ_ONCE(sqe->addr);
@@ -7036,7 +6987,7 @@ static int io_rsrc_update_prep(struct io_kiocb *req,
 {
 	if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
 		return -EINVAL;
-	if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
+	if (sqe->rw_flags || sqe->splice_fd_in)
 		return -EINVAL;
 
 	req->rsrc_update.offset = READ_ONCE(sqe->off);
@@ -7849,6 +7800,11 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 		}
 	}
 
+	if (!io_op_defs[opcode].ioprio && sqe->ioprio)
+		return -EINVAL;
+	if (!io_op_defs[opcode].iopoll && (ctx->flags & IORING_SETUP_IOPOLL))
+		return -EINVAL;
+
 	if (io_op_defs[opcode].needs_file) {
 		struct io_submit_state *state = &ctx->submit_state;
 
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] io_uring: add POLL_FIRST support for send/sendmsg and recv/recvmsg
  2022-04-27  1:54 [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST Jens Axboe
  2022-04-27  1:54 ` [PATCH 1/2] io_uring: check IOPOLL/ioprio support upfront Jens Axboe
@ 2022-04-27  1:54 ` Jens Axboe
  2022-04-27  3:57 ` [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST Hao Xu
  2022-04-29 18:31 ` Hao Xu
  3 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2022-04-27  1:54 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

If IORING_RECVSEND_POLL_FIRST is set for recv/recvmsg or send/sendmsg,
then we arm poll first rather than attempt a receive or send upfront.
This can be useful if we expect there to be no data (or space) available
for the request, as we can then avoid wasting time on the initial
issue attempt.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 27 +++++++++++++++++++++++++--
 include/uapi/linux/io_uring.h | 10 ++++++++++
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 39325e469738..a14bd5f55028 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -637,6 +637,7 @@ struct io_sr_msg {
 	int				bgid;
 	size_t				len;
 	size_t				done_io;
+	unsigned int			flags;
 };
 
 struct io_open {
@@ -5269,11 +5270,14 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_sr_msg *sr = &req->sr_msg;
 
-	if (unlikely(sqe->addr2 || sqe->file_index))
+	if (unlikely(sqe->file_index))
 		return -EINVAL;
 
 	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
 	sr->len = READ_ONCE(sqe->len);
+	sr->flags = READ_ONCE(sqe->addr2);
+	if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
+		return -EINVAL;
 	sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
 	if (sr->msg_flags & MSG_DONTWAIT)
 		req->flags |= REQ_F_NOWAIT;
@@ -5308,6 +5312,10 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
 		kmsg = &iomsg;
 	}
 
+	if (!(req->flags & REQ_F_POLLED) &&
+	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
+		return io_setup_async_msg(req, kmsg);
+
 	flags = req->sr_msg.msg_flags;
 	if (issue_flags & IO_URING_F_NONBLOCK)
 		flags |= MSG_DONTWAIT;
@@ -5350,6 +5358,10 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
 	int min_ret = 0;
 	int ret;
 
+	if (!(req->flags & REQ_F_POLLED) &&
+	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
+		return -EAGAIN;
+
 	sock = sock_from_file(req->file);
 	if (unlikely(!sock))
 		return -ENOTSOCK;
@@ -5502,11 +5514,14 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_sr_msg *sr = &req->sr_msg;
 
-	if (unlikely(sqe->addr2 || sqe->file_index))
+	if (unlikely(sqe->file_index))
 		return -EINVAL;
 
 	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
 	sr->len = READ_ONCE(sqe->len);
+	sr->flags = READ_ONCE(sqe->addr2);
+	if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
+		return -EINVAL;
 	sr->bgid = READ_ONCE(sqe->buf_group);
 	sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
 	if (sr->msg_flags & MSG_DONTWAIT)
@@ -5543,6 +5558,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
 		kmsg = &iomsg;
 	}
 
+	if (!(req->flags & REQ_F_POLLED) &&
+	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
+		return io_setup_async_msg(req, kmsg);
+
 	if (req->flags & REQ_F_BUFFER_SELECT) {
 		kbuf = io_recv_buffer_select(req, issue_flags);
 		if (IS_ERR(kbuf))
@@ -5600,6 +5619,10 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
 	int ret, min_ret = 0;
 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
 
+	if (!(req->flags & REQ_F_POLLED) &&
+	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
+		return -EAGAIN;
+
 	sock = sock_from_file(req->file);
 	if (unlikely(!sock))
 		return -ENOTSOCK;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index fad63564678a..51f972ecaba0 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -213,6 +213,16 @@ enum {
 #define IORING_ASYNC_CANCEL_FD	(1U << 1)
 #define IORING_ASYNC_CANCEL_ANY	(1U << 2)
 
+/*
+ * send/sendmsg and recv/recvmsg flags (sqe->addr2)
+ *
+ * IORING_RECVSEND_POLL_FIRST	If set, instead of first attempting to send
+ *				or receive and arm poll if that yields an
+ *				-EAGAIN result, arm poll upfront and skip
+ *				the initial transfer attempt.
+ */
+#define IORING_RECVSEND_POLL_FIRST	(1U << 1)
+
 /*
  * IO completion data structure (Completion Queue Entry)
  */
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST
  2022-04-27  1:54 [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST Jens Axboe
  2022-04-27  1:54 ` [PATCH 1/2] io_uring: check IOPOLL/ioprio support upfront Jens Axboe
  2022-04-27  1:54 ` [PATCH 2/2] io_uring: add POLL_FIRST support for send/sendmsg and recv/recvmsg Jens Axboe
@ 2022-04-27  3:57 ` Hao Xu
  2022-04-29 18:31 ` Hao Xu
  3 siblings, 0 replies; 8+ messages in thread
From: Hao Xu @ 2022-04-27  3:57 UTC (permalink / raw)
  To: Jens Axboe, io-uring

在 4/27/22 9:54 AM, Jens Axboe 写道:
> Hi,
> 
> I had a re-think on the flags2 addition [1] that was posted earlier
> today, and I don't really like the fact that flags2 then can't work
> with ioprio for read/write etc. We might also want to extend the
> ioprio field for other types of IO in the future.
> 
> So rather than do that, do a simpler approach and just add an io_uring
> specific flag set for send/recv and friends. This then allow setting
> IORING_RECVSEND_POLL_FIRST in sqe->addr2 for those, and if set, io_uring
> will arm poll first rather than attempt a send/recv operation.
> 
> [1] https://lore.kernel.org/io-uring/20220426183343.150273-1-axboe@kernel.dk/
> 
Looks good to me,

Reviewed-by: Hao Xu <howeyxu@tencent.com>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST
  2022-04-27  1:54 [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST Jens Axboe
                   ` (2 preceding siblings ...)
  2022-04-27  3:57 ` [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST Hao Xu
@ 2022-04-29 18:31 ` Hao Xu
  2022-04-29 18:40   ` Hao Xu
  2022-04-29 18:44   ` Jens Axboe
  3 siblings, 2 replies; 8+ messages in thread
From: Hao Xu @ 2022-04-29 18:31 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 4/27/22 09:54, Jens Axboe wrote:
> Hi,
>
> I had a re-think on the flags2 addition [1] that was posted earlier
> today, and I don't really like the fact that flags2 then can't work
> with ioprio for read/write etc. We might also want to extend the
> ioprio field for other types of IO in the future.
>
> So rather than do that, do a simpler approach and just add an io_uring
> specific flag set for send/recv and friends. This then allow setting
> IORING_RECVSEND_POLL_FIRST in sqe->addr2 for those, and if set, io_uring
> will arm poll first rather than attempt a send/recv operation.
>
> [1] 
> https://lore.kernel.org/io-uring/20220426183343.150273-1-axboe@kernel.dk/
>

Hi Jens,
Could we use something like the high bits of sqe->fd to store general flags2
since I saw the number of open FDs can be about (1<<20) at most.
Though I'm not sure if we can assume the limitation of fd won't change
in the future..

Regards,
Hao



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST
  2022-04-29 18:31 ` Hao Xu
@ 2022-04-29 18:40   ` Hao Xu
  2022-04-29 18:44     ` Jens Axboe
  2022-04-29 18:44   ` Jens Axboe
  1 sibling, 1 reply; 8+ messages in thread
From: Hao Xu @ 2022-04-29 18:40 UTC (permalink / raw)
  To: Jens Axboe, io-uring



On 4/30/22 02:31, Hao Xu wrote:
> On 4/27/22 09:54, Jens Axboe wrote:
>> Hi,
>>
>> I had a re-think on the flags2 addition [1] that was posted earlier
>> today, and I don't really like the fact that flags2 then can't work
>> with ioprio for read/write etc. We might also want to extend the
>> ioprio field for other types of IO in the future.
>>
>> So rather than do that, do a simpler approach and just add an io_uring
>> specific flag set for send/recv and friends. This then allow setting
>> IORING_RECVSEND_POLL_FIRST in sqe->addr2 for those, and if set, io_uring
>> will arm poll first rather than attempt a send/recv operation.
>>
>> [1] 
>> https://lore.kernel.org/io-uring/20220426183343.150273-1-axboe@kernel.dk/
>>
> 
> Hi Jens,
> Could we use something like the high bits of sqe->fd to store general 
> flags2
> since I saw the number of open FDs can be about (1<<20) at most.

oops, sorry my bad, (1<<20) is just a default value..

> Though I'm not sure if we can assume the limitation of fd won't change
> in the future..
> 
> Regards,
> Hao
> 
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST
  2022-04-29 18:31 ` Hao Xu
  2022-04-29 18:40   ` Hao Xu
@ 2022-04-29 18:44   ` Jens Axboe
  1 sibling, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2022-04-29 18:44 UTC (permalink / raw)
  To: Hao Xu, io-uring

On 4/29/22 12:31 PM, Hao Xu wrote:
> On 4/27/22 09:54, Jens Axboe wrote:
>> Hi,
>>
>> I had a re-think on the flags2 addition [1] that was posted earlier
>> today, and I don't really like the fact that flags2 then can't work
>> with ioprio for read/write etc. We might also want to extend the
>> ioprio field for other types of IO in the future.
>>
>> So rather than do that, do a simpler approach and just add an io_uring
>> specific flag set for send/recv and friends. This then allow setting
>> IORING_RECVSEND_POLL_FIRST in sqe->addr2 for those, and if set, io_uring
>> will arm poll first rather than attempt a send/recv operation.
>>
>> [1] https://lore.kernel.org/io-uring/20220426183343.150273-1-axboe@kernel.dk/
>>
> 
> Hi Jens,
> Could we use something like the high bits of sqe->fd to store general
> flags2 since I saw the number of open FDs can be about (1<<20) at
> most. Though I'm not sure if we can assume the limitation of fd won't
> change in the future..

I think that's a bit iffy, it's pretty universally true (at least on
*NIX) that and fd is a signed int. So I'd be hesitant to do that. I
don't mind the flags here, generally we have potentially 3 per request:

- SQE specific ones, these are the IOSQE_* flags and are meant to be
  generally applicable to all/most commands. io_uring internal, have no
  meaning outside of io_uring.

- Some requests are layered on top of existing functionality, the
  recv/recvmsg is a good example. They have their own sets of flags. We
  generally use the sqe->rw_flags space for those.

- Lastly, io_uring modifiers to specific requests. That's what this
  patch adds. They don't make sense to the lower layers, but they are
  specific to this request type for io_uring.

The 3rd type is put in sqe->rw_flags for io_uring specific opcodes, but
for commands that already have flags in the 2nd category, we have to put
them somewhere else. Not a big deal imho, at least as long as the
request type has space in the sqe for it. They generally do, they did in
this case.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST
  2022-04-29 18:40   ` Hao Xu
@ 2022-04-29 18:44     ` Jens Axboe
  0 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2022-04-29 18:44 UTC (permalink / raw)
  To: Hao Xu, io-uring

On 4/29/22 12:40 PM, Hao Xu wrote:
> 
> 
> On 4/30/22 02:31, Hao Xu wrote:
>> On 4/27/22 09:54, Jens Axboe wrote:
>>> Hi,
>>>
>>> I had a re-think on the flags2 addition [1] that was posted earlier
>>> today, and I don't really like the fact that flags2 then can't work
>>> with ioprio for read/write etc. We might also want to extend the
>>> ioprio field for other types of IO in the future.
>>>
>>> So rather than do that, do a simpler approach and just add an io_uring
>>> specific flag set for send/recv and friends. This then allow setting
>>> IORING_RECVSEND_POLL_FIRST in sqe->addr2 for those, and if set, io_uring
>>> will arm poll first rather than attempt a send/recv operation.
>>>
>>> [1] https://lore.kernel.org/io-uring/20220426183343.150273-1-axboe@kernel.dk/
>>>
>>
>> Hi Jens,
>> Could we use something like the high bits of sqe->fd to store general flags2
>> since I saw the number of open FDs can be about (1<<20) at most.
> 
> oops, sorry my bad, (1<<20) is just a default value..

Indeed, you can certainly go higher and people do.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-04-29 18:44 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-27  1:54 [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST Jens Axboe
2022-04-27  1:54 ` [PATCH 1/2] io_uring: check IOPOLL/ioprio support upfront Jens Axboe
2022-04-27  1:54 ` [PATCH 2/2] io_uring: add POLL_FIRST support for send/sendmsg and recv/recvmsg Jens Axboe
2022-04-27  3:57 ` [PATCHSET 0/2] Add support for IORING_RECVSEND_POLL_FIRST Hao Xu
2022-04-29 18:31 ` Hao Xu
2022-04-29 18:40   ` Hao Xu
2022-04-29 18:44     ` Jens Axboe
2022-04-29 18:44   ` Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.