All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jens Axboe <axboe@kernel.dk>
To: linux-fsdevel@vger.kernel.org, linux-aio@kvack.org,
	linux-block@vger.kernel.org
Cc: hch@lst.de, jmoyer@redhat.com, avi@scylladb.com,
	Jens Axboe <axboe@kernel.dk>
Subject: [PATCH 13/18] io_uring: add file set registration
Date: Wed, 23 Jan 2019 08:35:30 -0700	[thread overview]
Message-ID: <20190123153536.7081-20-axboe@kernel.dk> (raw)
In-Reply-To: <20190123153536.7081-1-axboe@kernel.dk>

We normally have to fget/fput for each IO we do on a file. Even with
the batching we do, the cost of the atomic inc/dec of the file usage
count adds up.

This adds IORING_REGISTER_FILES, and IORING_UNREGISTER_FILES opcodes
for the io_uring_register(2) system call. The arguments passed in must
be an array of __s32 holding file descriptors, and nr_args should hold
the number of file descriptors the application wishes to pin for the
duration of the io_uring context (or until IORING_UNREGISTER_FILES is
called).

When used, the application must set IOSQE_FIXED_FILE in the sqe->flags
member. Then, instead of setting sqe->fd to the real fd, it sets sqe->fd
to the index in the array passed in to IORING_REGISTER_FILES.

Files are automatically unregistered when the io_uring context is
torn down. An application need only unregister if it wishes to
register a few set of fds.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 125 +++++++++++++++++++++++++++++-----
 include/uapi/linux/io_uring.h |   9 ++-
 2 files changed, 116 insertions(+), 18 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 63ad09e7cdc7..86add82e1008 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -100,6 +100,10 @@ struct io_ring_ctx {
 		struct fasync_struct	*cq_fasync;
 	} ____cacheline_aligned_in_smp;
 
+	/* if used, fixed file set */
+	struct file		**user_files;
+	unsigned		nr_user_files;
+
 	/* if used, fixed mapped user buffers */
 	unsigned		nr_user_bufs;
 	struct io_mapped_ubuf	*user_bufs;
@@ -137,6 +141,7 @@ struct io_kiocb {
 #define REQ_F_FORCE_NONBLOCK	1	/* inline submission attempt */
 #define REQ_F_IOPOLL_COMPLETED	2	/* polled IO has completed */
 #define REQ_F_IOPOLL_EAGAIN	4	/* submission got EAGAIN */
+#define REQ_F_FIXED_FILE	8	/* ctx owns file */
 	u64			user_data;
 	u64			res;
 
@@ -359,15 +364,17 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
 		 * Batched puts of the same file, to avoid dirtying the
 		 * file usage count multiple times, if avoidable.
 		 */
-		if (!file) {
-			file = req->rw.ki_filp;
-			file_count = 1;
-		} else if (file == req->rw.ki_filp) {
-			file_count++;
-		} else {
-			fput_many(file, file_count);
-			file = req->rw.ki_filp;
-			file_count = 1;
+		if (!(req->flags & REQ_F_FIXED_FILE)) {
+			if (!file) {
+				file = req->rw.ki_filp;
+				file_count = 1;
+			} else if (file == req->rw.ki_filp) {
+				file_count++;
+			} else {
+				fput_many(file, file_count);
+				file = req->rw.ki_filp;
+				file_count = 1;
+			}
 		}
 
 		if (to_free == ARRAY_SIZE(reqs))
@@ -504,13 +511,19 @@ static void kiocb_end_write(struct kiocb *kiocb)
 	}
 }
 
+static void io_fput(struct io_kiocb *req)
+{
+	if (!(req->flags & REQ_F_FIXED_FILE))
+		fput(req->rw.ki_filp);
+}
+
 static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
 {
 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
 
 	kiocb_end_write(kiocb);
 
-	fput(kiocb->ki_filp);
+	io_fput(req);
 	io_cqring_add_event(req->ctx, req->user_data, res, 0);
 	io_free_req(req);
 }
@@ -614,7 +627,14 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	struct kiocb *kiocb = &req->rw;
 	int ret;
 
-	kiocb->ki_filp = io_file_get(state, sqe->fd);
+	if (sqe->flags & IOSQE_FIXED_FILE) {
+		if (unlikely(!ctx->user_files || sqe->fd >= ctx->nr_user_files))
+			return -EBADF;
+		kiocb->ki_filp = ctx->user_files[sqe->fd];
+		req->flags |= REQ_F_FIXED_FILE;
+	} else {
+		kiocb->ki_filp = io_file_get(state, sqe->fd);
+	}
 	if (unlikely(!kiocb->ki_filp))
 		return -EBADF;
 	kiocb->ki_pos = sqe->off;
@@ -653,7 +673,8 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	}
 	return 0;
 out_fput:
-	io_file_put(state, kiocb->ki_filp);
+	if (!(sqe->flags & IOSQE_FIXED_FILE))
+		io_file_put(state, kiocb->ki_filp);
 	return ret;
 }
 
@@ -770,7 +791,7 @@ static ssize_t io_read(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	kfree(iovec);
 out_fput:
 	if (unlikely(ret))
-		fput(file);
+		io_fput(req);
 	return ret;
 }
 
@@ -825,7 +846,7 @@ static ssize_t io_write(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	kfree(iovec);
 out_fput:
 	if (unlikely(ret))
-		fput(file);
+		io_fput(req);
 	return ret;
 }
 
@@ -863,14 +884,23 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	if (unlikely(sqe->fsync_flags & ~IORING_FSYNC_DATASYNC))
 		return -EINVAL;
 
-	file = fget(sqe->fd);
+	if (sqe->flags & IOSQE_FIXED_FILE) {
+		if (unlikely(!ctx->user_files || sqe->fd >= ctx->nr_user_files))
+			return -EBADF;
+		file = ctx->user_files[sqe->fd];
+	} else {
+		file = fget(sqe->fd);
+	}
+
 	if (unlikely(!file))
 		return -EBADF;
 
 	ret = vfs_fsync_range(file, sqe->off, end > 0 ? end : LLONG_MAX,
 			sqe->fsync_flags & IORING_FSYNC_DATASYNC);
 
-	fput(file);
+	if (!(sqe->flags & IOSQE_FIXED_FILE))
+		fput(file);
+
 	io_cqring_add_event(ctx, sqe->user_data, ret, 0);
 	io_free_req(req);
 	return 0;
@@ -988,7 +1018,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
 	ssize_t ret;
 
 	/* enforce forwards compatibility on users */
-	if (unlikely(s->sqe->flags))
+	if (unlikely(s->sqe->flags & ~IOSQE_FIXED_FILE))
 		return -EINVAL;
 
 	req = io_get_req(ctx, state);
@@ -1173,6 +1203,57 @@ static int __io_uring_enter(struct io_ring_ctx *ctx, unsigned to_submit,
 	return ret;
 }
 
+static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
+{
+	int i;
+
+	if (!ctx->user_files)
+		return -ENXIO;
+
+	for (i = 0; i < ctx->nr_user_files; i++)
+		fput(ctx->user_files[i]);
+
+	kfree(ctx->user_files);
+	ctx->user_files = NULL;
+	ctx->nr_user_files = 0;
+	return 0;
+}
+
+static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
+				 unsigned nr_args)
+{
+	__s32 __user *fds = (__s32 __user *) arg;
+	int fd, i, ret = 0;
+
+	if (ctx->user_files)
+		return -EBUSY;
+	if (!nr_args)
+		return -EINVAL;
+
+	ctx->user_files = kcalloc(nr_args, sizeof(struct file *), GFP_KERNEL);
+	if (!ctx->user_files)
+		return -ENOMEM;
+
+	for (i = 0; i < nr_args; i++) {
+		ret = -EFAULT;
+		if (copy_from_user(&fd, &fds[i], sizeof(fd)))
+			break;
+
+		ctx->user_files[i] = fget(fd);
+
+		ret = -EBADF;
+		if (!ctx->user_files[i])
+			break;
+		ctx->nr_user_files++;
+		ret = 0;
+	}
+
+	if (ret)
+		io_sqe_files_unregister(ctx);
+
+	return ret;
+}
+
 static int io_sq_offload_start(struct io_ring_ctx *ctx)
 {
 	int ret;
@@ -1468,6 +1549,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 	io_sq_offload_stop(ctx);
 	io_iopoll_reap_events(ctx);
 	io_free_scq_urings(ctx);
+	io_sqe_files_unregister(ctx);
 	io_sqe_buffer_unregister(ctx);
 	percpu_ref_exit(&ctx->refs);
 	io_unaccount_mem(ctx, ring_pages(ctx->sq_entries, ctx->cq_entries));
@@ -1780,6 +1862,15 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			break;
 		ret = io_sqe_buffer_unregister(ctx);
 		break;
+	case IORING_REGISTER_FILES:
+		ret = io_sqe_files_register(ctx, arg, nr_args);
+		break;
+	case IORING_UNREGISTER_FILES:
+		ret = -EINVAL;
+		if (arg || nr_args)
+			break;
+		ret = io_sqe_files_unregister(ctx);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 03ce7133c3b2..8323320077ec 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -18,7 +18,7 @@
  */
 struct io_uring_sqe {
 	__u8	opcode;		/* type of operation for this sqe */
-	__u8	flags;		/* as of now unused */
+	__u8	flags;		/* IOSQE_ flags */
 	__u16	ioprio;		/* ioprio for the request */
 	__s32	fd;		/* file descriptor to do IO on */
 	__u64	off;		/* offset into file */
@@ -35,6 +35,11 @@ struct io_uring_sqe {
 	};
 };
 
+/*
+ * sqe->flags
+ */
+#define IOSQE_FIXED_FILE	(1 << 0)	/* use fixed fileset */
+
 /*
  * io_uring_setup() flags
  */
@@ -114,5 +119,7 @@ struct io_uring_params {
  */
 #define IORING_REGISTER_BUFFERS		0
 #define IORING_UNREGISTER_BUFFERS	1
+#define IORING_REGISTER_FILES		2
+#define IORING_UNREGISTER_FILES		3
 
 #endif
-- 
2.17.1


  parent reply	other threads:[~2019-01-23 15:36 UTC|newest]

Thread overview: 172+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-23 15:35 [PATCHSET v7] io_uring IO interface Jens Axboe
2019-01-23 15:35 ` [PATCH 01/18] fs: add an iopoll method to struct file_operations Jens Axboe
2019-01-28 14:25   ` Christoph Hellwig
2019-01-28 16:13     ` Jens Axboe
2019-01-23 15:35 ` [PATCH 02/18] block: wire up block device iopoll method Jens Axboe
2019-01-23 15:35 ` [PATCH 03/18] block: add bio_set_polled() helper Jens Axboe
2019-01-28 14:26   ` Christoph Hellwig
2019-01-23 15:35 ` [PATCH 04/18] iomap: wire up the iopoll method Jens Axboe
2019-01-23 15:35 ` [PATCH 05/18] Add io_uring IO interface Jens Axboe
2019-01-28 14:57   ` Christoph Hellwig
2019-01-28 14:57     ` Christoph Hellwig
2019-01-28 16:26     ` Jens Axboe
2019-01-28 16:26       ` Jens Axboe
2019-01-28 16:34       ` Christoph Hellwig
2019-01-28 16:34         ` Christoph Hellwig
2019-01-28 19:32         ` Jens Axboe
2019-01-28 19:32           ` Jens Axboe
2019-01-28 18:25     ` Jens Axboe
2019-01-28 18:25       ` Jens Axboe
2019-01-29  6:30       ` Christoph Hellwig
2019-01-29  6:30         ` Christoph Hellwig
2019-01-29 11:58         ` Arnd Bergmann
2019-01-29 11:58           ` Arnd Bergmann
2019-01-29 15:20           ` Jens Axboe
2019-01-29 15:20             ` Jens Axboe
2019-01-29 16:18             ` Arnd Bergmann
2019-01-29 16:18               ` Arnd Bergmann
2019-01-29 16:19               ` Jens Axboe
2019-01-29 16:19                 ` Jens Axboe
2019-01-29 16:26                 ` Arnd Bergmann
2019-01-29 16:26                   ` Arnd Bergmann
2019-01-29 16:28                   ` Jens Axboe
2019-01-29 16:28                     ` Jens Axboe
2019-01-29 16:46                     ` Arnd Bergmann
2019-01-29 16:46                       ` Arnd Bergmann
2019-01-29  0:47     ` Andy Lutomirski
2019-01-29  0:47       ` Andy Lutomirski
2019-01-29  1:20       ` Jens Axboe
2019-01-29  1:20         ` Jens Axboe
2019-01-29  6:45         ` Christoph Hellwig
2019-01-29  6:45           ` Christoph Hellwig
2019-01-29 12:05           ` Arnd Bergmann
2019-01-29 12:05             ` Arnd Bergmann
2019-01-31  5:11         ` Andy Lutomirski
2019-01-31  5:11           ` Andy Lutomirski
2019-01-31 16:37           ` Jens Axboe
2019-01-31 16:37             ` Jens Axboe
2019-01-23 15:35 ` [PATCH 06/18] io_uring: add fsync support Jens Axboe
2019-01-23 15:35 ` [PATCH 07/13] io_uring: add support for pre-mapped user IO buffers Jens Axboe
2019-01-23 15:35 ` [PATCH 07/18] io_uring: support for IO polling Jens Axboe
2019-01-28 15:02   ` Christoph Hellwig
2019-01-28 16:46     ` Jens Axboe
2019-01-29  6:27       ` Christoph Hellwig
2019-01-29 13:20         ` Jens Axboe
2019-01-23 15:35 ` [PATCH 08/18] fs: add fget_many() and fput_many() Jens Axboe
2019-01-28 14:29   ` Christoph Hellwig
2019-01-28 16:48     ` Jens Axboe
2019-01-23 15:35 ` [PATCH 08/13] io_uring: add file set registration Jens Axboe
2019-01-23 15:35 ` [PATCH 09/13] io_uring: add submission polling Jens Axboe
2019-01-28 15:09   ` Christoph Hellwig
2019-01-28 17:05     ` Jens Axboe
2019-01-29  6:29       ` Christoph Hellwig
2019-01-29 13:21         ` Jens Axboe
2019-01-28 21:13   ` Jeff Moyer
2019-01-28 21:28     ` Jens Axboe
2019-01-23 15:35 ` [PATCH 09/18] io_uring: use fget/fput_many() for file references Jens Axboe
2019-01-23 15:35 ` [PATCH 10/13] io_uring: add io_kiocb ref count Jens Axboe
2019-01-23 15:35 ` [PATCH 10/18] io_uring: batch io_kiocb allocation Jens Axboe
2019-01-23 15:35 ` [PATCH 11/18] block: implement bio helper to add iter bvec pages to bio Jens Axboe
2019-01-28 14:31   ` Christoph Hellwig
2019-01-28 16:54     ` Jens Axboe
2019-01-23 15:35 ` [PATCH 11/13] io_uring: add support for IORING_OP_POLL Jens Axboe
2019-01-23 15:35 ` [PATCH 12/18] io_uring: add support for pre-mapped user IO buffers Jens Axboe
2019-01-23 15:35 ` [PATCH 12/13] io_uring: allow workqueue item to handle multiple buffered requests Jens Axboe
2019-01-23 15:35 ` Jens Axboe [this message]
2019-01-23 15:35 ` [PATCH 13/13] io_uring: add io_uring_event cache hit information Jens Axboe
2019-01-23 15:35 ` [PATCH 14/18] io_uring: add submission polling Jens Axboe
2019-01-23 15:35 ` [PATCH 15/18] io_uring: add io_kiocb ref count Jens Axboe
2019-01-23 15:35 ` [PATCH 16/18] io_uring: add support for IORING_OP_POLL Jens Axboe
2019-01-23 15:35 ` [PATCH 17/18] io_uring: allow workqueue item to handle multiple buffered requests Jens Axboe
2019-01-23 15:35 ` [PATCH 18/18] io_uring: add io_uring_event cache hit information Jens Axboe
2019-01-28 21:35 [PATCHSET v8] io_uring IO interface Jens Axboe
2019-01-28 21:35 ` [PATCH 13/18] io_uring: add file set registration Jens Axboe
2019-01-28 21:35   ` Jens Axboe
2019-01-29 16:36   ` Jann Horn
2019-01-29 16:36     ` Jann Horn
2019-01-29 18:13     ` Jens Axboe
2019-01-29 18:13       ` Jens Axboe
2019-01-29 19:26 [PATCHSET v9] io_uring IO interface Jens Axboe
2019-01-29 19:26 ` [PATCH 13/18] io_uring: add file set registration Jens Axboe
2019-01-29 19:26   ` Jens Axboe
2019-01-30  1:29   ` Jann Horn
2019-01-30  1:29     ` Jann Horn
2019-01-30 15:35     ` Jens Axboe
2019-01-30 15:35       ` Jens Axboe
2019-02-04  2:56     ` Al Viro
2019-02-04  2:56       ` Al Viro
2019-02-05  2:19       ` Jens Axboe
2019-02-05  2:19         ` Jens Axboe
2019-02-05 17:57         ` Jens Axboe
2019-02-05 17:57           ` Jens Axboe
2019-02-05 19:08           ` Jens Axboe
2019-02-05 19:08             ` Jens Axboe
2019-02-06  0:27             ` Jens Axboe
2019-02-06  0:27               ` Jens Axboe
2019-02-06  1:01               ` Al Viro
2019-02-06  1:01                 ` Al Viro
2019-02-06 17:56                 ` Jens Axboe
2019-02-06 17:56                   ` Jens Axboe
2019-02-07  4:05                   ` Al Viro
2019-02-07  4:05                     ` Al Viro
2019-02-07 16:14                     ` Jens Axboe
2019-02-07 16:30                       ` Al Viro
2019-02-07 16:30                         ` Al Viro
2019-02-07 16:35                         ` Jens Axboe
2019-02-07 16:35                           ` Jens Axboe
2019-02-07 16:51                         ` Al Viro
2019-02-07 16:51                           ` Al Viro
2019-02-06  0:56             ` Al Viro
2019-02-06  0:56               ` Al Viro
2019-02-06 13:41               ` Jens Axboe
2019-02-06 13:41                 ` Jens Axboe
2019-02-07  4:00                 ` Al Viro
2019-02-07  4:00                   ` Al Viro
2019-02-07  9:22                   ` Miklos Szeredi
2019-02-07  9:22                     ` Miklos Szeredi
2019-02-07 13:31                     ` Al Viro
2019-02-07 13:31                       ` Al Viro
2019-02-07 14:20                       ` Miklos Szeredi
2019-02-07 14:20                         ` Miklos Szeredi
2019-02-07 15:20                         ` Al Viro
2019-02-07 15:20                           ` Al Viro
2019-02-07 15:27                           ` Miklos Szeredi
2019-02-07 15:27                             ` Miklos Szeredi
2019-02-07 16:26                             ` Al Viro
2019-02-07 16:26                               ` Al Viro
2019-02-07 19:08                               ` Miklos Szeredi
2019-02-07 19:08                                 ` Miklos Szeredi
2019-02-07 18:45                   ` Jens Axboe
2019-02-07 18:45                     ` Jens Axboe
2019-02-07 18:58                     ` Jens Axboe
2019-02-07 18:58                       ` Jens Axboe
2019-02-11 15:55                     ` Jonathan Corbet
2019-02-11 15:55                       ` Jonathan Corbet
2019-02-11 17:35                       ` Al Viro
2019-02-11 17:35                         ` Al Viro
2019-02-11 20:33                         ` Jonathan Corbet
2019-02-11 20:33                           ` Jonathan Corbet
2019-01-30 21:55 [PATCHSET v10] io_uring IO interface Jens Axboe
2019-01-30 21:55 ` [PATCH 13/18] io_uring: add file set registration Jens Axboe
2019-01-30 21:55   ` Jens Axboe
2019-02-01 15:23 [PATCHSET v11] io_uring IO interface Jens Axboe
2019-02-01 15:24 ` [PATCH 13/18] io_uring: add file set registration Jens Axboe
2019-02-01 15:24   ` Jens Axboe
2019-02-07 19:55 [PATCHSET v12] io_uring IO interface Jens Axboe
2019-02-07 19:55 ` [PATCH 13/18] io_uring: add file set registration Jens Axboe
2019-02-07 19:55   ` Jens Axboe
2019-02-08 12:17   ` Alan Jenkins
2019-02-08 12:17     ` Alan Jenkins
2019-02-08 12:57     ` Jens Axboe
2019-02-08 12:57       ` Jens Axboe
2019-02-08 14:02       ` Alan Jenkins
2019-02-08 14:02         ` Alan Jenkins
2019-02-08 15:13         ` Jens Axboe
2019-02-08 15:13           ` Jens Axboe
2019-02-12 12:29           ` Alan Jenkins
2019-02-12 12:29             ` Alan Jenkins
2019-02-12 15:17             ` Jens Axboe
2019-02-12 15:17               ` Jens Axboe
2019-02-12 17:21               ` Alan Jenkins
2019-02-12 17:21                 ` Alan Jenkins
2019-02-12 17:33                 ` Jens Axboe
2019-02-12 17:33                   ` Jens Axboe
2019-02-12 20:23                   ` Alan Jenkins
2019-02-12 20:23                     ` Alan Jenkins
2019-02-12 21:10                     ` Jens Axboe
2019-02-12 21:10                       ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190123153536.7081-20-axboe@kernel.dk \
    --to=axboe@kernel.dk \
    --cc=avi@scylladb.com \
    --cc=hch@lst.de \
    --cc=jmoyer@redhat.com \
    --cc=linux-aio@kvack.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.