linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] io_uring: support for larger fixed file sets
@ 2019-10-25 22:22 Jens Axboe
  2019-10-25 23:54 ` Jann Horn
  0 siblings, 1 reply; 3+ messages in thread
From: Jens Axboe @ 2019-10-25 22:22 UTC (permalink / raw)
  To: linux-block

There's been a few requests for supporting more fixed files than 1024.
This isn't really tricky to do, we just need to split up the file table
into multiple tables and index appropriately.

This patch adds support for up to 64K files, which should be enough for
everyone.

Signed-off-by: Jens Axboe <axboe@kernel.dk>

---

Complete with updated test case in liburing, test/file-register now has
a test case that registers 8K files (sparse), then does a file set
update of a random file late in the set and performs some IO to it to
ensure it works.

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4402485f0879..2df92134e939 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -80,7 +80,11 @@
 
 #define IORING_MAX_ENTRIES	32768
 #define IORING_MAX_CQ_ENTRIES	(2 * IORING_MAX_ENTRIES)
-#define IORING_MAX_FIXED_FILES	1024
+
+#define IORING_FILE_TABLE_SHIFT	10
+#define IORING_MAX_FILES_TABLE	(1U << IORING_FILE_TABLE_SHIFT)
+#define IORING_FILE_TABLE_MASK	(IORING_MAX_FILES_TABLE - 1)
+#define IORING_MAX_FIXED_FILES	(64 * IORING_MAX_FILES_TABLE)
 
 struct io_uring {
 	u32 head ____cacheline_aligned_in_smp;
@@ -165,6 +169,10 @@ struct io_mapped_ubuf {
 	unsigned int	nr_bvecs;
 };
 
+struct fixed_file_table {
+	struct file		**files;
+};
+
 struct io_ring_ctx {
 	struct {
 		struct percpu_ref	refs;
@@ -225,7 +233,7 @@ struct io_ring_ctx {
 	 * readers must ensure that ->refs is alive as long as the file* is
 	 * used. Only updated through io_uring_register(2).
 	 */
-	struct file		**user_files;
+	struct fixed_file_table	*file_table;
 	unsigned		nr_user_files;
 
 	/* if used, fixed mapped user buffers */
@@ -2295,6 +2303,15 @@ static bool io_op_needs_file(const struct io_uring_sqe *sqe)
 	}
 }
 
+static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
+					      int index)
+{
+	struct fixed_file_table *table;
+
+	table = &ctx->file_table[index >> IORING_FILE_TABLE_SHIFT];
+	return table->files[index & IORING_FILE_TABLE_MASK];
+}
+
 static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s,
 			   struct io_submit_state *state, struct io_kiocb *req)
 {
@@ -2317,12 +2334,15 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s,
 		return 0;
 
 	if (flags & IOSQE_FIXED_FILE) {
-		if (unlikely(!ctx->user_files ||
+		struct file *file;
+
+		if (unlikely(!ctx->file_table ||
 		    (unsigned) fd >= ctx->nr_user_files))
 			return -EBADF;
-		if (!ctx->user_files[fd])
+		file = io_file_from_index(ctx, fd);
+		if (!file)
 			return -EBADF;
-		req->file = ctx->user_files[fd];
+		req->file = file;
 		req->flags |= REQ_F_FIXED_FILE;
 	} else {
 		if (s->needs_fixed_file)
@@ -2974,20 +2994,29 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
 #else
 	int i;
 
-	for (i = 0; i < ctx->nr_user_files; i++)
-		if (ctx->user_files[i])
-			fput(ctx->user_files[i]);
+	for (i = 0; i < ctx->nr_user_files; i++) {
+		struct file *file;
+
+		file = io_file_from_index(ctx, i);
+		if (file)
+			fput(file);
 #endif
 }
 
 static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 {
-	if (!ctx->user_files)
+	unsigned nr_tables, i;
+
+	if (!ctx->file_table)
 		return -ENXIO;
 
 	__io_sqe_files_unregister(ctx);
-	kfree(ctx->user_files);
-	ctx->user_files = NULL;
+	nr_tables = (ctx->nr_user_files + IORING_MAX_FILES_TABLE - 1) /
+			IORING_MAX_FILES_TABLE;
+	for (i = 0; i < nr_tables; i++)
+		kfree(ctx->file_table[i].files);
+	kfree(ctx->file_table);
+	ctx->file_table = NULL;
 	ctx->nr_user_files = 0;
 	return 0;
 }
@@ -3062,9 +3091,11 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset)
 	nr_files = 0;
 	fpl->user = get_uid(ctx->user);
 	for (i = 0; i < nr; i++) {
-		if (!ctx->user_files[i + offset])
+		struct file *file = io_file_from_index(ctx, i + offset);
+
+		if (!file)
 			continue;
-		fpl->fp[nr_files] = get_file(ctx->user_files[i + offset]);
+		fpl->fp[nr_files] = get_file(file);
 		unix_inflight(fpl->user, fpl->fp[nr_files]);
 		nr_files++;
 	}
@@ -3113,8 +3144,10 @@ static int io_sqe_files_scm(struct io_ring_ctx *ctx)
 		return 0;
 
 	while (total < ctx->nr_user_files) {
-		if (ctx->user_files[total])
-			fput(ctx->user_files[total]);
+		struct file *file = io_file_from_index(ctx, total);
+
+		if (file)
+			fput(file);
 		total++;
 	}
 
@@ -3127,25 +3160,66 @@ static int io_sqe_files_scm(struct io_ring_ctx *ctx)
 }
 #endif
 
+static int io_sqe_alloc_file_tables(struct io_ring_ctx *ctx, unsigned nr_tables,
+				    unsigned nr_files)
+{
+	int i;
+
+	for (i = 0; i < nr_tables; i++) {
+		struct fixed_file_table *table = &ctx->file_table[i];
+		unsigned this_files;
+
+		this_files = nr_files;
+		if (this_files > IORING_MAX_FILES_TABLE)
+			this_files = IORING_MAX_FILES_TABLE;
+
+		table->files = kcalloc(this_files, sizeof(struct file *),
+					GFP_KERNEL);
+		if (!table->files)
+			break;
+	}
+
+	if (i == nr_tables)
+		return 0;
+
+	for (i = 0; i < nr_tables; i++) {
+		struct fixed_file_table *table = &ctx->file_table[i];
+		kfree(table->files);
+	}
+	return 1;
+}
+
 static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 				 unsigned nr_args)
 {
 	__s32 __user *fds = (__s32 __user *) arg;
+	unsigned nr_tables;
 	int fd, ret = 0;
 	unsigned i;
 
-	if (ctx->user_files)
+	if (ctx->file_table)
 		return -EBUSY;
 	if (!nr_args)
 		return -EINVAL;
 	if (nr_args > IORING_MAX_FIXED_FILES)
 		return -EMFILE;
 
-	ctx->user_files = kcalloc(nr_args, sizeof(struct file *), GFP_KERNEL);
-	if (!ctx->user_files)
+	nr_tables = (nr_args + IORING_MAX_FILES_TABLE - 1) /
+			IORING_MAX_FILES_TABLE;
+	ctx->file_table = kcalloc(nr_tables, sizeof(struct fixed_file_table),
+					GFP_KERNEL);
+	if (!ctx->file_table)
 		return -ENOMEM;
 
+	if (io_sqe_alloc_file_tables(ctx, nr_tables, nr_args)) {
+		kfree(ctx->file_table);
+		return -ENOMEM;
+	}
+
 	for (i = 0; i < nr_args; i++, ctx->nr_user_files++) {
+		struct fixed_file_table *table;
+		unsigned table_index;
+
 		ret = -EFAULT;
 		if (copy_from_user(&fd, &fds[i], sizeof(fd)))
 			break;
@@ -3155,10 +3229,12 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 			continue;
 		}
 
-		ctx->user_files[i] = fget(fd);
+		table = &ctx->file_table[i >> IORING_FILE_TABLE_SHIFT];
+		table_index = i & IORING_FILE_TABLE_MASK;
+		table->files[table_index] = fget(fd);
 
 		ret = -EBADF;
-		if (!ctx->user_files[i])
+		if (!table->files[table_index])
 			break;
 		/*
 		 * Don't allow io_uring instances to be registered. If UNIX
@@ -3167,20 +3243,28 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 		 * handle it just fine, but there's still no point in allowing
 		 * a ring fd as it doesn't support regular read/write anyway.
 		 */
-		if (ctx->user_files[i]->f_op == &io_uring_fops) {
-			fput(ctx->user_files[i]);
+		if (table->files[table_index]->f_op == &io_uring_fops) {
+			fput(table->files[table_index]);
 			break;
 		}
 		ret = 0;
 	}
 
 	if (ret) {
-		for (i = 0; i < ctx->nr_user_files; i++)
-			if (ctx->user_files[i])
-				fput(ctx->user_files[i]);
+		for (i = 0; i < ctx->nr_user_files; i++) {
+			struct fixed_file_table *table;
+			unsigned index;
+
+			table = &ctx->file_table[i >> IORING_FILE_TABLE_SHIFT];
+			index = i & IORING_FILE_TABLE_MASK;
+			if (table->files[index])
+				fput(table->files[index]);
+		}
+		for (i = 0; i < nr_tables; i++)
+			kfree(ctx->file_table[i].files);
 
-		kfree(ctx->user_files);
-		ctx->user_files = NULL;
+		kfree(ctx->file_table);
+		ctx->file_table = NULL;
 		ctx->nr_user_files = 0;
 		return ret;
 	}
@@ -3195,7 +3279,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 static void io_sqe_file_unregister(struct io_ring_ctx *ctx, int index)
 {
 #if defined(CONFIG_UNIX)
-	struct file *file = ctx->user_files[index];
+	struct file *file = io_file_from_index(ctx, index);
 	struct sock *sock = ctx->ring_sock->sk;
 	struct sk_buff_head list, *head = &sock->sk_receive_queue;
 	struct sk_buff *skb;
@@ -3251,7 +3335,7 @@ static void io_sqe_file_unregister(struct io_ring_ctx *ctx, int index)
 		spin_unlock_irq(&head->lock);
 	}
 #else
-	fput(ctx->user_files[index]);
+	fput(io_file_from_index(ctx, index));
 #endif
 }
 
@@ -3306,7 +3390,7 @@ static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg,
 	int fd, i, err;
 	__u32 done;
 
-	if (!ctx->user_files)
+	if (!ctx->file_table)
 		return -ENXIO;
 	if (!nr_args)
 		return -EINVAL;
@@ -3320,15 +3404,20 @@ static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg,
 	done = 0;
 	fds = (__s32 __user *) up.fds;
 	while (nr_args) {
+		struct fixed_file_table *table;
+		unsigned index;
+
 		err = 0;
 		if (copy_from_user(&fd, &fds[done], sizeof(fd))) {
 			err = -EFAULT;
 			break;
 		}
 		i = array_index_nospec(up.offset, ctx->nr_user_files);
-		if (ctx->user_files[i]) {
+		table = &ctx->file_table[i >> IORING_FILE_TABLE_SHIFT];
+		index = i & IORING_FILE_TABLE_MASK;
+		if (table->files[index]) {
 			io_sqe_file_unregister(ctx, i);
-			ctx->user_files[i] = NULL;
+			table->files[index] = NULL;
 		}
 		if (fd != -1) {
 			struct file *file;
@@ -3351,7 +3440,7 @@ static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg,
 				err = -EBADF;
 				break;
 			}
-			ctx->user_files[i] = file;
+			table->files[index] = file;
 			err = io_sqe_file_register(ctx, file, i);
 			if (err)
 				break;

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] io_uring: support for larger fixed file sets
  2019-10-25 22:22 [PATCH] io_uring: support for larger fixed file sets Jens Axboe
@ 2019-10-25 23:54 ` Jann Horn
  2019-10-26 13:14   ` Jens Axboe
  0 siblings, 1 reply; 3+ messages in thread
From: Jann Horn @ 2019-10-25 23:54 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block

On Sat, Oct 26, 2019 at 12:22 AM Jens Axboe <axboe@kernel.dk> wrote:
> There's been a few requests for supporting more fixed files than 1024.
> This isn't really tricky to do, we just need to split up the file table
> into multiple tables and index appropriately.
>
> This patch adds support for up to 64K files, which should be enough for
> everyone.

What's the reason against vmalloc()? Performance of table reallocation?

> +static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
> +                                             int index)
> +{
> +       struct fixed_file_table *table;
> +
> +       table = &ctx->file_table[index >> IORING_FILE_TABLE_SHIFT];
> +       return table->files[index & IORING_FILE_TABLE_MASK];
> +}
[...]
> @@ -2317,12 +2334,15 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s,
>                 return 0;
>
>         if (flags & IOSQE_FIXED_FILE) {
> -               if (unlikely(!ctx->user_files ||
> +               struct file *file;
> +
> +               if (unlikely(!ctx->file_table ||
>                     (unsigned) fd >= ctx->nr_user_files))
>                         return -EBADF;

Not a problem with this patch, but: By the way, the array lookup after
this is a "array index using bounds-checked index" pattern, so
array_index_nospec() might be appropriate here. See __fcheck_files()
for comparison.

> -               if (!ctx->user_files[fd])
> +               file = io_file_from_index(ctx, fd);
> +               if (!file)
>                         return -EBADF;
> -               req->file = ctx->user_files[fd];
> +               req->file = file;
>                 req->flags |= REQ_F_FIXED_FILE;
>         } else {
>                 if (s->needs_fixed_file)
[...]
> +static int io_sqe_alloc_file_tables(struct io_ring_ctx *ctx, unsigned nr_tables,
> +                                   unsigned nr_files)
> +{
> +       int i;
> +
> +       for (i = 0; i < nr_tables; i++) {
> +               struct fixed_file_table *table = &ctx->file_table[i];
> +               unsigned this_files;
> +
> +               this_files = nr_files;
> +               if (this_files > IORING_MAX_FILES_TABLE)
> +                       this_files = IORING_MAX_FILES_TABLE;

nr_files and this_files stay the same throughout the loop. I suspect
that the intent here was something like:

this_files = min_t(unsigned, nr_files, IORING_MAX_FILES_TABLE);
nr_files -= this_files;

> +
> +               table->files = kcalloc(this_files, sizeof(struct file *),
> +                                       GFP_KERNEL);
> +               if (!table->files)
> +                       break;
> +       }
> +
> +       if (i == nr_tables)
> +               return 0;
> +
> +       for (i = 0; i < nr_tables; i++) {
> +               struct fixed_file_table *table = &ctx->file_table[i];
> +               kfree(table->files);
> +       }
> +       return 1;
> +}

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] io_uring: support for larger fixed file sets
  2019-10-25 23:54 ` Jann Horn
@ 2019-10-26 13:14   ` Jens Axboe
  0 siblings, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2019-10-26 13:14 UTC (permalink / raw)
  To: Jann Horn; +Cc: linux-block

On 10/25/19 5:54 PM, Jann Horn wrote:
> On Sat, Oct 26, 2019 at 12:22 AM Jens Axboe <axboe@kernel.dk> wrote:
>> There's been a few requests for supporting more fixed files than 1024.
>> This isn't really tricky to do, we just need to split up the file table
>> into multiple tables and index appropriately.
>>
>> This patch adds support for up to 64K files, which should be enough for
>> everyone.
> 
> What's the reason against vmalloc()? Performance of table reallocation?

Yeah, not that it's a super hot path, but single page allocs are always
going to be faster. And it's not a big deal to manage an index of tables.
I've changed the table shift to 9, which will actually improve the
reliability of the 1024 file case as well now, as every table will just
be a 4096b alloc.

>> +static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
>> +                                             int index)
>> +{
>> +       struct fixed_file_table *table;
>> +
>> +       table = &ctx->file_table[index >> IORING_FILE_TABLE_SHIFT];
>> +       return table->files[index & IORING_FILE_TABLE_MASK];
>> +}
> [...]
>> @@ -2317,12 +2334,15 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s,
>>                  return 0;
>>
>>          if (flags & IOSQE_FIXED_FILE) {
>> -               if (unlikely(!ctx->user_files ||
>> +               struct file *file;
>> +
>> +               if (unlikely(!ctx->file_table ||
>>                      (unsigned) fd >= ctx->nr_user_files))
>>                          return -EBADF;
> 
> Not a problem with this patch, but: By the way, the array lookup after
> this is a "array index using bounds-checked index" pattern, so
> array_index_nospec() might be appropriate here. See __fcheck_files()
> for comparison.

Yes, I think we should update that separately. I agree, should be
using the nospec indexer.

>> +static int io_sqe_alloc_file_tables(struct io_ring_ctx *ctx, unsigned nr_tables,
>> +                                   unsigned nr_files)
>> +{
>> +       int i;
>> +
>> +       for (i = 0; i < nr_tables; i++) {
>> +               struct fixed_file_table *table = &ctx->file_table[i];
>> +               unsigned this_files;
>> +
>> +               this_files = nr_files;
>> +               if (this_files > IORING_MAX_FILES_TABLE)
>> +                       this_files = IORING_MAX_FILES_TABLE;
> 
> nr_files and this_files stay the same throughout the loop. I suspect
> that the intent here was something like:
> 
> this_files = min_t(unsigned, nr_files, IORING_MAX_FILES_TABLE);
> nr_files -= this_files;

Yes, I messed up the decrement, always make a mental note to do so,
often forget. I've fixed this up, thanks.

Made a few other cleanups, will send a v2.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-10-26 13:14 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-25 22:22 [PATCH] io_uring: support for larger fixed file sets Jens Axboe
2019-10-25 23:54 ` Jann Horn
2019-10-26 13:14   ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).