Linux-Block Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH] io_uring: add support for IORING_REGISTER_FILES_UPDATE
@ 2019-10-04 14:52 Jens Axboe
  2019-10-04 15:34 ` Jeff Moyer
  0 siblings, 1 reply; 7+ messages in thread
From: Jens Axboe @ 2019-10-04 14:52 UTC (permalink / raw)
  To: linux-block

Allows the application to remove/replace/add files to/from a file set.
Passes in a struct:

struct io_uring_files_update {
        __u32 offset;
        __s32 *fds;
};

that holds an array of fds, size of array passed in through the usual
nr_args part of the io_uring_register() system call. The logic is as
follows:

1) If ->fds[i] is -1, the existing file at i + ->offset is removed from
   the set.
2) If ->fds[i] is a valid fd, the existing file at i + ->offset is
   replaced with ->fds[i].

For case #2, is the existing file is currently empty (fd == -1), the
new fd is simply added to the array.

Signed-off-by: Jens Axboe <axboe@kernel.dk>

---

liburing has a test case for this, test/file-register.c

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6d4f1394cfca..8afb0b689523 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3155,6 +3155,171 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 	return ret;
 }
 
+static void io_sqe_file_unregister(struct io_ring_ctx *ctx, int index)
+{
+#if defined(CONFIG_UNIX)
+	struct file *file = ctx->user_files[index];
+	struct sock *sock = ctx->ring_sock->sk;
+	struct sk_buff_head list, *head = &sock->sk_receive_queue;
+	struct sk_buff *skb;
+	int i;
+
+	__skb_queue_head_init(&list);
+
+	/*
+	 * Find the skb that holds this file in its SCM_RIGHTS. When found,
+	 * remove this entry and rearrange the file array.
+	 */
+	skb = skb_dequeue(head);
+	while (skb) {
+		struct scm_fp_list *fp;
+
+		fp = UNIXCB(skb).fp;
+		for (i = 0; i < fp->count; i++) {
+			int left;
+
+			if (fp->fp[i] != file)
+				continue;
+
+			unix_notinflight(fp->user, fp->fp[i]);
+			left = fp->count - 1 - i;
+			if (left) {
+				memmove(&fp->fp[i], &fp->fp[i + 1],
+						left * sizeof(struct file *));
+			}
+			fp->count--;
+			if (!fp->count) {
+				kfree_skb(skb);
+				skb = NULL;
+			} else if (skb_peek(&list)) {
+				spin_lock_irq(&head->lock);
+				__skb_queue_tail(&list, skb);
+				while ((skb = __skb_dequeue(&list)) != NULL)
+					__skb_queue_tail(head, skb);
+				spin_unlock_irq(&head->lock);
+			}
+			file = NULL;
+			break;
+		}
+
+		if (!file)
+			break;
+
+		__skb_queue_tail(&list, skb);
+
+		skb = skb_dequeue(head);
+	}
+#else
+	fput(ctx->user_files[index]);
+#endif
+}
+
+static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
+				int index)
+{
+#if defined(CONFIG_UNIX)
+	struct sock *sock = ctx->ring_sock->sk;
+	struct sk_buff_head *head = &sock->sk_receive_queue;
+	struct sk_buff *skb;
+
+	/*
+	 * See if we can merge this file into an existing skb SCM_RIGHTS
+	 * file set. If there's no room, fall back to allocating a new skb
+	 * and filling it in.
+	 */
+	spin_lock_irq(&head->lock);
+	skb = skb_peek(head);
+	if (skb) {
+		struct scm_fp_list *fpl = UNIXCB(skb).fp;
+
+		if (fpl->count < SCM_MAX_FD) {
+			__skb_unlink(skb, head);
+			spin_unlock_irq(&head->lock);
+			fpl->fp[fpl->count] = get_file(file);
+			unix_inflight(fpl->user, fpl->fp[fpl->count]);
+			fpl->count++;
+			spin_lock_irq(&head->lock);
+			__skb_queue_head(head, skb);
+		} else {
+			skb = NULL;
+		}
+	}
+	spin_unlock_irq(&head->lock);
+
+	if (skb) {
+		fput(file);
+		return 0;
+	}
+
+	return __io_sqe_files_scm(ctx, 1, index);
+#else
+	return 0;
+#endif
+}
+
+static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg,
+			       unsigned nr_args)
+{
+	struct io_uring_files_update up;
+	int fd, i, err, done;
+	__s32 __user *fds;
+
+	if (!ctx->user_files)
+		return -ENXIO;
+	if (!nr_args)
+		return -EINVAL;
+	if (copy_from_user(&up, arg, sizeof(up)))
+		return -EFAULT;
+	if (up.offset + nr_args > ctx->nr_user_files)
+		return -EINVAL;
+
+	done = 0;
+	i = up.offset;
+	fds = (__s32 __user *) up.fds;
+	while (nr_args) {
+		err = 0;
+		if (copy_from_user(&fd, &fds[done], sizeof(fd))) {
+			err = -EFAULT;
+			break;
+		}
+		if (ctx->user_files[i]) {
+			io_sqe_file_unregister(ctx, i);
+			ctx->user_files[i] = NULL;
+		}
+		if (fd != -1) {
+			struct file *file;
+
+			file = fget(fd);
+			if (!file) {
+				err = -EBADF;
+				break;
+			}
+			/*
+			 * Don't allow io_uring instances to be registered. If
+			 * UNIX isn't enabled, then this causes a reference
+			 * cycle and this instance can never get freed. If UNIX
+			 * is enabled we'll handle it just fine, but there's
+			 * still no point in allowing a ring fd as it doesn't
+			 * support regular read/write anyway.
+			 */
+			if (file->f_op == &io_uring_fops) {
+				fput(file);
+				err = -EBADF;
+				break;
+			}
+			ctx->user_files[i] = file;
+			err = io_sqe_file_register(ctx, file, i);
+			if (err)
+				break;
+		}
+		nr_args--;
+		done++;
+		i++;
+	}
+
+	return done ? done : err;
+}
+
 static int io_sq_offload_start(struct io_ring_ctx *ctx,
 			       struct io_uring_params *p)
 {
@@ -3969,6 +4134,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			break;
 		ret = io_sqe_files_unregister(ctx);
 		break;
+	case IORING_REGISTER_FILES_UPDATE:
+		ret = io_sqe_files_update(ctx, arg, nr_args);
+		break;
 	case IORING_REGISTER_EVENTFD:
 		ret = -EINVAL;
 		if (nr_args != 1)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index ea57526a5b89..4f532d9c0554 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -150,5 +150,11 @@ struct io_uring_params {
 #define IORING_UNREGISTER_FILES		3
 #define IORING_REGISTER_EVENTFD		4
 #define IORING_UNREGISTER_EVENTFD	5
+#define IORING_REGISTER_FILES_UPDATE	6
+
+struct io_uring_files_update {
+	__u32 offset;
+	__s32 *fds;
+};
 
 #endif
-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] io_uring: add support for IORING_REGISTER_FILES_UPDATE
  2019-10-04 14:52 [PATCH] io_uring: add support for IORING_REGISTER_FILES_UPDATE Jens Axboe
@ 2019-10-04 15:34 ` Jeff Moyer
  2019-10-04 15:45   ` Jens Axboe
  0 siblings, 1 reply; 7+ messages in thread
From: Jeff Moyer @ 2019-10-04 15:34 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block\

Jens Axboe <axboe@kernel.dk> writes:

> Allows the application to remove/replace/add files to/from a file set.
> Passes in a struct:
>
> struct io_uring_files_update {
>         __u32 offset;
>         __s32 *fds;
> };
>
> that holds an array of fds, size of array passed in through the usual
> nr_args part of the io_uring_register() system call. The logic is as
> follows:
>
> 1) If ->fds[i] is -1, the existing file at i + ->offset is removed from
>    the set.
> 2) If ->fds[i] is a valid fd, the existing file at i + ->offset is
>    replaced with ->fds[i].
>
> For case #2, is the existing file is currently empty (fd == -1), the
> new fd is simply added to the array.

If I'm reading this (and the code) right, that means you can't add files
to a set.  Wouldn't that be a useful thing to do, instead of just
replacing existing ones?

Can you post the man page update along with this?

Thanks,
Jeff

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] io_uring: add support for IORING_REGISTER_FILES_UPDATE
  2019-10-04 15:34 ` Jeff Moyer
@ 2019-10-04 15:45   ` Jens Axboe
  2019-10-04 16:03     ` Jeff Moyer
  0 siblings, 1 reply; 7+ messages in thread
From: Jens Axboe @ 2019-10-04 15:45 UTC (permalink / raw)
  To: Jeff Moyer; +Cc: linux-block

On 10/4/19 9:34 AM, Jeff Moyer wrote:
> Jens Axboe <axboe@kernel.dk> writes:
> 
>> Allows the application to remove/replace/add files to/from a file set.
>> Passes in a struct:
>>
>> struct io_uring_files_update {
>>          __u32 offset;
>>          __s32 *fds;
>> };
>>
>> that holds an array of fds, size of array passed in through the usual
>> nr_args part of the io_uring_register() system call. The logic is as
>> follows:
>>
>> 1) If ->fds[i] is -1, the existing file at i + ->offset is removed from
>>     the set.
>> 2) If ->fds[i] is a valid fd, the existing file at i + ->offset is
>>     replaced with ->fds[i].
>>
>> For case #2, is the existing file is currently empty (fd == -1), the
>> new fd is simply added to the array.
> 
> If I'm reading this (and the code) right, that means you can't add files
> to a set.  Wouldn't that be a useful thing to do, instead of just
> replacing existing ones?

You can add files to a set, you just can't grow a set beyond the size
you originally registered. I actually forgot to post the pre-patch for
this, which is:

http://git.kernel.dk/cgit/linux-block/commit/?h=for-5.5/io_uring&id=fb3e60f87aa43f4f047f01243d6be54dadd9d67a

This allows registering -1 as the fd, so you could register 10 files,
but an array of size 500 (for example), and the last 490 fds are just
-1. Then you can use the IORING_REGISTER_FILES_UPDATE to replace those
empty fds with real files later on.

> Can you post the man page update along with this?

Yes, I'll write the documentation too, just wanted consensus on the
approach before I wrote up documentation.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] io_uring: add support for IORING_REGISTER_FILES_UPDATE
  2019-10-04 15:45   ` Jens Axboe
@ 2019-10-04 16:03     ` Jeff Moyer
  2019-10-04 16:11       ` Jens Axboe
  0 siblings, 1 reply; 7+ messages in thread
From: Jeff Moyer @ 2019-10-04 16:03 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block\

Jens Axboe <axboe@kernel.dk> writes:

> On 10/4/19 9:34 AM, Jeff Moyer wrote:
>> If I'm reading this (and the code) right, that means you can't add files
>> to a set.  Wouldn't that be a useful thing to do, instead of just
>> replacing existing ones?
>
> You can add files to a set, you just can't grow a set beyond the size
> you originally registered. I actually forgot to post the pre-patch for
> this, which is:
>
> http://git.kernel.dk/cgit/linux-block/commit/?h=for-5.5/io_uring&id=fb3e60f87aa43f4f047f01243d6be54dadd9d67a
>
> This allows registering -1 as the fd, so you could register 10 files,
> but an array of size 500 (for example), and the last 490 fds are just
> -1. Then you can use the IORING_REGISTER_FILES_UPDATE to replace those
> empty fds with real files later on.

That makes more sense, but still requires a priori knowledge of how many
files you'll need to work with, otherwise you're back to
unregister/register dance.  Is it really that hard to grow on demand?

>> Can you post the man page update along with this?
>
> Yes, I'll write the documentation too, just wanted consensus on the
> approach before I wrote up documentation.

Gotcha.

-Jeff

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] io_uring: add support for IORING_REGISTER_FILES_UPDATE
  2019-10-04 16:03     ` Jeff Moyer
@ 2019-10-04 16:11       ` Jens Axboe
  2019-10-04 16:17         ` Jeff Moyer
  0 siblings, 1 reply; 7+ messages in thread
From: Jens Axboe @ 2019-10-04 16:11 UTC (permalink / raw)
  To: Jeff Moyer; +Cc: linux-block

On 10/4/19 10:03 AM, Jeff Moyer wrote:
> Jens Axboe <axboe@kernel.dk> writes:
> 
>> On 10/4/19 9:34 AM, Jeff Moyer wrote:
>>> If I'm reading this (and the code) right, that means you can't add files
>>> to a set.  Wouldn't that be a useful thing to do, instead of just
>>> replacing existing ones?
>>
>> You can add files to a set, you just can't grow a set beyond the size
>> you originally registered. I actually forgot to post the pre-patch for
>> this, which is:
>>
>> http://git.kernel.dk/cgit/linux-block/commit/?h=for-5.5/io_uring&id=fb3e60f87aa43f4f047f01243d6be54dadd9d67a
>>
>> This allows registering -1 as the fd, so you could register 10 files,
>> but an array of size 500 (for example), and the last 490 fds are just
>> -1. Then you can use the IORING_REGISTER_FILES_UPDATE to replace those
>> empty fds with real files later on.
> 
> That makes more sense, but still requires a priori knowledge of how many
> files you'll need to work with, otherwise you're back to
> unregister/register dance.  Is it really that hard to grow on demand?

It's not, it's just more efficient to add a file through replace, than it
is to alloc a new array, copy things over, free it. That also impacts the
application side of things, since that has to maintain an array of
descriptors so that it knows what fd maps to what index.

If you expose growing, you also have some kind of obligation to make it
efficient, and I just don't think that's possible. But there's nothing
preventing this API from supporting it, you'd just do an update with
offset == current_array_size, and then nr_args as what to grow with.
Currently that would -EINVAL, but it could be added as a feature.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] io_uring: add support for IORING_REGISTER_FILES_UPDATE
  2019-10-04 16:11       ` Jens Axboe
@ 2019-10-04 16:17         ` Jeff Moyer
  2019-10-04 16:19           ` Jens Axboe
  0 siblings, 1 reply; 7+ messages in thread
From: Jeff Moyer @ 2019-10-04 16:17 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block\

Jens Axboe <axboe@kernel.dk> writes:

> On 10/4/19 10:03 AM, Jeff Moyer wrote:
>> Jens Axboe <axboe@kernel.dk> writes:
>> 
>>> On 10/4/19 9:34 AM, Jeff Moyer wrote:
>>>> If I'm reading this (and the code) right, that means you can't add files
>>>> to a set.  Wouldn't that be a useful thing to do, instead of just
>>>> replacing existing ones?
>>>
>>> You can add files to a set, you just can't grow a set beyond the size
>>> you originally registered. I actually forgot to post the pre-patch for
>>> this, which is:
>>>
>>> http://git.kernel.dk/cgit/linux-block/commit/?h=for-5.5/io_uring&id=fb3e60f87aa43f4f047f01243d6be54dadd9d67a
>>>
>>> This allows registering -1 as the fd, so you could register 10 files,
>>> but an array of size 500 (for example), and the last 490 fds are just
>>> -1. Then you can use the IORING_REGISTER_FILES_UPDATE to replace those
>>> empty fds with real files later on.
>> 
>> That makes more sense, but still requires a priori knowledge of how many
>> files you'll need to work with, otherwise you're back to
>> unregister/register dance.  Is it really that hard to grow on demand?
>
> It's not, it's just more efficient to add a file through replace, than it
> is to alloc a new array, copy things over, free it. That also impacts the
> application side of things, since that has to maintain an array of
> descriptors so that it knows what fd maps to what index.

Yeah, that's a good point about the application side.

> If you expose growing, you also have some kind of obligation to make it
> efficient, and I just don't think that's possible. But there's nothing
> preventing this API from supporting it, you'd just do an update with
> offset == current_array_size, and then nr_args as what to grow with.
> Currently that would -EINVAL, but it could be added as a feature.

OK, I'm fine with the API as-is.  If someone asks, growing can be added
later.

Cheers,
Jeff

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] io_uring: add support for IORING_REGISTER_FILES_UPDATE
  2019-10-04 16:17         ` Jeff Moyer
@ 2019-10-04 16:19           ` Jens Axboe
  0 siblings, 0 replies; 7+ messages in thread
From: Jens Axboe @ 2019-10-04 16:19 UTC (permalink / raw)
  To: Jeff Moyer; +Cc: linux-block

On 10/4/19 10:17 AM, Jeff Moyer wrote:
> Jens Axboe <axboe@kernel.dk> writes:
> 
>> On 10/4/19 10:03 AM, Jeff Moyer wrote:
>>> Jens Axboe <axboe@kernel.dk> writes:
>>>
>>>> On 10/4/19 9:34 AM, Jeff Moyer wrote:
>>>>> If I'm reading this (and the code) right, that means you can't add files
>>>>> to a set.  Wouldn't that be a useful thing to do, instead of just
>>>>> replacing existing ones?
>>>>
>>>> You can add files to a set, you just can't grow a set beyond the size
>>>> you originally registered. I actually forgot to post the pre-patch for
>>>> this, which is:
>>>>
>>>> http://git.kernel.dk/cgit/linux-block/commit/?h=for-5.5/io_uring&id=fb3e60f87aa43f4f047f01243d6be54dadd9d67a
>>>>
>>>> This allows registering -1 as the fd, so you could register 10 files,
>>>> but an array of size 500 (for example), and the last 490 fds are just
>>>> -1. Then you can use the IORING_REGISTER_FILES_UPDATE to replace those
>>>> empty fds with real files later on.
>>>
>>> That makes more sense, but still requires a priori knowledge of how many
>>> files you'll need to work with, otherwise you're back to
>>> unregister/register dance.  Is it really that hard to grow on demand?
>>
>> It's not, it's just more efficient to add a file through replace, than it
>> is to alloc a new array, copy things over, free it. That also impacts the
>> application side of things, since that has to maintain an array of
>> descriptors so that it knows what fd maps to what index.
> 
> Yeah, that's a good point about the application side.
> 
>> If you expose growing, you also have some kind of obligation to make it
>> efficient, and I just don't think that's possible. But there's nothing
>> preventing this API from supporting it, you'd just do an update with
>> offset == current_array_size, and then nr_args as what to grow with.
>> Currently that would -EINVAL, but it could be added as a feature.
> 
> OK, I'm fine with the API as-is.  If someone asks, growing can be added
> later.

Sounds good, thanks. I'll send out a v2 with the prep patch included.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, back to index

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-04 14:52 [PATCH] io_uring: add support for IORING_REGISTER_FILES_UPDATE Jens Axboe
2019-10-04 15:34 ` Jeff Moyer
2019-10-04 15:45   ` Jens Axboe
2019-10-04 16:03     ` Jeff Moyer
2019-10-04 16:11       ` Jens Axboe
2019-10-04 16:17         ` Jeff Moyer
2019-10-04 16:19           ` Jens Axboe

Linux-Block Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-block/0 linux-block/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-block linux-block/ https://lore.kernel.org/linux-block \
		linux-block@vger.kernel.org
	public-inbox-index linux-block

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-block


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git