All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jens Axboe <axboe@kernel.dk>
To: linux-block@vger.kernel.org
Cc: jmoyer@redhat.com, Jens Axboe <axboe@kernel.dk>
Subject: [PATCH 2/2] io_uring: add support for IORING_REGISTER_FILES_UPDATE
Date: Fri,  4 Oct 2019 10:22:22 -0600	[thread overview]
Message-ID: <20191004162222.10390-3-axboe@kernel.dk> (raw)
In-Reply-To: <20191004162222.10390-1-axboe@kernel.dk>

Allows the application to remove/replace/add files to/from a file set.
Passes in a struct:

struct io_uring_files_update {
	__u32 offset;
	__s32 *fds;
};

that holds an array of fds, size of array passed in through the usual
nr_args part of the io_uring_register() system call. The logic is as
follows:

1) If ->fds[i] is -1, the existing file at i + ->offset is removed from
   the set.
2) If ->fds[i] is a valid fd, the existing file at i + ->offset is
   replaced with ->fds[i].

For case #2, is the existing file is currently empty (fd == -1), the
new fd is simply added to the array.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 168 ++++++++++++++++++++++++++++++++++
 include/uapi/linux/io_uring.h |   6 ++
 2 files changed, 174 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6d4f1394cfca..8afb0b689523 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3155,6 +3155,171 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 	return ret;
 }
 
+static void io_sqe_file_unregister(struct io_ring_ctx *ctx, int index)
+{
+#if defined(CONFIG_UNIX)
+	struct file *file = ctx->user_files[index];
+	struct sock *sock = ctx->ring_sock->sk;
+	struct sk_buff_head list, *head = &sock->sk_receive_queue;
+	struct sk_buff *skb;
+	int i;
+
+	__skb_queue_head_init(&list);
+
+	/*
+	 * Find the skb that holds this file in its SCM_RIGHTS. When found,
+	 * remove this entry and rearrange the file array.
+	 */
+	skb = skb_dequeue(head);
+	while (skb) {
+		struct scm_fp_list *fp;
+
+		fp = UNIXCB(skb).fp;
+		for (i = 0; i < fp->count; i++) {
+			int left;
+
+			if (fp->fp[i] != file)
+				continue;
+
+			unix_notinflight(fp->user, fp->fp[i]);
+			left = fp->count - 1 - i;
+			if (left) {
+				memmove(&fp->fp[i], &fp->fp[i + 1],
+						left * sizeof(struct file *));
+			}
+			fp->count--;
+			if (!fp->count) {
+				kfree_skb(skb);
+				skb = NULL;
+			} else if (skb_peek(&list)) {
+				spin_lock_irq(&head->lock);
+				__skb_queue_tail(&list, skb);
+				while ((skb = __skb_dequeue(&list)) != NULL)
+					__skb_queue_tail(head, skb);
+				spin_unlock_irq(&head->lock);
+			}
+			file = NULL;
+			break;
+		}
+
+		if (!file)
+			break;
+
+		__skb_queue_tail(&list, skb);
+
+		skb = skb_dequeue(head);
+	}
+#else
+	fput(ctx->user_files[index]);
+#endif
+}
+
+static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
+				int index)
+{
+#if defined(CONFIG_UNIX)
+	struct sock *sock = ctx->ring_sock->sk;
+	struct sk_buff_head *head = &sock->sk_receive_queue;
+	struct sk_buff *skb;
+
+	/*
+	 * See if we can merge this file into an existing skb SCM_RIGHTS
+	 * file set. If there's no room, fall back to allocating a new skb
+	 * and filling it in.
+	 */
+	spin_lock_irq(&head->lock);
+	skb = skb_peek(head);
+	if (skb) {
+		struct scm_fp_list *fpl = UNIXCB(skb).fp;
+
+		if (fpl->count < SCM_MAX_FD) {
+			__skb_unlink(skb, head);
+			spin_unlock_irq(&head->lock);
+			fpl->fp[fpl->count] = get_file(file);
+			unix_inflight(fpl->user, fpl->fp[fpl->count]);
+			fpl->count++;
+			spin_lock_irq(&head->lock);
+			__skb_queue_head(head, skb);
+		} else {
+			skb = NULL;
+		}
+	}
+	spin_unlock_irq(&head->lock);
+
+	if (skb) {
+		fput(file);
+		return 0;
+	}
+
+	return __io_sqe_files_scm(ctx, 1, index);
+#else
+	return 0;
+#endif
+}
+
+static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg,
+			       unsigned nr_args)
+{
+	struct io_uring_files_update up;
+	int fd, i, err, done;
+	__s32 __user *fds;
+
+	if (!ctx->user_files)
+		return -ENXIO;
+	if (!nr_args)
+		return -EINVAL;
+	if (copy_from_user(&up, arg, sizeof(up)))
+		return -EFAULT;
+	if (up.offset + nr_args > ctx->nr_user_files)
+		return -EINVAL;
+
+	done = 0;
+	i = up.offset;
+	fds = (__s32 __user *) up.fds;
+	while (nr_args) {
+		err = 0;
+		if (copy_from_user(&fd, &fds[done], sizeof(fd))) {
+			err = -EFAULT;
+			break;
+		}
+		if (ctx->user_files[i]) {
+			io_sqe_file_unregister(ctx, i);
+			ctx->user_files[i] = NULL;
+		}
+		if (fd != -1) {
+			struct file *file;
+
+			file = fget(fd);
+			if (!file) {
+				err = -EBADF;
+				break;
+			}
+			/*
+			 * Don't allow io_uring instances to be registered. If
+			 * UNIX isn't enabled, then this causes a reference
+			 * cycle and this instance can never get freed. If UNIX
+			 * is enabled we'll handle it just fine, but there's
+			 * still no point in allowing a ring fd as it doesn't
+			 * support regular read/write anyway.
+			 */
+			if (file->f_op == &io_uring_fops) {
+				fput(file);
+				err = -EBADF;
+				break;
+			}
+			ctx->user_files[i] = file;
+			err = io_sqe_file_register(ctx, file, i);
+			if (err)
+				break;
+		}
+		nr_args--;
+		done++;
+		i++;
+	}
+
+	return done ? done : err;
+}
+
 static int io_sq_offload_start(struct io_ring_ctx *ctx,
 			       struct io_uring_params *p)
 {
@@ -3969,6 +4134,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			break;
 		ret = io_sqe_files_unregister(ctx);
 		break;
+	case IORING_REGISTER_FILES_UPDATE:
+		ret = io_sqe_files_update(ctx, arg, nr_args);
+		break;
 	case IORING_REGISTER_EVENTFD:
 		ret = -EINVAL;
 		if (nr_args != 1)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index ea57526a5b89..4f532d9c0554 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -150,5 +150,11 @@ struct io_uring_params {
 #define IORING_UNREGISTER_FILES		3
 #define IORING_REGISTER_EVENTFD		4
 #define IORING_UNREGISTER_EVENTFD	5
+#define IORING_REGISTER_FILES_UPDATE	6
+
+struct io_uring_files_update {
+	__u32 offset;
+	__s32 *fds;
+};
 
 #endif
-- 
2.17.1


  parent reply	other threads:[~2019-10-04 16:22 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-04 16:22 [PATCHSET v2] io_uring: support fileset add/remove/modify Jens Axboe
2019-10-04 16:22 ` [PATCH 1/2] io_uring: allow sparse fixed file sets Jens Axboe
2019-10-04 16:22 ` Jens Axboe [this message]
2019-10-04 18:17 ` [PATCHSET v2] io_uring: support fileset add/remove/modify Jeff Moyer
2019-10-04 18:52   ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191004162222.10390-3-axboe@kernel.dk \
    --to=axboe@kernel.dk \
    --cc=jmoyer@redhat.com \
    --cc=linux-block@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.