All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC v2 00/12] dynamic buffers + rsrc tagging
@ 2021-04-25 13:32 Pavel Begunkov
  2021-04-25 13:32 ` [PATCH v2 01/12] io_uring: move __io_sqe_files_unregister Pavel Begunkov
                   ` (12 more replies)
  0 siblings, 13 replies; 14+ messages in thread
From: Pavel Begunkov @ 2021-04-25 13:32 UTC (permalink / raw)
  To: Jens Axboe, io-uring

1) support dynamic managment for registered buffers, including
update.

2) add new IORING_REGISTER* for rsrc register and rsrc update,
which are just dispatch files/buffers to right callbacks. Needed
because old ones not nicely extendible. The downside --
restrictions not supporting it with fine granularity.

3) add rsrc tagging, with tag=0 ingnoring CQE posting.
Doesn't post CQEs on unregister, but can easily be changed

v2: instead of async_data importing for fixed rw, save
    used io_mapped_ubuf and use it on re-import.
    Add patch 9/12 as a preparation for that.

    Fix prep rw getting a rsrc node ref for fixed files without
    having a rsrc node.

Bijan Mottahedeh (1):
  io_uring: implement fixed buffers registration similar to fixed files

Pavel Begunkov (11):
  io_uring: move __io_sqe_files_unregister
  io_uring: return back rsrc data free helper
  io_uring: decouple CQE filling from requests
  io_uring: preparation for rsrc tagging
  io_uring: add generic path for rsrc update
  io_uring: enumerate dynamic resources
  io_uring: add IORING_REGISTER_RSRC
  io_uring: add generic rsrc update with tags
  io_uring: keep table of pointers to ubufs
  io_uring: prepare fixed rw for dynanic buffers
  io_uring: add full-fledged dynamic buffers support

 fs/io_uring.c                 | 523 +++++++++++++++++++++++++---------
 include/uapi/linux/io_uring.h |  23 ++
 2 files changed, 405 insertions(+), 141 deletions(-)

-- 
2.31.1


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v2 01/12] io_uring: move __io_sqe_files_unregister
  2021-04-25 13:32 [RFC v2 00/12] dynamic buffers + rsrc tagging Pavel Begunkov
@ 2021-04-25 13:32 ` Pavel Begunkov
  2021-04-25 13:32 ` [PATCH v2 02/12] io_uring: return back rsrc data free helper Pavel Begunkov
                   ` (11 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: Pavel Begunkov @ 2021-04-25 13:32 UTC (permalink / raw)
  To: Jens Axboe, io-uring

A preparation patch moving __io_sqe_files_unregister() definition closer
to other "files" functions without any modification.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 54 +++++++++++++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index b2aa9b99b820..70e331349213 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7023,33 +7023,6 @@ static void io_free_file_tables(struct io_file_table *table, unsigned nr_files)
 	table->files = NULL;
 }
 
-static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
-{
-#if defined(CONFIG_UNIX)
-	if (ctx->ring_sock) {
-		struct sock *sock = ctx->ring_sock->sk;
-		struct sk_buff *skb;
-
-		while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL)
-			kfree_skb(skb);
-	}
-#else
-	int i;
-
-	for (i = 0; i < ctx->nr_user_files; i++) {
-		struct file *file;
-
-		file = io_file_from_index(ctx, i);
-		if (file)
-			fput(file);
-	}
-#endif
-	io_free_file_tables(&ctx->file_table, ctx->nr_user_files);
-	kfree(ctx->file_data);
-	ctx->file_data = NULL;
-	ctx->nr_user_files = 0;
-}
-
 static inline void io_rsrc_ref_lock(struct io_ring_ctx *ctx)
 {
 	spin_lock_bh(&ctx->rsrc_ref_lock);
@@ -7152,6 +7125,33 @@ static struct io_rsrc_data *io_rsrc_data_alloc(struct io_ring_ctx *ctx,
 	return data;
 }
 
+static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
+{
+#if defined(CONFIG_UNIX)
+	if (ctx->ring_sock) {
+		struct sock *sock = ctx->ring_sock->sk;
+		struct sk_buff *skb;
+
+		while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL)
+			kfree_skb(skb);
+	}
+#else
+	int i;
+
+	for (i = 0; i < ctx->nr_user_files; i++) {
+		struct file *file;
+
+		file = io_file_from_index(ctx, i);
+		if (file)
+			fput(file);
+	}
+#endif
+	io_free_file_tables(&ctx->file_table, ctx->nr_user_files);
+	kfree(ctx->file_data);
+	ctx->file_data = NULL;
+	ctx->nr_user_files = 0;
+}
+
 static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 {
 	int ret;
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 02/12] io_uring: return back rsrc data free helper
  2021-04-25 13:32 [RFC v2 00/12] dynamic buffers + rsrc tagging Pavel Begunkov
  2021-04-25 13:32 ` [PATCH v2 01/12] io_uring: move __io_sqe_files_unregister Pavel Begunkov
@ 2021-04-25 13:32 ` Pavel Begunkov
  2021-04-25 13:32 ` [PATCH v2 03/12] io_uring: decouple CQE filling from requests Pavel Begunkov
                   ` (10 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: Pavel Begunkov @ 2021-04-25 13:32 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Add io_rsrc_data_free() helper for destroying rsrc_data, easier for
search and the function will get more stuff to destroy shortly.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 70e331349213..a1f89340e844 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7109,6 +7109,11 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ct
 	return ret;
 }
 
+static void io_rsrc_data_free(struct io_rsrc_data *data)
+{
+	kfree(data);
+}
+
 static struct io_rsrc_data *io_rsrc_data_alloc(struct io_ring_ctx *ctx,
 					       rsrc_put_fn *do_put)
 {
@@ -7147,7 +7152,7 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
 	}
 #endif
 	io_free_file_tables(&ctx->file_table, ctx->nr_user_files);
-	kfree(ctx->file_data);
+	io_rsrc_data_free(ctx->file_data);
 	ctx->file_data = NULL;
 	ctx->nr_user_files = 0;
 }
@@ -7624,7 +7629,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 	io_free_file_tables(&ctx->file_table, nr_args);
 	ctx->nr_user_files = 0;
 out_free:
-	kfree(ctx->file_data);
+	io_rsrc_data_free(ctx->file_data);
 	ctx->file_data = NULL;
 	return ret;
 }
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 03/12] io_uring: decouple CQE filling from requests
  2021-04-25 13:32 [RFC v2 00/12] dynamic buffers + rsrc tagging Pavel Begunkov
  2021-04-25 13:32 ` [PATCH v2 01/12] io_uring: move __io_sqe_files_unregister Pavel Begunkov
  2021-04-25 13:32 ` [PATCH v2 02/12] io_uring: return back rsrc data free helper Pavel Begunkov
@ 2021-04-25 13:32 ` Pavel Begunkov
  2021-04-25 13:32 ` [PATCH v2 04/12] io_uring: preparation for rsrc tagging Pavel Begunkov
                   ` (9 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: Pavel Begunkov @ 2021-04-25 13:32 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Make __io_cqring_fill_event() agnostic of struct io_kiocb, pass all the
data needed directly into it. Will be used to post rsrc removal
completions, which don't have an associated request.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 55 ++++++++++++++++++++++++++-------------------------
 1 file changed, 28 insertions(+), 27 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index a1f89340e844..23f052a1d964 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1025,7 +1025,8 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 static void io_uring_cancel_sqpoll(struct io_sq_data *sqd);
 static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx);
 
-static bool io_cqring_fill_event(struct io_kiocb *req, long res, unsigned cflags);
+static bool io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data,
+				 long res, unsigned int cflags);
 static void io_put_req(struct io_kiocb *req);
 static void io_put_req_deferred(struct io_kiocb *req, int nr);
 static void io_dismantle_req(struct io_kiocb *req);
@@ -1266,7 +1267,7 @@ static void io_kill_timeout(struct io_kiocb *req, int status)
 		atomic_set(&req->ctx->cq_timeouts,
 			atomic_read(&req->ctx->cq_timeouts) + 1);
 		list_del_init(&req->timeout.list);
-		io_cqring_fill_event(req, status, 0);
+		io_cqring_fill_event(req->ctx, req->user_data, status, 0);
 		io_put_req_deferred(req, 1);
 	}
 }
@@ -1500,10 +1501,9 @@ static inline void req_ref_get(struct io_kiocb *req)
 	atomic_inc(&req->refs);
 }
 
-static bool io_cqring_event_overflow(struct io_kiocb *req, long res,
-				     unsigned int cflags)
+static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
+				     long res, unsigned int cflags)
 {
-	struct io_ring_ctx *ctx = req->ctx;
 	struct io_overflow_cqe *ocqe;
 
 	ocqe = kmalloc(sizeof(*ocqe), GFP_ATOMIC | __GFP_ACCOUNT);
@@ -1521,20 +1521,19 @@ static bool io_cqring_event_overflow(struct io_kiocb *req, long res,
 		set_bit(0, &ctx->cq_check_overflow);
 		ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
 	}
-	ocqe->cqe.user_data = req->user_data;
+	ocqe->cqe.user_data = user_data;
 	ocqe->cqe.res = res;
 	ocqe->cqe.flags = cflags;
 	list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
 	return true;
 }
 
-static inline bool __io_cqring_fill_event(struct io_kiocb *req, long res,
-					     unsigned int cflags)
+static inline bool __io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data,
+					  long res, unsigned int cflags)
 {
-	struct io_ring_ctx *ctx = req->ctx;
 	struct io_uring_cqe *cqe;
 
-	trace_io_uring_complete(ctx, req->user_data, res, cflags);
+	trace_io_uring_complete(ctx, user_data, res, cflags);
 
 	/*
 	 * If we can't get a cq entry, userspace overflowed the
@@ -1543,19 +1542,19 @@ static inline bool __io_cqring_fill_event(struct io_kiocb *req, long res,
 	 */
 	cqe = io_get_cqring(ctx);
 	if (likely(cqe)) {
-		WRITE_ONCE(cqe->user_data, req->user_data);
+		WRITE_ONCE(cqe->user_data, user_data);
 		WRITE_ONCE(cqe->res, res);
 		WRITE_ONCE(cqe->flags, cflags);
 		return true;
 	}
-	return io_cqring_event_overflow(req, res, cflags);
+	return io_cqring_event_overflow(ctx, user_data, res, cflags);
 }
 
 /* not as hot to bloat with inlining */
-static noinline bool io_cqring_fill_event(struct io_kiocb *req, long res,
-					  unsigned int cflags)
+static noinline bool io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data,
+					  long res, unsigned int cflags)
 {
-	return __io_cqring_fill_event(req, res, cflags);
+	return __io_cqring_fill_event(ctx, user_data, res, cflags);
 }
 
 static void io_req_complete_post(struct io_kiocb *req, long res,
@@ -1565,7 +1564,7 @@ static void io_req_complete_post(struct io_kiocb *req, long res,
 	unsigned long flags;
 
 	spin_lock_irqsave(&ctx->completion_lock, flags);
-	__io_cqring_fill_event(req, res, cflags);
+	__io_cqring_fill_event(ctx, req->user_data, res, cflags);
 	/*
 	 * If we're the last reference to this request, add to our locked
 	 * free_list cache.
@@ -1776,7 +1775,8 @@ static bool io_kill_linked_timeout(struct io_kiocb *req)
 		io_remove_next_linked(req);
 		link->timeout.head = NULL;
 		if (hrtimer_try_to_cancel(&io->timer) != -1) {
-			io_cqring_fill_event(link, -ECANCELED, 0);
+			io_cqring_fill_event(link->ctx, link->user_data,
+					     -ECANCELED, 0);
 			io_put_req_deferred(link, 1);
 			return true;
 		}
@@ -1795,7 +1795,7 @@ static void io_fail_links(struct io_kiocb *req)
 		link->link = NULL;
 
 		trace_io_uring_fail_link(req, link);
-		io_cqring_fill_event(link, -ECANCELED, 0);
+		io_cqring_fill_event(link->ctx, link->user_data, -ECANCELED, 0);
 		io_put_req_deferred(link, 2);
 		link = nxt;
 	}
@@ -2116,7 +2116,8 @@ static void io_submit_flush_completions(struct io_comp_state *cs,
 	spin_lock_irq(&ctx->completion_lock);
 	for (i = 0; i < nr; i++) {
 		req = cs->reqs[i];
-		__io_cqring_fill_event(req, req->result, req->compl.cflags);
+		__io_cqring_fill_event(ctx, req->user_data, req->result,
+					req->compl.cflags);
 	}
 	io_commit_cqring(ctx);
 	spin_unlock_irq(&ctx->completion_lock);
@@ -2256,7 +2257,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
 		if (req->flags & REQ_F_BUFFER_SELECTED)
 			cflags = io_put_rw_kbuf(req);
 
-		__io_cqring_fill_event(req, req->result, cflags);
+		__io_cqring_fill_event(ctx, req->user_data, req->result, cflags);
 		(*nr_events)++;
 
 		if (req_ref_put_and_test(req))
@@ -4869,7 +4870,7 @@ static bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
 	}
 	if (req->poll.events & EPOLLONESHOT)
 		flags = 0;
-	if (!io_cqring_fill_event(req, error, flags)) {
+	if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) {
 		io_poll_remove_waitqs(req);
 		req->poll.done = true;
 		flags = 0;
@@ -5197,7 +5198,7 @@ static bool io_poll_remove_one(struct io_kiocb *req)
 
 	do_complete = io_poll_remove_waitqs(req);
 	if (do_complete) {
-		io_cqring_fill_event(req, -ECANCELED, 0);
+		io_cqring_fill_event(req->ctx, req->user_data, -ECANCELED, 0);
 		io_commit_cqring(req->ctx);
 		req_set_fail_links(req);
 		io_put_req_deferred(req, 1);
@@ -5449,7 +5450,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
 	atomic_set(&req->ctx->cq_timeouts,
 		atomic_read(&req->ctx->cq_timeouts) + 1);
 
-	io_cqring_fill_event(req, -ETIME, 0);
+	io_cqring_fill_event(ctx, req->user_data, -ETIME, 0);
 	io_commit_cqring(ctx);
 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
 
@@ -5491,7 +5492,7 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
 		return PTR_ERR(req);
 
 	req_set_fail_links(req);
-	io_cqring_fill_event(req, -ECANCELED, 0);
+	io_cqring_fill_event(ctx, req->user_data, -ECANCELED, 0);
 	io_put_req_deferred(req, 1);
 	return 0;
 }
@@ -5564,7 +5565,7 @@ static int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
 		ret = io_timeout_update(ctx, tr->addr, &tr->ts,
 					io_translate_timeout_mode(tr->flags));
 
-	io_cqring_fill_event(req, ret, 0);
+	io_cqring_fill_event(ctx, req->user_data, ret, 0);
 	io_commit_cqring(ctx);
 	spin_unlock_irq(&ctx->completion_lock);
 	io_cqring_ev_posted(ctx);
@@ -5716,7 +5717,7 @@ static void io_async_find_and_cancel(struct io_ring_ctx *ctx,
 done:
 	if (!ret)
 		ret = success_ret;
-	io_cqring_fill_event(req, ret, 0);
+	io_cqring_fill_event(ctx, req->user_data, ret, 0);
 	io_commit_cqring(ctx);
 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
 	io_cqring_ev_posted(ctx);
@@ -5773,7 +5774,7 @@ static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
 
 	spin_lock_irq(&ctx->completion_lock);
 done:
-	io_cqring_fill_event(req, ret, 0);
+	io_cqring_fill_event(ctx, req->user_data, ret, 0);
 	io_commit_cqring(ctx);
 	spin_unlock_irq(&ctx->completion_lock);
 	io_cqring_ev_posted(ctx);
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 04/12] io_uring: preparation for rsrc tagging
  2021-04-25 13:32 [RFC v2 00/12] dynamic buffers + rsrc tagging Pavel Begunkov
                   ` (2 preceding siblings ...)
  2021-04-25 13:32 ` [PATCH v2 03/12] io_uring: decouple CQE filling from requests Pavel Begunkov
@ 2021-04-25 13:32 ` Pavel Begunkov
  2021-04-25 13:32 ` [PATCH v2 05/12] io_uring: add generic path for rsrc update Pavel Begunkov
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: Pavel Begunkov @ 2021-04-25 13:32 UTC (permalink / raw)
  To: Jens Axboe, io-uring

We need a way to notify userspace when a lazily removed resource
actually died out. This will be done by associating a tag, which is u64
exactly like req->user_data, with each rsrc (e.g. buffer of file). A CQE
will be posted once a resource is actually put down.

Tag 0 is a special value set by default, for whcih it don't generate an
CQE, so providing the old behaviour.

Don't expose it to the userspace yet, but prepare internally, allocate
buffers, add all posting hooks, etc.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 34 ++++++++++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 23f052a1d964..8cc593da5cc4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -214,6 +214,7 @@ struct io_fixed_file {
 
 struct io_rsrc_put {
 	struct list_head list;
+	u64 tag;
 	union {
 		void *rsrc;
 		struct file *file;
@@ -239,6 +240,7 @@ typedef void (rsrc_put_fn)(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc);
 struct io_rsrc_data {
 	struct io_ring_ctx		*ctx;
 
+	u64				*tags;
 	rsrc_put_fn			*do_put;
 	atomic_t			refs;
 	struct completion		done;
@@ -7112,11 +7114,13 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ct
 
 static void io_rsrc_data_free(struct io_rsrc_data *data)
 {
+	kvfree(data->tags);
 	kfree(data);
 }
 
 static struct io_rsrc_data *io_rsrc_data_alloc(struct io_ring_ctx *ctx,
-					       rsrc_put_fn *do_put)
+					       rsrc_put_fn *do_put,
+					       unsigned nr)
 {
 	struct io_rsrc_data *data;
 
@@ -7124,6 +7128,12 @@ static struct io_rsrc_data *io_rsrc_data_alloc(struct io_ring_ctx *ctx,
 	if (!data)
 		return NULL;
 
+	data->tags = kvcalloc(nr, sizeof(*data->tags), GFP_KERNEL);
+	if (!data->tags) {
+		kfree(data);
+		return NULL;
+	}
+
 	atomic_set(&data->refs, 1);
 	data->ctx = ctx;
 	data->do_put = do_put;
@@ -7488,6 +7498,20 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
 
 	list_for_each_entry_safe(prsrc, tmp, &ref_node->rsrc_list, list) {
 		list_del(&prsrc->list);
+
+		if (prsrc->tag) {
+			bool lock_ring = ctx->flags & IORING_SETUP_IOPOLL;
+			unsigned long flags;
+
+			io_ring_submit_lock(ctx, lock_ring);
+			spin_lock_irqsave(&ctx->completion_lock, flags);
+			io_cqring_fill_event(ctx, prsrc->tag, 0, 0);
+			io_commit_cqring(ctx);
+			spin_unlock_irqrestore(&ctx->completion_lock, flags);
+			io_cqring_ev_posted(ctx);
+			io_ring_submit_unlock(ctx, lock_ring);
+		}
+
 		rsrc_data->do_put(ctx, prsrc);
 		kfree(prsrc);
 	}
@@ -7577,7 +7601,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 	if (ret)
 		return ret;
 
-	file_data = io_rsrc_data_alloc(ctx, io_rsrc_file_put);
+	file_data = io_rsrc_data_alloc(ctx, io_rsrc_file_put, nr_args);
 	if (!file_data)
 		return -ENOMEM;
 	ctx->file_data = file_data;
@@ -7678,7 +7702,7 @@ static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
 #endif
 }
 
-static int io_queue_rsrc_removal(struct io_rsrc_data *data,
+static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
 				 struct io_rsrc_node *node, void *rsrc)
 {
 	struct io_rsrc_put *prsrc;
@@ -7687,6 +7711,7 @@ static int io_queue_rsrc_removal(struct io_rsrc_data *data,
 	if (!prsrc)
 		return -ENOMEM;
 
+	prsrc->tag = data->tags[idx];
 	prsrc->rsrc = rsrc;
 	list_add(&prsrc->list, &node->rsrc_list);
 	return 0;
@@ -7727,7 +7752,8 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
 
 		if (file_slot->file_ptr) {
 			file = (struct file *)(file_slot->file_ptr & FFS_MASK);
-			err = io_queue_rsrc_removal(data, ctx->rsrc_node, file);
+			err = io_queue_rsrc_removal(data, up->offset + done,
+						    ctx->rsrc_node, file);
 			if (err)
 				break;
 			file_slot->file_ptr = 0;
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 05/12] io_uring: add generic path for rsrc update
  2021-04-25 13:32 [RFC v2 00/12] dynamic buffers + rsrc tagging Pavel Begunkov
                   ` (3 preceding siblings ...)
  2021-04-25 13:32 ` [PATCH v2 04/12] io_uring: preparation for rsrc tagging Pavel Begunkov
@ 2021-04-25 13:32 ` Pavel Begunkov
  2021-04-25 13:32 ` [PATCH v2 06/12] io_uring: enumerate dynamic resources Pavel Begunkov
                   ` (7 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: Pavel Begunkov @ 2021-04-25 13:32 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Extract some common parts for rsrc update, will be used reg buffers
support dynamic (i.e. quiesce-lee) managing.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 79 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 46 insertions(+), 33 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8cc593da5cc4..0f79bb0362cd 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1035,9 +1035,9 @@ static void io_dismantle_req(struct io_kiocb *req);
 static void io_put_task(struct task_struct *task, int nr);
 static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req);
 static void io_queue_linked_timeout(struct io_kiocb *req);
-static int __io_sqe_files_update(struct io_ring_ctx *ctx,
-				 struct io_uring_rsrc_update *ip,
-				 unsigned nr_args);
+static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned opcode,
+				     struct io_uring_rsrc_update *up,
+				     unsigned nr_args);
 static void io_clean_op(struct io_kiocb *req);
 static struct file *io_file_get(struct io_submit_state *state,
 				struct io_kiocb *req, int fd, bool fixed);
@@ -5818,7 +5818,8 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
 	up.data = req->rsrc_update.arg;
 
 	mutex_lock(&ctx->uring_lock);
-	ret = __io_sqe_files_update(ctx, &up, req->rsrc_update.nr_args);
+	ret = __io_register_rsrc_update(ctx, IORING_REGISTER_FILES_UPDATE,
+					&up, req->rsrc_update.nr_args);
 	mutex_unlock(&ctx->uring_lock);
 
 	if (ret < 0)
@@ -7721,25 +7722,20 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
 				 struct io_uring_rsrc_update *up,
 				 unsigned nr_args)
 {
+	__s32 __user *fds = u64_to_user_ptr(up->data);
 	struct io_rsrc_data *data = ctx->file_data;
 	struct io_fixed_file *file_slot;
 	struct file *file;
-	__s32 __user *fds;
-	int fd, i, err;
-	__u32 done;
+	int fd, i, err = 0;
+	unsigned int done;
 	bool needs_switch = false;
 
-	if (check_add_overflow(up->offset, nr_args, &done))
-		return -EOVERFLOW;
-	if (done > ctx->nr_user_files)
+	if (!ctx->file_data)
+		return -ENXIO;
+	if (up->offset + nr_args > ctx->nr_user_files)
 		return -EINVAL;
-	err = io_rsrc_node_switch_start(ctx);
-	if (err)
-		return err;
 
-	fds = u64_to_user_ptr(up->data);
 	for (done = 0; done < nr_args; done++) {
-		err = 0;
 		if (copy_from_user(&fd, &fds[done], sizeof(fd))) {
 			err = -EFAULT;
 			break;
@@ -7793,23 +7789,6 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
 	return done ? done : err;
 }
 
-static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg,
-			       unsigned nr_args)
-{
-	struct io_uring_rsrc_update up;
-
-	if (!ctx->file_data)
-		return -ENXIO;
-	if (!nr_args)
-		return -EINVAL;
-	if (copy_from_user(&up, arg, sizeof(up)))
-		return -EFAULT;
-	if (up.resv)
-		return -EINVAL;
-
-	return __io_sqe_files_update(ctx, &up, nr_args);
-}
-
 static struct io_wq_work *io_free_work(struct io_wq_work *work)
 {
 	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
@@ -9687,6 +9666,40 @@ static int io_register_enable_rings(struct io_ring_ctx *ctx)
 	return 0;
 }
 
+static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned opcode,
+				     struct io_uring_rsrc_update *up,
+				     unsigned nr_args)
+{
+	__u32 tmp;
+	int err;
+
+	if (check_add_overflow(up->offset, nr_args, &tmp))
+		return -EOVERFLOW;
+	err = io_rsrc_node_switch_start(ctx);
+	if (err)
+		return err;
+
+	switch (opcode) {
+	case IORING_REGISTER_FILES_UPDATE:
+		return __io_sqe_files_update(ctx, up, nr_args);
+	}
+	return -EINVAL;
+}
+
+static int io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned opcode,
+				   void __user *arg, unsigned nr_args)
+{
+	struct io_uring_rsrc_update up;
+
+	if (!nr_args)
+		return -EINVAL;
+	if (copy_from_user(&up, arg, sizeof(up)))
+		return -EFAULT;
+	if (up.resv)
+		return -EINVAL;
+	return __io_register_rsrc_update(ctx, opcode, &up, nr_args);
+}
+
 static bool io_register_op_must_quiesce(int op)
 {
 	switch (op) {
@@ -9777,7 +9790,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 		ret = io_sqe_files_unregister(ctx);
 		break;
 	case IORING_REGISTER_FILES_UPDATE:
-		ret = io_sqe_files_update(ctx, arg, nr_args);
+		ret = io_register_rsrc_update(ctx, opcode, arg, nr_args);
 		break;
 	case IORING_REGISTER_EVENTFD:
 	case IORING_REGISTER_EVENTFD_ASYNC:
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 06/12] io_uring: enumerate dynamic resources
  2021-04-25 13:32 [RFC v2 00/12] dynamic buffers + rsrc tagging Pavel Begunkov
                   ` (4 preceding siblings ...)
  2021-04-25 13:32 ` [PATCH v2 05/12] io_uring: add generic path for rsrc update Pavel Begunkov
@ 2021-04-25 13:32 ` Pavel Begunkov
  2021-04-25 13:32 ` [PATCH v2 07/12] io_uring: add IORING_REGISTER_RSRC Pavel Begunkov
                   ` (6 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: Pavel Begunkov @ 2021-04-25 13:32 UTC (permalink / raw)
  To: Jens Axboe, io-uring

As resources are getting more support and common parts, it'll be more
convenient to index resources and use it for indexing.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c                 | 16 ++++++++--------
 include/uapi/linux/io_uring.h |  4 ++++
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0f79bb0362cd..cfd5164952e8 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1035,7 +1035,7 @@ static void io_dismantle_req(struct io_kiocb *req);
 static void io_put_task(struct task_struct *task, int nr);
 static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req);
 static void io_queue_linked_timeout(struct io_kiocb *req);
-static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned opcode,
+static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
 				     struct io_uring_rsrc_update *up,
 				     unsigned nr_args);
 static void io_clean_op(struct io_kiocb *req);
@@ -5818,7 +5818,7 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
 	up.data = req->rsrc_update.arg;
 
 	mutex_lock(&ctx->uring_lock);
-	ret = __io_register_rsrc_update(ctx, IORING_REGISTER_FILES_UPDATE,
+	ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE,
 					&up, req->rsrc_update.nr_args);
 	mutex_unlock(&ctx->uring_lock);
 
@@ -9666,7 +9666,7 @@ static int io_register_enable_rings(struct io_ring_ctx *ctx)
 	return 0;
 }
 
-static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned opcode,
+static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
 				     struct io_uring_rsrc_update *up,
 				     unsigned nr_args)
 {
@@ -9679,14 +9679,14 @@ static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned opcode,
 	if (err)
 		return err;
 
-	switch (opcode) {
-	case IORING_REGISTER_FILES_UPDATE:
+	switch (type) {
+	case IORING_RSRC_FILE:
 		return __io_sqe_files_update(ctx, up, nr_args);
 	}
 	return -EINVAL;
 }
 
-static int io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned opcode,
+static int io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
 				   void __user *arg, unsigned nr_args)
 {
 	struct io_uring_rsrc_update up;
@@ -9697,7 +9697,7 @@ static int io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned opcode,
 		return -EFAULT;
 	if (up.resv)
 		return -EINVAL;
-	return __io_register_rsrc_update(ctx, opcode, &up, nr_args);
+	return __io_register_rsrc_update(ctx, type, &up, nr_args);
 }
 
 static bool io_register_op_must_quiesce(int op)
@@ -9790,7 +9790,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 		ret = io_sqe_files_unregister(ctx);
 		break;
 	case IORING_REGISTER_FILES_UPDATE:
-		ret = io_register_rsrc_update(ctx, opcode, arg, nr_args);
+		ret = io_register_rsrc_update(ctx, IORING_RSRC_FILE, arg, nr_args);
 		break;
 	case IORING_REGISTER_EVENTFD:
 	case IORING_REGISTER_EVENTFD_ASYNC:
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 5beaa6bbc6db..d363e0c4fd21 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -316,6 +316,10 @@ struct io_uring_rsrc_update {
 	__aligned_u64 data;
 };
 
+enum {
+	IORING_RSRC_FILE		= 0,
+};
+
 /* Skip updating fd indexes set to this value in the fd table */
 #define IORING_REGISTER_FILES_SKIP	(-2)
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 07/12] io_uring: add IORING_REGISTER_RSRC
  2021-04-25 13:32 [RFC v2 00/12] dynamic buffers + rsrc tagging Pavel Begunkov
                   ` (5 preceding siblings ...)
  2021-04-25 13:32 ` [PATCH v2 06/12] io_uring: enumerate dynamic resources Pavel Begunkov
@ 2021-04-25 13:32 ` Pavel Begunkov
  2021-04-25 13:32 ` [PATCH v2 08/12] io_uring: add generic rsrc update with tags Pavel Begunkov
                   ` (5 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: Pavel Begunkov @ 2021-04-25 13:32 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Add a new io_uring_register() opcode for rsrc registeration. Instead of
accepting a pointer to resources, fds or iovecs, it @arg is now pointing
to a struct io_uring_rsrc_register, and the second argument tells how
large that struct is to make it easily extendible by adding new fields.

All that is done mainly to be able to pass in a pointer with tags. Pass
it in and enable CQE posting for file resources. Doesn't support setting
tags on update yet.

A design choice made here is to not post CQEs on rsrc de-registration,
but only when we updated-removed it by rsrc dynamic update.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c                 | 45 +++++++++++++++++++++++++++++++----
 include/uapi/linux/io_uring.h |  8 +++++++
 2 files changed, 48 insertions(+), 5 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index cfd5164952e8..3e7d96e25ec3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7584,7 +7584,7 @@ static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx)
 }
 
 static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
-				 unsigned nr_args)
+				 unsigned nr_args, u64 __user *tags)
 {
 	__s32 __user *fds = (__s32 __user *) arg;
 	struct file *file;
@@ -7611,17 +7611,24 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 		goto out_free;
 
 	for (i = 0; i < nr_args; i++, ctx->nr_user_files++) {
-		if (copy_from_user(&fd, &fds[i], sizeof(fd))) {
+		u64 tag = 0;
+
+		if ((tags && copy_from_user(&tag, &tags[i], sizeof(tag))) ||
+		    copy_from_user(&fd, &fds[i], sizeof(fd))) {
 			ret = -EFAULT;
 			goto out_fput;
 		}
 		/* allow sparse sets */
-		if (fd == -1)
+		if (fd == -1) {
+			ret = -EINVAL;
+			if (unlikely(tag))
+				goto out_fput;
 			continue;
+		}
 
 		file = fget(fd);
 		ret = -EBADF;
-		if (!file)
+		if (unlikely(!file))
 			goto out_fput;
 
 		/*
@@ -7635,6 +7642,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 			fput(file);
 			goto out_fput;
 		}
+		ctx->file_data->tags[i] = tag;
 		io_fixed_file_set(io_fixed_file_slot(&ctx->file_table, i), file);
 	}
 
@@ -9700,6 +9708,29 @@ static int io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
 	return __io_register_rsrc_update(ctx, type, &up, nr_args);
 }
 
+static int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
+			    unsigned int size)
+{
+	struct io_uring_rsrc_register rr;
+
+	/* keep it extendible */
+	if (size != sizeof(rr))
+		return -EINVAL;
+
+	memset(&rr, 0, sizeof(rr));
+	if (copy_from_user(&rr, arg, size))
+		return -EFAULT;
+	if (!rr.nr)
+		return -EINVAL;
+
+	switch (rr.type) {
+	case IORING_RSRC_FILE:
+		return io_sqe_files_register(ctx, u64_to_user_ptr(rr.data),
+					     rr.nr, u64_to_user_ptr(rr.tags));
+	}
+	return -EINVAL;
+}
+
 static bool io_register_op_must_quiesce(int op)
 {
 	switch (op) {
@@ -9709,6 +9740,7 @@ static bool io_register_op_must_quiesce(int op)
 	case IORING_REGISTER_PROBE:
 	case IORING_REGISTER_PERSONALITY:
 	case IORING_UNREGISTER_PERSONALITY:
+	case IORING_REGISTER_RSRC:
 		return false;
 	default:
 		return true;
@@ -9781,7 +9813,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 		ret = io_sqe_buffers_unregister(ctx);
 		break;
 	case IORING_REGISTER_FILES:
-		ret = io_sqe_files_register(ctx, arg, nr_args);
+		ret = io_sqe_files_register(ctx, arg, nr_args, NULL);
 		break;
 	case IORING_UNREGISTER_FILES:
 		ret = -EINVAL;
@@ -9838,6 +9870,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 	case IORING_REGISTER_RESTRICTIONS:
 		ret = io_register_restrictions(ctx, arg, nr_args);
 		break;
+	case IORING_REGISTER_RSRC:
+		ret = io_register_rsrc(ctx, arg, nr_args);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index d363e0c4fd21..ce7b2fce6713 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -298,6 +298,7 @@ enum {
 	IORING_UNREGISTER_PERSONALITY		= 10,
 	IORING_REGISTER_RESTRICTIONS		= 11,
 	IORING_REGISTER_ENABLE_RINGS		= 12,
+	IORING_REGISTER_RSRC			= 13,
 
 	/* this goes last */
 	IORING_REGISTER_LAST
@@ -320,6 +321,13 @@ enum {
 	IORING_RSRC_FILE		= 0,
 };
 
+struct io_uring_rsrc_register {
+	__u32 type;
+	__u32 nr;
+	__aligned_u64 data;
+	__aligned_u64 tags;
+};
+
 /* Skip updating fd indexes set to this value in the fd table */
 #define IORING_REGISTER_FILES_SKIP	(-2)
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 08/12] io_uring: add generic rsrc update with tags
  2021-04-25 13:32 [RFC v2 00/12] dynamic buffers + rsrc tagging Pavel Begunkov
                   ` (6 preceding siblings ...)
  2021-04-25 13:32 ` [PATCH v2 07/12] io_uring: add IORING_REGISTER_RSRC Pavel Begunkov
@ 2021-04-25 13:32 ` Pavel Begunkov
  2021-04-25 13:32 ` [PATCH v2 09/12] io_uring: keep table of pointers to ubufs Pavel Begunkov
                   ` (4 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: Pavel Begunkov @ 2021-04-25 13:32 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Add IORING_REGISTER_RSRC_UPDATE, which also supports passing in rsrc
tags. Implement it for registered files.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c                 | 52 +++++++++++++++++++++++++++--------
 include/uapi/linux/io_uring.h | 22 +++++++++++----
 2 files changed, 57 insertions(+), 17 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 3e7d96e25ec3..5882303cc84a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1036,7 +1036,7 @@ static void io_put_task(struct task_struct *task, int nr);
 static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req);
 static void io_queue_linked_timeout(struct io_kiocb *req);
 static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
-				     struct io_uring_rsrc_update *up,
+				     struct io_uring_rsrc_update2 *up,
 				     unsigned nr_args);
 static void io_clean_op(struct io_kiocb *req);
 static struct file *io_file_get(struct io_submit_state *state,
@@ -5808,7 +5808,7 @@ static int io_rsrc_update_prep(struct io_kiocb *req,
 static int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_ring_ctx *ctx = req->ctx;
-	struct io_uring_rsrc_update up;
+	struct io_uring_rsrc_update2 up;
 	int ret;
 
 	if (issue_flags & IO_URING_F_NONBLOCK)
@@ -5816,6 +5816,8 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
 
 	up.offset = req->rsrc_update.offset;
 	up.data = req->rsrc_update.arg;
+	up.nr = 0;
+	up.tags = 0;
 
 	mutex_lock(&ctx->uring_lock);
 	ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE,
@@ -7727,9 +7729,10 @@ static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
 }
 
 static int __io_sqe_files_update(struct io_ring_ctx *ctx,
-				 struct io_uring_rsrc_update *up,
+				 struct io_uring_rsrc_update2 *up,
 				 unsigned nr_args)
 {
+	u64 __user *tags = u64_to_user_ptr(up->tags);
 	__s32 __user *fds = u64_to_user_ptr(up->data);
 	struct io_rsrc_data *data = ctx->file_data;
 	struct io_fixed_file *file_slot;
@@ -7744,10 +7747,17 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
 		return -EINVAL;
 
 	for (done = 0; done < nr_args; done++) {
-		if (copy_from_user(&fd, &fds[done], sizeof(fd))) {
+		u64 tag = 0;
+
+		if ((tags && copy_from_user(&tag, &tags[done], sizeof(tag))) ||
+		    copy_from_user(&fd, &fds[done], sizeof(fd))) {
 			err = -EFAULT;
 			break;
 		}
+		if ((fd == IORING_REGISTER_FILES_SKIP || fd == -1) && tag) {
+			err = -EINVAL;
+			break;
+		}
 		if (fd == IORING_REGISTER_FILES_SKIP)
 			continue;
 
@@ -7782,6 +7792,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
 				err = -EBADF;
 				break;
 			}
+			data->tags[up->offset + done] = tag;
 			io_fixed_file_set(file_slot, file);
 			err = io_sqe_file_register(ctx, file, i);
 			if (err) {
@@ -9675,12 +9686,14 @@ static int io_register_enable_rings(struct io_ring_ctx *ctx)
 }
 
 static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
-				     struct io_uring_rsrc_update *up,
+				     struct io_uring_rsrc_update2 *up,
 				     unsigned nr_args)
 {
 	__u32 tmp;
 	int err;
 
+	if (up->resv)
+		return -EINVAL;
 	if (check_add_overflow(up->offset, nr_args, &tmp))
 		return -EOVERFLOW;
 	err = io_rsrc_node_switch_start(ctx);
@@ -9694,18 +9707,31 @@ static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
 	return -EINVAL;
 }
 
-static int io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
-				   void __user *arg, unsigned nr_args)
+static int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg,
+				    unsigned nr_args)
 {
-	struct io_uring_rsrc_update up;
+	struct io_uring_rsrc_update2 up;
 
 	if (!nr_args)
 		return -EINVAL;
+	memset(&up, 0, sizeof(up));
+	if (copy_from_user(&up, arg, sizeof(struct io_uring_rsrc_update)))
+		return -EFAULT;
+	return __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up, nr_args);
+}
+
+static int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg,
+				   unsigned size)
+{
+	struct io_uring_rsrc_update2 up;
+
+	if (size != sizeof(up))
+		return -EINVAL;
 	if (copy_from_user(&up, arg, sizeof(up)))
 		return -EFAULT;
-	if (up.resv)
+	if (!up.nr)
 		return -EINVAL;
-	return __io_register_rsrc_update(ctx, type, &up, nr_args);
+	return __io_register_rsrc_update(ctx, up.type, &up, up.nr);
 }
 
 static int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
@@ -9741,6 +9767,7 @@ static bool io_register_op_must_quiesce(int op)
 	case IORING_REGISTER_PERSONALITY:
 	case IORING_UNREGISTER_PERSONALITY:
 	case IORING_REGISTER_RSRC:
+	case IORING_REGISTER_RSRC_UPDATE:
 		return false;
 	default:
 		return true;
@@ -9822,7 +9849,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 		ret = io_sqe_files_unregister(ctx);
 		break;
 	case IORING_REGISTER_FILES_UPDATE:
-		ret = io_register_rsrc_update(ctx, IORING_RSRC_FILE, arg, nr_args);
+		ret = io_register_files_update(ctx, arg, nr_args);
 		break;
 	case IORING_REGISTER_EVENTFD:
 	case IORING_REGISTER_EVENTFD_ASYNC:
@@ -9873,6 +9900,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 	case IORING_REGISTER_RSRC:
 		ret = io_register_rsrc(ctx, arg, nr_args);
 		break;
+	case IORING_REGISTER_RSRC_UPDATE:
+		ret = io_register_rsrc_update(ctx, arg, nr_args);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index ce7b2fce6713..6d8360b5b9c5 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -299,6 +299,7 @@ enum {
 	IORING_REGISTER_RESTRICTIONS		= 11,
 	IORING_REGISTER_ENABLE_RINGS		= 12,
 	IORING_REGISTER_RSRC			= 13,
+	IORING_REGISTER_RSRC_UPDATE		= 14,
 
 	/* this goes last */
 	IORING_REGISTER_LAST
@@ -311,12 +312,6 @@ struct io_uring_files_update {
 	__aligned_u64 /* __s32 * */ fds;
 };
 
-struct io_uring_rsrc_update {
-	__u32 offset;
-	__u32 resv;
-	__aligned_u64 data;
-};
-
 enum {
 	IORING_RSRC_FILE		= 0,
 };
@@ -328,6 +323,21 @@ struct io_uring_rsrc_register {
 	__aligned_u64 tags;
 };
 
+struct io_uring_rsrc_update {
+	__u32 offset;
+	__u32 resv;
+	__aligned_u64 data;
+};
+
+struct io_uring_rsrc_update2 {
+	__u32 offset;
+	__u32 resv;
+	__aligned_u64 data;
+	__aligned_u64 tags;
+	__u32 type;
+	__u32 nr;
+};
+
 /* Skip updating fd indexes set to this value in the fd table */
 #define IORING_REGISTER_FILES_SKIP	(-2)
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 09/12] io_uring: keep table of pointers to ubufs
  2021-04-25 13:32 [RFC v2 00/12] dynamic buffers + rsrc tagging Pavel Begunkov
                   ` (7 preceding siblings ...)
  2021-04-25 13:32 ` [PATCH v2 08/12] io_uring: add generic rsrc update with tags Pavel Begunkov
@ 2021-04-25 13:32 ` Pavel Begunkov
  2021-04-25 13:32 ` [PATCH v2 10/12] io_uring: prepare fixed rw for dynanic buffers Pavel Begunkov
                   ` (3 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: Pavel Begunkov @ 2021-04-25 13:32 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Instead of keeping a table of ubufs convert them into pointers to ubuf,
so we can atomically read one pointer and be sure that the content of
ubuf won't change.

Because it was already dynamically allocating imu->bvec, throw both
imu and bvec into a single structure so they can be allocated together.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 35 +++++++++++++++++++----------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5882303cc84a..ea725c0cbf79 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -195,9 +195,9 @@ enum io_uring_cmd_flags {
 struct io_mapped_ubuf {
 	u64		ubuf;
 	u64		ubuf_end;
-	struct		bio_vec *bvec;
 	unsigned int	nr_bvecs;
 	unsigned long	acct_pages;
+	struct bio_vec	bvec[];
 };
 
 struct io_ring_ctx;
@@ -405,7 +405,7 @@ struct io_ring_ctx {
 
 	/* if used, fixed mapped user buffers */
 	unsigned		nr_user_bufs;
-	struct io_mapped_ubuf	*user_bufs;
+	struct io_mapped_ubuf	**user_bufs;
 
 	struct user_struct	*user;
 
@@ -2760,7 +2760,7 @@ static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter)
 	if (unlikely(buf_index >= ctx->nr_user_bufs))
 		return -EFAULT;
 	index = array_index_nospec(buf_index, ctx->nr_user_bufs);
-	imu = &ctx->user_bufs[index];
+	imu = ctx->user_bufs[index];
 	buf_addr = req->rw.addr;
 
 	if (unlikely(check_add_overflow(buf_addr, (u64)len, &buf_end)))
@@ -8076,16 +8076,17 @@ static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries,
 	return off;
 }
 
-static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu)
+static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slot)
 {
+	struct io_mapped_ubuf *imu = *slot;
 	unsigned int i;
 
 	for (i = 0; i < imu->nr_bvecs; i++)
 		unpin_user_page(imu->bvec[i].bv_page);
 	if (imu->acct_pages)
 		io_unaccount_mem(ctx, imu->acct_pages);
-	kvfree(imu->bvec);
-	imu->nr_bvecs = 0;
+	kvfree(imu);
+	*slot = NULL;
 }
 
 static int io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
@@ -8152,7 +8153,7 @@ static bool headpage_already_acct(struct io_ring_ctx *ctx, struct page **pages,
 
 	/* check previously registered pages */
 	for (i = 0; i < ctx->nr_user_bufs; i++) {
-		struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
+		struct io_mapped_ubuf *imu = ctx->user_bufs[i];
 
 		for (j = 0; j < imu->nr_bvecs; j++) {
 			if (!PageCompound(imu->bvec[j].bv_page))
@@ -8197,9 +8198,10 @@ static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages,
 }
 
 static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
-				  struct io_mapped_ubuf *imu,
+				  struct io_mapped_ubuf **pimu,
 				  struct page **last_hpage)
 {
+	struct io_mapped_ubuf *imu = NULL;
 	struct vm_area_struct **vmas = NULL;
 	struct page **pages = NULL;
 	unsigned long off, start, end, ubuf;
@@ -8211,6 +8213,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
 	start = ubuf >> PAGE_SHIFT;
 	nr_pages = end - start;
 
+	*pimu = NULL;
 	ret = -ENOMEM;
 
 	pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL);
@@ -8222,8 +8225,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
 	if (!vmas)
 		goto done;
 
-	imu->bvec = kvmalloc_array(nr_pages, sizeof(struct bio_vec),
-				   GFP_KERNEL);
+	imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
 	if (!imu->bvec)
 		goto done;
 
@@ -8253,14 +8255,12 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
 		 */
 		if (pret > 0)
 			unpin_user_pages(pages, pret);
-		kvfree(imu->bvec);
 		goto done;
 	}
 
 	ret = io_buffer_account_pin(ctx, pages, pret, imu, last_hpage);
 	if (ret) {
 		unpin_user_pages(pages, pret);
-		kvfree(imu->bvec);
 		goto done;
 	}
 
@@ -8280,8 +8280,11 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
 	imu->ubuf = ubuf;
 	imu->ubuf_end = ubuf + iov->iov_len;
 	imu->nr_bvecs = nr_pages;
+	*pimu = imu;
 	ret = 0;
 done:
+	if (ret)
+		kvfree(imu);
 	kvfree(pages);
 	kvfree(vmas);
 	return ret;
@@ -8331,15 +8334,15 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
 		return ret;
 
 	for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) {
-		struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
-
 		ret = io_copy_iov(ctx, &iov, arg, i);
 		if (ret)
 			break;
 		ret = io_buffer_validate(&iov);
 		if (ret)
 			break;
-		ret = io_sqe_buffer_register(ctx, &iov, imu, &last_hpage);
+
+		ret = io_sqe_buffer_register(ctx, &iov, &ctx->user_bufs[i],
+					     &last_hpage);
 		if (ret)
 			break;
 	}
@@ -9248,7 +9251,7 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
 	}
 	seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs);
 	for (i = 0; has_lock && i < ctx->nr_user_bufs; i++) {
-		struct io_mapped_ubuf *buf = &ctx->user_bufs[i];
+		struct io_mapped_ubuf *buf = ctx->user_bufs[i];
 		unsigned int len = buf->ubuf_end - buf->ubuf;
 
 		seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, len);
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 10/12] io_uring: prepare fixed rw for dynanic buffers
  2021-04-25 13:32 [RFC v2 00/12] dynamic buffers + rsrc tagging Pavel Begunkov
                   ` (8 preceding siblings ...)
  2021-04-25 13:32 ` [PATCH v2 09/12] io_uring: keep table of pointers to ubufs Pavel Begunkov
@ 2021-04-25 13:32 ` Pavel Begunkov
  2021-04-25 13:32 ` [PATCH v2 11/12] io_uring: implement fixed buffers registration similar to fixed files Pavel Begunkov
                   ` (2 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: Pavel Begunkov @ 2021-04-25 13:32 UTC (permalink / raw)
  To: Jens Axboe, io-uring

With dynamic buffer updates, registered buffers in the table may change
at any moment. First of all we want to prevent future races between
updating and importing (i.e. io_import_fixed()), where the latter one
may happen without uring_lock held, e.g. from io-wq.

Save the first loaded io_mapped_ubuf buffer and reuse.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 39 +++++++++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index ea725c0cbf79..083917bd7aa6 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -839,6 +839,8 @@ struct io_kiocb {
 	struct hlist_node		hash_node;
 	struct async_poll		*apoll;
 	struct io_wq_work		work;
+	/* store used ubuf, so we can prevent reloading */
+	struct io_mapped_ubuf		*imu;
 };
 
 struct io_tctx_node {
@@ -2683,6 +2685,12 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		kiocb->ki_complete = io_complete_rw;
 	}
 
+	if (req->opcode == IORING_OP_READ_FIXED ||
+	    req->opcode == IORING_OP_WRITE_FIXED) {
+		req->imu = NULL;
+		io_req_set_rsrc_node(req);
+	}
+
 	req->rw.addr = READ_ONCE(sqe->addr);
 	req->rw.len = READ_ONCE(sqe->len);
 	req->buf_index = READ_ONCE(sqe->buf_index);
@@ -2748,21 +2756,13 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
 	}
 }
 
-static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter)
+static int __io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter,
+			     struct io_mapped_ubuf *imu)
 {
-	struct io_ring_ctx *ctx = req->ctx;
 	size_t len = req->rw.len;
-	struct io_mapped_ubuf *imu;
-	u16 index, buf_index = req->buf_index;
 	u64 buf_end, buf_addr = req->rw.addr;
 	size_t offset;
 
-	if (unlikely(buf_index >= ctx->nr_user_bufs))
-		return -EFAULT;
-	index = array_index_nospec(buf_index, ctx->nr_user_bufs);
-	imu = ctx->user_bufs[index];
-	buf_addr = req->rw.addr;
-
 	if (unlikely(check_add_overflow(buf_addr, (u64)len, &buf_end)))
 		return -EFAULT;
 	/* not inside the mapped region */
@@ -2814,6 +2814,22 @@ static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter)
 	return 0;
 }
 
+static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter)
+{
+	struct io_ring_ctx *ctx = req->ctx;
+	struct io_mapped_ubuf *imu = req->imu;
+	u16 index, buf_index = req->buf_index;
+
+	if (likely(!imu)) {
+		if (unlikely(buf_index >= ctx->nr_user_bufs))
+			return -EFAULT;
+		index = array_index_nospec(buf_index, ctx->nr_user_bufs);
+		imu = READ_ONCE(ctx->user_bufs[index]);
+		req->imu = imu;
+	}
+	return __io_import_fixed(req, rw, iter, imu);
+}
+
 static void io_ring_submit_unlock(struct io_ring_ctx *ctx, bool needs_lock)
 {
 	if (needs_lock)
@@ -9463,6 +9479,9 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 	ret = io_sq_offload_create(ctx, p);
 	if (ret)
 		goto err;
+	/* always set a rsrc node */
+	io_rsrc_node_switch_start(ctx);
+	io_rsrc_node_switch(ctx, NULL);
 
 	memset(&p->sq_off, 0, sizeof(p->sq_off));
 	p->sq_off.head = offsetof(struct io_rings, sq.head);
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 11/12] io_uring: implement fixed buffers registration similar to fixed files
  2021-04-25 13:32 [RFC v2 00/12] dynamic buffers + rsrc tagging Pavel Begunkov
                   ` (9 preceding siblings ...)
  2021-04-25 13:32 ` [PATCH v2 10/12] io_uring: prepare fixed rw for dynanic buffers Pavel Begunkov
@ 2021-04-25 13:32 ` Pavel Begunkov
  2021-04-25 13:32 ` [PATCH v2 12/12] io_uring: add full-fledged dynamic buffers support Pavel Begunkov
  2021-04-25 16:15 ` [RFC v2 00/12] dynamic buffers + rsrc tagging Jens Axboe
  12 siblings, 0 replies; 14+ messages in thread
From: Pavel Begunkov @ 2021-04-25 13:32 UTC (permalink / raw)
  To: Jens Axboe, io-uring; +Cc: Bijan Mottahedeh

From: Bijan Mottahedeh <bijan.mottahedeh@oracle.com>

Apply fixed_rsrc functionality for fixed buffers support.

Signed-off-by: Bijan Mottahedeh <bijan.mottahedeh@oracle.com>
[rebase, remove multi-level tables, fix unregister on exit]
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 71 ++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 56 insertions(+), 15 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 083917bd7aa6..30f0563349db 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -218,6 +218,7 @@ struct io_rsrc_put {
 	union {
 		void *rsrc;
 		struct file *file;
+		struct io_mapped_ubuf *buf;
 	};
 };
 
@@ -404,6 +405,7 @@ struct io_ring_ctx {
 	unsigned		nr_user_files;
 
 	/* if used, fixed mapped user buffers */
+	struct io_rsrc_data	*buf_data;
 	unsigned		nr_user_bufs;
 	struct io_mapped_ubuf	**user_bufs;
 
@@ -5921,7 +5923,7 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 
 	printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
 			req->opcode);
-	return-EINVAL;
+	return -EINVAL;
 }
 
 static int io_req_prep_async(struct io_kiocb *req)
@@ -8105,19 +8107,36 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slo
 	*slot = NULL;
 }
 
-static int io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
+static void io_rsrc_buf_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
 {
-	unsigned int i;
+	/* no updates yet, so not used */
+	WARN_ON_ONCE(1);
+}
 
-	if (!ctx->user_bufs)
-		return -ENXIO;
+static void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
+{
+	unsigned int i;
 
 	for (i = 0; i < ctx->nr_user_bufs; i++)
 		io_buffer_unmap(ctx, &ctx->user_bufs[i]);
 	kfree(ctx->user_bufs);
+	kfree(ctx->buf_data);
 	ctx->user_bufs = NULL;
+	ctx->buf_data = NULL;
 	ctx->nr_user_bufs = 0;
-	return 0;
+}
+
+static int io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
+{
+	int ret;
+
+	if (!ctx->buf_data)
+		return -ENXIO;
+
+	ret = io_rsrc_ref_quiesce(ctx->buf_data, ctx);
+	if (!ret)
+		__io_sqe_buffers_unregister(ctx);
+	return ret;
 }
 
 static int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst,
@@ -8337,17 +8356,26 @@ static int io_buffer_validate(struct iovec *iov)
 static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
 				   unsigned int nr_args)
 {
+	struct page *last_hpage = NULL;
+	struct io_rsrc_data *data;
 	int i, ret;
 	struct iovec iov;
-	struct page *last_hpage = NULL;
 
 	if (ctx->user_bufs)
 		return -EBUSY;
 	if (!nr_args || nr_args > UIO_MAXIOV)
 		return -EINVAL;
-	ret = io_buffers_map_alloc(ctx, nr_args);
+	ret = io_rsrc_node_switch_start(ctx);
 	if (ret)
 		return ret;
+	data = io_rsrc_data_alloc(ctx, io_rsrc_buf_put, nr_args);
+	if (!data)
+		return -ENOMEM;
+	ret = io_buffers_map_alloc(ctx, nr_args);
+	if (ret) {
+		kfree(data);
+		return ret;
+	}
 
 	for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) {
 		ret = io_copy_iov(ctx, &iov, arg, i);
@@ -8363,9 +8391,13 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
 			break;
 	}
 
-	if (ret)
-		io_sqe_buffers_unregister(ctx);
+	WARN_ON_ONCE(ctx->buf_data);
 
+	ctx->buf_data = data;
+	if (ret)
+		__io_sqe_buffers_unregister(ctx);
+	else
+		io_rsrc_node_switch(ctx, NULL);
 	return ret;
 }
 
@@ -8440,10 +8472,18 @@ static void io_req_caches_free(struct io_ring_ctx *ctx)
 	mutex_unlock(&ctx->uring_lock);
 }
 
+static bool io_wait_rsrc_data(struct io_rsrc_data *data)
+{
+	if (!data)
+		return false;
+	if (!atomic_dec_and_test(&data->refs))
+		wait_for_completion(&data->done);
+	return true;
+}
+
 static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 {
 	io_sq_thread_finish(ctx);
-	io_sqe_buffers_unregister(ctx);
 
 	if (ctx->mm_account) {
 		mmdrop(ctx->mm_account);
@@ -8451,11 +8491,10 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 	}
 
 	mutex_lock(&ctx->uring_lock);
-	if (ctx->file_data) {
-		if (!atomic_dec_and_test(&ctx->file_data->refs))
-			wait_for_completion(&ctx->file_data->done);
+	if (io_wait_rsrc_data(ctx->buf_data))
+		__io_sqe_buffers_unregister(ctx);
+	if (io_wait_rsrc_data(ctx->file_data))
 		__io_sqe_files_unregister(ctx);
-	}
 	if (ctx->rings)
 		__io_cqring_overflow_flush(ctx, true);
 	mutex_unlock(&ctx->uring_lock);
@@ -9782,6 +9821,8 @@ static int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
 static bool io_register_op_must_quiesce(int op)
 {
 	switch (op) {
+	case IORING_REGISTER_BUFFERS:
+	case IORING_UNREGISTER_BUFFERS:
 	case IORING_REGISTER_FILES:
 	case IORING_UNREGISTER_FILES:
 	case IORING_REGISTER_FILES_UPDATE:
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 12/12] io_uring: add full-fledged dynamic buffers support
  2021-04-25 13:32 [RFC v2 00/12] dynamic buffers + rsrc tagging Pavel Begunkov
                   ` (10 preceding siblings ...)
  2021-04-25 13:32 ` [PATCH v2 11/12] io_uring: implement fixed buffers registration similar to fixed files Pavel Begunkov
@ 2021-04-25 13:32 ` Pavel Begunkov
  2021-04-25 16:15 ` [RFC v2 00/12] dynamic buffers + rsrc tagging Jens Axboe
  12 siblings, 0 replies; 14+ messages in thread
From: Pavel Begunkov @ 2021-04-25 13:32 UTC (permalink / raw)
  To: Jens Axboe, io-uring; +Cc: Bijan Mottahedeh

Hook buffers into all rsrc infrastructure, including tagging and
updates.

Suggested-by: Bijan Mottahedeh <bijan.mottahedeh@oracle.com>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c                 | 76 +++++++++++++++++++++++++++++++++--
 include/uapi/linux/io_uring.h |  1 +
 2 files changed, 73 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 30f0563349db..fd953a96f5af 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8109,8 +8109,8 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slo
 
 static void io_rsrc_buf_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
 {
-	/* no updates yet, so not used */
-	WARN_ON_ONCE(1);
+	io_buffer_unmap(ctx, &prsrc->buf);
+	prsrc->buf = NULL;
 }
 
 static void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
@@ -8354,7 +8354,7 @@ static int io_buffer_validate(struct iovec *iov)
 }
 
 static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
-				   unsigned int nr_args)
+				   unsigned int nr_args, u64 __user *tags)
 {
 	struct page *last_hpage = NULL;
 	struct io_rsrc_data *data;
@@ -8378,6 +8378,12 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
 	}
 
 	for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) {
+		u64 tag = 0;
+
+		if (tags && copy_from_user(&tag, &tags[i], sizeof(tag))) {
+			ret = -EFAULT;
+			break;
+		}
 		ret = io_copy_iov(ctx, &iov, arg, i);
 		if (ret)
 			break;
@@ -8389,6 +8395,7 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
 					     &last_hpage);
 		if (ret)
 			break;
+		data->tags[i] = tag;
 	}
 
 	WARN_ON_ONCE(ctx->buf_data);
@@ -8401,6 +8408,62 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
 	return ret;
 }
 
+static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
+				   struct io_uring_rsrc_update2 *up,
+				   unsigned int nr_args)
+{
+	u64 __user *tags = u64_to_user_ptr(up->tags);
+	struct iovec iov, __user *iovs = u64_to_user_ptr(up->data);
+	struct io_mapped_ubuf *imu;
+	struct page *last_hpage = NULL;
+	bool needs_switch = false;
+	__u32 done;
+	int i, err;
+
+	if (!ctx->buf_data)
+		return -ENXIO;
+	if (up->offset + nr_args > ctx->nr_user_bufs)
+		return -EINVAL;
+
+	for (done = 0; done < nr_args; done++) {
+		u64 tag = 0;
+
+		err = io_copy_iov(ctx, &iov, iovs, done);
+		if (err)
+			break;
+		if (tags && copy_from_user(&tag, &tags[done], sizeof(tag))) {
+			err = -EFAULT;
+			break;
+		}
+
+		i = array_index_nospec(up->offset + done, ctx->nr_user_bufs);
+		imu = ctx->user_bufs[i];
+		if (imu) {
+			err = io_queue_rsrc_removal(ctx->buf_data, up->offset + done,
+						    ctx->rsrc_node, imu);
+			if (err)
+				break;
+			ctx->user_bufs[i] = NULL;
+			needs_switch = true;
+		}
+
+		if (iov.iov_base || iov.iov_len) {
+			err = io_buffer_validate(&iov);
+			if (err)
+				break;
+			err = io_sqe_buffer_register(ctx, &iov, &ctx->user_bufs[i],
+						     &last_hpage);
+			if (err)
+				break;
+			ctx->buf_data->tags[up->offset + done] = tag;
+		}
+	}
+
+	if (needs_switch)
+		io_rsrc_node_switch(ctx, ctx->buf_data);
+	return done ? done : err;
+}
+
 static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg)
 {
 	__s32 __user *fds = arg;
@@ -9764,6 +9827,8 @@ static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
 	switch (type) {
 	case IORING_RSRC_FILE:
 		return __io_sqe_files_update(ctx, up, nr_args);
+	case IORING_RSRC_BUFFER:
+		return __io_sqe_buffers_update(ctx, up, nr_args);
 	}
 	return -EINVAL;
 }
@@ -9814,6 +9879,9 @@ static int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
 	case IORING_RSRC_FILE:
 		return io_sqe_files_register(ctx, u64_to_user_ptr(rr.data),
 					     rr.nr, u64_to_user_ptr(rr.tags));
+	case IORING_RSRC_BUFFER:
+		return io_sqe_buffers_register(ctx, u64_to_user_ptr(rr.data),
+					       rr.nr, u64_to_user_ptr(rr.tags));
 	}
 	return -EINVAL;
 }
@@ -9894,7 +9962,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 
 	switch (opcode) {
 	case IORING_REGISTER_BUFFERS:
-		ret = io_sqe_buffers_register(ctx, arg, nr_args);
+		ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL);
 		break;
 	case IORING_UNREGISTER_BUFFERS:
 		ret = -EINVAL;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 6d8360b5b9c5..e1ae46683301 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -314,6 +314,7 @@ struct io_uring_files_update {
 
 enum {
 	IORING_RSRC_FILE		= 0,
+	IORING_RSRC_BUFFER		= 1,
 };
 
 struct io_uring_rsrc_register {
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [RFC v2 00/12] dynamic buffers + rsrc tagging
  2021-04-25 13:32 [RFC v2 00/12] dynamic buffers + rsrc tagging Pavel Begunkov
                   ` (11 preceding siblings ...)
  2021-04-25 13:32 ` [PATCH v2 12/12] io_uring: add full-fledged dynamic buffers support Pavel Begunkov
@ 2021-04-25 16:15 ` Jens Axboe
  12 siblings, 0 replies; 14+ messages in thread
From: Jens Axboe @ 2021-04-25 16:15 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 4/25/21 7:32 AM, Pavel Begunkov wrote:
> 1) support dynamic managment for registered buffers, including
> update.
> 
> 2) add new IORING_REGISTER* for rsrc register and rsrc update,
> which are just dispatch files/buffers to right callbacks. Needed
> because old ones not nicely extendible. The downside --
> restrictions not supporting it with fine granularity.
> 
> 3) add rsrc tagging, with tag=0 ingnoring CQE posting.
> Doesn't post CQEs on unregister, but can easily be changed
> 
> v2: instead of async_data importing for fixed rw, save
>     used io_mapped_ubuf and use it on re-import.
>     Add patch 9/12 as a preparation for that.
> 
>     Fix prep rw getting a rsrc node ref for fixed files without
>     having a rsrc node.

This looks Good Enough for me, let's get it queued up.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2021-04-25 16:15 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-25 13:32 [RFC v2 00/12] dynamic buffers + rsrc tagging Pavel Begunkov
2021-04-25 13:32 ` [PATCH v2 01/12] io_uring: move __io_sqe_files_unregister Pavel Begunkov
2021-04-25 13:32 ` [PATCH v2 02/12] io_uring: return back rsrc data free helper Pavel Begunkov
2021-04-25 13:32 ` [PATCH v2 03/12] io_uring: decouple CQE filling from requests Pavel Begunkov
2021-04-25 13:32 ` [PATCH v2 04/12] io_uring: preparation for rsrc tagging Pavel Begunkov
2021-04-25 13:32 ` [PATCH v2 05/12] io_uring: add generic path for rsrc update Pavel Begunkov
2021-04-25 13:32 ` [PATCH v2 06/12] io_uring: enumerate dynamic resources Pavel Begunkov
2021-04-25 13:32 ` [PATCH v2 07/12] io_uring: add IORING_REGISTER_RSRC Pavel Begunkov
2021-04-25 13:32 ` [PATCH v2 08/12] io_uring: add generic rsrc update with tags Pavel Begunkov
2021-04-25 13:32 ` [PATCH v2 09/12] io_uring: keep table of pointers to ubufs Pavel Begunkov
2021-04-25 13:32 ` [PATCH v2 10/12] io_uring: prepare fixed rw for dynanic buffers Pavel Begunkov
2021-04-25 13:32 ` [PATCH v2 11/12] io_uring: implement fixed buffers registration similar to fixed files Pavel Begunkov
2021-04-25 13:32 ` [PATCH v2 12/12] io_uring: add full-fledged dynamic buffers support Pavel Begunkov
2021-04-25 16:15 ` [RFC v2 00/12] dynamic buffers + rsrc tagging Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.