IO-Uring Archive on lore.kernel.org
 help / color / Atom feed
* [PATCHSET 0/2] io_uring: close lookup gap for dependent work
@ 2019-11-20 20:09 Jens Axboe
  2019-11-20 20:09 ` [PATCH 1/7] io-wq: wait for io_wq_create() to setup necessary workers Jens Axboe
                   ` (9 more replies)
  0 siblings, 10 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:09 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

As discussed earlier today on this list, there's a gap between finding
dependent work and ensuring we can look it up for cancellation purposes.
On top of that, we also currently NEVER find dependent work due to how
we do lookups of it, so that is fixed in patch 1 while patch 2
implements the fix for the lookup gap.

Patches are against for-5.5/io_uring-post

 fs/io-wq.c    |  3 +++
 fs/io-wq.h    | 12 +++++++++++-
 fs/io_uring.c | 34 +++++++++++++++++++++++++++-------
 3 files changed, 41 insertions(+), 8 deletions(-)

-- 
Jens Axboe



^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/7] io-wq: wait for io_wq_create() to setup necessary workers
  2019-11-20 20:09 [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
@ 2019-11-20 20:09 ` Jens Axboe
  2019-11-20 20:09 ` [PATCH 1/2] io_uring: allow finding next link independent of req reference count Jens Axboe
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:09 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, Jens Axboe, syzbot+0f1cc17f85154f400465

We currently have a race where if setup is really slow, we can be
calling io_wq_destroy() before we're done setting up. This will cause
the caller to get stuck waiting for the manager to set things up, but
the manager already exited.

Fix this by doing a sync setup of the manager. This also fixes the case
where if we failed creating workers, we'd also get stuck.

In practice this race window was really small, as we already wait for
the manager to start. Hence someone would have to call io_wq_destroy()
after the task has started, but before it started the first loop. The
reported test case forked tons of these, which is why it became an
issue.

Reported-by: syzbot+0f1cc17f85154f400465@syzkaller.appspotmail.com
Fixes: 771b53d033e8 ("io-wq: small threadpool implementation for io_uring")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.c | 38 ++++++++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index 9174007ce107..1f640c489f7c 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -33,6 +33,7 @@ enum {
 enum {
 	IO_WQ_BIT_EXIT		= 0,	/* wq exiting */
 	IO_WQ_BIT_CANCEL	= 1,	/* cancel work on list */
+	IO_WQ_BIT_ERROR		= 2,	/* error on setup */
 };
 
 enum {
@@ -562,14 +563,14 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
 	spin_unlock_irq(&wqe->lock);
 }
 
-static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
+static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
 {
 	struct io_wqe_acct *acct =&wqe->acct[index];
 	struct io_worker *worker;
 
 	worker = kcalloc_node(1, sizeof(*worker), GFP_KERNEL, wqe->node);
 	if (!worker)
-		return;
+		return false;
 
 	refcount_set(&worker->ref, 1);
 	worker->nulls_node.pprev = NULL;
@@ -581,7 +582,7 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
 				"io_wqe_worker-%d/%d", index, wqe->node);
 	if (IS_ERR(worker->task)) {
 		kfree(worker);
-		return;
+		return false;
 	}
 
 	spin_lock_irq(&wqe->lock);
@@ -599,6 +600,7 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
 		atomic_inc(&wq->user->processes);
 
 	wake_up_process(worker->task);
+	return true;
 }
 
 static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index)
@@ -606,9 +608,6 @@ static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index)
 {
 	struct io_wqe_acct *acct = &wqe->acct[index];
 
-	/* always ensure we have one bounded worker */
-	if (index == IO_WQ_ACCT_BOUND && !acct->nr_workers)
-		return true;
 	/* if we have available workers or no work, no need */
 	if (!hlist_nulls_empty(&wqe->free_list) || !io_wqe_run_queue(wqe))
 		return false;
@@ -621,10 +620,19 @@ static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index)
 static int io_wq_manager(void *data)
 {
 	struct io_wq *wq = data;
+	int i;
 
-	while (!kthread_should_stop()) {
-		int i;
+	/* create fixed workers */
+	for (i = 0; i < wq->nr_wqes; i++) {
+		if (create_io_worker(wq, wq->wqes[i], IO_WQ_ACCT_BOUND))
+			continue;
+		goto err;
+	}
 
+	refcount_set(&wq->refs, wq->nr_wqes);
+	complete(&wq->done);
+
+	while (!kthread_should_stop()) {
 		for (i = 0; i < wq->nr_wqes; i++) {
 			struct io_wqe *wqe = wq->wqes[i];
 			bool fork_worker[2] = { false, false };
@@ -644,6 +652,10 @@ static int io_wq_manager(void *data)
 		schedule_timeout(HZ);
 	}
 
+	return 0;
+err:
+	set_bit(IO_WQ_BIT_ERROR, &wq->state);
+	complete(&wq->done);
 	return 0;
 }
 
@@ -982,7 +994,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct mm_struct *mm,
 	wq->user = user;
 
 	i = 0;
-	refcount_set(&wq->refs, wq->nr_wqes);
 	for_each_online_node(node) {
 		struct io_wqe *wqe;
 
@@ -1020,6 +1031,10 @@ struct io_wq *io_wq_create(unsigned bounded, struct mm_struct *mm,
 	wq->manager = kthread_create(io_wq_manager, wq, "io_wq_manager");
 	if (!IS_ERR(wq->manager)) {
 		wake_up_process(wq->manager);
+		wait_for_completion(&wq->done);
+		if (test_bit(IO_WQ_BIT_ERROR, &wq->state))
+			goto err;
+		reinit_completion(&wq->done);
 		return wq;
 	}
 
@@ -1041,10 +1056,9 @@ void io_wq_destroy(struct io_wq *wq)
 {
 	int i;
 
-	if (wq->manager) {
-		set_bit(IO_WQ_BIT_EXIT, &wq->state);
+	set_bit(IO_WQ_BIT_EXIT, &wq->state);
+	if (wq->manager)
 		kthread_stop(wq->manager);
-	}
 
 	rcu_read_lock();
 	for (i = 0; i < wq->nr_wqes; i++) {
-- 
2.24.0


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/2] io_uring: allow finding next link independent of req reference count
  2019-11-20 20:09 [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
  2019-11-20 20:09 ` [PATCH 1/7] io-wq: wait for io_wq_create() to setup necessary workers Jens Axboe
@ 2019-11-20 20:09 ` Jens Axboe
  2019-11-20 20:09 ` [PATCH 2/7] io-wq: remove extra space characters Jens Axboe
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:09 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, Jens Axboe

We currently try and start the next link when we put the request, and
only if we were going to free it. This means that the optimization to
continue executing requests from the same context often fails, as we're
not putting the final reference.

Add REQ_F_LINK_NEXT to keep track of this, and allow io_uring to find the
next request more efficiently.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 066b59ffb54e..132a890368bf 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -340,6 +340,7 @@ struct io_kiocb {
 #define REQ_F_NOWAIT		1	/* must not punt to workers */
 #define REQ_F_IOPOLL_COMPLETED	2	/* polled IO has completed */
 #define REQ_F_FIXED_FILE	4	/* ctx owns file */
+#define REQ_F_LINK_NEXT		8	/* already grabbed next link */
 #define REQ_F_IO_DRAIN		16	/* drain existing IO first */
 #define REQ_F_IO_DRAINED	32	/* drain done */
 #define REQ_F_LINK		64	/* linked sqes */
@@ -874,6 +875,10 @@ static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
 	struct io_kiocb *nxt;
 	bool wake_ev = false;
 
+	/* Already got next link */
+	if (req->flags & REQ_F_LINK_NEXT)
+		return;
+
 	/*
 	 * The list should never be empty when we are called here. But could
 	 * potentially happen if the chain is messed up, check to be on the
@@ -910,6 +915,7 @@ static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
 		break;
 	}
 
+	req->flags |= REQ_F_LINK_NEXT;
 	if (wake_ev)
 		io_cqring_ev_posted(ctx);
 }
@@ -946,12 +952,10 @@ static void io_fail_links(struct io_kiocb *req)
 	io_cqring_ev_posted(ctx);
 }
 
-static void io_free_req_find_next(struct io_kiocb *req, struct io_kiocb **nxt)
+static void io_req_find_next(struct io_kiocb *req, struct io_kiocb **nxt)
 {
-	if (likely(!(req->flags & REQ_F_LINK))) {
-		__io_free_req(req);
+	if (likely(!(req->flags & REQ_F_LINK)))
 		return;
-	}
 
 	/*
 	 * If LINK is set, we have dependent requests in this chain. If we
@@ -977,7 +981,11 @@ static void io_free_req_find_next(struct io_kiocb *req, struct io_kiocb **nxt)
 	} else {
 		io_req_link_next(req, nxt);
 	}
+}
 
+static void io_free_req_find_next(struct io_kiocb *req, struct io_kiocb **nxt)
+{
+	io_req_find_next(req, nxt);
 	__io_free_req(req);
 }
 
@@ -994,8 +1002,10 @@ static void io_put_req_find_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
 {
 	struct io_kiocb *nxt = NULL;
 
+	io_req_find_next(req, &nxt);
+
 	if (refcount_dec_and_test(&req->refs))
-		io_free_req_find_next(req, &nxt);
+		__io_free_req(req);
 
 	if (nxt) {
 		if (nxtptr)
-- 
2.24.0


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2/7] io-wq: remove extra space characters
  2019-11-20 20:09 [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
  2019-11-20 20:09 ` [PATCH 1/7] io-wq: wait for io_wq_create() to setup necessary workers Jens Axboe
  2019-11-20 20:09 ` [PATCH 1/2] io_uring: allow finding next link independent of req reference count Jens Axboe
@ 2019-11-20 20:09 ` Jens Axboe
  2019-11-20 20:09 ` [PATCH 2/2] io_uring: close lookup gap for dependent next work Jens Axboe
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:09 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, Dan Carpenter, Jens Axboe

From: Dan Carpenter <dan.carpenter@oracle.com>

These lines are indented an extra space character.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index 1f640c489f7c..81b2a456d1ce 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -328,9 +328,9 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
 	 * If worker is moving from bound to unbound (or vice versa), then
 	 * ensure we update the running accounting.
 	 */
-	 worker_bound = (worker->flags & IO_WORKER_F_BOUND) != 0;
-	 work_bound = (work->flags & IO_WQ_WORK_UNBOUND) == 0;
-	 if (worker_bound != work_bound) {
+	worker_bound = (worker->flags & IO_WORKER_F_BOUND) != 0;
+	work_bound = (work->flags & IO_WQ_WORK_UNBOUND) == 0;
+	if (worker_bound != work_bound) {
 		io_wqe_dec_running(wqe, worker);
 		if (work_bound) {
 			worker->flags |= IO_WORKER_F_BOUND;
-- 
2.24.0


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2/2] io_uring: close lookup gap for dependent next work
  2019-11-20 20:09 [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
                   ` (2 preceding siblings ...)
  2019-11-20 20:09 ` [PATCH 2/7] io-wq: remove extra space characters Jens Axboe
@ 2019-11-20 20:09 ` Jens Axboe
  2019-11-20 20:09 ` [PATCH 3/7] io_uring: break links for failed defer Jens Axboe
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:09 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, Jens Axboe

When we find new work to process within the work handler, we queue the
linked timeout before we have issued the new work. This can be
problematic for very short timeouts, as we have a window where the new
work isn't visible.

Allow the work handler to store a callback function for this in the work
item, and flag it with IO_WQ_WORK_CB if the caller has done so. If that
is set, then io-wq will call the callback when it has setup the new work
item.

Reported-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.c    |  3 +++
 fs/io-wq.h    | 12 +++++++++++-
 fs/io_uring.c | 14 ++++++++++++--
 3 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index b4bc377dda61..2666384aaf44 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -427,6 +427,9 @@ static void io_worker_handle_work(struct io_worker *worker)
 		worker->cur_work = work;
 		spin_unlock_irq(&worker->lock);
 
+		if (work->flags & IO_WQ_WORK_CB)
+			work->cb.fn(work->cb.data);
+
 		if ((work->flags & IO_WQ_WORK_NEEDS_FILES) &&
 		    current->files != work->files) {
 			task_lock(current);
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 4b29f922f80c..892989f3e41e 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -11,6 +11,7 @@ enum {
 	IO_WQ_WORK_NEEDS_FILES	= 16,
 	IO_WQ_WORK_UNBOUND	= 32,
 	IO_WQ_WORK_INTERNAL	= 64,
+	IO_WQ_WORK_CB		= 128,
 
 	IO_WQ_HASH_SHIFT	= 24,	/* upper 8 bits are used for hash key */
 };
@@ -21,8 +22,17 @@ enum io_wq_cancel {
 	IO_WQ_CANCEL_NOTFOUND,	/* work not found */
 };
 
+struct io_wq_work;
+struct io_wq_work_cb {
+	void (*fn)(void *data);
+	void *data;
+};
+
 struct io_wq_work {
-	struct list_head list;
+	union {
+		struct list_head list;
+		struct io_wq_work_cb cb;
+	};
 	void (*func)(struct io_wq_work **);
 	unsigned flags;
 	struct files_struct *files;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 132a890368bf..6175e2e195c0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2664,6 +2664,13 @@ static int __io_submit_sqe(struct io_kiocb *req, struct io_kiocb **nxt,
 	return 0;
 }
 
+static void io_link_work_cb(void *data)
+{
+	struct io_kiocb *link = data;
+
+	io_queue_linked_timeout(link);
+}
+
 static void io_wq_submit_work(struct io_wq_work **workptr)
 {
 	struct io_wq_work *work = *workptr;
@@ -2710,8 +2717,11 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
 
 		io_prep_async_work(nxt, &link);
 		*workptr = &nxt->work;
-		if (link)
-			io_queue_linked_timeout(link);
+		if (link) {
+			nxt->work.flags |= IO_WQ_WORK_CB;
+			nxt->work.cb.fn = io_link_work_cb;
+			nxt->work.cb.data = link;
+		}
 	}
 }
 
-- 
2.24.0


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 3/7] io_uring: break links for failed defer
  2019-11-20 20:09 [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
                   ` (3 preceding siblings ...)
  2019-11-20 20:09 ` [PATCH 2/2] io_uring: close lookup gap for dependent next work Jens Axboe
@ 2019-11-20 20:09 ` Jens Axboe
  2019-11-20 20:09 ` [PATCH 4/7] io_uring: remove redundant check Jens Axboe
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:09 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, Jens Axboe

From: Pavel Begunkov <asml.silence@gmail.com>

If io_req_defer() failed, it needs to cancel a dependant link.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index ebc58f896088..7b9bd6ad4fb9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2943,6 +2943,8 @@ static void io_queue_sqe(struct io_kiocb *req)
 	if (ret) {
 		if (ret != -EIOCBQUEUED) {
 			io_cqring_add_event(req, ret);
+			if (req->flags & REQ_F_LINK)
+				req->flags |= REQ_F_FAIL_LINK;
 			io_double_put_req(req);
 		}
 	} else
@@ -2975,6 +2977,8 @@ static void io_queue_link_head(struct io_kiocb *req, struct io_kiocb *shadow)
 		if (ret != -EIOCBQUEUED) {
 err:
 			io_cqring_add_event(req, ret);
+			if (req->flags & REQ_F_LINK)
+				req->flags |= REQ_F_FAIL_LINK;
 			io_double_put_req(req);
 			if (shadow)
 				__io_free_req(shadow);
-- 
2.24.0


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 4/7] io_uring: remove redundant check
  2019-11-20 20:09 [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
                   ` (4 preceding siblings ...)
  2019-11-20 20:09 ` [PATCH 3/7] io_uring: break links for failed defer Jens Axboe
@ 2019-11-20 20:09 ` Jens Axboe
  2019-11-20 20:09 ` [PATCH 5/7] io_uring: Fix leaking linked timeouts Jens Axboe
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:09 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, Jens Axboe

From: Pavel Begunkov <asml.silence@gmail.com>

Pass any IORING_OP_LINK_TIMEOUT request further, where it will
eventually fail in io_issue_sqe().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 7b9bd6ad4fb9..8d25e157b4d8 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3064,10 +3064,6 @@ static void io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
 
 		INIT_LIST_HEAD(&req->link_list);
 		*link = req;
-	} else if (READ_ONCE(s->sqe->opcode) == IORING_OP_LINK_TIMEOUT) {
-		/* Only valid as a linked SQE */
-		ret = -EINVAL;
-		goto err_req;
 	} else {
 		io_queue_sqe(req);
 	}
-- 
2.24.0


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 5/7] io_uring: Fix leaking linked timeouts
  2019-11-20 20:09 [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
                   ` (5 preceding siblings ...)
  2019-11-20 20:09 ` [PATCH 4/7] io_uring: remove redundant check Jens Axboe
@ 2019-11-20 20:09 ` Jens Axboe
  2019-11-20 20:09 ` [PATCH 6/7] io_uring: io_fail_links() should only consider first linked timeout Jens Axboe
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:09 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, Jens Axboe

From: Pavel Begunkov <asml.silence@gmail.com>

let have a dependant link: REQ -> LINK_TIMEOUT -> LINK_TIMEOUT

1. submission stage: submission references for REQ and LINK_TIMEOUT
are dropped. So, references respectively (1,1,2)

2. io_put(REQ) + FAIL_LINKS stage: calls io_fail_links(), which for all
linked timeouts will call cancel_timeout() and drop 1 reference.
So, references after: (0,0,1). That's a leak.

Make it treat only the first linked timeout as such, and pass others
through __io_double_put_req().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8d25e157b4d8..a79ef43367b1 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -937,6 +937,7 @@ static void io_fail_links(struct io_kiocb *req)
 		if ((req->flags & REQ_F_LINK_TIMEOUT) &&
 		    link->submit.sqe->opcode == IORING_OP_LINK_TIMEOUT) {
 			io_link_cancel_timeout(link);
+			req->flags &= ~REQ_F_LINK_TIMEOUT;
 		} else {
 			io_cqring_fill_event(link, -ECANCELED);
 			__io_double_put_req(link);
-- 
2.24.0


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 6/7] io_uring: io_fail_links() should only consider first linked timeout
  2019-11-20 20:09 [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
                   ` (6 preceding siblings ...)
  2019-11-20 20:09 ` [PATCH 5/7] io_uring: Fix leaking linked timeouts Jens Axboe
@ 2019-11-20 20:09 ` Jens Axboe
  2019-11-20 20:09 ` [PATCH 7/7] io_uring: Always REQ_F_FREE_SQE for allocated sqe Jens Axboe
  2019-11-20 20:11 ` [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
  9 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:09 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, Jens Axboe

We currently clear the linked timeout field if we cancel such a timeout,
but we should only attempt to cancel if it's the first one we see.
Others should simply be freed like other requests, as they haven't
been started yet.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index a79ef43367b1..d1085e4e8ae9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -937,12 +937,12 @@ static void io_fail_links(struct io_kiocb *req)
 		if ((req->flags & REQ_F_LINK_TIMEOUT) &&
 		    link->submit.sqe->opcode == IORING_OP_LINK_TIMEOUT) {
 			io_link_cancel_timeout(link);
-			req->flags &= ~REQ_F_LINK_TIMEOUT;
 		} else {
 			io_cqring_fill_event(link, -ECANCELED);
 			__io_double_put_req(link);
 		}
 		kfree(sqe_to_free);
+		req->flags &= ~REQ_F_LINK_TIMEOUT;
 	}
 
 	io_commit_cqring(ctx);
-- 
2.24.0


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 7/7] io_uring: Always REQ_F_FREE_SQE for allocated sqe
  2019-11-20 20:09 [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
                   ` (7 preceding siblings ...)
  2019-11-20 20:09 ` [PATCH 6/7] io_uring: io_fail_links() should only consider first linked timeout Jens Axboe
@ 2019-11-20 20:09 ` Jens Axboe
  2019-11-20 20:11 ` [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
  9 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:09 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, Jens Axboe

From: Pavel Begunkov <asml.silence@gmail.com>

Always mark requests with allocated sqe and deallocate it in
__io_free_req(). It's easier to follow and doesn't add edge cases.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 49 ++++++++++++++++++++++---------------------------
 1 file changed, 22 insertions(+), 27 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index d1085e4e8ae9..df7f5ce5bb06 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -829,6 +829,8 @@ static void __io_free_req(struct io_kiocb *req)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 
+	if (req->flags & REQ_F_FREE_SQE)
+		kfree(req->submit.sqe);
 	if (req->file && !(req->flags & REQ_F_FIXED_FILE))
 		fput(req->file);
 	if (req->flags & REQ_F_INFLIGHT) {
@@ -924,16 +926,11 @@ static void io_fail_links(struct io_kiocb *req)
 	spin_lock_irqsave(&ctx->completion_lock, flags);
 
 	while (!list_empty(&req->link_list)) {
-		const struct io_uring_sqe *sqe_to_free = NULL;
-
 		link = list_first_entry(&req->link_list, struct io_kiocb, list);
 		list_del_init(&link->list);
 
 		trace_io_uring_fail_link(req, link);
 
-		if (link->flags & REQ_F_FREE_SQE)
-			sqe_to_free = link->submit.sqe;
-
 		if ((req->flags & REQ_F_LINK_TIMEOUT) &&
 		    link->submit.sqe->opcode == IORING_OP_LINK_TIMEOUT) {
 			io_link_cancel_timeout(link);
@@ -941,7 +938,6 @@ static void io_fail_links(struct io_kiocb *req)
 			io_cqring_fill_event(link, -ECANCELED);
 			__io_double_put_req(link);
 		}
-		kfree(sqe_to_free);
 		req->flags &= ~REQ_F_LINK_TIMEOUT;
 	}
 
@@ -1084,7 +1080,8 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
 			 * completions for those, only batch free for fixed
 			 * file and non-linked commands.
 			 */
-			if (((req->flags & (REQ_F_FIXED_FILE|REQ_F_LINK)) ==
+			if (((req->flags &
+				(REQ_F_FIXED_FILE|REQ_F_LINK|REQ_F_FREE_SQE)) ==
 			    REQ_F_FIXED_FILE) && !io_is_fallback_req(req)) {
 				reqs[to_free++] = req;
 				if (to_free == ARRAY_SIZE(reqs))
@@ -2567,6 +2564,7 @@ static int io_req_defer(struct io_kiocb *req)
 	}
 
 	memcpy(sqe_copy, sqe, sizeof(*sqe_copy));
+	req->flags |= REQ_F_FREE_SQE;
 	req->submit.sqe = sqe_copy;
 
 	trace_io_uring_defer(ctx, req, false);
@@ -2661,7 +2659,6 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
 	struct io_wq_work *work = *workptr;
 	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
 	struct sqe_submit *s = &req->submit;
-	const struct io_uring_sqe *sqe = s->sqe;
 	struct io_kiocb *nxt = NULL;
 	int ret = 0;
 
@@ -2697,9 +2694,6 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
 		io_put_req(req);
 	}
 
-	/* async context always use a copy of the sqe */
-	kfree(sqe);
-
 	/* if a dependent link is ready, pass it back */
 	if (!ret && nxt) {
 		struct io_kiocb *link;
@@ -2897,23 +2891,24 @@ static void __io_queue_sqe(struct io_kiocb *req)
 		struct io_uring_sqe *sqe_copy;
 
 		sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL);
-		if (sqe_copy) {
-			s->sqe = sqe_copy;
-			if (req->work.flags & IO_WQ_WORK_NEEDS_FILES) {
-				ret = io_grab_files(req);
-				if (ret) {
-					kfree(sqe_copy);
-					goto err;
-				}
-			}
+		if (!sqe_copy)
+			goto err;
 
-			/*
-			 * Queued up for async execution, worker will release
-			 * submit reference when the iocb is actually submitted.
-			 */
-			io_queue_async_work(req);
-			return;
+		s->sqe = sqe_copy;
+		req->flags |= REQ_F_FREE_SQE;
+
+		if (req->work.flags & IO_WQ_WORK_NEEDS_FILES) {
+			ret = io_grab_files(req);
+			if (ret)
+				goto err;
 		}
+
+		/*
+		 * Queued up for async execution, worker will release
+		 * submit reference when the iocb is actually submitted.
+		 */
+		io_queue_async_work(req);
+		return;
 	}
 
 err:
@@ -3008,7 +3003,6 @@ static void io_queue_link_head(struct io_kiocb *req, struct io_kiocb *shadow)
 static void io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
 			  struct io_kiocb **link)
 {
-	struct io_uring_sqe *sqe_copy;
 	struct sqe_submit *s = &req->submit;
 	struct io_ring_ctx *ctx = req->ctx;
 	int ret;
@@ -3038,6 +3032,7 @@ static void io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
 	 */
 	if (*link) {
 		struct io_kiocb *prev = *link;
+		struct io_uring_sqe *sqe_copy;
 
 		if (READ_ONCE(s->sqe->opcode) == IORING_OP_LINK_TIMEOUT) {
 			ret = io_timeout_setup(req);
-- 
2.24.0


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCHSET 0/2] io_uring: close lookup gap for dependent work
  2019-11-20 20:09 [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
                   ` (8 preceding siblings ...)
  2019-11-20 20:09 ` [PATCH 7/7] io_uring: Always REQ_F_FREE_SQE for allocated sqe Jens Axboe
@ 2019-11-20 20:11 ` Jens Axboe
  9 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:11 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

On 11/20/19 1:09 PM, Jens Axboe wrote:
> As discussed earlier today on this list, there's a gap between finding
> dependent work and ensuring we can look it up for cancellation purposes.
> On top of that, we also currently NEVER find dependent work due to how
> we do lookups of it, so that is fixed in patch 1 while patch 2
> implements the fix for the lookup gap.
> 
> Patches are against for-5.5/io_uring-post
> 
>   fs/io-wq.c    |  3 +++
>   fs/io-wq.h    | 12 +++++++++++-
>   fs/io_uring.c | 34 +++++++++++++++++++++++++++-------
>   3 files changed, 41 insertions(+), 8 deletions(-)

Gah, disregard this posting, there were older patches in that dir too.
Re-sending a correct one.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/2] io_uring: allow finding next link independent of req reference count
  2019-11-20 20:12 [PATCHSET v2 " Jens Axboe
@ 2019-11-20 20:12 ` Jens Axboe
  0 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:12 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, Jens Axboe

We currently try and start the next link when we put the request, and
only if we were going to free it. This means that the optimization to
continue executing requests from the same context often fails, as we're
not putting the final reference.

Add REQ_F_LINK_NEXT to keep track of this, and allow io_uring to find the
next request more efficiently.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 066b59ffb54e..132a890368bf 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -340,6 +340,7 @@ struct io_kiocb {
 #define REQ_F_NOWAIT		1	/* must not punt to workers */
 #define REQ_F_IOPOLL_COMPLETED	2	/* polled IO has completed */
 #define REQ_F_FIXED_FILE	4	/* ctx owns file */
+#define REQ_F_LINK_NEXT		8	/* already grabbed next link */
 #define REQ_F_IO_DRAIN		16	/* drain existing IO first */
 #define REQ_F_IO_DRAINED	32	/* drain done */
 #define REQ_F_LINK		64	/* linked sqes */
@@ -874,6 +875,10 @@ static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
 	struct io_kiocb *nxt;
 	bool wake_ev = false;
 
+	/* Already got next link */
+	if (req->flags & REQ_F_LINK_NEXT)
+		return;
+
 	/*
 	 * The list should never be empty when we are called here. But could
 	 * potentially happen if the chain is messed up, check to be on the
@@ -910,6 +915,7 @@ static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
 		break;
 	}
 
+	req->flags |= REQ_F_LINK_NEXT;
 	if (wake_ev)
 		io_cqring_ev_posted(ctx);
 }
@@ -946,12 +952,10 @@ static void io_fail_links(struct io_kiocb *req)
 	io_cqring_ev_posted(ctx);
 }
 
-static void io_free_req_find_next(struct io_kiocb *req, struct io_kiocb **nxt)
+static void io_req_find_next(struct io_kiocb *req, struct io_kiocb **nxt)
 {
-	if (likely(!(req->flags & REQ_F_LINK))) {
-		__io_free_req(req);
+	if (likely(!(req->flags & REQ_F_LINK)))
 		return;
-	}
 
 	/*
 	 * If LINK is set, we have dependent requests in this chain. If we
@@ -977,7 +981,11 @@ static void io_free_req_find_next(struct io_kiocb *req, struct io_kiocb **nxt)
 	} else {
 		io_req_link_next(req, nxt);
 	}
+}
 
+static void io_free_req_find_next(struct io_kiocb *req, struct io_kiocb **nxt)
+{
+	io_req_find_next(req, nxt);
 	__io_free_req(req);
 }
 
@@ -994,8 +1002,10 @@ static void io_put_req_find_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
 {
 	struct io_kiocb *nxt = NULL;
 
+	io_req_find_next(req, &nxt);
+
 	if (refcount_dec_and_test(&req->refs))
-		io_free_req_find_next(req, &nxt);
+		__io_free_req(req);
 
 	if (nxt) {
 		if (nxtptr)
-- 
2.24.0


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, back to index

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-20 20:09 [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
2019-11-20 20:09 ` [PATCH 1/7] io-wq: wait for io_wq_create() to setup necessary workers Jens Axboe
2019-11-20 20:09 ` [PATCH 1/2] io_uring: allow finding next link independent of req reference count Jens Axboe
2019-11-20 20:09 ` [PATCH 2/7] io-wq: remove extra space characters Jens Axboe
2019-11-20 20:09 ` [PATCH 2/2] io_uring: close lookup gap for dependent next work Jens Axboe
2019-11-20 20:09 ` [PATCH 3/7] io_uring: break links for failed defer Jens Axboe
2019-11-20 20:09 ` [PATCH 4/7] io_uring: remove redundant check Jens Axboe
2019-11-20 20:09 ` [PATCH 5/7] io_uring: Fix leaking linked timeouts Jens Axboe
2019-11-20 20:09 ` [PATCH 6/7] io_uring: io_fail_links() should only consider first linked timeout Jens Axboe
2019-11-20 20:09 ` [PATCH 7/7] io_uring: Always REQ_F_FREE_SQE for allocated sqe Jens Axboe
2019-11-20 20:11 ` [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
2019-11-20 20:12 [PATCHSET v2 " Jens Axboe
2019-11-20 20:12 ` [PATCH 1/2] io_uring: allow finding next link independent of req reference count Jens Axboe

IO-Uring Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/io-uring/0 io-uring/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 io-uring io-uring/ https://lore.kernel.org/io-uring \
		io-uring@vger.kernel.org
	public-inbox-index io-uring

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.io-uring


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git