IO-Uring Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH] io_uring: prune request from overflow list on flush
@ 2020-02-14  0:17 Jens Axboe
  2020-02-14  0:51 ` Jens Axboe
  0 siblings, 1 reply; 2+ messages in thread
From: Jens Axboe @ 2020-02-14  0:17 UTC (permalink / raw)
  To: io-uring, 李通洲

Carter reported an issue where he could produce a stall on ring exit,
when we're cleaning up requests that match the given file table. For
this particular test case, a combination of a few things caused the
issue:

- The cq ring was overflown
- The request being canceled was in the overflow list

The combination of the above means that the cq overflow list holds a
reference to the request. The request is canceled correctly, but since
the overflow list holds a reference to it, the final put won't happen.
Since the final put doesn't happen, the request remains in the inflight.
Hence we never finish the cancelation flush.

Fix this by removing requests from the overflow list if we're canceling
them.

Reported-by: Carter Li 李通洲 <carter.li@eoitek.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

---

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 9f00f30e1790..d967a17c5923 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -481,6 +481,7 @@ enum {
 	REQ_F_TIMEOUT_NOSEQ_BIT,
 	REQ_F_COMP_LOCKED_BIT,
 	REQ_F_NEED_CLEANUP_BIT,
+	REQ_F_OVERFLOW_BIT,
 };
 
 enum {
@@ -521,6 +522,8 @@ enum {
 	REQ_F_COMP_LOCKED	= BIT(REQ_F_COMP_LOCKED_BIT),
 	/* needs cleanup */
 	REQ_F_NEED_CLEANUP	= BIT(REQ_F_NEED_CLEANUP_BIT),
+	/* in overflow list */
+	REQ_F_OVERFLOW		= BIT(REQ_F_OVERFLOW_BIT),
 };
 
 /*
@@ -1103,6 +1106,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
 		req = list_first_entry(&ctx->cq_overflow_list, struct io_kiocb,
 						list);
 		list_move(&req->list, &list);
+		req->flags &= ~REQ_F_OVERFLOW;
 		if (cqe) {
 			WRITE_ONCE(cqe->user_data, req->user_data);
 			WRITE_ONCE(cqe->res, req->result);
@@ -1155,6 +1159,7 @@ static void io_cqring_fill_event(struct io_kiocb *req, long res)
 			set_bit(0, &ctx->sq_check_overflow);
 			set_bit(0, &ctx->cq_check_overflow);
 		}
+		req->flags |= REQ_F_OVERFLOW;
 		refcount_inc(&req->refs);
 		req->result = res;
 		list_add_tail(&req->list, &ctx->cq_overflow_list);
@@ -6536,6 +6541,26 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
 		if (!cancel_req)
 			break;
 
+		if (cancel_req->flags & REQ_F_OVERFLOW) {
+			spin_lock_irq(&ctx->completion_lock);
+			list_del(&cancel_req->list);
+			cancel_req->flags &= ~REQ_F_OVERFLOW;
+			if (list_empty(&ctx->cq_overflow_list)) {
+				clear_bit(0, &ctx->sq_check_overflow);
+				clear_bit(0, &ctx->cq_check_overflow);
+			}
+			spin_unlock_irq(&ctx->completion_lock);
+
+			/*
+			 * Put inflight ref and overflow ref. If that's
+			 * all we had, then we're done with this request.
+			 */
+			if (refcount_sub_and_test(2, &cancel_req->refs)) {
+				io_put_req(cancel_req);
+				continue;
+			}
+		}
+
 		io_wq_cancel_work(ctx->io_wq, &cancel_req->work);
 		io_put_req(cancel_req);
 		schedule();

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] io_uring: prune request from overflow list on flush
  2020-02-14  0:17 [PATCH] io_uring: prune request from overflow list on flush Jens Axboe
@ 2020-02-14  0:51 ` Jens Axboe
  0 siblings, 0 replies; 2+ messages in thread
From: Jens Axboe @ 2020-02-14  0:51 UTC (permalink / raw)
  To: io-uring, 李通洲

On 2/13/20 5:17 PM, Jens Axboe wrote:
> Carter reported an issue where he could produce a stall on ring exit,
> when we're cleaning up requests that match the given file table. For
> this particular test case, a combination of a few things caused the
> issue:
> 
> - The cq ring was overflown
> - The request being canceled was in the overflow list
> 
> The combination of the above means that the cq overflow list holds a
> reference to the request. The request is canceled correctly, but since
> the overflow list holds a reference to it, the final put won't happen.
> Since the final put doesn't happen, the request remains in the inflight.
> Hence we never finish the cancelation flush.
> 
> Fix this by removing requests from the overflow list if we're canceling
> them.

What I queued up was a v2, only difference being that we increment the
overflow counter if we prune it. Below for reference:


commit 2ca10259b4189a433c309054496dd6af1415f992
Author: Jens Axboe <axboe@kernel.dk>
Date:   Thu Feb 13 17:17:35 2020 -0700

    io_uring: prune request from overflow list on flush
    
    Carter reported an issue where he could produce a stall on ring exit,
    when we're cleaning up requests that match the given file table. For
    this particular test case, a combination of a few things caused the
    issue:
    
    - The cq ring was overflown
    - The request being canceled was in the overflow list
    
    The combination of the above means that the cq overflow list holds a
    reference to the request. The request is canceled correctly, but since
    the overflow list holds a reference to it, the final put won't happen.
    Since the final put doesn't happen, the request remains in the inflight.
    Hence we never finish the cancelation flush.
    
    Fix this by removing requests from the overflow list if we're canceling
    them.
    
    Cc: stable@vger.kernel.org # 5.5
    Reported-by: Carter Li 李通洲 <carter.li@eoitek.com>
    Signed-off-by: Jens Axboe <axboe@kernel.dk>

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6d4e20d59729..5a826017ebb8 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -481,6 +481,7 @@ enum {
 	REQ_F_TIMEOUT_NOSEQ_BIT,
 	REQ_F_COMP_LOCKED_BIT,
 	REQ_F_NEED_CLEANUP_BIT,
+	REQ_F_OVERFLOW_BIT,
 };
 
 enum {
@@ -521,6 +522,8 @@ enum {
 	REQ_F_COMP_LOCKED	= BIT(REQ_F_COMP_LOCKED_BIT),
 	/* needs cleanup */
 	REQ_F_NEED_CLEANUP	= BIT(REQ_F_NEED_CLEANUP_BIT),
+	/* in overflow list */
+	REQ_F_OVERFLOW		= BIT(REQ_F_OVERFLOW_BIT),
 };
 
 /*
@@ -1103,6 +1106,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
 		req = list_first_entry(&ctx->cq_overflow_list, struct io_kiocb,
 						list);
 		list_move(&req->list, &list);
+		req->flags &= ~REQ_F_OVERFLOW;
 		if (cqe) {
 			WRITE_ONCE(cqe->user_data, req->user_data);
 			WRITE_ONCE(cqe->res, req->result);
@@ -1155,6 +1159,7 @@ static void io_cqring_fill_event(struct io_kiocb *req, long res)
 			set_bit(0, &ctx->sq_check_overflow);
 			set_bit(0, &ctx->cq_check_overflow);
 		}
+		req->flags |= REQ_F_OVERFLOW;
 		refcount_inc(&req->refs);
 		req->result = res;
 		list_add_tail(&req->list, &ctx->cq_overflow_list);
@@ -6463,6 +6468,29 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
 		if (!cancel_req)
 			break;
 
+		if (cancel_req->flags & REQ_F_OVERFLOW) {
+			spin_lock_irq(&ctx->completion_lock);
+			list_del(&cancel_req->list);
+			cancel_req->flags &= ~REQ_F_OVERFLOW;
+			if (list_empty(&ctx->cq_overflow_list)) {
+				clear_bit(0, &ctx->sq_check_overflow);
+				clear_bit(0, &ctx->cq_check_overflow);
+			}
+			spin_unlock_irq(&ctx->completion_lock);
+
+			WRITE_ONCE(ctx->rings->cq_overflow,
+				atomic_inc_return(&ctx->cached_cq_overflow));
+
+			/*
+			 * Put inflight ref and overflow ref. If that's
+			 * all we had, then we're done with this request.
+			 */
+			if (refcount_sub_and_test(2, &cancel_req->refs)) {
+				io_put_req(cancel_req);
+				continue;
+			}
+		}
+
 		io_wq_cancel_work(ctx->io_wq, &cancel_req->work);
 		io_put_req(cancel_req);
 		schedule();

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, back to index

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-14  0:17 [PATCH] io_uring: prune request from overflow list on flush Jens Axboe
2020-02-14  0:51 ` Jens Axboe

IO-Uring Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/io-uring/0 io-uring/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 io-uring io-uring/ https://lore.kernel.org/io-uring \
		io-uring@vger.kernel.org
	public-inbox-index io-uring

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.io-uring


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git