io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCHSET v2 0/3] io_uring: make POLL_ADD support multiple waitqs
@ 2020-02-12 20:25 Jens Axboe
  2020-02-12 20:25 ` [PATCH 1/3] io_uring: store io_kiocb in wait->private Jens Axboe
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Jens Axboe @ 2020-02-12 20:25 UTC (permalink / raw)
  To: io-uring

Here's v2 of the "let's make POLL_ADD work on everything" patchset. As
before, patches 1-2 are just basic prep patches, and should not have any
functional changes in them. Patch 3 adds support for allocating a new
io_poll_iocb unit if we get multiple additions through our queue proc
for the wait queues. This new 'poll' addition is queued up as well, and
it grabs a reference to the original poll request.

Changes since v1:

- Fix unused 'ret' variable in io_poll_double_wake()
- Fail if we get an attempt at a third waitqueue addition (Pavel)

-- 
Jens Axboe



^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/3] io_uring: store io_kiocb in wait->private
  2020-02-12 20:25 [PATCHSET v2 0/3] io_uring: make POLL_ADD support multiple waitqs Jens Axboe
@ 2020-02-12 20:25 ` Jens Axboe
  2020-02-12 20:25 ` [PATCH 2/3] io_uring: abstract out main poll wake handler Jens Axboe
  2020-02-12 20:25 ` [PATCH 3/3] io_uring: allow POLL_ADD with double poll_wait() users Jens Axboe
  2 siblings, 0 replies; 5+ messages in thread
From: Jens Axboe @ 2020-02-12 20:25 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

Store the io_kiocb in the private field instead of the poll entry, this
is in preparation for allowing multiple waitqueues.

No functional changes in this patch.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6d4e20d59729..08ffeb7df4f5 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3625,8 +3625,8 @@ static void io_poll_trigger_evfd(struct io_wq_work **workptr)
 static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 			void *key)
 {
-	struct io_poll_iocb *poll = wait->private;
-	struct io_kiocb *req = container_of(poll, struct io_kiocb, poll);
+	struct io_kiocb *req = wait->private;
+	struct io_poll_iocb *poll = &req->poll;
 	struct io_ring_ctx *ctx = req->ctx;
 	__poll_t mask = key_to_poll(key);
 
@@ -3749,7 +3749,7 @@ static int io_poll_add(struct io_kiocb *req, struct io_kiocb **nxt)
 	/* initialized the list so that we can do list_empty checks */
 	INIT_LIST_HEAD(&poll->wait.entry);
 	init_waitqueue_func_entry(&poll->wait, io_poll_wake);
-	poll->wait.private = poll;
+	poll->wait.private = req;
 
 	INIT_LIST_HEAD(&req->list);
 
-- 
2.25.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/3] io_uring: abstract out main poll wake handler
  2020-02-12 20:25 [PATCHSET v2 0/3] io_uring: make POLL_ADD support multiple waitqs Jens Axboe
  2020-02-12 20:25 ` [PATCH 1/3] io_uring: store io_kiocb in wait->private Jens Axboe
@ 2020-02-12 20:25 ` Jens Axboe
  2020-02-12 20:25 ` [PATCH 3/3] io_uring: allow POLL_ADD with double poll_wait() users Jens Axboe
  2 siblings, 0 replies; 5+ messages in thread
From: Jens Axboe @ 2020-02-12 20:25 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

In preparation for having multiple poll waitqueues, abstract out the
main wake handler so we can call it with the desired data.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 74 +++++++++++++++++++++++++++------------------------
 1 file changed, 39 insertions(+), 35 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 08ffeb7df4f5..9cd2ce3b8ad9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3622,17 +3622,11 @@ static void io_poll_trigger_evfd(struct io_wq_work **workptr)
 	io_put_req(req);
 }
 
-static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
-			void *key)
+static void __io_poll_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
+			   __poll_t mask)
 {
-	struct io_kiocb *req = wait->private;
-	struct io_poll_iocb *poll = &req->poll;
 	struct io_ring_ctx *ctx = req->ctx;
-	__poll_t mask = key_to_poll(key);
-
-	/* for instances that support it check for an event match first: */
-	if (mask && !(mask & poll->events))
-		return 0;
+	unsigned long flags;
 
 	list_del_init(&poll->wait.entry);
 
@@ -3642,40 +3636,50 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 	 * If we have a link timeout we're going to need the completion_lock
 	 * for finalizing the request, mark us as having grabbed that already.
 	 */
-	if (mask) {
-		unsigned long flags;
+	if (llist_empty(&ctx->poll_llist) && !req->io &&
+	    spin_trylock_irqsave(&ctx->completion_lock, flags)) {
+		bool trigger_ev;
 
-		if (llist_empty(&ctx->poll_llist) &&
-		    spin_trylock_irqsave(&ctx->completion_lock, flags)) {
-			bool trigger_ev;
-
-			hash_del(&req->hash_node);
-			io_poll_complete(req, mask, 0);
+		hash_del(&req->hash_node);
+		io_poll_complete(req, mask, 0);
 
-			trigger_ev = io_should_trigger_evfd(ctx);
-			if (trigger_ev && eventfd_signal_count()) {
-				trigger_ev = false;
-				req->work.func = io_poll_trigger_evfd;
-			} else {
-				req->flags |= REQ_F_COMP_LOCKED;
-				io_put_req(req);
-				req = NULL;
-			}
-			spin_unlock_irqrestore(&ctx->completion_lock, flags);
-			__io_cqring_ev_posted(ctx, trigger_ev);
+		trigger_ev = io_should_trigger_evfd(ctx);
+		if (trigger_ev && eventfd_signal_count()) {
+			trigger_ev = false;
+			req->work.func = io_poll_trigger_evfd;
 		} else {
-			req->result = mask;
-			req->llist_node.next = NULL;
-			/* if the list wasn't empty, we're done */
-			if (!llist_add(&req->llist_node, &ctx->poll_llist))
-				req = NULL;
-			else
-				req->work.func = io_poll_flush;
+			req->flags |= REQ_F_COMP_LOCKED;
+			io_put_req(req);
+			req = NULL;
 		}
+		spin_unlock_irqrestore(&ctx->completion_lock, flags);
+		__io_cqring_ev_posted(ctx, trigger_ev);
+	} else {
+		req->result = mask;
+		req->llist_node.next = NULL;
+		/* if the list wasn't empty, we're done */
+		if (!llist_add(&req->llist_node, &ctx->poll_llist))
+			req = NULL;
+		else
+			req->work.func = io_poll_flush;
 	}
+
 	if (req)
 		io_queue_async_work(req);
+}
+
+static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+			void *key)
+{
+	struct io_kiocb *req = wait->private;
+	struct io_poll_iocb *poll = &req->poll;
+	__poll_t mask = key_to_poll(key);
+
+	/* for instances that support it check for an event match first: */
+	if (mask && !(mask & poll->events))
+		return 0;
 
+	__io_poll_wake(req, &req->poll, mask);
 	return 1;
 }
 
-- 
2.25.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/3] io_uring: allow POLL_ADD with double poll_wait() users
  2020-02-12 20:25 [PATCHSET v2 0/3] io_uring: make POLL_ADD support multiple waitqs Jens Axboe
  2020-02-12 20:25 ` [PATCH 1/3] io_uring: store io_kiocb in wait->private Jens Axboe
  2020-02-12 20:25 ` [PATCH 2/3] io_uring: abstract out main poll wake handler Jens Axboe
@ 2020-02-12 20:25 ` Jens Axboe
  2020-02-13 15:50   ` Pavel Begunkov
  2 siblings, 1 reply; 5+ messages in thread
From: Jens Axboe @ 2020-02-12 20:25 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

Some file descriptors use separate waitqueues for their f_ops->poll()
handler, most commonly one for read and one for write. The io_uring
poll implementation doesn't work with that, as the 2nd poll_wait()
call will cause the io_uring poll request to -EINVAL.

This is particularly a problem now that pipes were switched to using
multiple wait queues (commit 0ddad21d3e99), but it also affects tty
devices and /dev/random as well. This is a big problem for event loops
where some file descriptors work, and others don't.

With this fix, io_uring handles multiple waitqueues.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 74 insertions(+), 5 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 9cd2ce3b8ad9..9f00f30e1790 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3440,10 +3440,27 @@ static int io_connect(struct io_kiocb *req, struct io_kiocb **nxt,
 #endif
 }
 
+static void io_poll_remove_double(struct io_kiocb *req)
+{
+	struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io;
+
+	if (poll && poll->head) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&poll->head->lock, flags);
+		list_del_init(&poll->wait.entry);
+		if (poll->wait.private)
+			refcount_dec(&req->refs);
+		spin_unlock_irqrestore(&poll->head->lock, flags);
+	}
+}
+
 static void io_poll_remove_one(struct io_kiocb *req)
 {
 	struct io_poll_iocb *poll = &req->poll;
 
+	io_poll_remove_double(req);
+
 	spin_lock(&poll->head->lock);
 	WRITE_ONCE(poll->canceled, true);
 	if (!list_empty(&poll->wait.entry)) {
@@ -3679,10 +3696,38 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 	if (mask && !(mask & poll->events))
 		return 0;
 
+	io_poll_remove_double(req);
 	__io_poll_wake(req, &req->poll, mask);
 	return 1;
 }
 
+static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
+			       int sync, void *key)
+{
+	struct io_kiocb *req = wait->private;
+	struct io_poll_iocb *poll = (void *) req->io;
+	__poll_t mask = key_to_poll(key);
+	bool done = true;
+
+	/* for instances that support it check for an event match first: */
+	if (mask && !(mask & poll->events))
+		return 0;
+
+	if (req->poll.head) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&req->poll.head->lock, flags);
+		done = list_empty(&req->poll.wait.entry);
+		if (!done)
+			list_del_init(&req->poll.wait.entry);
+		spin_unlock_irqrestore(&req->poll.head->lock, flags);
+	}
+	if (!done)
+		__io_poll_wake(req, poll, mask);
+	refcount_dec(&req->refs);
+	return 1;
+}
+
 struct io_poll_table {
 	struct poll_table_struct pt;
 	struct io_kiocb *req;
@@ -3693,15 +3738,38 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
 			       struct poll_table_struct *p)
 {
 	struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
+	struct io_kiocb *req = pt->req;
+	struct io_poll_iocb *poll = &req->poll;
 
-	if (unlikely(pt->req->poll.head)) {
-		pt->error = -EINVAL;
-		return;
+	/*
+	 * If poll->head is already set, it's because the file being polled
+	 * use multiple waitqueues for poll handling (eg one for read, one
+	 * for write). Setup a separate io_poll_iocb if this happens.
+	 */
+	if (unlikely(poll->head)) {
+		/* already have a 2nd entry, fail a third attempt */
+		if (req->io) {
+			pt->error = -EINVAL;
+			return;
+		}
+		poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
+		if (!poll) {
+			pt->error = -ENOMEM;
+			return;
+		}
+		poll->done = false;
+		poll->canceled = false;
+		poll->events = req->poll.events;
+		INIT_LIST_HEAD(&poll->wait.entry);
+		init_waitqueue_func_entry(&poll->wait, io_poll_double_wake);
+		refcount_inc(&req->refs);
+		poll->wait.private = req;
+		req->io = (void *) poll;
 	}
 
 	pt->error = 0;
-	pt->req->poll.head = head;
-	add_wait_queue(head, &pt->req->poll.wait);
+	poll->head = head;
+	add_wait_queue(head, &poll->wait);
 }
 
 static void io_poll_req_insert(struct io_kiocb *req)
@@ -3778,6 +3846,7 @@ static int io_poll_add(struct io_kiocb *req, struct io_kiocb **nxt)
 	}
 	if (mask) { /* no async, we'd stolen it */
 		ipt.error = 0;
+		io_poll_remove_double(req);
 		io_poll_complete(req, mask, 0);
 	}
 	spin_unlock_irq(&ctx->completion_lock);
-- 
2.25.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 3/3] io_uring: allow POLL_ADD with double poll_wait() users
  2020-02-12 20:25 ` [PATCH 3/3] io_uring: allow POLL_ADD with double poll_wait() users Jens Axboe
@ 2020-02-13 15:50   ` Pavel Begunkov
  0 siblings, 0 replies; 5+ messages in thread
From: Pavel Begunkov @ 2020-02-13 15:50 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Hi,

There is a couple of comments below

On 2/12/2020 11:25 PM, Jens Axboe wrote:
> Some file descriptors use separate waitqueues for their f_ops->poll()
> handler, most commonly one for read and one for write. The io_uring
> poll implementation doesn't work with that, as the 2nd poll_wait()
> call will cause the io_uring poll request to -EINVAL.
> 
> This is particularly a problem now that pipes were switched to using
> multiple wait queues (commit 0ddad21d3e99), but it also affects tty
> devices and /dev/random as well. This is a big problem for event loops
> where some file descriptors work, and others don't.
> 
> With this fix, io_uring handles multiple waitqueues.
> 
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
>  fs/io_uring.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 74 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 9cd2ce3b8ad9..9f00f30e1790 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -3440,10 +3440,27 @@ static int io_connect(struct io_kiocb *req, struct io_kiocb **nxt,
>  #endif
>  }
>  
> +static void io_poll_remove_double(struct io_kiocb *req)
> +{
> +	struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io;
> +
> +	if (poll && poll->head) {
> +		unsigned long flags;
> +
> +		spin_lock_irqsave(&poll->head->lock, flags);
> +		list_del_init(&poll->wait.entry);
> +		if (poll->wait.private)
> +			refcount_dec(&req->refs);
> +		spin_unlock_irqrestore(&poll->head->lock, flags);
> +	}
> +}
> +
>  static void io_poll_remove_one(struct io_kiocb *req)
>  {
>  	struct io_poll_iocb *poll = &req->poll;
>  
> +	io_poll_remove_double(req);
> +
>  	spin_lock(&poll->head->lock);
>  	WRITE_ONCE(poll->canceled, true);
>  	if (!list_empty(&poll->wait.entry)) {
> @@ -3679,10 +3696,38 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
>  	if (mask && !(mask & poll->events))
>  		return 0;
>  
> +	io_poll_remove_double(req);
>  	__io_poll_wake(req, &req->poll, mask);
>  	return 1;
>  }
>  
> +static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
> +			       int sync, void *key)
> +{
> +	struct io_kiocb *req = wait->private;
> +	struct io_poll_iocb *poll = (void *) req->io;
> +	__poll_t mask = key_to_poll(key);
> +	bool done = true;
> +
> +	/* for instances that support it check for an event match first: */
> +	if (mask && !(mask & poll->events))
> +		return 0;
> +
> +	if (req->poll.head) {

Can there be concurrent problems?

1. io_poll_wake() -> io_poll_remove_double() is working
   awhile the second io_poll_queue_proc() is called.
   Then there will be a race for req->io

2. concurrent io_poll_wake() and io_poll_double_wake()


> +		unsigned long flags;
> +
> +		spin_lock_irqsave(&req->poll.head->lock, flags);
> +		done = list_empty(&req->poll.wait.entry);
> +		if (!done)
> +			list_del_init(&req->poll.wait.entry);
> +		spin_unlock_irqrestore(&req->poll.head->lock, flags);
> +	}
> +	if (!done)
> +		__io_poll_wake(req, poll, mask);

It's always false if we didn't hit the block under `req->poll.head`, so
it may be placed there along with @done declaration.

> +	refcount_dec(&req->refs);
> +	return 1;
> +}
> +
>  struct io_poll_table {
>  	struct poll_table_struct pt;
>  	struct io_kiocb *req;
> @@ -3693,15 +3738,38 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
>  			       struct poll_table_struct *p)
>  {

May this be called concurrently? (at least in theory)

>  	struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
> +	struct io_kiocb *req = pt->req;
> +	struct io_poll_iocb *poll = &req->poll;
>  
> -	if (unlikely(pt->req->poll.head)) {
> -		pt->error = -EINVAL;
> -		return;
> +	/*
> +	 * If poll->head is already set, it's because the file being polled
> +	 * use multiple waitqueues for poll handling (eg one for read, one
> +	 * for write). Setup a separate io_poll_iocb if this happens.
> +	 */
> +	if (unlikely(poll->head)) {
> +		/* already have a 2nd entry, fail a third attempt */
> +		if (req->io) {
> +			pt->error = -EINVAL;
> +			return;
> +		}
> +		poll = kmalloc(sizeof(*poll), GFP_ATOMIC);

Don't see where this is freed

> +		if (!poll) {
> +			pt->error = -ENOMEM;
> +			return;
> +		}
> +		poll->done = false;
> +		poll->canceled = false;
> +		poll->events = req->poll.events;
> +		INIT_LIST_HEAD(&poll->wait.entry);
> +		init_waitqueue_func_entry(&poll->wait, io_poll_double_wake);
> +		refcount_inc(&req->refs);
> +		poll->wait.private = req;
> +		req->io = (void *) poll;
>  	}
>  
>  	pt->error = 0;
> -	pt->req->poll.head = head;
> -	add_wait_queue(head, &pt->req->poll.wait);
> +	poll->head = head;
> +	add_wait_queue(head, &poll->wait);
>  }
>  
>  static void io_poll_req_insert(struct io_kiocb *req)
> @@ -3778,6 +3846,7 @@ static int io_poll_add(struct io_kiocb *req, struct io_kiocb **nxt)
>  	}
>  	if (mask) { /* no async, we'd stolen it */
>  		ipt.error = 0;
> +		io_poll_remove_double(req);
>  		io_poll_complete(req, mask, 0);
>  	}
>  	spin_unlock_irq(&ctx->completion_lock);
> 

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-02-13 15:50 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-12 20:25 [PATCHSET v2 0/3] io_uring: make POLL_ADD support multiple waitqs Jens Axboe
2020-02-12 20:25 ` [PATCH 1/3] io_uring: store io_kiocb in wait->private Jens Axboe
2020-02-12 20:25 ` [PATCH 2/3] io_uring: abstract out main poll wake handler Jens Axboe
2020-02-12 20:25 ` [PATCH 3/3] io_uring: allow POLL_ADD with double poll_wait() users Jens Axboe
2020-02-13 15:50   ` Pavel Begunkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).