io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCHSET 0/2] io_uring: use TWA_SIGNAL more carefully
@ 2020-08-08 18:34 Jens Axboe
  2020-08-08 18:34 ` [PATCH 1/2] kernel: split task_work_add() into two separate helpers Jens Axboe
  2020-08-08 18:34 ` [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running Jens Axboe
  0 siblings, 2 replies; 41+ messages in thread
From: Jens Axboe @ 2020-08-08 18:34 UTC (permalink / raw)
  To: io-uring; +Cc: peterz

Adding io_uring support to Netty, Josef noticed that there's another
case where we can fail to process task_work in time. We previously
added a "work-around" for eventfd, see:

b7db41c9e03b ("io_uring: fix regression with always ignoring signals in io_cqring_wait()")

but we can run into this dependency issue even without that. See the
test case added to liburing:

https://git.kernel.dk/cgit/liburing/tree/test/wakeup-hang.c

for an example of that. This series adds a split way to call
task_work_add(), so we can use TWA_SIGNAL if the task is currently
not running. This over-reaches a bit since there are definitely cases
where we do not need to use it, but better safe than sorry for now.

-- 
Jens Axboe



^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCH 1/2] kernel: split task_work_add() into two separate helpers
  2020-08-08 18:34 [PATCHSET 0/2] io_uring: use TWA_SIGNAL more carefully Jens Axboe
@ 2020-08-08 18:34 ` Jens Axboe
  2020-08-10 11:37   ` peterz
  2020-08-08 18:34 ` [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running Jens Axboe
  1 sibling, 1 reply; 41+ messages in thread
From: Jens Axboe @ 2020-08-08 18:34 UTC (permalink / raw)
  To: io-uring; +Cc: peterz, Jens Axboe, stable

Some callers may need to make signaling decisions based on the state
of the targeted task, and that can only safely be done post adding
the task_work to the task. Split task_work_add() into:

__task_work_add()	- adds the work item
__task_work_notify()	- sends the notification

No functional changes in this patch.

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org # v5.7+
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/task_work.h | 19 ++++++++++++++++
 kernel/task_work.c        | 48 +++++++++++++++++++++------------------
 2 files changed, 45 insertions(+), 22 deletions(-)

diff --git a/include/linux/task_work.h b/include/linux/task_work.h
index 0fb93aafa478..7abbd8df5e13 100644
--- a/include/linux/task_work.h
+++ b/include/linux/task_work.h
@@ -5,6 +5,8 @@
 #include <linux/list.h>
 #include <linux/sched.h>
 
+extern struct callback_head work_exited;
+
 typedef void (*task_work_func_t)(struct callback_head *);
 
 static inline void
@@ -13,6 +15,21 @@ init_task_work(struct callback_head *twork, task_work_func_t func)
 	twork->func = func;
 }
 
+static inline int __task_work_add(struct task_struct *task,
+				  struct callback_head *work)
+{
+	struct callback_head *head;
+
+	do {
+		head = READ_ONCE(task->task_works);
+		if (unlikely(head == &work_exited))
+			return -ESRCH;
+		work->next = head;
+	} while (cmpxchg(&task->task_works, head, work) != head);
+
+	return 0;
+}
+
 #define TWA_RESUME	1
 #define TWA_SIGNAL	2
 int task_work_add(struct task_struct *task, struct callback_head *twork, int);
@@ -20,6 +37,8 @@ int task_work_add(struct task_struct *task, struct callback_head *twork, int);
 struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t);
 void task_work_run(void);
 
+void __task_work_notify(struct task_struct *task, int notify);
+
 static inline void exit_task_work(struct task_struct *task)
 {
 	task_work_run();
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 5c0848ca1287..9bde81481984 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -3,7 +3,27 @@
 #include <linux/task_work.h>
 #include <linux/tracehook.h>
 
-static struct callback_head work_exited; /* all we need is ->next == NULL */
+struct callback_head work_exited = {
+	.next = NULL	/* all we need is ->next == NULL */
+};
+
+void __task_work_notify(struct task_struct *task, int notify)
+{
+	unsigned long flags;
+
+	switch (notify) {
+	case TWA_RESUME:
+		set_notify_resume(task);
+		break;
+	case TWA_SIGNAL:
+		if (lock_task_sighand(task, &flags)) {
+			task->jobctl |= JOBCTL_TASK_WORK;
+			signal_wake_up(task, 0);
+			unlock_task_sighand(task, &flags);
+		}
+		break;
+	}
+}
 
 /**
  * task_work_add - ask the @task to execute @work->func()
@@ -27,29 +47,13 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
 int
 task_work_add(struct task_struct *task, struct callback_head *work, int notify)
 {
-	struct callback_head *head;
-	unsigned long flags;
+	int ret;
 
-	do {
-		head = READ_ONCE(task->task_works);
-		if (unlikely(head == &work_exited))
-			return -ESRCH;
-		work->next = head;
-	} while (cmpxchg(&task->task_works, head, work) != head);
-
-	switch (notify) {
-	case TWA_RESUME:
-		set_notify_resume(task);
-		break;
-	case TWA_SIGNAL:
-		if (lock_task_sighand(task, &flags)) {
-			task->jobctl |= JOBCTL_TASK_WORK;
-			signal_wake_up(task, 0);
-			unlock_task_sighand(task, &flags);
-		}
-		break;
-	}
+	ret = __task_work_add(task, work);
+	if (unlikely(ret))
+		return ret;
 
+	__task_work_notify(task, notify);
 	return 0;
 }
 
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-08 18:34 [PATCHSET 0/2] io_uring: use TWA_SIGNAL more carefully Jens Axboe
  2020-08-08 18:34 ` [PATCH 1/2] kernel: split task_work_add() into two separate helpers Jens Axboe
@ 2020-08-08 18:34 ` Jens Axboe
  2020-08-10 11:42   ` peterz
                     ` (2 more replies)
  1 sibling, 3 replies; 41+ messages in thread
From: Jens Axboe @ 2020-08-08 18:34 UTC (permalink / raw)
  To: io-uring; +Cc: peterz, Jens Axboe, stable, Josef

An earlier commit:

b7db41c9e03b ("io_uring: fix regression with always ignoring signals in io_cqring_wait()")

ensured that we didn't get stuck waiting for eventfd reads when it's
registered with the io_uring ring for event notification, but we still
have a gap where the task can be waiting on other events in the kernel
and need a bigger nudge to make forward progress.

Ensure that we use signaled notifications for a task that isn't currently
running, to be certain the work is seen and processed immediately.

Cc: stable@vger.kernel.org # v5.7+
Reported-by: Josef <josef.grieb@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index e9b27cdaa735..443eecdfeda9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1712,21 +1712,27 @@ static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb)
 	struct io_ring_ctx *ctx = req->ctx;
 	int ret, notify = TWA_RESUME;
 
+	ret = __task_work_add(tsk, cb);
+	if (unlikely(ret))
+		return ret;
+
 	/*
 	 * SQPOLL kernel thread doesn't need notification, just a wakeup.
-	 * If we're not using an eventfd, then TWA_RESUME is always fine,
-	 * as we won't have dependencies between request completions for
-	 * other kernel wait conditions.
+	 * For any other work, use signaled wakeups if the task isn't
+	 * running to avoid dependencies between tasks or threads. If
+	 * the issuing task is currently waiting in the kernel on a thread,
+	 * and same thread is waiting for a completion event, then we need
+	 * to ensure that the issuing task processes task_work. TWA_SIGNAL
+	 * is needed for that.
 	 */
 	if (ctx->flags & IORING_SETUP_SQPOLL)
 		notify = 0;
-	else if (ctx->cq_ev_fd)
+	else if (READ_ONCE(tsk->state) != TASK_RUNNING)
 		notify = TWA_SIGNAL;
 
-	ret = task_work_add(tsk, cb, notify);
-	if (!ret)
-		wake_up_process(tsk);
-	return ret;
+	__task_work_notify(tsk, notify);
+	wake_up_process(tsk);
+	return 0;
 }
 
 static void __io_req_task_cancel(struct io_kiocb *req, int error)
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/2] kernel: split task_work_add() into two separate helpers
  2020-08-08 18:34 ` [PATCH 1/2] kernel: split task_work_add() into two separate helpers Jens Axboe
@ 2020-08-10 11:37   ` peterz
  2020-08-10 15:01     ` Jens Axboe
  0 siblings, 1 reply; 41+ messages in thread
From: peterz @ 2020-08-10 11:37 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring, stable

On Sat, Aug 08, 2020 at 12:34:38PM -0600, Jens Axboe wrote:
> Some callers may need to make signaling decisions based on the state
> of the targeted task, and that can only safely be done post adding
> the task_work to the task. Split task_work_add() into:
> 
> __task_work_add()	- adds the work item
> __task_work_notify()	- sends the notification
> 
> No functional changes in this patch.

Might be nice to mention __task_work_add() is now inline.

> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: stable@vger.kernel.org # v5.7+
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
>  include/linux/task_work.h | 19 ++++++++++++++++
>  kernel/task_work.c        | 48 +++++++++++++++++++++------------------
>  2 files changed, 45 insertions(+), 22 deletions(-)



> +struct callback_head work_exited = {
> +	.next = NULL	/* all we need is ->next == NULL */
> +};

Would it make sense to make this const ? Esp. with the thing exposed,
sticking it in R/O memory might avoid a mistake somewhere.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-08 18:34 ` [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running Jens Axboe
@ 2020-08-10 11:42   ` peterz
  2020-08-10 15:02     ` Jens Axboe
  2020-08-13 16:25   ` Sasha Levin
  2020-08-19 23:57   ` Sasha Levin
  2 siblings, 1 reply; 41+ messages in thread
From: peterz @ 2020-08-10 11:42 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring, stable, Josef

On Sat, Aug 08, 2020 at 12:34:39PM -0600, Jens Axboe wrote:
> An earlier commit:
> 
> b7db41c9e03b ("io_uring: fix regression with always ignoring signals in io_cqring_wait()")
> 
> ensured that we didn't get stuck waiting for eventfd reads when it's
> registered with the io_uring ring for event notification, but we still
> have a gap where the task can be waiting on other events in the kernel
> and need a bigger nudge to make forward progress.
> 
> Ensure that we use signaled notifications for a task that isn't currently
> running, to be certain the work is seen and processed immediately.
> 
> Cc: stable@vger.kernel.org # v5.7+
> Reported-by: Josef <josef.grieb@gmail.com>
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
>  fs/io_uring.c | 22 ++++++++++++++--------
>  1 file changed, 14 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index e9b27cdaa735..443eecdfeda9 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -1712,21 +1712,27 @@ static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb)
>  	struct io_ring_ctx *ctx = req->ctx;
>  	int ret, notify = TWA_RESUME;
>  
> +	ret = __task_work_add(tsk, cb);
> +	if (unlikely(ret))
> +		return ret;
> +
>  	/*
>  	 * SQPOLL kernel thread doesn't need notification, just a wakeup.
> -	 * If we're not using an eventfd, then TWA_RESUME is always fine,
> -	 * as we won't have dependencies between request completions for
> -	 * other kernel wait conditions.
> +	 * For any other work, use signaled wakeups if the task isn't
> +	 * running to avoid dependencies between tasks or threads. If
> +	 * the issuing task is currently waiting in the kernel on a thread,
> +	 * and same thread is waiting for a completion event, then we need
> +	 * to ensure that the issuing task processes task_work. TWA_SIGNAL
> +	 * is needed for that.
>  	 */
>  	if (ctx->flags & IORING_SETUP_SQPOLL)
>  		notify = 0;
> -	else if (ctx->cq_ev_fd)
> +	else if (READ_ONCE(tsk->state) != TASK_RUNNING)
>  		notify = TWA_SIGNAL;
>  
> -	ret = task_work_add(tsk, cb, notify);
> -	if (!ret)
> -		wake_up_process(tsk);
> -	return ret;
> +	__task_work_notify(tsk, notify);
> +	wake_up_process(tsk);
> +	return 0;
>  }

Wait.. so the only change here is that you look at tsk->state, _after_
doing __task_work_add(), but nothing, not the Changelog nor the comment
explains this.

So you're relying on __task_work_add() being an smp_mb() vs the add, and
you order this against the smp_mb() in set_current_state() ?

This really needs spelling out.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/2] kernel: split task_work_add() into two separate helpers
  2020-08-10 11:37   ` peterz
@ 2020-08-10 15:01     ` Jens Axboe
  2020-08-10 15:28       ` peterz
  2020-08-10 17:51       ` Jens Axboe
  0 siblings, 2 replies; 41+ messages in thread
From: Jens Axboe @ 2020-08-10 15:01 UTC (permalink / raw)
  To: peterz; +Cc: io-uring, stable

On 8/10/20 5:37 AM, peterz@infradead.org wrote:
> On Sat, Aug 08, 2020 at 12:34:38PM -0600, Jens Axboe wrote:
>> Some callers may need to make signaling decisions based on the state
>> of the targeted task, and that can only safely be done post adding
>> the task_work to the task. Split task_work_add() into:
>>
>> __task_work_add()	- adds the work item
>> __task_work_notify()	- sends the notification
>>
>> No functional changes in this patch.
> 
> Might be nice to mention __task_work_add() is now inline.

OK, will mention that.

>> Cc: Peter Zijlstra <peterz@infradead.org>
>> Cc: stable@vger.kernel.org # v5.7+
>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>> ---
>>  include/linux/task_work.h | 19 ++++++++++++++++
>>  kernel/task_work.c        | 48 +++++++++++++++++++++------------------
>>  2 files changed, 45 insertions(+), 22 deletions(-)
> 
> 
> 
>> +struct callback_head work_exited = {
>> +	.next = NULL	/* all we need is ->next == NULL */
>> +};
> 
> Would it make sense to make this const ? Esp. with the thing exposed,
> sticking it in R/O memory might avoid a mistake somewhere.

That was my original intent, but that makes 'head' in task_work_run()
const as well, and cmpxchg() doesn't like that:

kernel/task_work.c: In function ‘task_work_run’:
./arch/x86/include/asm/cmpxchg.h:89:29: warning: initialization discards ‘const’ qualifier from pointer target type [-Wdiscarded-qualifiers]
   89 |  __typeof__(*(ptr)) __new = (new);    \
      |                             ^
./arch/x86/include/asm/cmpxchg.h:134:2: note: in expansion of macro ‘__raw_cmpxchg’
  134 |  __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
      |  ^~~~~~~~~~~~~
./arch/x86/include/asm/cmpxchg.h:149:2: note: in expansion of macro ‘__cmpxchg’
  149 |  __cmpxchg(ptr, old, new, sizeof(*(ptr)))
      |  ^~~~~~~~~
./include/asm-generic/atomic-instrumented.h:1685:2: note: in expansion of macro ‘arch_cmpxchg’
 1685 |  arch_cmpxchg(__ai_ptr, __VA_ARGS__);    \
      |  ^~~~~~~~~~~~
kernel/task_work.c:126:12: note: in expansion of macro ‘cmpxchg’
  126 |   } while (cmpxchg(&task->task_works, work, head) != work);
      |            ^~~~~~~

which is somewhat annoying. Because there's really no good reason why it
can't be const, it'll just require the changes to dig a bit deeper.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-10 11:42   ` peterz
@ 2020-08-10 15:02     ` Jens Axboe
  2020-08-10 19:21       ` Jens Axboe
  0 siblings, 1 reply; 41+ messages in thread
From: Jens Axboe @ 2020-08-10 15:02 UTC (permalink / raw)
  To: peterz; +Cc: io-uring, stable, Josef

On 8/10/20 5:42 AM, peterz@infradead.org wrote:
> On Sat, Aug 08, 2020 at 12:34:39PM -0600, Jens Axboe wrote:
>> An earlier commit:
>>
>> b7db41c9e03b ("io_uring: fix regression with always ignoring signals in io_cqring_wait()")
>>
>> ensured that we didn't get stuck waiting for eventfd reads when it's
>> registered with the io_uring ring for event notification, but we still
>> have a gap where the task can be waiting on other events in the kernel
>> and need a bigger nudge to make forward progress.
>>
>> Ensure that we use signaled notifications for a task that isn't currently
>> running, to be certain the work is seen and processed immediately.
>>
>> Cc: stable@vger.kernel.org # v5.7+
>> Reported-by: Josef <josef.grieb@gmail.com>
>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>> ---
>>  fs/io_uring.c | 22 ++++++++++++++--------
>>  1 file changed, 14 insertions(+), 8 deletions(-)
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index e9b27cdaa735..443eecdfeda9 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -1712,21 +1712,27 @@ static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb)
>>  	struct io_ring_ctx *ctx = req->ctx;
>>  	int ret, notify = TWA_RESUME;
>>  
>> +	ret = __task_work_add(tsk, cb);
>> +	if (unlikely(ret))
>> +		return ret;
>> +
>>  	/*
>>  	 * SQPOLL kernel thread doesn't need notification, just a wakeup.
>> -	 * If we're not using an eventfd, then TWA_RESUME is always fine,
>> -	 * as we won't have dependencies between request completions for
>> -	 * other kernel wait conditions.
>> +	 * For any other work, use signaled wakeups if the task isn't
>> +	 * running to avoid dependencies between tasks or threads. If
>> +	 * the issuing task is currently waiting in the kernel on a thread,
>> +	 * and same thread is waiting for a completion event, then we need
>> +	 * to ensure that the issuing task processes task_work. TWA_SIGNAL
>> +	 * is needed for that.
>>  	 */
>>  	if (ctx->flags & IORING_SETUP_SQPOLL)
>>  		notify = 0;
>> -	else if (ctx->cq_ev_fd)
>> +	else if (READ_ONCE(tsk->state) != TASK_RUNNING)
>>  		notify = TWA_SIGNAL;
>>  
>> -	ret = task_work_add(tsk, cb, notify);
>> -	if (!ret)
>> -		wake_up_process(tsk);
>> -	return ret;
>> +	__task_work_notify(tsk, notify);
>> +	wake_up_process(tsk);
>> +	return 0;
>>  }
> 
> Wait.. so the only change here is that you look at tsk->state, _after_
> doing __task_work_add(), but nothing, not the Changelog nor the comment
> explains this.
> 
> So you're relying on __task_work_add() being an smp_mb() vs the add, and
> you order this against the smp_mb() in set_current_state() ?
> 
> This really needs spelling out.

I'll update the changelog, it suffers a bit from having been reused from
the earlier versions. Thanks for checking!

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/2] kernel: split task_work_add() into two separate helpers
  2020-08-10 15:01     ` Jens Axboe
@ 2020-08-10 15:28       ` peterz
  2020-08-10 17:51       ` Jens Axboe
  1 sibling, 0 replies; 41+ messages in thread
From: peterz @ 2020-08-10 15:28 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring, stable

On Mon, Aug 10, 2020 at 09:01:02AM -0600, Jens Axboe wrote:

> >> +struct callback_head work_exited = {
> >> +	.next = NULL	/* all we need is ->next == NULL */
> >> +};
> > 
> > Would it make sense to make this const ? Esp. with the thing exposed,
> > sticking it in R/O memory might avoid a mistake somewhere.
> 
> That was my original intent, but that makes 'head' in task_work_run()
> const as well, and cmpxchg() doesn't like that:
> 
> kernel/task_work.c: In function ‘task_work_run’:
> ./arch/x86/include/asm/cmpxchg.h:89:29: warning: initialization discards ‘const’ qualifier from pointer target type [-Wdiscarded-qualifiers]
>    89 |  __typeof__(*(ptr)) __new = (new);    \
>       |                             ^
> ./arch/x86/include/asm/cmpxchg.h:134:2: note: in expansion of macro ‘__raw_cmpxchg’
>   134 |  __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
>       |  ^~~~~~~~~~~~~
> ./arch/x86/include/asm/cmpxchg.h:149:2: note: in expansion of macro ‘__cmpxchg’
>   149 |  __cmpxchg(ptr, old, new, sizeof(*(ptr)))
>       |  ^~~~~~~~~
> ./include/asm-generic/atomic-instrumented.h:1685:2: note: in expansion of macro ‘arch_cmpxchg’
>  1685 |  arch_cmpxchg(__ai_ptr, __VA_ARGS__);    \
>       |  ^~~~~~~~~~~~
> kernel/task_work.c:126:12: note: in expansion of macro ‘cmpxchg’
>   126 |   } while (cmpxchg(&task->task_works, work, head) != work);
>       |            ^~~~~~~
> 
> which is somewhat annoying. Because there's really no good reason why it
> can't be const, it'll just require the changes to dig a bit deeper.

Bah! Best I can come up with is casting the const away there, what a
mess :-(

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/2] kernel: split task_work_add() into two separate helpers
  2020-08-10 15:01     ` Jens Axboe
  2020-08-10 15:28       ` peterz
@ 2020-08-10 17:51       ` Jens Axboe
  2020-08-10 19:53         ` Peter Zijlstra
  1 sibling, 1 reply; 41+ messages in thread
From: Jens Axboe @ 2020-08-10 17:51 UTC (permalink / raw)
  To: peterz; +Cc: io-uring, stable

On 8/10/20 9:01 AM, Jens Axboe wrote:
> On 8/10/20 5:37 AM, peterz@infradead.org wrote:
>> On Sat, Aug 08, 2020 at 12:34:38PM -0600, Jens Axboe wrote:
>>> Some callers may need to make signaling decisions based on the state
>>> of the targeted task, and that can only safely be done post adding
>>> the task_work to the task. Split task_work_add() into:
>>>
>>> __task_work_add()	- adds the work item
>>> __task_work_notify()	- sends the notification
>>>
>>> No functional changes in this patch.
>>
>> Might be nice to mention __task_work_add() is now inline.
> 
> OK, will mention that.

Added a note of that in the commit message, otherwise the patch is
unchanged:

https://git.kernel.dk/cgit/linux-block/commit/?h=io_uring-5.9&id=67e5aca3cb1bd40de0392fea5a661eae2372d6cc

Are you happy with this one now, given that we cannot easily make
the exit_work const?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-10 15:02     ` Jens Axboe
@ 2020-08-10 19:21       ` Jens Axboe
  2020-08-10 20:12         ` Peter Zijlstra
  0 siblings, 1 reply; 41+ messages in thread
From: Jens Axboe @ 2020-08-10 19:21 UTC (permalink / raw)
  To: peterz; +Cc: io-uring, stable, Josef

On 8/10/20 9:02 AM, Jens Axboe wrote:
> On 8/10/20 5:42 AM, peterz@infradead.org wrote:
>> On Sat, Aug 08, 2020 at 12:34:39PM -0600, Jens Axboe wrote:
>>> An earlier commit:
>>>
>>> b7db41c9e03b ("io_uring: fix regression with always ignoring signals in io_cqring_wait()")
>>>
>>> ensured that we didn't get stuck waiting for eventfd reads when it's
>>> registered with the io_uring ring for event notification, but we still
>>> have a gap where the task can be waiting on other events in the kernel
>>> and need a bigger nudge to make forward progress.
>>>
>>> Ensure that we use signaled notifications for a task that isn't currently
>>> running, to be certain the work is seen and processed immediately.
>>>
>>> Cc: stable@vger.kernel.org # v5.7+
>>> Reported-by: Josef <josef.grieb@gmail.com>
>>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>>> ---
>>>  fs/io_uring.c | 22 ++++++++++++++--------
>>>  1 file changed, 14 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>>> index e9b27cdaa735..443eecdfeda9 100644
>>> --- a/fs/io_uring.c
>>> +++ b/fs/io_uring.c
>>> @@ -1712,21 +1712,27 @@ static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb)
>>>  	struct io_ring_ctx *ctx = req->ctx;
>>>  	int ret, notify = TWA_RESUME;
>>>  
>>> +	ret = __task_work_add(tsk, cb);
>>> +	if (unlikely(ret))
>>> +		return ret;
>>> +
>>>  	/*
>>>  	 * SQPOLL kernel thread doesn't need notification, just a wakeup.
>>> -	 * If we're not using an eventfd, then TWA_RESUME is always fine,
>>> -	 * as we won't have dependencies between request completions for
>>> -	 * other kernel wait conditions.
>>> +	 * For any other work, use signaled wakeups if the task isn't
>>> +	 * running to avoid dependencies between tasks or threads. If
>>> +	 * the issuing task is currently waiting in the kernel on a thread,
>>> +	 * and same thread is waiting for a completion event, then we need
>>> +	 * to ensure that the issuing task processes task_work. TWA_SIGNAL
>>> +	 * is needed for that.
>>>  	 */
>>>  	if (ctx->flags & IORING_SETUP_SQPOLL)
>>>  		notify = 0;
>>> -	else if (ctx->cq_ev_fd)
>>> +	else if (READ_ONCE(tsk->state) != TASK_RUNNING)
>>>  		notify = TWA_SIGNAL;
>>>  
>>> -	ret = task_work_add(tsk, cb, notify);
>>> -	if (!ret)
>>> -		wake_up_process(tsk);
>>> -	return ret;
>>> +	__task_work_notify(tsk, notify);
>>> +	wake_up_process(tsk);
>>> +	return 0;
>>>  }
>>
>> Wait.. so the only change here is that you look at tsk->state, _after_
>> doing __task_work_add(), but nothing, not the Changelog nor the comment
>> explains this.
>>
>> So you're relying on __task_work_add() being an smp_mb() vs the add, and
>> you order this against the smp_mb() in set_current_state() ?
>>
>> This really needs spelling out.
> 
> I'll update the changelog, it suffers a bit from having been reused from
> the earlier versions. Thanks for checking!

I failed to convince myself that the existing construct was safe, so
here's an incremental on top of that. Basically we re-check the task
state _after_ the initial notification, to protect ourselves from the
case where we initially find the task running, but between that check
and when we do the notification, it's now gone to sleep. Should be
pretty slim, but I think it's there.

Hence do a loop around it, if we're using TWA_RESUME.

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 44ac103483b6..a4ecb6c7e2b0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1780,12 +1780,27 @@ static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb)
 	 * to ensure that the issuing task processes task_work. TWA_SIGNAL
 	 * is needed for that.
 	 */
-	if (ctx->flags & IORING_SETUP_SQPOLL)
+	if (ctx->flags & IORING_SETUP_SQPOLL) {
 		notify = 0;
-	else if (READ_ONCE(tsk->state) != TASK_RUNNING)
-		notify = TWA_SIGNAL;
+	} else {
+		bool notified = false;
 
-	__task_work_notify(tsk, notify);
+		/*
+		 * If the task is running, TWA_RESUME notify is enough. Make
+		 * sure to re-check after we've sent the notification, as not
+		 * to have a race between the check and the notification. This
+		 * only applies for TWA_RESUME, as TWA_SIGNAL is safe with a
+		 * sleeping task
+		 */
+		do {
+			if (READ_ONCE(tsk->state) != TASK_RUNNING)
+				notify = TWA_SIGNAL;
+			else if (notified)
+				break;
+			__task_work_notify(tsk, notify);
+			notified = true;
+		} while (notify != TWA_SIGNAL);
+	}
 	wake_up_process(tsk);
 	return 0;
 }

and I've folded it in here:

https://git.kernel.dk/cgit/linux-block/commit/?h=io_uring-5.9&id=8d685b56f80b16516be9ce2eb1aee5adcfba13ff

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/2] kernel: split task_work_add() into two separate helpers
  2020-08-10 17:51       ` Jens Axboe
@ 2020-08-10 19:53         ` Peter Zijlstra
  0 siblings, 0 replies; 41+ messages in thread
From: Peter Zijlstra @ 2020-08-10 19:53 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring, stable

On Mon, Aug 10, 2020 at 11:51:33AM -0600, Jens Axboe wrote:

> Added a note of that in the commit message, otherwise the patch is
> unchanged:
> 
> https://git.kernel.dk/cgit/linux-block/commit/?h=io_uring-5.9&id=67e5aca3cb1bd40de0392fea5a661eae2372d6cc

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-10 19:21       ` Jens Axboe
@ 2020-08-10 20:12         ` Peter Zijlstra
  2020-08-10 20:13           ` Jens Axboe
  2020-08-10 20:16           ` Jens Axboe
  0 siblings, 2 replies; 41+ messages in thread
From: Peter Zijlstra @ 2020-08-10 20:12 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring, stable, Josef, Oleg Nesterov

On Mon, Aug 10, 2020 at 01:21:48PM -0600, Jens Axboe wrote:

> >> Wait.. so the only change here is that you look at tsk->state, _after_
> >> doing __task_work_add(), but nothing, not the Changelog nor the comment
> >> explains this.
> >>
> >> So you're relying on __task_work_add() being an smp_mb() vs the add, and
> >> you order this against the smp_mb() in set_current_state() ?
> >>
> >> This really needs spelling out.
> > 
> > I'll update the changelog, it suffers a bit from having been reused from
> > the earlier versions. Thanks for checking!
> 
> I failed to convince myself that the existing construct was safe, so
> here's an incremental on top of that. Basically we re-check the task
> state _after_ the initial notification, to protect ourselves from the
> case where we initially find the task running, but between that check
> and when we do the notification, it's now gone to sleep. Should be
> pretty slim, but I think it's there.
> 
> Hence do a loop around it, if we're using TWA_RESUME.
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 44ac103483b6..a4ecb6c7e2b0 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -1780,12 +1780,27 @@ static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb)
>  	 * to ensure that the issuing task processes task_work. TWA_SIGNAL
>  	 * is needed for that.
>  	 */
> -	if (ctx->flags & IORING_SETUP_SQPOLL)
> +	if (ctx->flags & IORING_SETUP_SQPOLL) {
>  		notify = 0;
> -	else if (READ_ONCE(tsk->state) != TASK_RUNNING)
> -		notify = TWA_SIGNAL;
> +	} else {
> +		bool notified = false;
>  
> -	__task_work_notify(tsk, notify);
> +		/*
> +		 * If the task is running, TWA_RESUME notify is enough. Make
> +		 * sure to re-check after we've sent the notification, as not

Could we get a clue as to why TWA_RESUME is enough when it's running? I
presume it is because we'll do task_work_run() somewhere before we
block, but having an explicit reference here might help someone new to
this make sense of it all.

> +		 * to have a race between the check and the notification. This
> +		 * only applies for TWA_RESUME, as TWA_SIGNAL is safe with a
> +		 * sleeping task
> +		 */
> +		do {
> +			if (READ_ONCE(tsk->state) != TASK_RUNNING)
> +				notify = TWA_SIGNAL;
> +			else if (notified)
> +				break;
> +			__task_work_notify(tsk, notify);
> +			notified = true;
> +		} while (notify != TWA_SIGNAL);
> +	}
>  	wake_up_process(tsk);
>  	return 0;
>  }

Would it be clearer to write it like so perhaps?

	/*
	 * Optimization; when the task is RUNNING we can do with a
	 * cheaper TWA_RESUME notification because,... <reason goes
	 * here>. Otherwise do the more expensive, but always correct
	 * TWA_SIGNAL.
	 */
	if (READ_ONCE(tsk->state) == TASK_RUNNING) {
		__task_work_notify(tsk, TWA_RESUME);
		if (READ_ONCE(tsk->state) == TASK_RUNNING)
			return;
	}
	__task_work_notify(tsk, TWA_SIGNAL);
	wake_up_process(tsk);




^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-10 20:12         ` Peter Zijlstra
@ 2020-08-10 20:13           ` Jens Axboe
  2020-08-10 20:25             ` Jens Axboe
  2020-08-10 20:16           ` Jens Axboe
  1 sibling, 1 reply; 41+ messages in thread
From: Jens Axboe @ 2020-08-10 20:13 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: io-uring, stable, Josef, Oleg Nesterov

On 8/10/20 2:12 PM, Peter Zijlstra wrote:
> On Mon, Aug 10, 2020 at 01:21:48PM -0600, Jens Axboe wrote:
> 
>>>> Wait.. so the only change here is that you look at tsk->state, _after_
>>>> doing __task_work_add(), but nothing, not the Changelog nor the comment
>>>> explains this.
>>>>
>>>> So you're relying on __task_work_add() being an smp_mb() vs the add, and
>>>> you order this against the smp_mb() in set_current_state() ?
>>>>
>>>> This really needs spelling out.
>>>
>>> I'll update the changelog, it suffers a bit from having been reused from
>>> the earlier versions. Thanks for checking!
>>
>> I failed to convince myself that the existing construct was safe, so
>> here's an incremental on top of that. Basically we re-check the task
>> state _after_ the initial notification, to protect ourselves from the
>> case where we initially find the task running, but between that check
>> and when we do the notification, it's now gone to sleep. Should be
>> pretty slim, but I think it's there.
>>
>> Hence do a loop around it, if we're using TWA_RESUME.
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index 44ac103483b6..a4ecb6c7e2b0 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -1780,12 +1780,27 @@ static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb)
>>  	 * to ensure that the issuing task processes task_work. TWA_SIGNAL
>>  	 * is needed for that.
>>  	 */
>> -	if (ctx->flags & IORING_SETUP_SQPOLL)
>> +	if (ctx->flags & IORING_SETUP_SQPOLL) {
>>  		notify = 0;
>> -	else if (READ_ONCE(tsk->state) != TASK_RUNNING)
>> -		notify = TWA_SIGNAL;
>> +	} else {
>> +		bool notified = false;
>>  
>> -	__task_work_notify(tsk, notify);
>> +		/*
>> +		 * If the task is running, TWA_RESUME notify is enough. Make
>> +		 * sure to re-check after we've sent the notification, as not
> 
> Could we get a clue as to why TWA_RESUME is enough when it's running? I
> presume it is because we'll do task_work_run() somewhere before we
> block, but having an explicit reference here might help someone new to
> this make sense of it all.
> 
>> +		 * to have a race between the check and the notification. This
>> +		 * only applies for TWA_RESUME, as TWA_SIGNAL is safe with a
>> +		 * sleeping task
>> +		 */
>> +		do {
>> +			if (READ_ONCE(tsk->state) != TASK_RUNNING)
>> +				notify = TWA_SIGNAL;
>> +			else if (notified)
>> +				break;
>> +			__task_work_notify(tsk, notify);
>> +			notified = true;
>> +		} while (notify != TWA_SIGNAL);
>> +	}
>>  	wake_up_process(tsk);
>>  	return 0;
>>  }
> 
> Would it be clearer to write it like so perhaps?
> 
> 	/*
> 	 * Optimization; when the task is RUNNING we can do with a
> 	 * cheaper TWA_RESUME notification because,... <reason goes
> 	 * here>. Otherwise do the more expensive, but always correct
> 	 * TWA_SIGNAL.
> 	 */
> 	if (READ_ONCE(tsk->state) == TASK_RUNNING) {
> 		__task_work_notify(tsk, TWA_RESUME);
> 		if (READ_ONCE(tsk->state) == TASK_RUNNING)
> 			return;
> 	}
> 	__task_work_notify(tsk, TWA_SIGNAL);
> 	wake_up_process(tsk);

Yeah that is easier to read, wasn't a huge fan of the loop since it's
only a single retry kind of condition. I'll adopt this suggestion,
thanks!

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-10 20:12         ` Peter Zijlstra
  2020-08-10 20:13           ` Jens Axboe
@ 2020-08-10 20:16           ` Jens Axboe
  1 sibling, 0 replies; 41+ messages in thread
From: Jens Axboe @ 2020-08-10 20:16 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: io-uring, stable, Josef, Oleg Nesterov

On 8/10/20 2:12 PM, Peter Zijlstra wrote:
> On Mon, Aug 10, 2020 at 01:21:48PM -0600, Jens Axboe wrote:
> 
>>>> Wait.. so the only change here is that you look at tsk->state, _after_
>>>> doing __task_work_add(), but nothing, not the Changelog nor the comment
>>>> explains this.
>>>>
>>>> So you're relying on __task_work_add() being an smp_mb() vs the add, and
>>>> you order this against the smp_mb() in set_current_state() ?
>>>>
>>>> This really needs spelling out.
>>>
>>> I'll update the changelog, it suffers a bit from having been reused from
>>> the earlier versions. Thanks for checking!
>>
>> I failed to convince myself that the existing construct was safe, so
>> here's an incremental on top of that. Basically we re-check the task
>> state _after_ the initial notification, to protect ourselves from the
>> case where we initially find the task running, but between that check
>> and when we do the notification, it's now gone to sleep. Should be
>> pretty slim, but I think it's there.
>>
>> Hence do a loop around it, if we're using TWA_RESUME.
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index 44ac103483b6..a4ecb6c7e2b0 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -1780,12 +1780,27 @@ static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb)
>>  	 * to ensure that the issuing task processes task_work. TWA_SIGNAL
>>  	 * is needed for that.
>>  	 */
>> -	if (ctx->flags & IORING_SETUP_SQPOLL)
>> +	if (ctx->flags & IORING_SETUP_SQPOLL) {
>>  		notify = 0;
>> -	else if (READ_ONCE(tsk->state) != TASK_RUNNING)
>> -		notify = TWA_SIGNAL;
>> +	} else {
>> +		bool notified = false;
>>  
>> -	__task_work_notify(tsk, notify);
>> +		/*
>> +		 * If the task is running, TWA_RESUME notify is enough. Make
>> +		 * sure to re-check after we've sent the notification, as not
> 
> Could we get a clue as to why TWA_RESUME is enough when it's running? I
> presume it is because we'll do task_work_run() somewhere before we
> block, but having an explicit reference here might help someone new to
> this make sense of it all.

Right, it's because we're sure to run task_work in that case. I'll
update the comment.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-10 20:13           ` Jens Axboe
@ 2020-08-10 20:25             ` Jens Axboe
  2020-08-10 20:32               ` Peter Zijlstra
  2020-08-10 20:35               ` Jann Horn
  0 siblings, 2 replies; 41+ messages in thread
From: Jens Axboe @ 2020-08-10 20:25 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: io-uring, stable, Josef, Oleg Nesterov

On 8/10/20 2:13 PM, Jens Axboe wrote:
>> Would it be clearer to write it like so perhaps?
>>
>> 	/*
>> 	 * Optimization; when the task is RUNNING we can do with a
>> 	 * cheaper TWA_RESUME notification because,... <reason goes
>> 	 * here>. Otherwise do the more expensive, but always correct
>> 	 * TWA_SIGNAL.
>> 	 */
>> 	if (READ_ONCE(tsk->state) == TASK_RUNNING) {
>> 		__task_work_notify(tsk, TWA_RESUME);
>> 		if (READ_ONCE(tsk->state) == TASK_RUNNING)
>> 			return;
>> 	}
>> 	__task_work_notify(tsk, TWA_SIGNAL);
>> 	wake_up_process(tsk);
> 
> Yeah that is easier to read, wasn't a huge fan of the loop since it's
> only a single retry kind of condition. I'll adopt this suggestion,
> thanks!

Re-write it a bit on top of that, just turning it into two separate
READ_ONCE, and added appropriate comments. For the SQPOLL case, the
wake_up_process() is enough, so we can clean up that if/else.

https://git.kernel.dk/cgit/linux-block/commit/?h=io_uring-5.9&id=49bc5c16483945982cf81b0109d7da7cd9ee55ed

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-10 20:25             ` Jens Axboe
@ 2020-08-10 20:32               ` Peter Zijlstra
  2020-08-10 20:35                 ` Jens Axboe
  2020-08-10 20:35               ` Jann Horn
  1 sibling, 1 reply; 41+ messages in thread
From: Peter Zijlstra @ 2020-08-10 20:32 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring, stable, Josef, Oleg Nesterov

On Mon, Aug 10, 2020 at 02:25:48PM -0600, Jens Axboe wrote:
> On 8/10/20 2:13 PM, Jens Axboe wrote:
> >> Would it be clearer to write it like so perhaps?
> >>
> >> 	/*
> >> 	 * Optimization; when the task is RUNNING we can do with a
> >> 	 * cheaper TWA_RESUME notification because,... <reason goes
> >> 	 * here>. Otherwise do the more expensive, but always correct
> >> 	 * TWA_SIGNAL.
> >> 	 */
> >> 	if (READ_ONCE(tsk->state) == TASK_RUNNING) {
> >> 		__task_work_notify(tsk, TWA_RESUME);
> >> 		if (READ_ONCE(tsk->state) == TASK_RUNNING)
> >> 			return;
> >> 	}
> >> 	__task_work_notify(tsk, TWA_SIGNAL);
> >> 	wake_up_process(tsk);
> > 
> > Yeah that is easier to read, wasn't a huge fan of the loop since it's
> > only a single retry kind of condition. I'll adopt this suggestion,
> > thanks!
> 
> Re-write it a bit on top of that, just turning it into two separate
> READ_ONCE, and added appropriate comments. For the SQPOLL case, the
> wake_up_process() is enough, so we can clean up that if/else.
> 
> https://git.kernel.dk/cgit/linux-block/commit/?h=io_uring-5.9&id=49bc5c16483945982cf81b0109d7da7cd9ee55ed

OK, that works for me, thanks!

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-10 20:32               ` Peter Zijlstra
@ 2020-08-10 20:35                 ` Jens Axboe
  0 siblings, 0 replies; 41+ messages in thread
From: Jens Axboe @ 2020-08-10 20:35 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: io-uring, stable, Josef, Oleg Nesterov

On 8/10/20 2:32 PM, Peter Zijlstra wrote:
> On Mon, Aug 10, 2020 at 02:25:48PM -0600, Jens Axboe wrote:
>> On 8/10/20 2:13 PM, Jens Axboe wrote:
>>>> Would it be clearer to write it like so perhaps?
>>>>
>>>> 	/*
>>>> 	 * Optimization; when the task is RUNNING we can do with a
>>>> 	 * cheaper TWA_RESUME notification because,... <reason goes
>>>> 	 * here>. Otherwise do the more expensive, but always correct
>>>> 	 * TWA_SIGNAL.
>>>> 	 */
>>>> 	if (READ_ONCE(tsk->state) == TASK_RUNNING) {
>>>> 		__task_work_notify(tsk, TWA_RESUME);
>>>> 		if (READ_ONCE(tsk->state) == TASK_RUNNING)
>>>> 			return;
>>>> 	}
>>>> 	__task_work_notify(tsk, TWA_SIGNAL);
>>>> 	wake_up_process(tsk);
>>>
>>> Yeah that is easier to read, wasn't a huge fan of the loop since it's
>>> only a single retry kind of condition. I'll adopt this suggestion,
>>> thanks!
>>
>> Re-write it a bit on top of that, just turning it into two separate
>> READ_ONCE, and added appropriate comments. For the SQPOLL case, the
>> wake_up_process() is enough, so we can clean up that if/else.
>>
>> https://git.kernel.dk/cgit/linux-block/commit/?h=io_uring-5.9&id=49bc5c16483945982cf81b0109d7da7cd9ee55ed
> 
> OK, that works for me, thanks!
> 
> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

Thanks for checking (again) - I've added your acked-by.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-10 20:25             ` Jens Axboe
  2020-08-10 20:32               ` Peter Zijlstra
@ 2020-08-10 20:35               ` Jann Horn
  2020-08-10 21:06                 ` Jens Axboe
  1 sibling, 1 reply; 41+ messages in thread
From: Jann Horn @ 2020-08-10 20:35 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Peter Zijlstra, io-uring, stable, Josef, Oleg Nesterov

On Mon, Aug 10, 2020 at 10:25 PM Jens Axboe <axboe@kernel.dk> wrote:
> On 8/10/20 2:13 PM, Jens Axboe wrote:
> >> Would it be clearer to write it like so perhaps?
> >>
> >>      /*
> >>       * Optimization; when the task is RUNNING we can do with a
> >>       * cheaper TWA_RESUME notification because,... <reason goes
> >>       * here>. Otherwise do the more expensive, but always correct
> >>       * TWA_SIGNAL.
> >>       */
> >>      if (READ_ONCE(tsk->state) == TASK_RUNNING) {
> >>              __task_work_notify(tsk, TWA_RESUME);
> >>              if (READ_ONCE(tsk->state) == TASK_RUNNING)
> >>                      return;
> >>      }
> >>      __task_work_notify(tsk, TWA_SIGNAL);
> >>      wake_up_process(tsk);
> >
> > Yeah that is easier to read, wasn't a huge fan of the loop since it's
> > only a single retry kind of condition. I'll adopt this suggestion,
> > thanks!
>
> Re-write it a bit on top of that, just turning it into two separate
> READ_ONCE, and added appropriate comments. For the SQPOLL case, the
> wake_up_process() is enough, so we can clean up that if/else.
>
> https://git.kernel.dk/cgit/linux-block/commit/?h=io_uring-5.9&id=49bc5c16483945982cf81b0109d7da7cd9ee55ed

I think I'm starting to understand the overall picture here, and I
think if my understanding is correct, your solution isn't going to
work properly.

My understanding of the scenario you're trying to address is:

 - task A starts up io_uring
 - task A tells io_uring to bump the counter of an eventfd E when work
has been completed
 - task A submits some work ("read a byte from file descriptor X", or
something like that)
 - io_uring internally starts an asynchronous I/O operation, with a callback C
 - task A calls read(E, &counter, sizeof(counter)) to wait for events
to be processed
 - the async I/O operation finishes, C is invoked, and C schedules
task_work for task A

And here you run into a deadlock, because the task_work will only run
when task A returns from the syscall, but the syscall will only return
once the task_work is executing and has finished the I/O operation.


If that is the scenario you're trying to solve here (where you're
trying to force a task that's in the middle of some syscall that's
completely unrelated to io_uring to return back to syscall context), I
don't think this will work: It might well be that the task has e.g.
just started entering the read() syscall, and is *about to* block, but
is currently still running.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-10 20:35               ` Jann Horn
@ 2020-08-10 21:06                 ` Jens Axboe
  2020-08-10 21:10                   ` Peter Zijlstra
  0 siblings, 1 reply; 41+ messages in thread
From: Jens Axboe @ 2020-08-10 21:06 UTC (permalink / raw)
  To: Jann Horn; +Cc: Peter Zijlstra, io-uring, stable, Josef, Oleg Nesterov

On 8/10/20 2:35 PM, Jann Horn wrote:
> On Mon, Aug 10, 2020 at 10:25 PM Jens Axboe <axboe@kernel.dk> wrote:
>> On 8/10/20 2:13 PM, Jens Axboe wrote:
>>>> Would it be clearer to write it like so perhaps?
>>>>
>>>>      /*
>>>>       * Optimization; when the task is RUNNING we can do with a
>>>>       * cheaper TWA_RESUME notification because,... <reason goes
>>>>       * here>. Otherwise do the more expensive, but always correct
>>>>       * TWA_SIGNAL.
>>>>       */
>>>>      if (READ_ONCE(tsk->state) == TASK_RUNNING) {
>>>>              __task_work_notify(tsk, TWA_RESUME);
>>>>              if (READ_ONCE(tsk->state) == TASK_RUNNING)
>>>>                      return;
>>>>      }
>>>>      __task_work_notify(tsk, TWA_SIGNAL);
>>>>      wake_up_process(tsk);
>>>
>>> Yeah that is easier to read, wasn't a huge fan of the loop since it's
>>> only a single retry kind of condition. I'll adopt this suggestion,
>>> thanks!
>>
>> Re-write it a bit on top of that, just turning it into two separate
>> READ_ONCE, and added appropriate comments. For the SQPOLL case, the
>> wake_up_process() is enough, so we can clean up that if/else.
>>
>> https://git.kernel.dk/cgit/linux-block/commit/?h=io_uring-5.9&id=49bc5c16483945982cf81b0109d7da7cd9ee55ed
> 
> I think I'm starting to understand the overall picture here, and I
> think if my understanding is correct, your solution isn't going to
> work properly.
> 
> My understanding of the scenario you're trying to address is:
> 
>  - task A starts up io_uring
>  - task A tells io_uring to bump the counter of an eventfd E when work
> has been completed
>  - task A submits some work ("read a byte from file descriptor X", or
> something like that)
>  - io_uring internally starts an asynchronous I/O operation, with a callback C
>  - task A calls read(E, &counter, sizeof(counter)) to wait for events
> to be processed
>  - the async I/O operation finishes, C is invoked, and C schedules
> task_work for task A
> 
> And here you run into a deadlock, because the task_work will only run
> when task A returns from the syscall, but the syscall will only return
> once the task_work is executing and has finished the I/O operation.
> 
> 
> If that is the scenario you're trying to solve here (where you're
> trying to force a task that's in the middle of some syscall that's
> completely unrelated to io_uring to return back to syscall context), I
> don't think this will work: It might well be that the task has e.g.
> just started entering the read() syscall, and is *about to* block, but
> is currently still running.

Your understanding of the scenario appears to be correct, and as far as
I can tell, also your analysis of why the existing approach doesn't
fully close it. You're right in that the task could currently be on its
way to blocking, but still running. And for that case, TWA_RESUME isn't
going to cut it.

Ugh. This basically means I have to use TWA_SIGNAL regardless of state,
unless SQPOLL is used. That's not optimal.

Alternatively:

if (tsk->state != TASK_RUNNING || task_in_kernel(tsk))
	notify = TWA_SIGNAL;
else
	notify = TWA_RESUME;

should work as far as I can tell, but I don't even know if there's a
reliable way to do task_in_kernel(). But I suspect this kind of check
would still save the day, as we're not really expecting the common case
to be that the task is in the kernel on the way to blocking. And it'd be
kind of annoying to have to cater to that scenario by slowing down the
fast path.

Suggestions?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-10 21:06                 ` Jens Axboe
@ 2020-08-10 21:10                   ` Peter Zijlstra
  2020-08-10 21:12                     ` Jens Axboe
  0 siblings, 1 reply; 41+ messages in thread
From: Peter Zijlstra @ 2020-08-10 21:10 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Jann Horn, io-uring, stable, Josef, Oleg Nesterov

On Mon, Aug 10, 2020 at 03:06:49PM -0600, Jens Axboe wrote:

> should work as far as I can tell, but I don't even know if there's a
> reliable way to do task_in_kernel().

Only on NOHZ_FULL, and tracking that is one of the things that makes it
so horribly expensive.


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-10 21:10                   ` Peter Zijlstra
@ 2020-08-10 21:12                     ` Jens Axboe
  2020-08-10 21:26                       ` Jann Horn
  2020-08-10 21:27                       ` Jens Axboe
  0 siblings, 2 replies; 41+ messages in thread
From: Jens Axboe @ 2020-08-10 21:12 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Jann Horn, io-uring, stable, Josef, Oleg Nesterov

On 8/10/20 3:10 PM, Peter Zijlstra wrote:
> On Mon, Aug 10, 2020 at 03:06:49PM -0600, Jens Axboe wrote:
> 
>> should work as far as I can tell, but I don't even know if there's a
>> reliable way to do task_in_kernel().
> 
> Only on NOHZ_FULL, and tracking that is one of the things that makes it
> so horribly expensive.

Probably no other way than to bite the bullet and just use TWA_SIGNAL
unconditionally...

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-10 21:12                     ` Jens Axboe
@ 2020-08-10 21:26                       ` Jann Horn
  2020-08-10 21:28                         ` Jens Axboe
  2020-08-10 21:27                       ` Jens Axboe
  1 sibling, 1 reply; 41+ messages in thread
From: Jann Horn @ 2020-08-10 21:26 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Peter Zijlstra, io-uring, stable, Josef, Oleg Nesterov

On Mon, Aug 10, 2020 at 11:12 PM Jens Axboe <axboe@kernel.dk> wrote:
> On 8/10/20 3:10 PM, Peter Zijlstra wrote:
> > On Mon, Aug 10, 2020 at 03:06:49PM -0600, Jens Axboe wrote:
> >
> >> should work as far as I can tell, but I don't even know if there's a
> >> reliable way to do task_in_kernel().
> >
> > Only on NOHZ_FULL, and tracking that is one of the things that makes it
> > so horribly expensive.
>
> Probably no other way than to bite the bullet and just use TWA_SIGNAL
> unconditionally...

Why are you trying to avoid using TWA_SIGNAL? Is there a specific part
of handling it that's particularly slow?

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-10 21:12                     ` Jens Axboe
  2020-08-10 21:26                       ` Jann Horn
@ 2020-08-10 21:27                       ` Jens Axboe
  1 sibling, 0 replies; 41+ messages in thread
From: Jens Axboe @ 2020-08-10 21:27 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Jann Horn, io-uring, stable, Josef, Oleg Nesterov

On 8/10/20 3:12 PM, Jens Axboe wrote:
> On 8/10/20 3:10 PM, Peter Zijlstra wrote:
>> On Mon, Aug 10, 2020 at 03:06:49PM -0600, Jens Axboe wrote:
>>
>>> should work as far as I can tell, but I don't even know if there's a
>>> reliable way to do task_in_kernel().
>>
>> Only on NOHZ_FULL, and tracking that is one of the things that makes it
>> so horribly expensive.
> 
> Probably no other way than to bite the bullet and just use TWA_SIGNAL
> unconditionally...

Is there a safe way to make TWA_SIGNAL notification cheaper? I won't
pretend to fully understand the ordering, Oleg probably has a much
better idea then me...

diff --git a/kernel/task_work.c b/kernel/task_work.c
index 5c0848ca1287..ea2c683c8563 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -42,7 +42,8 @@ task_work_add(struct task_struct *task, struct callback_head *work, int notify)
 		set_notify_resume(task);
 		break;
 	case TWA_SIGNAL:
-		if (lock_task_sighand(task, &flags)) {
+		if (!(task->jobctl & JOBCTL_TASK_WORK) &&
+		    lock_task_sighand(task, &flags)) {
 			task->jobctl |= JOBCTL_TASK_WORK;
 			signal_wake_up(task, 0);
 			unlock_task_sighand(task, &flags);

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-10 21:26                       ` Jann Horn
@ 2020-08-10 21:28                         ` Jens Axboe
  2020-08-10 22:01                           ` Jens Axboe
  0 siblings, 1 reply; 41+ messages in thread
From: Jens Axboe @ 2020-08-10 21:28 UTC (permalink / raw)
  To: Jann Horn; +Cc: Peter Zijlstra, io-uring, stable, Josef, Oleg Nesterov

On 8/10/20 3:26 PM, Jann Horn wrote:
> On Mon, Aug 10, 2020 at 11:12 PM Jens Axboe <axboe@kernel.dk> wrote:
>> On 8/10/20 3:10 PM, Peter Zijlstra wrote:
>>> On Mon, Aug 10, 2020 at 03:06:49PM -0600, Jens Axboe wrote:
>>>
>>>> should work as far as I can tell, but I don't even know if there's a
>>>> reliable way to do task_in_kernel().
>>>
>>> Only on NOHZ_FULL, and tracking that is one of the things that makes it
>>> so horribly expensive.
>>
>> Probably no other way than to bite the bullet and just use TWA_SIGNAL
>> unconditionally...
> 
> Why are you trying to avoid using TWA_SIGNAL? Is there a specific part
> of handling it that's particularly slow?

Not particularly slow, but it's definitely heavier than TWA_RESUME. And
as we're driving any pollable async IO through this, just trying to
ensure it's as light as possible.

It's not a functional thing, just efficiency.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-10 21:28                         ` Jens Axboe
@ 2020-08-10 22:01                           ` Jens Axboe
  2020-08-10 22:41                             ` Jann Horn
  0 siblings, 1 reply; 41+ messages in thread
From: Jens Axboe @ 2020-08-10 22:01 UTC (permalink / raw)
  To: Jann Horn; +Cc: Peter Zijlstra, io-uring, stable, Josef, Oleg Nesterov

On 8/10/20 3:28 PM, Jens Axboe wrote:
> On 8/10/20 3:26 PM, Jann Horn wrote:
>> On Mon, Aug 10, 2020 at 11:12 PM Jens Axboe <axboe@kernel.dk> wrote:
>>> On 8/10/20 3:10 PM, Peter Zijlstra wrote:
>>>> On Mon, Aug 10, 2020 at 03:06:49PM -0600, Jens Axboe wrote:
>>>>
>>>>> should work as far as I can tell, but I don't even know if there's a
>>>>> reliable way to do task_in_kernel().
>>>>
>>>> Only on NOHZ_FULL, and tracking that is one of the things that makes it
>>>> so horribly expensive.
>>>
>>> Probably no other way than to bite the bullet and just use TWA_SIGNAL
>>> unconditionally...
>>
>> Why are you trying to avoid using TWA_SIGNAL? Is there a specific part
>> of handling it that's particularly slow?
> 
> Not particularly slow, but it's definitely heavier than TWA_RESUME. And
> as we're driving any pollable async IO through this, just trying to
> ensure it's as light as possible.
> 
> It's not a functional thing, just efficiency.

Ran some quick testing in a vm, which is worst case for this kind of
thing as any kind of mucking with interrupts is really slow. And the hit
is substantial. Though with the below, we're basically at parity again.
Just for discussion...


diff --git a/kernel/task_work.c b/kernel/task_work.c
index 5c0848ca1287..ea2c683c8563 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -42,7 +42,8 @@ task_work_add(struct task_struct *task, struct callback_head *work, int notify)
 		set_notify_resume(task);
 		break;
 	case TWA_SIGNAL:
-		if (lock_task_sighand(task, &flags)) {
+		if (!(task->jobctl & JOBCTL_TASK_WORK) &&
+		    lock_task_sighand(task, &flags)) {
 			task->jobctl |= JOBCTL_TASK_WORK;
 			signal_wake_up(task, 0);
 			unlock_task_sighand(task, &flags);

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-10 22:01                           ` Jens Axboe
@ 2020-08-10 22:41                             ` Jann Horn
  2020-08-11  1:25                               ` Jens Axboe
  2020-08-11  6:45                               ` Oleg Nesterov
  0 siblings, 2 replies; 41+ messages in thread
From: Jann Horn @ 2020-08-10 22:41 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Peter Zijlstra, io-uring, stable, Josef, Oleg Nesterov

On Tue, Aug 11, 2020 at 12:01 AM Jens Axboe <axboe@kernel.dk> wrote:
> On 8/10/20 3:28 PM, Jens Axboe wrote:
> > On 8/10/20 3:26 PM, Jann Horn wrote:
> >> On Mon, Aug 10, 2020 at 11:12 PM Jens Axboe <axboe@kernel.dk> wrote:
> >>> On 8/10/20 3:10 PM, Peter Zijlstra wrote:
> >>>> On Mon, Aug 10, 2020 at 03:06:49PM -0600, Jens Axboe wrote:
> >>>>
> >>>>> should work as far as I can tell, but I don't even know if there's a
> >>>>> reliable way to do task_in_kernel().
> >>>>
> >>>> Only on NOHZ_FULL, and tracking that is one of the things that makes it
> >>>> so horribly expensive.
> >>>
> >>> Probably no other way than to bite the bullet and just use TWA_SIGNAL
> >>> unconditionally...
> >>
> >> Why are you trying to avoid using TWA_SIGNAL? Is there a specific part
> >> of handling it that's particularly slow?
> >
> > Not particularly slow, but it's definitely heavier than TWA_RESUME. And
> > as we're driving any pollable async IO through this, just trying to
> > ensure it's as light as possible.
> >
> > It's not a functional thing, just efficiency.
>
> Ran some quick testing in a vm, which is worst case for this kind of
> thing as any kind of mucking with interrupts is really slow. And the hit
> is substantial. Though with the below, we're basically at parity again.
> Just for discussion...
>
>
> diff --git a/kernel/task_work.c b/kernel/task_work.c
> index 5c0848ca1287..ea2c683c8563 100644
> --- a/kernel/task_work.c
> +++ b/kernel/task_work.c
> @@ -42,7 +42,8 @@ task_work_add(struct task_struct *task, struct callback_head *work, int notify)
>                 set_notify_resume(task);
>                 break;
>         case TWA_SIGNAL:
> -               if (lock_task_sighand(task, &flags)) {
> +               if (!(task->jobctl & JOBCTL_TASK_WORK) &&
> +                   lock_task_sighand(task, &flags)) {
>                         task->jobctl |= JOBCTL_TASK_WORK;
>                         signal_wake_up(task, 0);
>                         unlock_task_sighand(task, &flags);

I think that should work in theory, but if you want to be able to do a
proper unlocked read of task->jobctl here, then I think you'd have to
use READ_ONCE() here and make all existing writes to ->jobctl use
WRITE_ONCE().

Also, I think that to make this work, stuff like get_signal() will
need to use memory barriers to ensure that reads from ->task_works are
ordered after ->jobctl has been cleared - ideally written such that on
the fastpath, the memory barrier doesn't execute.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-10 22:41                             ` Jann Horn
@ 2020-08-11  1:25                               ` Jens Axboe
  2020-08-11  6:45                               ` Oleg Nesterov
  1 sibling, 0 replies; 41+ messages in thread
From: Jens Axboe @ 2020-08-11  1:25 UTC (permalink / raw)
  To: Jann Horn; +Cc: Peter Zijlstra, io-uring, stable, Josef, Oleg Nesterov

On 8/10/20 4:41 PM, Jann Horn wrote:
> On Tue, Aug 11, 2020 at 12:01 AM Jens Axboe <axboe@kernel.dk> wrote:
>> On 8/10/20 3:28 PM, Jens Axboe wrote:
>>> On 8/10/20 3:26 PM, Jann Horn wrote:
>>>> On Mon, Aug 10, 2020 at 11:12 PM Jens Axboe <axboe@kernel.dk> wrote:
>>>>> On 8/10/20 3:10 PM, Peter Zijlstra wrote:
>>>>>> On Mon, Aug 10, 2020 at 03:06:49PM -0600, Jens Axboe wrote:
>>>>>>
>>>>>>> should work as far as I can tell, but I don't even know if there's a
>>>>>>> reliable way to do task_in_kernel().
>>>>>>
>>>>>> Only on NOHZ_FULL, and tracking that is one of the things that makes it
>>>>>> so horribly expensive.
>>>>>
>>>>> Probably no other way than to bite the bullet and just use TWA_SIGNAL
>>>>> unconditionally...
>>>>
>>>> Why are you trying to avoid using TWA_SIGNAL? Is there a specific part
>>>> of handling it that's particularly slow?
>>>
>>> Not particularly slow, but it's definitely heavier than TWA_RESUME. And
>>> as we're driving any pollable async IO through this, just trying to
>>> ensure it's as light as possible.
>>>
>>> It's not a functional thing, just efficiency.
>>
>> Ran some quick testing in a vm, which is worst case for this kind of
>> thing as any kind of mucking with interrupts is really slow. And the hit
>> is substantial. Though with the below, we're basically at parity again.
>> Just for discussion...
>>
>>
>> diff --git a/kernel/task_work.c b/kernel/task_work.c
>> index 5c0848ca1287..ea2c683c8563 100644
>> --- a/kernel/task_work.c
>> +++ b/kernel/task_work.c
>> @@ -42,7 +42,8 @@ task_work_add(struct task_struct *task, struct callback_head *work, int notify)
>>                 set_notify_resume(task);
>>                 break;
>>         case TWA_SIGNAL:
>> -               if (lock_task_sighand(task, &flags)) {
>> +               if (!(task->jobctl & JOBCTL_TASK_WORK) &&
>> +                   lock_task_sighand(task, &flags)) {
>>                         task->jobctl |= JOBCTL_TASK_WORK;
>>                         signal_wake_up(task, 0);
>>                         unlock_task_sighand(task, &flags);
> 
> I think that should work in theory, but if you want to be able to do a
> proper unlocked read of task->jobctl here, then I think you'd have to
> use READ_ONCE() here and make all existing writes to ->jobctl use
> WRITE_ONCE().
> 
> Also, I think that to make this work, stuff like get_signal() will
> need to use memory barriers to ensure that reads from ->task_works are
> ordered after ->jobctl has been cleared - ideally written such that on
> the fastpath, the memory barrier doesn't execute.

I wonder if it's possible to just make it safe for the io_uring case,
since a bigger change would make this performance regression persistent
in this release... Would still require the split add/notification patch,
but that one is trivial.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-10 22:41                             ` Jann Horn
  2020-08-11  1:25                               ` Jens Axboe
@ 2020-08-11  6:45                               ` Oleg Nesterov
  2020-08-11  6:56                                 ` Peter Zijlstra
  1 sibling, 1 reply; 41+ messages in thread
From: Oleg Nesterov @ 2020-08-11  6:45 UTC (permalink / raw)
  To: Jann Horn; +Cc: Jens Axboe, Peter Zijlstra, io-uring, stable, Josef

On 08/11, Jann Horn wrote:
>
> > --- a/kernel/task_work.c
> > +++ b/kernel/task_work.c
> > @@ -42,7 +42,8 @@ task_work_add(struct task_struct *task, struct callback_head *work, int notify)
> >                 set_notify_resume(task);
> >                 break;
> >         case TWA_SIGNAL:
> > -               if (lock_task_sighand(task, &flags)) {
> > +               if (!(task->jobctl & JOBCTL_TASK_WORK) &&
> > +                   lock_task_sighand(task, &flags)) {
> >                         task->jobctl |= JOBCTL_TASK_WORK;
> >                         signal_wake_up(task, 0);
> >                         unlock_task_sighand(task, &flags);
> 
> I think that should work in theory, but if you want to be able to do a
> proper unlocked read of task->jobctl here, then I think you'd have to
> use READ_ONCE() here

Agreed,

> and make all existing writes to ->jobctl use
> WRITE_ONCE().

->jobctl is always modified with ->siglock held, do we really need
WRITE_ONCE() ?

> Also, I think that to make this work, stuff like get_signal() will
> need to use memory barriers to ensure that reads from ->task_works are
> ordered after ->jobctl has been cleared

Why? I don't follow.

Afaics, we only need to ensure that task_work_add() checks JOBCTL_TASK_WORK
after it adds the new work to the ->task_works list, and we can rely on
cmpxchg() ?

Oleg.


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-11  6:45                               ` Oleg Nesterov
@ 2020-08-11  6:56                                 ` Peter Zijlstra
  2020-08-11  7:14                                   ` Oleg Nesterov
  0 siblings, 1 reply; 41+ messages in thread
From: Peter Zijlstra @ 2020-08-11  6:56 UTC (permalink / raw)
  To: Oleg Nesterov; +Cc: Jann Horn, Jens Axboe, io-uring, stable, Josef

On Tue, Aug 11, 2020 at 08:45:16AM +0200, Oleg Nesterov wrote:
> On 08/11, Jann Horn wrote:
> >
> > > --- a/kernel/task_work.c
> > > +++ b/kernel/task_work.c
> > > @@ -42,7 +42,8 @@ task_work_add(struct task_struct *task, struct callback_head *work, int notify)
> > >                 set_notify_resume(task);
> > >                 break;
> > >         case TWA_SIGNAL:
> > > -               if (lock_task_sighand(task, &flags)) {
> > > +               if (!(task->jobctl & JOBCTL_TASK_WORK) &&
> > > +                   lock_task_sighand(task, &flags)) {
> > >                         task->jobctl |= JOBCTL_TASK_WORK;
> > >                         signal_wake_up(task, 0);
> > >                         unlock_task_sighand(task, &flags);
> > 
> > I think that should work in theory, but if you want to be able to do a
> > proper unlocked read of task->jobctl here, then I think you'd have to
> > use READ_ONCE() here
> 
> Agreed,
> 
> > and make all existing writes to ->jobctl use
> > WRITE_ONCE().
> 
> ->jobctl is always modified with ->siglock held, do we really need
> WRITE_ONCE() ?

In theory, yes. The compiler doesn't know about locks, it can tear
writes whenever it feels like it. In practise it doesn't happen much,
but...

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-11  6:56                                 ` Peter Zijlstra
@ 2020-08-11  7:14                                   ` Oleg Nesterov
  2020-08-11  7:26                                     ` Oleg Nesterov
  2020-08-11  7:45                                     ` Peter Zijlstra
  0 siblings, 2 replies; 41+ messages in thread
From: Oleg Nesterov @ 2020-08-11  7:14 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Jann Horn, Jens Axboe, io-uring, stable, Josef

On 08/11, Peter Zijlstra wrote:
>
> On Tue, Aug 11, 2020 at 08:45:16AM +0200, Oleg Nesterov wrote:
> >
> > ->jobctl is always modified with ->siglock held, do we really need
> > WRITE_ONCE() ?
>
> In theory, yes. The compiler doesn't know about locks, it can tear
> writes whenever it feels like it.

Yes, but why does this matter? Could you spell please?

Oleg.


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-11  7:14                                   ` Oleg Nesterov
@ 2020-08-11  7:26                                     ` Oleg Nesterov
  2020-08-11  7:49                                       ` Peter Zijlstra
  2020-08-11  7:45                                     ` Peter Zijlstra
  1 sibling, 1 reply; 41+ messages in thread
From: Oleg Nesterov @ 2020-08-11  7:26 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Jann Horn, Jens Axboe, io-uring, stable, Josef

On 08/11, Oleg Nesterov wrote:
>
> On 08/11, Peter Zijlstra wrote:
> >
> > On Tue, Aug 11, 2020 at 08:45:16AM +0200, Oleg Nesterov wrote:
> > >
> > > ->jobctl is always modified with ->siglock held, do we really need
> > > WRITE_ONCE() ?
> >
> > In theory, yes. The compiler doesn't know about locks, it can tear
> > writes whenever it feels like it.
>
> Yes, but why does this matter? Could you spell please?

Do you mean that compiler can temporary set/clear JOBCTL_TASK_WORK
when it sets/clears another bit?

Oleg.


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-11  7:14                                   ` Oleg Nesterov
  2020-08-11  7:26                                     ` Oleg Nesterov
@ 2020-08-11  7:45                                     ` Peter Zijlstra
  2020-08-11  8:10                                       ` Oleg Nesterov
  1 sibling, 1 reply; 41+ messages in thread
From: Peter Zijlstra @ 2020-08-11  7:45 UTC (permalink / raw)
  To: Oleg Nesterov; +Cc: Jann Horn, Jens Axboe, io-uring, stable, Josef

On Tue, Aug 11, 2020 at 09:14:02AM +0200, Oleg Nesterov wrote:
> On 08/11, Peter Zijlstra wrote:
> >
> > On Tue, Aug 11, 2020 at 08:45:16AM +0200, Oleg Nesterov wrote:
> > >
> > > ->jobctl is always modified with ->siglock held, do we really need
> > > WRITE_ONCE() ?
> >
> > In theory, yes. The compiler doesn't know about locks, it can tear
> > writes whenever it feels like it.
> 
> Yes, but why does this matter? Could you spell please?

Ah, well, that I don't konw. Why do we need the READ_ONCE() ?

It does:

> +               if (!(task->jobctl & JOBCTL_TASK_WORK) &&
> +                   lock_task_sighand(task, &flags)) {

and the lock_task_sighand() implies barrier(), so I thought the reason
for the READ_ONCE() was load-tearing, and then we need WRITE_ONCE() to
avoid store-tearing.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-11  7:26                                     ` Oleg Nesterov
@ 2020-08-11  7:49                                       ` Peter Zijlstra
  0 siblings, 0 replies; 41+ messages in thread
From: Peter Zijlstra @ 2020-08-11  7:49 UTC (permalink / raw)
  To: Oleg Nesterov; +Cc: Jann Horn, Jens Axboe, io-uring, stable, Josef

On Tue, Aug 11, 2020 at 09:26:37AM +0200, Oleg Nesterov wrote:
> On 08/11, Oleg Nesterov wrote:
> >
> > On 08/11, Peter Zijlstra wrote:
> > >
> > > On Tue, Aug 11, 2020 at 08:45:16AM +0200, Oleg Nesterov wrote:
> > > >
> > > > ->jobctl is always modified with ->siglock held, do we really need
> > > > WRITE_ONCE() ?
> > >
> > > In theory, yes. The compiler doesn't know about locks, it can tear
> > > writes whenever it feels like it.
> >
> > Yes, but why does this matter? Could you spell please?
> 
> Do you mean that compiler can temporary set/clear JOBCTL_TASK_WORK
> when it sets/clears another bit?

Possibly, afaict the compiler is allowed to 'spill' intermediate state
into the variable. If any intermediate state has the bit clear,...

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-11  7:45                                     ` Peter Zijlstra
@ 2020-08-11  8:10                                       ` Oleg Nesterov
  2020-08-11 13:06                                         ` Jens Axboe
  0 siblings, 1 reply; 41+ messages in thread
From: Oleg Nesterov @ 2020-08-11  8:10 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Jann Horn, Jens Axboe, io-uring, stable, Josef

On 08/11, Peter Zijlstra wrote:
>
> On Tue, Aug 11, 2020 at 09:14:02AM +0200, Oleg Nesterov wrote:
> > On 08/11, Peter Zijlstra wrote:
> > >
> > > On Tue, Aug 11, 2020 at 08:45:16AM +0200, Oleg Nesterov wrote:
> > > >
> > > > ->jobctl is always modified with ->siglock held, do we really need
> > > > WRITE_ONCE() ?
> > >
> > > In theory, yes. The compiler doesn't know about locks, it can tear
> > > writes whenever it feels like it.
> >
> > Yes, but why does this matter? Could you spell please?
>
> Ah, well, that I don't konw. Why do we need the READ_ONCE() ?
>
> It does:
>
> > +               if (!(task->jobctl & JOBCTL_TASK_WORK) &&
> > +                   lock_task_sighand(task, &flags)) {
>
> and the lock_task_sighand() implies barrier(), so I thought the reason
> for the READ_ONCE() was load-tearing, and then we need WRITE_ONCE() to
> avoid store-tearing.

I don't think we really need READ_ONCE() for correctness, compiler can't
reorder this LOAD with cmpxchg() above, and I think we don't care about
load-tearing.

But I guess we need READ_ONCE() or data_race() to shut kcsan up.

Oleg.


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-11  8:10                                       ` Oleg Nesterov
@ 2020-08-11 13:06                                         ` Jens Axboe
  2020-08-11 14:05                                           ` Oleg Nesterov
  0 siblings, 1 reply; 41+ messages in thread
From: Jens Axboe @ 2020-08-11 13:06 UTC (permalink / raw)
  To: Oleg Nesterov, Peter Zijlstra; +Cc: Jann Horn, io-uring, stable, Josef

On 8/11/20 2:10 AM, Oleg Nesterov wrote:
> On 08/11, Peter Zijlstra wrote:
>>
>> On Tue, Aug 11, 2020 at 09:14:02AM +0200, Oleg Nesterov wrote:
>>> On 08/11, Peter Zijlstra wrote:
>>>>
>>>> On Tue, Aug 11, 2020 at 08:45:16AM +0200, Oleg Nesterov wrote:
>>>>>
>>>>> ->jobctl is always modified with ->siglock held, do we really need
>>>>> WRITE_ONCE() ?
>>>>
>>>> In theory, yes. The compiler doesn't know about locks, it can tear
>>>> writes whenever it feels like it.
>>>
>>> Yes, but why does this matter? Could you spell please?
>>
>> Ah, well, that I don't konw. Why do we need the READ_ONCE() ?
>>
>> It does:
>>
>>> +               if (!(task->jobctl & JOBCTL_TASK_WORK) &&
>>> +                   lock_task_sighand(task, &flags)) {
>>
>> and the lock_task_sighand() implies barrier(), so I thought the reason
>> for the READ_ONCE() was load-tearing, and then we need WRITE_ONCE() to
>> avoid store-tearing.
> 
> I don't think we really need READ_ONCE() for correctness, compiler can't
> reorder this LOAD with cmpxchg() above, and I think we don't care about
> load-tearing.
> 
> But I guess we need READ_ONCE() or data_race() to shut kcsan up.

Thanks, reading through this thread makes me feel better. I agree that
we'll need READ_ONCE() just to shut up analyzers.

I'd really like to get this done at the same time as the io_uring
change. Are you open to doing the READ_ONCE() based JOBCTL_TASK_WORK
addition for 5.9? Alternatively we can retain the 1/2 patch from this
series and I'll open-code it in io_uring, but seems pointless as
io_uring is the only user of TWA_SIGNAL in the kernel anyway.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-11 13:06                                         ` Jens Axboe
@ 2020-08-11 14:05                                           ` Oleg Nesterov
  2020-08-11 14:12                                             ` Jens Axboe
  0 siblings, 1 reply; 41+ messages in thread
From: Oleg Nesterov @ 2020-08-11 14:05 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Peter Zijlstra, Jann Horn, io-uring, stable, Josef

On 08/11, Jens Axboe wrote:
>
> I'd really like to get this done at the same time as the io_uring
> change. Are you open to doing the READ_ONCE() based JOBCTL_TASK_WORK
> addition for 5.9?

Yes, the patch looks fine to me. In fact I was going to add this
optimization from the very beginning, but then decided to make that
patch as simple as possible.

And in any case I personally like this change much more than 1/2 +
2/2 which I honestly don't understand ;)

Oleg.


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-11 14:05                                           ` Oleg Nesterov
@ 2020-08-11 14:12                                             ` Jens Axboe
  0 siblings, 0 replies; 41+ messages in thread
From: Jens Axboe @ 2020-08-11 14:12 UTC (permalink / raw)
  To: Oleg Nesterov; +Cc: Peter Zijlstra, Jann Horn, io-uring, stable, Josef

On 8/11/20 8:05 AM, Oleg Nesterov wrote:
> On 08/11, Jens Axboe wrote:
>>
>> I'd really like to get this done at the same time as the io_uring
>> change. Are you open to doing the READ_ONCE() based JOBCTL_TASK_WORK
>> addition for 5.9?
> 
> Yes, the patch looks fine to me. In fact I was going to add this
> optimization from the very beginning, but then decided to make that
> patch as simple as possible.

OK great, I'll send that out separately.

> And in any case I personally like this change much more than 1/2 +
> 2/2 which I honestly don't understand ;)

Yes, in retrospect, me too :-)

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-08 18:34 ` [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running Jens Axboe
  2020-08-10 11:42   ` peterz
@ 2020-08-13 16:25   ` Sasha Levin
  2020-08-19 23:57   ` Sasha Levin
  2 siblings, 0 replies; 41+ messages in thread
From: Sasha Levin @ 2020-08-13 16:25 UTC (permalink / raw)
  To: Sasha Levin, Jens Axboe, io-uring; +Cc: peterz, Jens Axboe, stable

Hi

[This is an automated email]

This commit has been processed because it contains a -stable tag.
The stable tag indicates that it's relevant for the following trees: 5.7+

The bot has tested the following trees: v5.8, v5.7.14.

v5.8: Failed to apply! Possible dependencies:
    3fa5e0f33128 ("io_uring: optimise io_req_find_next() fast check")
    4503b7676a2e ("io_uring: catch -EIO from buffered issue request failure")
    7c86ffeeed30 ("io_uring: deduplicate freeing linked timeouts")
    9b0d911acce0 ("io_uring: kill REQ_F_LINK_NEXT")
    9b5f7bd93272 ("io_uring: replace find_next() out param with ret")
    a1d7c393c471 ("io_uring: enable READ/WRITE to use deferred completions")
    b63534c41e20 ("io_uring: re-issue block requests that failed because of resources")
    bcf5a06304d6 ("io_uring: support true async buffered reads, if file provides it")
    c2c4c83c58cb ("io_uring: use new io_req_task_work_add() helper throughout")
    c40f63790ec9 ("io_uring: use task_work for links if possible")
    e1e16097e265 ("io_uring: provide generic io_req_complete() helper")

v5.7.14: Failed to apply! Possible dependencies:
    0cdaf760f42e ("io_uring: remove req->needs_fixed_files")
    310672552f4a ("io_uring: async task poll trigger cleanup")
    3fa5e0f33128 ("io_uring: optimise io_req_find_next() fast check")
    405a5d2b2762 ("io_uring: avoid unnecessary io_wq_work copy for fast poll feature")
    4a38aed2a0a7 ("io_uring: batch reap of dead file registrations")
    4dd2824d6d59 ("io_uring: lazy get task")
    7c86ffeeed30 ("io_uring: deduplicate freeing linked timeouts")
    7cdaf587de7c ("io_uring: avoid whole io_wq_work copy for requests completed inline")
    7d01bd745a8f ("io_uring: remove obsolete 'state' parameter")
    9b0d911acce0 ("io_uring: kill REQ_F_LINK_NEXT")
    9b5f7bd93272 ("io_uring: replace find_next() out param with ret")
    c2c4c83c58cb ("io_uring: use new io_req_task_work_add() helper throughout")
    c40f63790ec9 ("io_uring: use task_work for links if possible")
    d4c81f38522f ("io_uring: don't arm a timeout through work.func")
    f5fa38c59cb0 ("io_wq: add per-wq work handler instead of per work")


NOTE: The patch will not be queued to stable trees until it is upstream.

How should we proceed with this patch?

-- 
Thanks
Sasha

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-08 18:34 ` [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running Jens Axboe
  2020-08-10 11:42   ` peterz
  2020-08-13 16:25   ` Sasha Levin
@ 2020-08-19 23:57   ` Sasha Levin
  2020-08-19 23:59     ` Jens Axboe
  2 siblings, 1 reply; 41+ messages in thread
From: Sasha Levin @ 2020-08-19 23:57 UTC (permalink / raw)
  To: Sasha Levin, Jens Axboe, io-uring; +Cc: peterz, Jens Axboe, stable

Hi

[This is an automated email]

This commit has been processed because it contains a -stable tag.
The stable tag indicates that it's relevant for the following trees: 5.7+

The bot has tested the following trees: v5.8.1, v5.7.15.

v5.8.1: Failed to apply! Possible dependencies:
    3fa5e0f33128 ("io_uring: optimise io_req_find_next() fast check")
    4503b7676a2e ("io_uring: catch -EIO from buffered issue request failure")
    7c86ffeeed30 ("io_uring: deduplicate freeing linked timeouts")
    9b0d911acce0 ("io_uring: kill REQ_F_LINK_NEXT")
    9b5f7bd93272 ("io_uring: replace find_next() out param with ret")
    a1d7c393c471 ("io_uring: enable READ/WRITE to use deferred completions")
    b63534c41e20 ("io_uring: re-issue block requests that failed because of resources")
    bcf5a06304d6 ("io_uring: support true async buffered reads, if file provides it")
    c2c4c83c58cb ("io_uring: use new io_req_task_work_add() helper throughout")
    c40f63790ec9 ("io_uring: use task_work for links if possible")
    e1e16097e265 ("io_uring: provide generic io_req_complete() helper")

v5.7.15: Failed to apply! Possible dependencies:
    0cdaf760f42e ("io_uring: remove req->needs_fixed_files")
    310672552f4a ("io_uring: async task poll trigger cleanup")
    3fa5e0f33128 ("io_uring: optimise io_req_find_next() fast check")
    405a5d2b2762 ("io_uring: avoid unnecessary io_wq_work copy for fast poll feature")
    4a38aed2a0a7 ("io_uring: batch reap of dead file registrations")
    4dd2824d6d59 ("io_uring: lazy get task")
    7c86ffeeed30 ("io_uring: deduplicate freeing linked timeouts")
    7cdaf587de7c ("io_uring: avoid whole io_wq_work copy for requests completed inline")
    7d01bd745a8f ("io_uring: remove obsolete 'state' parameter")
    9b0d911acce0 ("io_uring: kill REQ_F_LINK_NEXT")
    9b5f7bd93272 ("io_uring: replace find_next() out param with ret")
    c2c4c83c58cb ("io_uring: use new io_req_task_work_add() helper throughout")
    c40f63790ec9 ("io_uring: use task_work for links if possible")
    d4c81f38522f ("io_uring: don't arm a timeout through work.func")
    f5fa38c59cb0 ("io_wq: add per-wq work handler instead of per work")


NOTE: The patch will not be queued to stable trees until it is upstream.

How should we proceed with this patch?

-- 
Thanks
Sasha

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-19 23:57   ` Sasha Levin
@ 2020-08-19 23:59     ` Jens Axboe
  2020-08-20  0:02       ` Jens Axboe
  0 siblings, 1 reply; 41+ messages in thread
From: Jens Axboe @ 2020-08-19 23:59 UTC (permalink / raw)
  To: Sasha Levin, io-uring; +Cc: peterz, stable

On 8/19/20 4:57 PM, Sasha Levin wrote:
> Hi
> 
> [This is an automated email]
> 
> This commit has been processed because it contains a -stable tag.
> The stable tag indicates that it's relevant for the following trees: 5.7+
> 
> The bot has tested the following trees: v5.8.1, v5.7.15.
> 
> v5.8.1: Failed to apply! Possible dependencies:
>     3fa5e0f33128 ("io_uring: optimise io_req_find_next() fast check")
>     4503b7676a2e ("io_uring: catch -EIO from buffered issue request failure")
>     7c86ffeeed30 ("io_uring: deduplicate freeing linked timeouts")
>     9b0d911acce0 ("io_uring: kill REQ_F_LINK_NEXT")
>     9b5f7bd93272 ("io_uring: replace find_next() out param with ret")
>     a1d7c393c471 ("io_uring: enable READ/WRITE to use deferred completions")
>     b63534c41e20 ("io_uring: re-issue block requests that failed because of resources")
>     bcf5a06304d6 ("io_uring: support true async buffered reads, if file provides it")
>     c2c4c83c58cb ("io_uring: use new io_req_task_work_add() helper throughout")
>     c40f63790ec9 ("io_uring: use task_work for links if possible")
>     e1e16097e265 ("io_uring: provide generic io_req_complete() helper")
> 
> v5.7.15: Failed to apply! Possible dependencies:
>     0cdaf760f42e ("io_uring: remove req->needs_fixed_files")
>     310672552f4a ("io_uring: async task poll trigger cleanup")
>     3fa5e0f33128 ("io_uring: optimise io_req_find_next() fast check")
>     405a5d2b2762 ("io_uring: avoid unnecessary io_wq_work copy for fast poll feature")
>     4a38aed2a0a7 ("io_uring: batch reap of dead file registrations")
>     4dd2824d6d59 ("io_uring: lazy get task")
>     7c86ffeeed30 ("io_uring: deduplicate freeing linked timeouts")
>     7cdaf587de7c ("io_uring: avoid whole io_wq_work copy for requests completed inline")
>     7d01bd745a8f ("io_uring: remove obsolete 'state' parameter")
>     9b0d911acce0 ("io_uring: kill REQ_F_LINK_NEXT")
>     9b5f7bd93272 ("io_uring: replace find_next() out param with ret")
>     c2c4c83c58cb ("io_uring: use new io_req_task_work_add() helper throughout")
>     c40f63790ec9 ("io_uring: use task_work for links if possible")
>     d4c81f38522f ("io_uring: don't arm a timeout through work.func")
>     f5fa38c59cb0 ("io_wq: add per-wq work handler instead of per work")
> 
> 
> NOTE: The patch will not be queued to stable trees until it is upstream.
> 
> How should we proceed with this patch?

It's already queued for 5.7 and 5.8 stable. At least it should be, I'll double
check!

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running
  2020-08-19 23:59     ` Jens Axboe
@ 2020-08-20  0:02       ` Jens Axboe
  0 siblings, 0 replies; 41+ messages in thread
From: Jens Axboe @ 2020-08-20  0:02 UTC (permalink / raw)
  To: Sasha Levin, io-uring; +Cc: peterz, stable

On 8/19/20 4:59 PM, Jens Axboe wrote:
> On 8/19/20 4:57 PM, Sasha Levin wrote:
>> Hi
>>
>> [This is an automated email]
>>
>> This commit has been processed because it contains a -stable tag.
>> The stable tag indicates that it's relevant for the following trees: 5.7+
>>
>> The bot has tested the following trees: v5.8.1, v5.7.15.
>>
>> v5.8.1: Failed to apply! Possible dependencies:
>>     3fa5e0f33128 ("io_uring: optimise io_req_find_next() fast check")
>>     4503b7676a2e ("io_uring: catch -EIO from buffered issue request failure")
>>     7c86ffeeed30 ("io_uring: deduplicate freeing linked timeouts")
>>     9b0d911acce0 ("io_uring: kill REQ_F_LINK_NEXT")
>>     9b5f7bd93272 ("io_uring: replace find_next() out param with ret")
>>     a1d7c393c471 ("io_uring: enable READ/WRITE to use deferred completions")
>>     b63534c41e20 ("io_uring: re-issue block requests that failed because of resources")
>>     bcf5a06304d6 ("io_uring: support true async buffered reads, if file provides it")
>>     c2c4c83c58cb ("io_uring: use new io_req_task_work_add() helper throughout")
>>     c40f63790ec9 ("io_uring: use task_work for links if possible")
>>     e1e16097e265 ("io_uring: provide generic io_req_complete() helper")
>>
>> v5.7.15: Failed to apply! Possible dependencies:
>>     0cdaf760f42e ("io_uring: remove req->needs_fixed_files")
>>     310672552f4a ("io_uring: async task poll trigger cleanup")
>>     3fa5e0f33128 ("io_uring: optimise io_req_find_next() fast check")
>>     405a5d2b2762 ("io_uring: avoid unnecessary io_wq_work copy for fast poll feature")
>>     4a38aed2a0a7 ("io_uring: batch reap of dead file registrations")
>>     4dd2824d6d59 ("io_uring: lazy get task")
>>     7c86ffeeed30 ("io_uring: deduplicate freeing linked timeouts")
>>     7cdaf587de7c ("io_uring: avoid whole io_wq_work copy for requests completed inline")
>>     7d01bd745a8f ("io_uring: remove obsolete 'state' parameter")
>>     9b0d911acce0 ("io_uring: kill REQ_F_LINK_NEXT")
>>     9b5f7bd93272 ("io_uring: replace find_next() out param with ret")
>>     c2c4c83c58cb ("io_uring: use new io_req_task_work_add() helper throughout")
>>     c40f63790ec9 ("io_uring: use task_work for links if possible")
>>     d4c81f38522f ("io_uring: don't arm a timeout through work.func")
>>     f5fa38c59cb0 ("io_wq: add per-wq work handler instead of per work")
>>
>>
>> NOTE: The patch will not be queued to stable trees until it is upstream.
>>
>> How should we proceed with this patch?
> 
> It's already queued for 5.7 and 5.8 stable. At least it should be, I'll double
> check!

The replacement is:

commit 228bb0e69491f55e502c883c583d7c0d67659e83
Author: Jens Axboe <axboe@kernel.dk>
Date:   Thu Aug 6 19:41:50 2020 -0600

    io_uring: use TWA_SIGNAL for task_work uncondtionally

So you can ignore this one.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 41+ messages in thread

end of thread, other threads:[~2020-08-20  0:10 UTC | newest]

Thread overview: 41+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-08 18:34 [PATCHSET 0/2] io_uring: use TWA_SIGNAL more carefully Jens Axboe
2020-08-08 18:34 ` [PATCH 1/2] kernel: split task_work_add() into two separate helpers Jens Axboe
2020-08-10 11:37   ` peterz
2020-08-10 15:01     ` Jens Axboe
2020-08-10 15:28       ` peterz
2020-08-10 17:51       ` Jens Axboe
2020-08-10 19:53         ` Peter Zijlstra
2020-08-08 18:34 ` [PATCH 2/2] io_uring: use TWA_SIGNAL for task_work if the task isn't running Jens Axboe
2020-08-10 11:42   ` peterz
2020-08-10 15:02     ` Jens Axboe
2020-08-10 19:21       ` Jens Axboe
2020-08-10 20:12         ` Peter Zijlstra
2020-08-10 20:13           ` Jens Axboe
2020-08-10 20:25             ` Jens Axboe
2020-08-10 20:32               ` Peter Zijlstra
2020-08-10 20:35                 ` Jens Axboe
2020-08-10 20:35               ` Jann Horn
2020-08-10 21:06                 ` Jens Axboe
2020-08-10 21:10                   ` Peter Zijlstra
2020-08-10 21:12                     ` Jens Axboe
2020-08-10 21:26                       ` Jann Horn
2020-08-10 21:28                         ` Jens Axboe
2020-08-10 22:01                           ` Jens Axboe
2020-08-10 22:41                             ` Jann Horn
2020-08-11  1:25                               ` Jens Axboe
2020-08-11  6:45                               ` Oleg Nesterov
2020-08-11  6:56                                 ` Peter Zijlstra
2020-08-11  7:14                                   ` Oleg Nesterov
2020-08-11  7:26                                     ` Oleg Nesterov
2020-08-11  7:49                                       ` Peter Zijlstra
2020-08-11  7:45                                     ` Peter Zijlstra
2020-08-11  8:10                                       ` Oleg Nesterov
2020-08-11 13:06                                         ` Jens Axboe
2020-08-11 14:05                                           ` Oleg Nesterov
2020-08-11 14:12                                             ` Jens Axboe
2020-08-10 21:27                       ` Jens Axboe
2020-08-10 20:16           ` Jens Axboe
2020-08-13 16:25   ` Sasha Levin
2020-08-19 23:57   ` Sasha Levin
2020-08-19 23:59     ` Jens Axboe
2020-08-20  0:02       ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).