LKML Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH] task_work: cleanup notification modes
@ 2020-10-16 15:16 Jens Axboe
  2020-10-16 21:44 ` Thomas Gleixner
  0 siblings, 1 reply; 14+ messages in thread
From: Jens Axboe @ 2020-10-16 15:16 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: linux-kernel

A previous commit changed the notification mode from 0/1 to allowing
0/1/TWA_SIGNAL. Clean this up properly, and define a proper enum for
the notification mode. Now we have:

- TWA_NONE. This is 0, same as before the original change, meaning no
  notification requested.
- TWA_RESUME. This is 1, same as before the original change, meaning
  that we use TIF_NOTIFY_RESUME.
- TWA_SIGNAL. This uses TIF_SIGPENDING/JOBCTL_TASK_WORK for the
  notification.

Clean up all the callers, switching their 0/1/false/true to using the
appropriate TWA_* mode for notifications.

Signed-off-by: Jens Axboe <axboe@kernel.dk>

---

As promised, here's the task_work notification cleanup patch.

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 1c08cb9eb9f6..4102b866e7c0 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1277,7 +1277,7 @@ static void queue_task_work(struct mce *m, int kill_it)
 	else
 		current->mce_kill_me.func = kill_me_maybe;
 
-	task_work_add(current, &current->mce_kill_me, true);
+	task_work_add(current, &current->mce_kill_me, TWA_RESUME);
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index b494187632b2..af323e2e3100 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -561,7 +561,7 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
 	 * callback has been invoked.
 	 */
 	atomic_inc(&rdtgrp->waitcount);
-	ret = task_work_add(tsk, &callback->work, true);
+	ret = task_work_add(tsk, &callback->work, TWA_RESUME);
 	if (ret) {
 		/*
 		 * Task is exiting. Drop the refcount and free the callback.
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 81bf71b10d44..8360f8d6be65 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -879,7 +879,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
 			estatus_node->task_work.func = ghes_kick_task_work;
 			estatus_node->task_work_cpu = smp_processor_id();
 			ret = task_work_add(current, &estatus_node->task_work,
-					    true);
+					    TWA_RESUME);
 			if (ret)
 				estatus_node->task_work.func = NULL;
 		}
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 4b9476521da6..b5117576792b 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2229,7 +2229,7 @@ static void binder_deferred_fd_close(int fd)
 	__close_fd_get_file(fd, &twcb->file);
 	if (twcb->file) {
 		filp_close(twcb->file, current->files);
-		task_work_add(current, &twcb->twork, true);
+		task_work_add(current, &twcb->twork, TWA_RESUME);
 	} else {
 		kfree(twcb);
 	}
diff --git a/fs/file_table.c b/fs/file_table.c
index 656647f9575a..709ada3151da 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -339,7 +339,7 @@ void fput_many(struct file *file, unsigned int refs)
 
 		if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
 			init_task_work(&file->f_u.fu_rcuhead, ____fput);
-			if (!task_work_add(task, &file->f_u.fu_rcuhead, true))
+			if (!task_work_add(task, &file->f_u.fu_rcuhead, TWA_RESUME))
 				return;
 			/*
 			 * After this task has run exit_task_work(),
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 2e1dc354cd08..16e1d0e0d9b9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1756,7 +1756,7 @@ static void __io_free_req(struct io_kiocb *req)
 			struct task_struct *tsk;
 
 			tsk = io_wq_get_task(req->ctx->io_wq);
-			task_work_add(tsk, &req->task_work, 0);
+			task_work_add(tsk, &req->task_work, TWA_NONE);
 		}
 	}
 }
@@ -1905,7 +1905,8 @@ static int io_req_task_work_add(struct io_kiocb *req, bool twa_signal_ok)
 {
 	struct task_struct *tsk = req->task;
 	struct io_ring_ctx *ctx = req->ctx;
-	int ret, notify;
+	enum task_work_notify_mode notify;
+	int ret;
 
 	if (tsk->flags & PF_EXITING)
 		return -ESRCH;
@@ -1916,7 +1917,7 @@ static int io_req_task_work_add(struct io_kiocb *req, bool twa_signal_ok)
 	 * processing task_work. There's no reliable way to tell if TWA_RESUME
 	 * will do the job.
 	 */
-	notify = 0;
+	notify = TWA_NONE;
 	if (!(ctx->flags & IORING_SETUP_SQPOLL) && twa_signal_ok)
 		notify = TWA_SIGNAL;
 
@@ -1985,7 +1986,7 @@ static void io_req_task_queue(struct io_kiocb *req)
 
 		init_task_work(&req->task_work, io_req_task_cancel);
 		tsk = io_wq_get_task(req->ctx->io_wq);
-		task_work_add(tsk, &req->task_work, 0);
+		task_work_add(tsk, &req->task_work, TWA_NONE);
 		wake_up_process(tsk);
 	}
 }
@@ -3199,7 +3200,7 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
 		/* queue just for cancelation */
 		init_task_work(&req->task_work, io_req_task_cancel);
 		tsk = io_wq_get_task(req->ctx->io_wq);
-		task_work_add(tsk, &req->task_work, 0);
+		task_work_add(tsk, &req->task_work, TWA_NONE);
 		wake_up_process(tsk);
 	}
 	return 1;
@@ -4765,7 +4766,7 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
 
 		WRITE_ONCE(poll->canceled, true);
 		tsk = io_wq_get_task(req->ctx->io_wq);
-		task_work_add(tsk, &req->task_work, 0);
+		task_work_add(tsk, &req->task_work, TWA_NONE);
 		wake_up_process(tsk);
 	}
 	return 1;
diff --git a/fs/namespace.c b/fs/namespace.c
index 294e05a13d17..1a75336668a3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1191,7 +1191,7 @@ static void mntput_no_expire(struct mount *mnt)
 		struct task_struct *task = current;
 		if (likely(!(task->flags & PF_KTHREAD))) {
 			init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
-			if (!task_work_add(task, &mnt->mnt_rcu, true))
+			if (!task_work_add(task, &mnt->mnt_rcu, TWA_RESUME))
 				return;
 		}
 		if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
diff --git a/include/linux/task_work.h b/include/linux/task_work.h
index 0fb93aafa478..0d848a1e9e62 100644
--- a/include/linux/task_work.h
+++ b/include/linux/task_work.h
@@ -13,9 +13,14 @@ init_task_work(struct callback_head *twork, task_work_func_t func)
 	twork->func = func;
 }
 
-#define TWA_RESUME	1
-#define TWA_SIGNAL	2
-int task_work_add(struct task_struct *task, struct callback_head *twork, int);
+enum task_work_notify_mode {
+	TWA_NONE,
+	TWA_RESUME,
+	TWA_SIGNAL,
+};
+
+int task_work_add(struct task_struct *task, struct callback_head *twork,
+			enum task_work_notify_mode mode);
 
 struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t);
 void task_work_run(void);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 0e18aaf23a7b..00b0358739ab 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1823,7 +1823,7 @@ void uprobe_copy_process(struct task_struct *t, unsigned long flags)
 
 	t->utask->dup_xol_addr = area->vaddr;
 	init_task_work(&t->utask->dup_xol_work, dup_xol_work);
-	task_work_add(t, &t->utask->dup_xol_work, true);
+	task_work_add(t, &t->utask->dup_xol_work, TWA_RESUME);
 }
 
 /*
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 5df903fccb60..c460e0496006 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1162,7 +1162,7 @@ static int irq_thread(void *data)
 		handler_fn = irq_thread_fn;
 
 	init_task_work(&on_exit_work, irq_thread_dtor);
-	task_work_add(current, &on_exit_work, false);
+	task_work_add(current, &on_exit_work, TWA_NONE);
 
 	irq_thread_check_affinity(desc, action);
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index aa4c6227cd6d..e17012be4d14 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2928,7 +2928,7 @@ static void task_tick_numa(struct rq *rq, struct task_struct *curr)
 		curr->node_stamp += period;
 
 		if (!time_before(jiffies, curr->mm->numa_next_scan))
-			task_work_add(curr, work, true);
+			task_work_add(curr, work, TWA_RESUME);
 	}
 }
 
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 613b2d634af8..6aad749485de 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -9,7 +9,7 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
  * task_work_add - ask the @task to execute @work->func()
  * @task: the task which should run the callback
  * @work: the callback to run
- * @notify: send the notification if true
+ * @notify: send chosen notification, if any
  *
  * Queue @work for task_work_run() below and notify the @task if @notify.
  * Fails if the @task is exiting/exited and thus it can't process this @work.
@@ -24,8 +24,8 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
  * RETURNS:
  * 0 if succeeds or -ESRCH.
  */
-int
-task_work_add(struct task_struct *task, struct callback_head *work, int notify)
+int task_work_add(struct task_struct *task, struct callback_head *work,
+		  enum task_work_notify_mode notify)
 {
 	struct callback_head *head;
 	unsigned long flags;
@@ -38,6 +38,8 @@ task_work_add(struct task_struct *task, struct callback_head *work, int notify)
 	} while (cmpxchg(&task->task_works, head, work) != head);
 
 	switch (notify) {
+	case TWA_NONE:
+		break;
 	case TWA_RESUME:
 		set_notify_resume(task);
 		break;
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index e26bbccda7cc..61a614c21b9b 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1693,7 +1693,7 @@ long keyctl_session_to_parent(void)
 
 	/* the replacement session keyring is applied just prior to userspace
 	 * restarting */
-	ret = task_work_add(parent, newwork, true);
+	ret = task_work_add(parent, newwork, TWA_RESUME);
 	if (!ret)
 		newwork = NULL;
 unlock:
diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c
index 536c99646f6a..06e226166aab 100644
--- a/security/yama/yama_lsm.c
+++ b/security/yama/yama_lsm.c
@@ -99,7 +99,7 @@ static void report_access(const char *access, struct task_struct *target,
 	info->access = access;
 	info->target = target;
 	info->agent = agent;
-	if (task_work_add(current, &info->work, true) == 0)
+	if (task_work_add(current, &info->work, TWA_RESUME) == 0)
 		return; /* success */
 
 	WARN(1, "report_access called from exiting task");

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] task_work: cleanup notification modes
  2020-10-16 15:16 [PATCH] task_work: cleanup notification modes Jens Axboe
@ 2020-10-16 21:44 ` Thomas Gleixner
  2020-10-16 22:39   ` Jens Axboe
  0 siblings, 1 reply; 14+ messages in thread
From: Thomas Gleixner @ 2020-10-16 21:44 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-kernel

On Fri, Oct 16 2020 at 09:16, Jens Axboe wrote:
> A previous commit changed the notification mode from 0/1 to allowing

No. It changed it from boolean to an int.

There is a fundamental difference between 0/1 and false/true simply
because it's a compiler implementation detail how to represent a boolean
value.

Assume the following:

#define BAZ	0x08

       task_work_add(tsk, &work, foo & BAZ);

So with a function signature of

       task_work_add(*tsk, *work, bool signal);

the above 'foo & BAZ' becomes either true of false.

With the changed function signature of

       task_work_add(*tsk, *work, int signal);

the above becomes the result of 'foo & BAZ' which means that this
construct will not longer do the right thing.

It's pure luck that none of the usage sites relied on the boolean
property of that argument.

Please spell it out correctly that converting a boolean argument to an
integer argument is not equivalent.

>  	switch (notify) {
> +	case TWA_NONE:
> +		break;
>  	case TWA_RESUME:
>  		set_notify_resume(task);
>  		break;

The enum will not prevent that either and what you really want to do is
to have some form of debug warning if 'notify' is out of range, which
would have been the right thing to do in the first place.

> - * @notify: send the notification if true
> + * @notify: send chosen notification, if any

Is that really all you found to be wrong in that comment?

There is more and please take your time which will in turn save time and
nerves on both ends.

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] task_work: cleanup notification modes
  2020-10-16 21:44 ` Thomas Gleixner
@ 2020-10-16 22:39   ` Jens Axboe
  2020-10-16 23:09     ` Thomas Gleixner
  0 siblings, 1 reply; 14+ messages in thread
From: Jens Axboe @ 2020-10-16 22:39 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: linux-kernel

On 10/16/20 3:44 PM, Thomas Gleixner wrote:
> On Fri, Oct 16 2020 at 09:16, Jens Axboe wrote:
>> A previous commit changed the notification mode from 0/1 to allowing
> 
> No. It changed it from boolean to an int.
> 
> There is a fundamental difference between 0/1 and false/true simply
> because it's a compiler implementation detail how to represent a boolean
> value.
> 
> Assume the following:
> 
> #define BAZ	0x08
> 
>        task_work_add(tsk, &work, foo & BAZ);
> 
> So with a function signature of
> 
>        task_work_add(*tsk, *work, bool signal);
> 
> the above 'foo & BAZ' becomes either true of false.
> 
> With the changed function signature of
> 
>        task_work_add(*tsk, *work, int signal);
> 
> the above becomes the result of 'foo & BAZ' which means that this
> construct will not longer do the right thing.
> 
> It's pure luck that none of the usage sites relied on the boolean
> property of that argument.

It wasn't pure luck, that was checked before that change was made. No
users did anything funky, it was all false/true or 0/1.

> Please spell it out correctly that converting a boolean argument to an
> integer argument is not equivalent.

Fixed up the commit message to be more descriptive.

>>  	switch (notify) {
>> +	case TWA_NONE:
>> +		break;
>>  	case TWA_RESUME:
>>  		set_notify_resume(task);
>>  		break;
> 
> The enum will not prevent that either and what you really want to do is
> to have some form of debug warning if 'notify' is out of range, which
> would have been the right thing to do in the first place.

I added a WARN_ON_ONCE() in the default case for that one.

>> - * @notify: send the notification if true
>> + * @notify: send chosen notification, if any
> 
> Is that really all you found to be wrong in that comment?

There really is nothing wrong, but it's not very descriptive (wasn't
before either). I've added a fuller description of the various TWA_*
notification types now.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] task_work: cleanup notification modes
  2020-10-16 22:39   ` Jens Axboe
@ 2020-10-16 23:09     ` Thomas Gleixner
  2020-10-16 23:13       ` Jens Axboe
  0 siblings, 1 reply; 14+ messages in thread
From: Thomas Gleixner @ 2020-10-16 23:09 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-kernel

On Fri, Oct 16 2020 at 16:39, Jens Axboe wrote:
> On 10/16/20 3:44 PM, Thomas Gleixner wrote:
>>> - * @notify: send the notification if true
>>> + * @notify: send chosen notification, if any
>> 
>> Is that really all you found to be wrong in that comment?
>
> There really is nothing wrong, but it's not very descriptive (wasn't
> before either).

 * This is like the signal handler which runs in kernel mode, but it doesn't                                                                                                                                                                                                   
 * try to wake up the @task.   

If find a lot of wrongs in that sentence in context of TWA_SIGNAL.

Agreed, it was hard to understand before that, but with TWA_SIGNAL it
does not make sense at all.

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] task_work: cleanup notification modes
  2020-10-16 23:09     ` Thomas Gleixner
@ 2020-10-16 23:13       ` Jens Axboe
  2020-10-16 23:38         ` Thomas Gleixner
  0 siblings, 1 reply; 14+ messages in thread
From: Jens Axboe @ 2020-10-16 23:13 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: linux-kernel

On 10/16/20 5:09 PM, Thomas Gleixner wrote:
> On Fri, Oct 16 2020 at 16:39, Jens Axboe wrote:
>> On 10/16/20 3:44 PM, Thomas Gleixner wrote:
>>>> - * @notify: send the notification if true
>>>> + * @notify: send chosen notification, if any
>>>
>>> Is that really all you found to be wrong in that comment?
>>
>> There really is nothing wrong, but it's not very descriptive (wasn't
>> before either).
> 
>  * This is like the signal handler which runs in kernel mode, but it doesn't                                                                                                                                                                                                   
>  * try to wake up the @task.   
> 
> If find a lot of wrongs in that sentence in context of TWA_SIGNAL.
> 
> Agreed, it was hard to understand before that, but with TWA_SIGNAL it
> does not make sense at all.

This is what I currently have:

/**
 * task_work_add - ask the @task to execute @work->func()
 * @task: the task which should run the callback
 * @work: the callback to run
 * @notify: how to notify the targeted task
 *
 * Queue @work for task_work_run() below and notify the @task if @notify
 * is @TWA_RESUME or @TWA_SIGNAL. @TWA_SIGNAL work like signals, in that the
 * it will interrupt the targeted task and run the task_work. @TWA_RESUME
 * work is run only when the task exits the kernel and returns to user mode.
 * Fails if the @task is exiting/exited and thus it can't process this @work.
 * Otherwise @work->func() will be called when the @task returns from kernel
 * mode or exits.
 *
 * Note: there is no ordering guarantee on works queued here.
 *
 * RETURNS:
 * 0 if succeeds or -ESRCH.
 */

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] task_work: cleanup notification modes
  2020-10-16 23:13       ` Jens Axboe
@ 2020-10-16 23:38         ` Thomas Gleixner
  2020-10-16 23:43           ` Jens Axboe
  0 siblings, 1 reply; 14+ messages in thread
From: Thomas Gleixner @ 2020-10-16 23:38 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-kernel

On Fri, Oct 16 2020 at 17:13, Jens Axboe wrote:
> /**
>  * task_work_add - ask the @task to execute @work->func()
>  * @task: the task which should run the callback
>  * @work: the callback to run
>  * @notify: how to notify the targeted task
>  *
>  * Queue @work for task_work_run() below and notify the @task if @notify
>  * is @TWA_RESUME or @TWA_SIGNAL. @TWA_SIGNAL work like signals, in that the

s/the//

>  * it will interrupt the targeted task and run the task_work. @TWA_RESUME
>  * work is run only when the task exits the kernel and returns to user mode.
>  * Fails if the @task is exiting/exited and thus it can't process this @work.
>  * Otherwise @work->func() will be called when the @task returns from kernel
>  * mode or exits.

Yes, that makes a lot more sense.

What's still lacking is a description of the return value and how to act
upon it.

Most of the call sites ignore it, some are acting upon it but I can't
make any sense of these actions:

fs/io_uring.c-	notify = 0;
fs/io_uring.c-	if (!(ctx->flags & IORING_SETUP_SQPOLL) && twa_signal_ok)
fs/io_uring.c-		notify = TWA_SIGNAL;
fs/io_uring.c-
fs/io_uring.c:	ret = task_work_add(tsk, &req->task_work, notify);
fs/io_uring.c-	if (!ret)
fs/io_uring.c-		wake_up_process(tsk);

???

fs/io_uring.c-	if (unlikely(ret)) {
fs/io_uring.c-		struct task_struct *tsk;
fs/io_uring.c-
fs/io_uring.c-		init_task_work(&req->task_work, io_req_task_cancel);
fs/io_uring.c-		tsk = io_wq_get_task(req->ctx->io_wq);
fs/io_uring.c:		task_work_add(tsk, &req->task_work, 0);
fs/io_uring.c-		wake_up_process(tsk);

yet more magic wakeup.

fs/io_uring.c-
fs/io_uring.c-	init_task_work(&req->task_work, io_req_task_submit);
fs/io_uring.c-	percpu_ref_get(&req->ctx->refs);
fs/io_uring.c-
fs/io_uring.c-	/* submit ref gets dropped, acquire a new one */
fs/io_uring.c-	refcount_inc(&req->refs);
fs/io_uring.c:	ret = io_req_task_work_add(req, true);
fs/io_uring.c-	if (unlikely(ret)) {
fs/io_uring.c-		struct task_struct *tsk;
fs/io_uring.c-
fs/io_uring.c-		/* queue just for cancelation */
fs/io_uring.c-		init_task_work(&req->task_work, io_req_task_cancel);
fs/io_uring.c-		tsk = io_wq_get_task(req->ctx->io_wq);
fs/io_uring.c:		task_work_add(tsk, &req->task_work, 0);
fs/io_uring.c-		wake_up_process(tsk);

Ditto. Why the heck is this wakeup making any sense? The initial
task_work_add() within io_req_task_work_add() failed already ...

fs/io_uring.c:	ret = io_req_task_work_add(req, twa_signal_ok);
fs/io_uring.c-	if (unlikely(ret)) {
fs/io_uring.c-		struct task_struct *tsk;
fs/io_uring.c-
fs/io_uring.c-		WRITE_ONCE(poll->canceled, true);
fs/io_uring.c-		tsk = io_wq_get_task(req->ctx->io_wq);
fs/io_uring.c:		task_work_add(tsk, &req->task_work, 0);
fs/io_uring.c-		wake_up_process(tsk);

...

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] task_work: cleanup notification modes
  2020-10-16 23:38         ` Thomas Gleixner
@ 2020-10-16 23:43           ` Jens Axboe
  2020-10-17 15:31             ` Thomas Gleixner
  0 siblings, 1 reply; 14+ messages in thread
From: Jens Axboe @ 2020-10-16 23:43 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: linux-kernel

On 10/16/20 5:38 PM, Thomas Gleixner wrote:
> On Fri, Oct 16 2020 at 17:13, Jens Axboe wrote:
>> /**
>>  * task_work_add - ask the @task to execute @work->func()
>>  * @task: the task which should run the callback
>>  * @work: the callback to run
>>  * @notify: how to notify the targeted task
>>  *
>>  * Queue @work for task_work_run() below and notify the @task if @notify
>>  * is @TWA_RESUME or @TWA_SIGNAL. @TWA_SIGNAL work like signals, in that the
> 
> s/the//

Thanks, good catch.

>>  * it will interrupt the targeted task and run the task_work. @TWA_RESUME
>>  * work is run only when the task exits the kernel and returns to user mode.
>>  * Fails if the @task is exiting/exited and thus it can't process this @work.
>>  * Otherwise @work->func() will be called when the @task returns from kernel
>>  * mode or exits.
> 
> Yes, that makes a lot more sense.
> 
> What's still lacking is a description of the return value and how to act
> upon it.

That's really up to the caller. But we should add some explanation of
that. Most callers use some alternative if the task is exiting, like
using a work queue for example.

> Most of the call sites ignore it, some are acting upon it but I can't

If you know the task isn't exiting, then yeah you can ignore it. But
seems a bit dicey...

> make any sense of these actions:
> 
> fs/io_uring.c-	notify = 0;
> fs/io_uring.c-	if (!(ctx->flags & IORING_SETUP_SQPOLL) && twa_signal_ok)
> fs/io_uring.c-		notify = TWA_SIGNAL;
> fs/io_uring.c-
> fs/io_uring.c:	ret = task_work_add(tsk, &req->task_work, notify);
> fs/io_uring.c-	if (!ret)
> fs/io_uring.c-		wake_up_process(tsk);
> 
> ???
> 
> fs/io_uring.c-	if (unlikely(ret)) {
> fs/io_uring.c-		struct task_struct *tsk;
> fs/io_uring.c-
> fs/io_uring.c-		init_task_work(&req->task_work, io_req_task_cancel);
> fs/io_uring.c-		tsk = io_wq_get_task(req->ctx->io_wq);
> fs/io_uring.c:		task_work_add(tsk, &req->task_work, 0);
> fs/io_uring.c-		wake_up_process(tsk);
> 
> yet more magic wakeup.

It's not magic, but probably needs a comment... If we fail, that task is
exiting. But we know we have our io-wq threads, so we use that as a
fallback. Not really expected in the fast path.

> fs/io_uring.c-
> fs/io_uring.c-	init_task_work(&req->task_work, io_req_task_submit);
> fs/io_uring.c-	percpu_ref_get(&req->ctx->refs);
> fs/io_uring.c-
> fs/io_uring.c-	/* submit ref gets dropped, acquire a new one */
> fs/io_uring.c-	refcount_inc(&req->refs);
> fs/io_uring.c:	ret = io_req_task_work_add(req, true);
> fs/io_uring.c-	if (unlikely(ret)) {
> fs/io_uring.c-		struct task_struct *tsk;
> fs/io_uring.c-
> fs/io_uring.c-		/* queue just for cancelation */
> fs/io_uring.c-		init_task_work(&req->task_work, io_req_task_cancel);
> fs/io_uring.c-		tsk = io_wq_get_task(req->ctx->io_wq);
> fs/io_uring.c:		task_work_add(tsk, &req->task_work, 0);
> fs/io_uring.c-		wake_up_process(tsk);
> 
> Ditto. Why the heck is this wakeup making any sense? The initial
> task_work_add() within io_req_task_work_add() failed already ...

Right, but we're using a new task for this. And that task is a kthread
that we manage, hence no notification is needed outside of just waking
it up.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] task_work: cleanup notification modes
  2020-10-16 23:43           ` Jens Axboe
@ 2020-10-17 15:31             ` Thomas Gleixner
  2020-10-17 15:36               ` Jens Axboe
  0 siblings, 1 reply; 14+ messages in thread
From: Thomas Gleixner @ 2020-10-17 15:31 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-kernel

On Fri, Oct 16 2020 at 17:43, Jens Axboe wrote:
> On 10/16/20 5:38 PM, Thomas Gleixner wrote:
> If you know the task isn't exiting, then yeah you can ignore it. But
> seems a bit dicey...

Indeed.

>> fs/io_uring.c-	if (unlikely(ret)) {
>> fs/io_uring.c-		struct task_struct *tsk;
>> fs/io_uring.c-
>> fs/io_uring.c-		init_task_work(&req->task_work, io_req_task_cancel);
>> fs/io_uring.c-		tsk = io_wq_get_task(req->ctx->io_wq);
>> fs/io_uring.c:		task_work_add(tsk, &req->task_work, 0);
>> fs/io_uring.c-		wake_up_process(tsk);
>> 
>> yet more magic wakeup.
>
> It's not magic, but probably needs a comment... If we fail, that task is
> exiting. But we know we have our io-wq threads, so we use that as a
> fallback. Not really expected in the fast path.

I somehow misread it. So ignore me.

Thanks,

        tglx



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] task_work: cleanup notification modes
  2020-10-17 15:31             ` Thomas Gleixner
@ 2020-10-17 15:36               ` Jens Axboe
  2020-10-17 20:18                 ` Thomas Gleixner
  0 siblings, 1 reply; 14+ messages in thread
From: Jens Axboe @ 2020-10-17 15:36 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: linux-kernel

On 10/17/20 9:31 AM, Thomas Gleixner wrote:
> On Fri, Oct 16 2020 at 17:43, Jens Axboe wrote:
>> On 10/16/20 5:38 PM, Thomas Gleixner wrote:
>> If you know the task isn't exiting, then yeah you can ignore it. But
>> seems a bit dicey...
> 
> Indeed.
> 
>>> fs/io_uring.c-	if (unlikely(ret)) {
>>> fs/io_uring.c-		struct task_struct *tsk;
>>> fs/io_uring.c-
>>> fs/io_uring.c-		init_task_work(&req->task_work, io_req_task_cancel);
>>> fs/io_uring.c-		tsk = io_wq_get_task(req->ctx->io_wq);
>>> fs/io_uring.c:		task_work_add(tsk, &req->task_work, 0);
>>> fs/io_uring.c-		wake_up_process(tsk);
>>>
>>> yet more magic wakeup.
>>
>> It's not magic, but probably needs a comment... If we fail, that task is
>> exiting. But we know we have our io-wq threads, so we use that as a
>> fallback. Not really expected in the fast path.
> 
> I somehow misread it. So ignore me.

No worries, it probably needs a comment...

In any case, here's the current version. I added a comment on the failure
case as well for task_work_add(), would be great if you could let me know
if you're happy with it.


commit dc43ec64ca1374451853b53a81d5371c91ca221f
Author: Jens Axboe <axboe@kernel.dk>
Date:   Fri Oct 16 09:02:26 2020 -0600

    task_work: cleanup notification modes
    
    A previous commit changed the notification mode from true/false to an
    int, allowing notify-no, notify-yes, or signal-notify. This was
    backwards compatible in the sense that any existing true/false user
    would translate to either 0 (on notification sent) or 1, the latter
    which mapped to TWA_RESUME. TWA_SIGNAL was assigned a value of 2.
    
    Clean this up properly, and define a proper enum for the notification
    mode. Now we have:
    
    - TWA_NONE. This is 0, same as before the original change, meaning no
      notification requested.
    - TWA_RESUME. This is 1, same as before the original change, meaning
      that we use TIF_NOTIFY_RESUME.
    - TWA_SIGNAL. This uses TIF_SIGPENDING/JOBCTL_TASK_WORK for the
      notification.
    
    Clean up all the callers, switching their 0/1/false/true to using the
    appropriate TWA_* mode for notifications.
    
    Fixes: e91b48162332 ("task_work: teach task_work_add() to do signal_wake_up()")
    Signed-off-by: Jens Axboe <axboe@kernel.dk>

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 1c08cb9eb9f6..4102b866e7c0 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1277,7 +1277,7 @@ static void queue_task_work(struct mce *m, int kill_it)
 	else
 		current->mce_kill_me.func = kill_me_maybe;
 
-	task_work_add(current, &current->mce_kill_me, true);
+	task_work_add(current, &current->mce_kill_me, TWA_RESUME);
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index b494187632b2..af323e2e3100 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -561,7 +561,7 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
 	 * callback has been invoked.
 	 */
 	atomic_inc(&rdtgrp->waitcount);
-	ret = task_work_add(tsk, &callback->work, true);
+	ret = task_work_add(tsk, &callback->work, TWA_RESUME);
 	if (ret) {
 		/*
 		 * Task is exiting. Drop the refcount and free the callback.
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 81bf71b10d44..8360f8d6be65 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -879,7 +879,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
 			estatus_node->task_work.func = ghes_kick_task_work;
 			estatus_node->task_work_cpu = smp_processor_id();
 			ret = task_work_add(current, &estatus_node->task_work,
-					    true);
+					    TWA_RESUME);
 			if (ret)
 				estatus_node->task_work.func = NULL;
 		}
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 4b9476521da6..b5117576792b 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2229,7 +2229,7 @@ static void binder_deferred_fd_close(int fd)
 	__close_fd_get_file(fd, &twcb->file);
 	if (twcb->file) {
 		filp_close(twcb->file, current->files);
-		task_work_add(current, &twcb->twork, true);
+		task_work_add(current, &twcb->twork, TWA_RESUME);
 	} else {
 		kfree(twcb);
 	}
diff --git a/fs/file_table.c b/fs/file_table.c
index 656647f9575a..709ada3151da 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -339,7 +339,7 @@ void fput_many(struct file *file, unsigned int refs)
 
 		if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
 			init_task_work(&file->f_u.fu_rcuhead, ____fput);
-			if (!task_work_add(task, &file->f_u.fu_rcuhead, true))
+			if (!task_work_add(task, &file->f_u.fu_rcuhead, TWA_RESUME))
 				return;
 			/*
 			 * After this task has run exit_task_work(),
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 2e1dc354cd08..16e1d0e0d9b9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1756,7 +1756,7 @@ static void __io_free_req(struct io_kiocb *req)
 			struct task_struct *tsk;
 
 			tsk = io_wq_get_task(req->ctx->io_wq);
-			task_work_add(tsk, &req->task_work, 0);
+			task_work_add(tsk, &req->task_work, TWA_NONE);
 		}
 	}
 }
@@ -1905,7 +1905,8 @@ static int io_req_task_work_add(struct io_kiocb *req, bool twa_signal_ok)
 {
 	struct task_struct *tsk = req->task;
 	struct io_ring_ctx *ctx = req->ctx;
-	int ret, notify;
+	enum task_work_notify_mode notify;
+	int ret;
 
 	if (tsk->flags & PF_EXITING)
 		return -ESRCH;
@@ -1916,7 +1917,7 @@ static int io_req_task_work_add(struct io_kiocb *req, bool twa_signal_ok)
 	 * processing task_work. There's no reliable way to tell if TWA_RESUME
 	 * will do the job.
 	 */
-	notify = 0;
+	notify = TWA_NONE;
 	if (!(ctx->flags & IORING_SETUP_SQPOLL) && twa_signal_ok)
 		notify = TWA_SIGNAL;
 
@@ -1985,7 +1986,7 @@ static void io_req_task_queue(struct io_kiocb *req)
 
 		init_task_work(&req->task_work, io_req_task_cancel);
 		tsk = io_wq_get_task(req->ctx->io_wq);
-		task_work_add(tsk, &req->task_work, 0);
+		task_work_add(tsk, &req->task_work, TWA_NONE);
 		wake_up_process(tsk);
 	}
 }
@@ -3199,7 +3200,7 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
 		/* queue just for cancelation */
 		init_task_work(&req->task_work, io_req_task_cancel);
 		tsk = io_wq_get_task(req->ctx->io_wq);
-		task_work_add(tsk, &req->task_work, 0);
+		task_work_add(tsk, &req->task_work, TWA_NONE);
 		wake_up_process(tsk);
 	}
 	return 1;
@@ -4765,7 +4766,7 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
 
 		WRITE_ONCE(poll->canceled, true);
 		tsk = io_wq_get_task(req->ctx->io_wq);
-		task_work_add(tsk, &req->task_work, 0);
+		task_work_add(tsk, &req->task_work, TWA_NONE);
 		wake_up_process(tsk);
 	}
 	return 1;
diff --git a/fs/namespace.c b/fs/namespace.c
index 294e05a13d17..1a75336668a3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1191,7 +1191,7 @@ static void mntput_no_expire(struct mount *mnt)
 		struct task_struct *task = current;
 		if (likely(!(task->flags & PF_KTHREAD))) {
 			init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
-			if (!task_work_add(task, &mnt->mnt_rcu, true))
+			if (!task_work_add(task, &mnt->mnt_rcu, TWA_RESUME))
 				return;
 		}
 		if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
diff --git a/include/linux/task_work.h b/include/linux/task_work.h
index 0fb93aafa478..0d848a1e9e62 100644
--- a/include/linux/task_work.h
+++ b/include/linux/task_work.h
@@ -13,9 +13,14 @@ init_task_work(struct callback_head *twork, task_work_func_t func)
 	twork->func = func;
 }
 
-#define TWA_RESUME	1
-#define TWA_SIGNAL	2
-int task_work_add(struct task_struct *task, struct callback_head *twork, int);
+enum task_work_notify_mode {
+	TWA_NONE,
+	TWA_RESUME,
+	TWA_SIGNAL,
+};
+
+int task_work_add(struct task_struct *task, struct callback_head *twork,
+			enum task_work_notify_mode mode);
 
 struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t);
 void task_work_run(void);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 0e18aaf23a7b..00b0358739ab 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1823,7 +1823,7 @@ void uprobe_copy_process(struct task_struct *t, unsigned long flags)
 
 	t->utask->dup_xol_addr = area->vaddr;
 	init_task_work(&t->utask->dup_xol_work, dup_xol_work);
-	task_work_add(t, &t->utask->dup_xol_work, true);
+	task_work_add(t, &t->utask->dup_xol_work, TWA_RESUME);
 }
 
 /*
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 5df903fccb60..c460e0496006 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1162,7 +1162,7 @@ static int irq_thread(void *data)
 		handler_fn = irq_thread_fn;
 
 	init_task_work(&on_exit_work, irq_thread_dtor);
-	task_work_add(current, &on_exit_work, false);
+	task_work_add(current, &on_exit_work, TWA_NONE);
 
 	irq_thread_check_affinity(desc, action);
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index aa4c6227cd6d..e17012be4d14 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2928,7 +2928,7 @@ static void task_tick_numa(struct rq *rq, struct task_struct *curr)
 		curr->node_stamp += period;
 
 		if (!time_before(jiffies, curr->mm->numa_next_scan))
-			task_work_add(curr, work, true);
+			task_work_add(curr, work, TWA_RESUME);
 	}
 }
 
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 613b2d634af8..d82c224ab5d5 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -9,23 +9,28 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
  * task_work_add - ask the @task to execute @work->func()
  * @task: the task which should run the callback
  * @work: the callback to run
- * @notify: send the notification if true
+ * @notify: how to notify the targeted task
  *
- * Queue @work for task_work_run() below and notify the @task if @notify.
+ * Queue @work for task_work_run() below and notify the @task if @notify
+ * is @TWA_RESUME or @TWA_SIGNAL. @TWA_SIGNAL work like signals, in that the
+ * it will interrupt the targeted task and run the task_work. @TWA_RESUME
+ * work is run only when the task exits the kernel and returns to user mode.
  * Fails if the @task is exiting/exited and thus it can't process this @work.
  * Otherwise @work->func() will be called when the @task returns from kernel
  * mode or exits.
  *
- * This is like the signal handler which runs in kernel mode, but it doesn't
- * try to wake up the @task.
+ * If the targeted task is exiting, then an error is returned and the work item
+ * is not queued. It's up to the caller to arrange for an alternative mechanism
+ * in tht case.
  *
- * Note: there is no ordering guarantee on works queued here.
+ * Note: there is no ordering guarantee on works queued here. The task_work
+ * list is LIFO.
  *
  * RETURNS:
  * 0 if succeeds or -ESRCH.
  */
-int
-task_work_add(struct task_struct *task, struct callback_head *work, int notify)
+int task_work_add(struct task_struct *task, struct callback_head *work,
+		  enum task_work_notify_mode notify)
 {
 	struct callback_head *head;
 	unsigned long flags;
@@ -38,6 +43,8 @@ task_work_add(struct task_struct *task, struct callback_head *work, int notify)
 	} while (cmpxchg(&task->task_works, head, work) != head);
 
 	switch (notify) {
+	case TWA_NONE:
+		break;
 	case TWA_RESUME:
 		set_notify_resume(task);
 		break;
@@ -54,6 +61,9 @@ task_work_add(struct task_struct *task, struct callback_head *work, int notify)
 			unlock_task_sighand(task, &flags);
 		}
 		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
 	}
 
 	return 0;
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index e26bbccda7cc..61a614c21b9b 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1693,7 +1693,7 @@ long keyctl_session_to_parent(void)
 
 	/* the replacement session keyring is applied just prior to userspace
 	 * restarting */
-	ret = task_work_add(parent, newwork, true);
+	ret = task_work_add(parent, newwork, TWA_RESUME);
 	if (!ret)
 		newwork = NULL;
 unlock:
diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c
index 536c99646f6a..06e226166aab 100644
--- a/security/yama/yama_lsm.c
+++ b/security/yama/yama_lsm.c
@@ -99,7 +99,7 @@ static void report_access(const char *access, struct task_struct *target,
 	info->access = access;
 	info->target = target;
 	info->agent = agent;
-	if (task_work_add(current, &info->work, true) == 0)
+	if (task_work_add(current, &info->work, TWA_RESUME) == 0)
 		return; /* success */
 
 	WARN(1, "report_access called from exiting task");

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] task_work: cleanup notification modes
  2020-10-17 15:36               ` Jens Axboe
@ 2020-10-17 20:18                 ` Thomas Gleixner
  2020-10-17 20:32                   ` Jens Axboe
  0 siblings, 1 reply; 14+ messages in thread
From: Thomas Gleixner @ 2020-10-17 20:18 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-kernel

On Sat, Oct 17 2020 at 09:36, Jens Axboe wrote:
> diff --git a/kernel/task_work.c b/kernel/task_work.c
> index 613b2d634af8..d82c224ab5d5 100644
> --- a/kernel/task_work.c
> +++ b/kernel/task_work.c
> @@ -9,23 +9,28 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
>   * task_work_add - ask the @task to execute @work->func()
>   * @task: the task which should run the callback
>   * @work: the callback to run
> - * @notify: send the notification if true
> + * @notify: how to notify the targeted task
>   *
> - * Queue @work for task_work_run() below and notify the @task if @notify.
> + * Queue @work for task_work_run() below and notify the @task if @notify
> + * is @TWA_RESUME or @TWA_SIGNAL. @TWA_SIGNAL work like signals, in that the

s/work/works/

> + * it will interrupt the targeted task and run the task_work. @TWA_RESUME
> + * work is run only when the task exits the kernel and returns to user mode.

It's also run before entering a guest. At least on x86, but all other
architectures should do the same.

>   * Fails if the @task is exiting/exited and thus it can't process this @work.
>   * Otherwise @work->func() will be called when the @task returns from kernel
>   * mode or exits.
>   *
> - * This is like the signal handler which runs in kernel mode, but it doesn't
> - * try to wake up the @task.
> + * If the targeted task is exiting, then an error is returned and the work item
> + * is not queued. It's up to the caller to arrange for an alternative mechanism
> + * in tht case.

s/tht/that/

Looks good otherwise.

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] task_work: cleanup notification modes
  2020-10-17 20:18                 ` Thomas Gleixner
@ 2020-10-17 20:32                   ` Jens Axboe
  2020-10-17 21:01                     ` Thomas Gleixner
  0 siblings, 1 reply; 14+ messages in thread
From: Jens Axboe @ 2020-10-17 20:32 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: linux-kernel

On 10/17/20 2:18 PM, Thomas Gleixner wrote:
> On Sat, Oct 17 2020 at 09:36, Jens Axboe wrote:
>> diff --git a/kernel/task_work.c b/kernel/task_work.c
>> index 613b2d634af8..d82c224ab5d5 100644
>> --- a/kernel/task_work.c
>> +++ b/kernel/task_work.c
>> @@ -9,23 +9,28 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
>>   * task_work_add - ask the @task to execute @work->func()
>>   * @task: the task which should run the callback
>>   * @work: the callback to run
>> - * @notify: send the notification if true
>> + * @notify: how to notify the targeted task
>>   *
>> - * Queue @work for task_work_run() below and notify the @task if @notify.
>> + * Queue @work for task_work_run() below and notify the @task if @notify
>> + * is @TWA_RESUME or @TWA_SIGNAL. @TWA_SIGNAL work like signals, in that the
> 
> s/work/works/
> 
>> + * it will interrupt the targeted task and run the task_work. @TWA_RESUME
>> + * work is run only when the task exits the kernel and returns to user mode.
> 
> It's also run before entering a guest. At least on x86, but all other
> architectures should do the same.
> 
>>   * Fails if the @task is exiting/exited and thus it can't process this @work.
>>   * Otherwise @work->func() will be called when the @task returns from kernel
>>   * mode or exits.
>>   *
>> - * This is like the signal handler which runs in kernel mode, but it doesn't
>> - * try to wake up the @task.
>> + * If the targeted task is exiting, then an error is returned and the work item
>> + * is not queued. It's up to the caller to arrange for an alternative mechanism
>> + * in tht case.
> 
> s/tht/that/
> 
> Looks good otherwise.

Thanks! Fixed the two typos, and also included the guest mode. If you're fine
with it now, would be great to have your reviewed-by or similar.


commit 4799fc5b3bc3520a61d3afd714d11672e45c81cb
Author: Jens Axboe <axboe@kernel.dk>
Date:   Fri Oct 16 09:02:26 2020 -0600

    task_work: cleanup notification modes
    
    A previous commit changed the notification mode from true/false to an
    int, allowing notify-no, notify-yes, or signal-notify. This was
    backwards compatible in the sense that any existing true/false user
    would translate to either 0 (on notification sent) or 1, the latter
    which mapped to TWA_RESUME. TWA_SIGNAL was assigned a value of 2.
    
    Clean this up properly, and define a proper enum for the notification
    mode. Now we have:
    
    - TWA_NONE. This is 0, same as before the original change, meaning no
      notification requested.
    - TWA_RESUME. This is 1, same as before the original change, meaning
      that we use TIF_NOTIFY_RESUME.
    - TWA_SIGNAL. This uses TIF_SIGPENDING/JOBCTL_TASK_WORK for the
      notification.
    
    Clean up all the callers, switching their 0/1/false/true to using the
    appropriate TWA_* mode for notifications.
    
    Fixes: e91b48162332 ("task_work: teach task_work_add() to do signal_wake_up()")
    Signed-off-by: Jens Axboe <axboe@kernel.dk>

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 1c08cb9eb9f6..4102b866e7c0 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1277,7 +1277,7 @@ static void queue_task_work(struct mce *m, int kill_it)
 	else
 		current->mce_kill_me.func = kill_me_maybe;
 
-	task_work_add(current, &current->mce_kill_me, true);
+	task_work_add(current, &current->mce_kill_me, TWA_RESUME);
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index b494187632b2..af323e2e3100 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -561,7 +561,7 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
 	 * callback has been invoked.
 	 */
 	atomic_inc(&rdtgrp->waitcount);
-	ret = task_work_add(tsk, &callback->work, true);
+	ret = task_work_add(tsk, &callback->work, TWA_RESUME);
 	if (ret) {
 		/*
 		 * Task is exiting. Drop the refcount and free the callback.
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 81bf71b10d44..8360f8d6be65 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -879,7 +879,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
 			estatus_node->task_work.func = ghes_kick_task_work;
 			estatus_node->task_work_cpu = smp_processor_id();
 			ret = task_work_add(current, &estatus_node->task_work,
-					    true);
+					    TWA_RESUME);
 			if (ret)
 				estatus_node->task_work.func = NULL;
 		}
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 4b9476521da6..b5117576792b 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2229,7 +2229,7 @@ static void binder_deferred_fd_close(int fd)
 	__close_fd_get_file(fd, &twcb->file);
 	if (twcb->file) {
 		filp_close(twcb->file, current->files);
-		task_work_add(current, &twcb->twork, true);
+		task_work_add(current, &twcb->twork, TWA_RESUME);
 	} else {
 		kfree(twcb);
 	}
diff --git a/fs/file_table.c b/fs/file_table.c
index 656647f9575a..709ada3151da 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -339,7 +339,7 @@ void fput_many(struct file *file, unsigned int refs)
 
 		if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
 			init_task_work(&file->f_u.fu_rcuhead, ____fput);
-			if (!task_work_add(task, &file->f_u.fu_rcuhead, true))
+			if (!task_work_add(task, &file->f_u.fu_rcuhead, TWA_RESUME))
 				return;
 			/*
 			 * After this task has run exit_task_work(),
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 2e1dc354cd08..16e1d0e0d9b9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1756,7 +1756,7 @@ static void __io_free_req(struct io_kiocb *req)
 			struct task_struct *tsk;
 
 			tsk = io_wq_get_task(req->ctx->io_wq);
-			task_work_add(tsk, &req->task_work, 0);
+			task_work_add(tsk, &req->task_work, TWA_NONE);
 		}
 	}
 }
@@ -1905,7 +1905,8 @@ static int io_req_task_work_add(struct io_kiocb *req, bool twa_signal_ok)
 {
 	struct task_struct *tsk = req->task;
 	struct io_ring_ctx *ctx = req->ctx;
-	int ret, notify;
+	enum task_work_notify_mode notify;
+	int ret;
 
 	if (tsk->flags & PF_EXITING)
 		return -ESRCH;
@@ -1916,7 +1917,7 @@ static int io_req_task_work_add(struct io_kiocb *req, bool twa_signal_ok)
 	 * processing task_work. There's no reliable way to tell if TWA_RESUME
 	 * will do the job.
 	 */
-	notify = 0;
+	notify = TWA_NONE;
 	if (!(ctx->flags & IORING_SETUP_SQPOLL) && twa_signal_ok)
 		notify = TWA_SIGNAL;
 
@@ -1985,7 +1986,7 @@ static void io_req_task_queue(struct io_kiocb *req)
 
 		init_task_work(&req->task_work, io_req_task_cancel);
 		tsk = io_wq_get_task(req->ctx->io_wq);
-		task_work_add(tsk, &req->task_work, 0);
+		task_work_add(tsk, &req->task_work, TWA_NONE);
 		wake_up_process(tsk);
 	}
 }
@@ -3199,7 +3200,7 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
 		/* queue just for cancelation */
 		init_task_work(&req->task_work, io_req_task_cancel);
 		tsk = io_wq_get_task(req->ctx->io_wq);
-		task_work_add(tsk, &req->task_work, 0);
+		task_work_add(tsk, &req->task_work, TWA_NONE);
 		wake_up_process(tsk);
 	}
 	return 1;
@@ -4765,7 +4766,7 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
 
 		WRITE_ONCE(poll->canceled, true);
 		tsk = io_wq_get_task(req->ctx->io_wq);
-		task_work_add(tsk, &req->task_work, 0);
+		task_work_add(tsk, &req->task_work, TWA_NONE);
 		wake_up_process(tsk);
 	}
 	return 1;
diff --git a/fs/namespace.c b/fs/namespace.c
index 294e05a13d17..1a75336668a3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1191,7 +1191,7 @@ static void mntput_no_expire(struct mount *mnt)
 		struct task_struct *task = current;
 		if (likely(!(task->flags & PF_KTHREAD))) {
 			init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
-			if (!task_work_add(task, &mnt->mnt_rcu, true))
+			if (!task_work_add(task, &mnt->mnt_rcu, TWA_RESUME))
 				return;
 		}
 		if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
diff --git a/include/linux/task_work.h b/include/linux/task_work.h
index 0fb93aafa478..0d848a1e9e62 100644
--- a/include/linux/task_work.h
+++ b/include/linux/task_work.h
@@ -13,9 +13,14 @@ init_task_work(struct callback_head *twork, task_work_func_t func)
 	twork->func = func;
 }
 
-#define TWA_RESUME	1
-#define TWA_SIGNAL	2
-int task_work_add(struct task_struct *task, struct callback_head *twork, int);
+enum task_work_notify_mode {
+	TWA_NONE,
+	TWA_RESUME,
+	TWA_SIGNAL,
+};
+
+int task_work_add(struct task_struct *task, struct callback_head *twork,
+			enum task_work_notify_mode mode);
 
 struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t);
 void task_work_run(void);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 0e18aaf23a7b..00b0358739ab 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1823,7 +1823,7 @@ void uprobe_copy_process(struct task_struct *t, unsigned long flags)
 
 	t->utask->dup_xol_addr = area->vaddr;
 	init_task_work(&t->utask->dup_xol_work, dup_xol_work);
-	task_work_add(t, &t->utask->dup_xol_work, true);
+	task_work_add(t, &t->utask->dup_xol_work, TWA_RESUME);
 }
 
 /*
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 5df903fccb60..c460e0496006 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1162,7 +1162,7 @@ static int irq_thread(void *data)
 		handler_fn = irq_thread_fn;
 
 	init_task_work(&on_exit_work, irq_thread_dtor);
-	task_work_add(current, &on_exit_work, false);
+	task_work_add(current, &on_exit_work, TWA_NONE);
 
 	irq_thread_check_affinity(desc, action);
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index aa4c6227cd6d..e17012be4d14 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2928,7 +2928,7 @@ static void task_tick_numa(struct rq *rq, struct task_struct *curr)
 		curr->node_stamp += period;
 
 		if (!time_before(jiffies, curr->mm->numa_next_scan))
-			task_work_add(curr, work, true);
+			task_work_add(curr, work, TWA_RESUME);
 	}
 }
 
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 613b2d634af8..8d6e1217c451 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -9,23 +9,28 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
  * task_work_add - ask the @task to execute @work->func()
  * @task: the task which should run the callback
  * @work: the callback to run
- * @notify: send the notification if true
+ * @notify: how to notify the targeted task
  *
- * Queue @work for task_work_run() below and notify the @task if @notify.
- * Fails if the @task is exiting/exited and thus it can't process this @work.
- * Otherwise @work->func() will be called when the @task returns from kernel
- * mode or exits.
+ * Queue @work for task_work_run() below and notify the @task if @notify
+ * is @TWA_RESUME or @TWA_SIGNAL. @TWA_SIGNAL works like signals, in that the
+ * it will interrupt the targeted task and run the task_work. @TWA_RESUME
+ * work is run only when the task exits the kernel and returns to user mode,
+ * or before entering guest mode. Fails if the @task is exiting/exited and thus
+ * it can't process this @work. Otherwise @work->func() will be called when the
+ * @task goes through one of the aforementioned transitions, or exits.
  *
- * This is like the signal handler which runs in kernel mode, but it doesn't
- * try to wake up the @task.
+ * If the targeted task is exiting, then an error is returned and the work item
+ * is not queued. It's up to the caller to arrange for an alternative mechanism
+ * in that case.
  *
- * Note: there is no ordering guarantee on works queued here.
+ * Note: there is no ordering guarantee on works queued here. The task_work
+ * list is LIFO.
  *
  * RETURNS:
  * 0 if succeeds or -ESRCH.
  */
-int
-task_work_add(struct task_struct *task, struct callback_head *work, int notify)
+int task_work_add(struct task_struct *task, struct callback_head *work,
+		  enum task_work_notify_mode notify)
 {
 	struct callback_head *head;
 	unsigned long flags;
@@ -38,6 +43,8 @@ task_work_add(struct task_struct *task, struct callback_head *work, int notify)
 	} while (cmpxchg(&task->task_works, head, work) != head);
 
 	switch (notify) {
+	case TWA_NONE:
+		break;
 	case TWA_RESUME:
 		set_notify_resume(task);
 		break;
@@ -54,6 +61,9 @@ task_work_add(struct task_struct *task, struct callback_head *work, int notify)
 			unlock_task_sighand(task, &flags);
 		}
 		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
 	}
 
 	return 0;
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index e26bbccda7cc..61a614c21b9b 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1693,7 +1693,7 @@ long keyctl_session_to_parent(void)
 
 	/* the replacement session keyring is applied just prior to userspace
 	 * restarting */
-	ret = task_work_add(parent, newwork, true);
+	ret = task_work_add(parent, newwork, TWA_RESUME);
 	if (!ret)
 		newwork = NULL;
 unlock:
diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c
index 536c99646f6a..06e226166aab 100644
--- a/security/yama/yama_lsm.c
+++ b/security/yama/yama_lsm.c
@@ -99,7 +99,7 @@ static void report_access(const char *access, struct task_struct *target,
 	info->access = access;
 	info->target = target;
 	info->agent = agent;
-	if (task_work_add(current, &info->work, true) == 0)
+	if (task_work_add(current, &info->work, TWA_RESUME) == 0)
 		return; /* success */
 
 	WARN(1, "report_access called from exiting task");

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] task_work: cleanup notification modes
  2020-10-17 20:32                   ` Jens Axboe
@ 2020-10-17 21:01                     ` Thomas Gleixner
  2020-10-17 21:03                       ` Jens Axboe
  0 siblings, 1 reply; 14+ messages in thread
From: Thomas Gleixner @ 2020-10-17 21:01 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-kernel

Jens,

On Sat, Oct 17 2020 at 14:32, Jens Axboe wrote:
> On 10/17/20 2:18 PM, Thomas Gleixner wrote:
>
> Thanks! Fixed the two typos, and also included the guest mode. If you're fine
> with it now, would be great to have your reviewed-by or similar.

Sure. I assume you ship it to Linus, otherwise let me know and I'll pick
it up.

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] task_work: cleanup notification modes
  2020-10-17 21:01                     ` Thomas Gleixner
@ 2020-10-17 21:03                       ` Jens Axboe
  2020-10-17 21:17                         ` Thomas Gleixner
  0 siblings, 1 reply; 14+ messages in thread
From: Jens Axboe @ 2020-10-17 21:03 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: linux-kernel

On 10/17/20 3:01 PM, Thomas Gleixner wrote:
> Jens,
> 
> On Sat, Oct 17 2020 at 14:32, Jens Axboe wrote:
>> On 10/17/20 2:18 PM, Thomas Gleixner wrote:
>>
>> Thanks! Fixed the two typos, and also included the guest mode. If you're fine
>> with it now, would be great to have your reviewed-by or similar.
> 
> Sure. I assume you ship it to Linus, otherwise let me know and I'll pick
> it up.

I can, I have it bundled up with the TIF_NOTIFY_RESUME cleanup. Either
way is fine with me, so if you're good with it, I'll ship it before -rc1.

> Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

Thanks!

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] task_work: cleanup notification modes
  2020-10-17 21:03                       ` Jens Axboe
@ 2020-10-17 21:17                         ` Thomas Gleixner
  0 siblings, 0 replies; 14+ messages in thread
From: Thomas Gleixner @ 2020-10-17 21:17 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-kernel

On Sat, Oct 17 2020 at 15:03, Jens Axboe wrote:
> On 10/17/20 3:01 PM, Thomas Gleixner wrote:
>> Sure. I assume you ship it to Linus, otherwise let me know and I'll pick
>> it up.
>
> I can, I have it bundled up with the TIF_NOTIFY_RESUME cleanup. Either
> way is fine with me, so if you're good with it, I'll ship it before -rc1.

Works for me.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, back to index

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-16 15:16 [PATCH] task_work: cleanup notification modes Jens Axboe
2020-10-16 21:44 ` Thomas Gleixner
2020-10-16 22:39   ` Jens Axboe
2020-10-16 23:09     ` Thomas Gleixner
2020-10-16 23:13       ` Jens Axboe
2020-10-16 23:38         ` Thomas Gleixner
2020-10-16 23:43           ` Jens Axboe
2020-10-17 15:31             ` Thomas Gleixner
2020-10-17 15:36               ` Jens Axboe
2020-10-17 20:18                 ` Thomas Gleixner
2020-10-17 20:32                   ` Jens Axboe
2020-10-17 21:01                     ` Thomas Gleixner
2020-10-17 21:03                       ` Jens Axboe
2020-10-17 21:17                         ` Thomas Gleixner

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git
	git clone --mirror https://lore.kernel.org/lkml/8 lkml/git/8.git
	git clone --mirror https://lore.kernel.org/lkml/9 lkml/git/9.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git