From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>,
Ingo Molnar <mingo@kernel.org>, Darren Hart <darren@dvhart.com>,
Yi Wang <wang.yi59@zte.com.cn>, Yang Tao <yang.tao172@zte.com.cn>,
Oleg Nesterov <oleg@redhat.com>,
Florian Weimer <fweimer@redhat.com>,
Carlos O'Donell <carlos@redhat.com>,
Alexander Viro <viro@zeniv.linux.org.uk>
Subject: [patch 03/12] futex: Replace PF_EXITPIDONE with a state
Date: Wed, 06 Nov 2019 22:55:37 +0100 [thread overview]
Message-ID: <20191106224556.149449274@linutronix.de> (raw)
In-Reply-To: 20191106215534.241796846@linutronix.de
The futex exit handling relies on PF_ flags. That's suboptimal as it
requires a smp_mb() and an ugly lock/unlock of the exiting tasks pi_lock in
the middle of do_exit() to enforce the observability of PF_EXITING in the
futex code.
Add a futex_state member to task_struct and convert the PF_EXITPIDONE logic
over to the new state. The PF_EXITING dependency will be cleaned up in a
later step.
This prepares for handling various futex exit issues later.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/futex.h | 33 +++++++++++++++++++++++++++++++++
include/linux/sched.h | 2 +-
kernel/exit.c | 18 ++----------------
kernel/futex.c | 25 +++++++++++++------------
4 files changed, 49 insertions(+), 29 deletions(-)
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -48,6 +48,10 @@ union futex_key {
#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
#ifdef CONFIG_FUTEX
+enum {
+ FUTEX_STATE_OK,
+ FUTEX_STATE_DEAD,
+};
static inline void futex_init_task(struct task_struct *tsk)
{
@@ -57,6 +61,34 @@ static inline void futex_init_task(struc
#endif
INIT_LIST_HEAD(&tsk->pi_state_list);
tsk->pi_state_cache = NULL;
+ tsk->futex_state = FUTEX_STATE_OK;
+}
+
+/**
+ * futex_exit_done - Sets the tasks futex state to FUTEX_STATE_DEAD
+ * @tsk: task to set the state on
+ *
+ * Set the futex exit state of the task lockless. The futex waiter code
+ * observes that state when a task is exiting and loops until the task has
+ * actually finished the futex cleanup. The worst case for this is that the
+ * waiter runs through the wait loop until the state becomes visible.
+ *
+ * This has two callers:
+ *
+ * - futex_mm_release() after the futex exit cleanup has been done
+ *
+ * - do_exit() from the recursive fault handling path.
+ *
+ * In case of a recursive fault this is best effort. Either the futex exit
+ * code has run already or not. If the OWNER_DIED bit has been set on the
+ * futex then the waiter can take it over. If not, the problem is pushed
+ * back to user space. If the futex exit code did not run yet, then an
+ * already queued waiter might block forever, but there is nothing which
+ * can be done about that.
+ */
+static inline void futex_exit_done(struct task_struct *tsk)
+{
+ tsk->futex_state = FUTEX_STATE_DEAD;
}
void futex_mm_release(struct task_struct *tsk);
@@ -66,6 +98,7 @@ long do_futex(u32 __user *uaddr, int op,
#else
static inline void futex_init_task(struct task_struct *tsk) { }
static inline void futex_mm_release(struct task_struct *tsk) { }
+static inline void futex_exit_done(struct task_struct *tsk) { }
static inline long do_futex(u32 __user *uaddr, int op, u32 val,
ktime_t *timeout, u32 __user *uaddr2,
u32 val2, u32 val3)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1054,6 +1054,7 @@ struct task_struct {
#endif
struct list_head pi_state_list;
struct futex_pi_state *pi_state_cache;
+ unsigned int futex_state;
#endif
#ifdef CONFIG_PERF_EVENTS
struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
@@ -1442,7 +1443,6 @@ extern struct pid *cad_pid;
*/
#define PF_IDLE 0x00000002 /* I am an IDLE thread */
#define PF_EXITING 0x00000004 /* Getting shut down */
-#define PF_EXITPIDONE 0x00000008 /* PI exit done on shut down */
#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
#define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -746,16 +746,7 @@ void __noreturn do_exit(long code)
*/
if (unlikely(tsk->flags & PF_EXITING)) {
pr_alert("Fixing recursive fault but reboot is needed!\n");
- /*
- * We can do this unlocked here. The futex code uses
- * this flag just to verify whether the pi state
- * cleanup has been done or not. In the worst case it
- * loops once more. We pretend that the cleanup was
- * done as there is no way to return. Either the
- * OWNER_DIED bit is set by now or we push the blocked
- * task into the wait for ever nirwana as well.
- */
- tsk->flags |= PF_EXITPIDONE;
+ futex_exit_done(tsk);
set_current_state(TASK_UNINTERRUPTIBLE);
schedule();
}
@@ -846,12 +837,7 @@ void __noreturn do_exit(long code)
* Make sure we are holding no locks:
*/
debug_check_no_locks_held();
- /*
- * We can do this unlocked here. The futex code uses this flag
- * just to verify whether the pi state cleanup has been done
- * or not. In the worst case it loops once more.
- */
- tsk->flags |= PF_EXITPIDONE;
+ futex_exit_done(tsk);
if (tsk->io_context)
exit_io_context(tsk);
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1182,9 +1182,10 @@ static int handle_exit_race(u32 __user *
u32 uval2;
/*
- * If PF_EXITPIDONE is not yet set, then try again.
+ * If the futex exit state is not yet FUTEX_STATE_DEAD, wait
+ * for it to finish.
*/
- if (tsk && !(tsk->flags & PF_EXITPIDONE))
+ if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
return -EAGAIN;
/*
@@ -1203,8 +1204,9 @@ static int handle_exit_race(u32 __user *
* *uaddr = 0xC0000000; tsk = get_task(PID);
* } if (!tsk->flags & PF_EXITING) {
* ... attach();
- * tsk->flags |= PF_EXITPIDONE; } else {
- * if (!(tsk->flags & PF_EXITPIDONE))
+ * tsk->futex_state = } else {
+ * FUTEX_STATE_DEAD; if (tsk->futex_state !=
+ * FUTEX_STATE_DEAD)
* return -EAGAIN;
* return -ESRCH; <--- FAIL
* }
@@ -1260,17 +1262,16 @@ static int attach_to_pi_owner(u32 __user
}
/*
- * We need to look at the task state flags to figure out,
- * whether the task is exiting. To protect against the do_exit
- * change of the task flags, we do this protected by
- * p->pi_lock:
+ * We need to look at the task state to figure out, whether the
+ * task is exiting. To protect against the change of the task state
+ * in futex_exit_release(), we do this protected by p->pi_lock:
*/
raw_spin_lock_irq(&p->pi_lock);
- if (unlikely(p->flags & PF_EXITING)) {
+ if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
/*
- * The task is on the way out. When PF_EXITPIDONE is
- * set, we know that the task has finished the
- * cleanup:
+ * The task is on the way out. When the futex state is
+ * FUTEX_STATE_DEAD, we know that the task has finished
+ * the cleanup:
*/
int ret = handle_exit_race(uaddr, uval, p);
next prev parent reply other threads:[~2019-11-06 23:08 UTC|newest]
Thread overview: 53+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-11-06 21:55 [patch 00/12] futex: Cure robust/PI futex exit races Thomas Gleixner
2019-11-06 21:55 ` [patch 01/12] futex: Prevent robust futex exit race Thomas Gleixner
2019-11-06 21:55 ` [patch 02/12] futex: Move futex exit handling into futex code Thomas Gleixner
2019-11-07 9:24 ` Peter Zijlstra
2019-11-07 9:38 ` Thomas Gleixner
2019-11-15 18:19 ` [tip: locking/core] " tip-bot2 for Thomas Gleixner
2019-11-16 20:53 ` Borislav Petkov
2019-11-20 9:38 ` tip-bot2 for Thomas Gleixner
2019-11-06 21:55 ` Thomas Gleixner [this message]
2019-11-15 18:19 ` [tip: locking/core] futex: Replace PF_EXITPIDONE with a state tip-bot2 for Thomas Gleixner
2019-11-20 9:38 ` tip-bot2 for Thomas Gleixner
2019-11-06 21:55 ` [patch 04/12] exit/exec: Seperate mm_release() Thomas Gleixner
2019-11-15 18:19 ` [tip: locking/core] " tip-bot2 for Thomas Gleixner
2019-11-20 9:38 ` tip-bot2 for Thomas Gleixner
2019-11-06 21:55 ` [patch 05/12] futex: Split futex_mm_release() for exit/exec Thomas Gleixner
2019-11-15 18:19 ` [tip: locking/core] " tip-bot2 for Thomas Gleixner
2019-11-20 9:38 ` tip-bot2 for Thomas Gleixner
2019-11-06 21:55 ` [patch 06/12] futex: Set task::futex state to DEAD right after handling futex exit Thomas Gleixner
2019-11-15 18:19 ` [tip: locking/core] futex: Set task::futex_state " tip-bot2 for Thomas Gleixner
2019-11-20 9:38 ` tip-bot2 for Thomas Gleixner
2019-11-06 21:55 ` [patch 07/12] futex: Mark the begin of futex exit explicitly Thomas Gleixner
2019-11-15 18:19 ` [tip: locking/core] " tip-bot2 for Thomas Gleixner
2019-11-20 9:38 ` tip-bot2 for Thomas Gleixner
2019-11-06 21:55 ` [patch 08/12] futex: Sanitize exit state handling Thomas Gleixner
2019-11-07 9:38 ` Peter Zijlstra
2019-11-15 18:19 ` [tip: locking/core] " tip-bot2 for Thomas Gleixner
2019-11-20 9:38 ` tip-bot2 for Thomas Gleixner
2019-11-06 21:55 ` [patch 09/12] futex: Provide state handling for exec() as well Thomas Gleixner
2019-11-07 9:49 ` Peter Zijlstra
2019-11-07 9:54 ` Thomas Gleixner
2019-11-15 18:19 ` [tip: locking/core] " tip-bot2 for Thomas Gleixner
2019-11-20 9:38 ` tip-bot2 for Thomas Gleixner
2019-11-06 21:55 ` [patch 10/12] futex: Add mutex around futex exit Thomas Gleixner
2019-11-15 18:19 ` [tip: locking/core] " tip-bot2 for Thomas Gleixner
2019-11-20 9:38 ` tip-bot2 for Thomas Gleixner
2019-11-06 21:55 ` [patch 11/12] futex: Provide distinct return value when owner is exiting Thomas Gleixner
2019-11-15 18:19 ` [tip: locking/core] " tip-bot2 for Thomas Gleixner
2019-11-20 9:38 ` tip-bot2 for Thomas Gleixner
2019-11-06 21:55 ` [patch 12/12] futex: Prevent exit livelock Thomas Gleixner
2019-11-15 18:19 ` [tip: locking/core] " tip-bot2 for Thomas Gleixner
2019-11-20 9:38 ` tip-bot2 for Thomas Gleixner
2019-11-07 8:29 ` [patch 00/12] futex: Cure robust/PI futex exit races Ingo Molnar
2019-11-07 8:41 ` Ingo Molnar
2019-11-07 9:40 ` Peter Zijlstra
2019-11-07 15:03 ` Oleg Nesterov
2019-11-07 22:29 ` Florian Weimer
2019-11-07 22:40 ` Florian Weimer
2019-11-08 7:38 ` Florian Weimer
2019-11-08 9:18 ` Thomas Gleixner
2019-11-08 10:17 ` Florian Weimer
2019-11-08 10:37 ` Thomas Gleixner
2019-11-08 11:51 ` Thomas Gleixner
2019-11-11 9:48 ` Florian Weimer
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20191106224556.149449274@linutronix.de \
--to=tglx@linutronix.de \
--cc=carlos@redhat.com \
--cc=darren@dvhart.com \
--cc=fweimer@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=oleg@redhat.com \
--cc=peterz@infradead.org \
--cc=viro@zeniv.linux.org.uk \
--cc=wang.yi59@zte.com.cn \
--cc=yang.tao172@zte.com.cn \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).