All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@kernel.org>,
	"Peter Zijlstra (Intel)" <peterz@infradead.org>,
	Lee Jones <lee.jones@linaro.org>
Subject: [PATCH 4.9 06/32] futex: Replace PF_EXITPIDONE with a state
Date: Tue,  2 Feb 2021 14:38:29 +0100	[thread overview]
Message-ID: <20210202132942.291252804@linuxfoundation.org> (raw)
In-Reply-To: <20210202132942.035179752@linuxfoundation.org>

From: Thomas Gleixner <tglx@linutronix.de>

commit 3d4775df0a89240f671861c6ab6e8d59af8e9e41 upstream.

The futex exit handling relies on PF_ flags. That's suboptimal as it
requires a smp_mb() and an ugly lock/unlock of the exiting tasks pi_lock in
the middle of do_exit() to enforce the observability of PF_EXITING in the
futex code.

Add a futex_state member to task_struct and convert the PF_EXITPIDONE logic
over to the new state. The PF_EXITING dependency will be cleaned up in a
later step.

This prepares for handling various futex exit issues later.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20191106224556.149449274@linutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/futex.h |   34 ++++++++++++++++++++++++++++++++++
 include/linux/sched.h |    2 +-
 kernel/exit.c         |   18 ++----------------
 kernel/futex.c        |   17 ++++++++---------
 4 files changed, 45 insertions(+), 26 deletions(-)

--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -55,6 +55,11 @@ union futex_key {
 #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }
 
 #ifdef CONFIG_FUTEX
+enum {
+	FUTEX_STATE_OK,
+	FUTEX_STATE_DEAD,
+};
+
 static inline void futex_init_task(struct task_struct *tsk)
 {
 	tsk->robust_list = NULL;
@@ -63,6 +68,34 @@ static inline void futex_init_task(struc
 #endif
 	INIT_LIST_HEAD(&tsk->pi_state_list);
 	tsk->pi_state_cache = NULL;
+	tsk->futex_state = FUTEX_STATE_OK;
+}
+
+/**
+ * futex_exit_done - Sets the tasks futex state to FUTEX_STATE_DEAD
+ * @tsk:	task to set the state on
+ *
+ * Set the futex exit state of the task lockless. The futex waiter code
+ * observes that state when a task is exiting and loops until the task has
+ * actually finished the futex cleanup. The worst case for this is that the
+ * waiter runs through the wait loop until the state becomes visible.
+ *
+ * This has two callers:
+ *
+ * - futex_mm_release() after the futex exit cleanup has been done
+ *
+ * - do_exit() from the recursive fault handling path.
+ *
+ * In case of a recursive fault this is best effort. Either the futex exit
+ * code has run already or not. If the OWNER_DIED bit has been set on the
+ * futex then the waiter can take it over. If not, the problem is pushed
+ * back to user space. If the futex exit code did not run yet, then an
+ * already queued waiter might block forever, but there is nothing which
+ * can be done about that.
+ */
+static inline void futex_exit_done(struct task_struct *tsk)
+{
+	tsk->futex_state = FUTEX_STATE_DEAD;
 }
 
 void futex_mm_release(struct task_struct *tsk);
@@ -72,5 +105,6 @@ long do_futex(u32 __user *uaddr, int op,
 #else
 static inline void futex_init_task(struct task_struct *tsk) { }
 static inline void futex_mm_release(struct task_struct *tsk) { }
+static inline void futex_exit_done(struct task_struct *tsk) { }
 #endif
 #endif
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1815,6 +1815,7 @@ struct task_struct {
 #endif
 	struct list_head pi_state_list;
 	struct futex_pi_state *pi_state_cache;
+	unsigned int futex_state;
 #endif
 #ifdef CONFIG_PERF_EVENTS
 	struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
@@ -2276,7 +2277,6 @@ extern void thread_group_cputime_adjuste
  * Per process flags
  */
 #define PF_EXITING	0x00000004	/* getting shut down */
-#define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */
 #define PF_VCPU		0x00000010	/* I'm a virtual CPU */
 #define PF_WQ_WORKER	0x00000020	/* I'm a workqueue worker */
 #define PF_FORKNOEXEC	0x00000040	/* forked but didn't exec */
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -785,16 +785,7 @@ void __noreturn do_exit(long code)
 	 */
 	if (unlikely(tsk->flags & PF_EXITING)) {
 		pr_alert("Fixing recursive fault but reboot is needed!\n");
-		/*
-		 * We can do this unlocked here. The futex code uses
-		 * this flag just to verify whether the pi state
-		 * cleanup has been done or not. In the worst case it
-		 * loops once more. We pretend that the cleanup was
-		 * done as there is no way to return. Either the
-		 * OWNER_DIED bit is set by now or we push the blocked
-		 * task into the wait for ever nirwana as well.
-		 */
-		tsk->flags |= PF_EXITPIDONE;
+		futex_exit_done(tsk);
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		schedule();
 	}
@@ -876,12 +867,7 @@ void __noreturn do_exit(long code)
 	 * Make sure we are holding no locks:
 	 */
 	debug_check_no_locks_held();
-	/*
-	 * We can do this unlocked here. The futex code uses this flag
-	 * just to verify whether the pi state cleanup has been done
-	 * or not. In the worst case it loops once more.
-	 */
-	tsk->flags |= PF_EXITPIDONE;
+	futex_exit_done(tsk);
 
 	if (tsk->io_context)
 		exit_io_context(tsk);
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1099,19 +1099,18 @@ static int attach_to_pi_owner(u32 uval,
 	}
 
 	/*
-	 * We need to look at the task state flags to figure out,
-	 * whether the task is exiting. To protect against the do_exit
-	 * change of the task flags, we do this protected by
-	 * p->pi_lock:
+	 * We need to look at the task state to figure out, whether the
+	 * task is exiting. To protect against the change of the task state
+	 * in futex_exit_release(), we do this protected by p->pi_lock:
 	 */
 	raw_spin_lock_irq(&p->pi_lock);
-	if (unlikely(p->flags & PF_EXITING)) {
+	if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
 		/*
-		 * The task is on the way out. When PF_EXITPIDONE is
-		 * set, we know that the task has finished the
-		 * cleanup:
+		 * The task is on the way out. When the futex state is
+		 * FUTEX_STATE_DEAD, we know that the task has finished
+		 * the cleanup:
 		 */
-		int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
+		int ret = (p->futex_state = FUTEX_STATE_DEAD) ? -ESRCH : -EAGAIN;
 
 		raw_spin_unlock_irq(&p->pi_lock);
 		put_task_struct(p);



  parent reply	other threads:[~2021-02-02 14:19 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-02 13:38 [PATCH 4.9 00/32] 4.9.255-rc1 review Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 01/32] ACPI: sysfs: Prefer "compatible" modalias Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 02/32] wext: fix NULL-ptr-dereference with cfg80211s lack of commit() Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 03/32] net: usb: qmi_wwan: added support for Thales Cinterion PLSx3 modem family Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 04/32] y2038: futex: Move compat implementation into futex.c Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 05/32] futex: Move futex exit handling into futex code Greg Kroah-Hartman
2021-02-02 13:38 ` Greg Kroah-Hartman [this message]
2021-02-02 13:38 ` [PATCH 4.9 07/32] exit/exec: Seperate mm_release() Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 08/32] futex: Split futex_mm_release() for exit/exec Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 09/32] futex: Set task::futex_state to DEAD right after handling futex exit Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 10/32] futex: Mark the begin of futex exit explicitly Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 11/32] futex: Sanitize exit state handling Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 12/32] futex: Provide state handling for exec() as well Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 13/32] futex: Add mutex around futex exit Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 14/32] futex: Provide distinct return value when owner is exiting Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 15/32] futex: Prevent exit livelock Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 16/32] KVM: x86/pmu: Fix HW_REF_CPU_CYCLES event pseudo-encoding in intel_arch_events[] Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 17/32] KVM: x86: get smi pending status correctly Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 18/32] leds: trigger: fix potential deadlock with libata Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 19/32] mt7601u: fix kernel crash unplugging the device Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 20/32] mt7601u: fix rx buffer refcounting Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 21/32] ARM: imx: build suspend-imx6.S with arm instruction set Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 22/32] netfilter: nft_dynset: add timeout extension to template Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 23/32] xfrm: Fix oops in xfrm_replay_advance_bmp Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 24/32] RDMA/cxgb4: Fix the reported max_recv_sge value Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 25/32] iwlwifi: pcie: use jiffies for memory read spin time limit Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 26/32] iwlwifi: pcie: reschedule in long-running memory reads Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 27/32] mac80211: pause TX while changing interface type Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 28/32] can: dev: prevent potential information leak in can_fill_info() Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 29/32] iommu/vt-d: Gracefully handle DMAR units with no supported address widths Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 30/32] iommu/vt-d: Dont dereference iommu_device if IOMMU_API is not built Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 31/32] NFC: fix resource leak when target index is invalid Greg Kroah-Hartman
2021-02-02 13:38 ` [PATCH 4.9 32/32] NFC: fix possible resource leak Greg Kroah-Hartman
2021-02-02 20:20 ` [PATCH 4.9 00/32] 4.9.255-rc1 review Jon Hunter
2021-02-03  9:53 ` Naresh Kamboju
2021-02-03 15:42 ` Shuah Khan
2021-02-03 20:41 ` Guenter Roeck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210202132942.291252804@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=lee.jones@linaro.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=stable@vger.kernel.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.