stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@kernel.org>,
	"Peter Zijlstra (Intel)" <peterz@infradead.org>
Subject: [PATCH 5.4 37/66] futex: Replace PF_EXITPIDONE with a state
Date: Wed, 27 Nov 2019 21:32:32 +0100	[thread overview]
Message-ID: <20191127202717.281355143@linuxfoundation.org> (raw)
In-Reply-To: <20191127202632.536277063@linuxfoundation.org>

From: Thomas Gleixner <tglx@linutronix.de>

commit 3d4775df0a89240f671861c6ab6e8d59af8e9e41 upstream.

The futex exit handling relies on PF_ flags. That's suboptimal as it
requires a smp_mb() and an ugly lock/unlock of the exiting tasks pi_lock in
the middle of do_exit() to enforce the observability of PF_EXITING in the
futex code.

Add a futex_state member to task_struct and convert the PF_EXITPIDONE logic
over to the new state. The PF_EXITING dependency will be cleaned up in a
later step.

This prepares for handling various futex exit issues later.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20191106224556.149449274@linutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 include/linux/futex.h |   33 +++++++++++++++++++++++++++++++++
 include/linux/sched.h |    2 +-
 kernel/exit.c         |   18 ++----------------
 kernel/futex.c        |   25 +++++++++++++------------
 4 files changed, 49 insertions(+), 29 deletions(-)

--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -50,6 +50,10 @@ union futex_key {
 #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
 
 #ifdef CONFIG_FUTEX
+enum {
+	FUTEX_STATE_OK,
+	FUTEX_STATE_DEAD,
+};
 
 static inline void futex_init_task(struct task_struct *tsk)
 {
@@ -59,6 +63,34 @@ static inline void futex_init_task(struc
 #endif
 	INIT_LIST_HEAD(&tsk->pi_state_list);
 	tsk->pi_state_cache = NULL;
+	tsk->futex_state = FUTEX_STATE_OK;
+}
+
+/**
+ * futex_exit_done - Sets the tasks futex state to FUTEX_STATE_DEAD
+ * @tsk:	task to set the state on
+ *
+ * Set the futex exit state of the task lockless. The futex waiter code
+ * observes that state when a task is exiting and loops until the task has
+ * actually finished the futex cleanup. The worst case for this is that the
+ * waiter runs through the wait loop until the state becomes visible.
+ *
+ * This has two callers:
+ *
+ * - futex_mm_release() after the futex exit cleanup has been done
+ *
+ * - do_exit() from the recursive fault handling path.
+ *
+ * In case of a recursive fault this is best effort. Either the futex exit
+ * code has run already or not. If the OWNER_DIED bit has been set on the
+ * futex then the waiter can take it over. If not, the problem is pushed
+ * back to user space. If the futex exit code did not run yet, then an
+ * already queued waiter might block forever, but there is nothing which
+ * can be done about that.
+ */
+static inline void futex_exit_done(struct task_struct *tsk)
+{
+	tsk->futex_state = FUTEX_STATE_DEAD;
 }
 
 void futex_mm_release(struct task_struct *tsk);
@@ -68,6 +100,7 @@ long do_futex(u32 __user *uaddr, int op,
 #else
 static inline void futex_init_task(struct task_struct *tsk) { }
 static inline void futex_mm_release(struct task_struct *tsk) { }
+static inline void futex_exit_done(struct task_struct *tsk) { }
 static inline long do_futex(u32 __user *uaddr, int op, u32 val,
 			    ktime_t *timeout, u32 __user *uaddr2,
 			    u32 val2, u32 val3)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1054,6 +1054,7 @@ struct task_struct {
 #endif
 	struct list_head		pi_state_list;
 	struct futex_pi_state		*pi_state_cache;
+	unsigned int			futex_state;
 #endif
 #ifdef CONFIG_PERF_EVENTS
 	struct perf_event_context	*perf_event_ctxp[perf_nr_task_contexts];
@@ -1442,7 +1443,6 @@ extern struct pid *cad_pid;
  */
 #define PF_IDLE			0x00000002	/* I am an IDLE thread */
 #define PF_EXITING		0x00000004	/* Getting shut down */
-#define PF_EXITPIDONE		0x00000008	/* PI exit done on shut down */
 #define PF_VCPU			0x00000010	/* I'm a virtual CPU */
 #define PF_WQ_WORKER		0x00000020	/* I'm a workqueue worker */
 #define PF_FORKNOEXEC		0x00000040	/* Forked but didn't exec */
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -746,16 +746,7 @@ void __noreturn do_exit(long code)
 	 */
 	if (unlikely(tsk->flags & PF_EXITING)) {
 		pr_alert("Fixing recursive fault but reboot is needed!\n");
-		/*
-		 * We can do this unlocked here. The futex code uses
-		 * this flag just to verify whether the pi state
-		 * cleanup has been done or not. In the worst case it
-		 * loops once more. We pretend that the cleanup was
-		 * done as there is no way to return. Either the
-		 * OWNER_DIED bit is set by now or we push the blocked
-		 * task into the wait for ever nirwana as well.
-		 */
-		tsk->flags |= PF_EXITPIDONE;
+		futex_exit_done(tsk);
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		schedule();
 	}
@@ -846,12 +837,7 @@ void __noreturn do_exit(long code)
 	 * Make sure we are holding no locks:
 	 */
 	debug_check_no_locks_held();
-	/*
-	 * We can do this unlocked here. The futex code uses this flag
-	 * just to verify whether the pi state cleanup has been done
-	 * or not. In the worst case it loops once more.
-	 */
-	tsk->flags |= PF_EXITPIDONE;
+	futex_exit_done(tsk);
 
 	if (tsk->io_context)
 		exit_io_context(tsk);
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1182,9 +1182,10 @@ static int handle_exit_race(u32 __user *
 	u32 uval2;
 
 	/*
-	 * If PF_EXITPIDONE is not yet set, then try again.
+	 * If the futex exit state is not yet FUTEX_STATE_DEAD, wait
+	 * for it to finish.
 	 */
-	if (tsk && !(tsk->flags & PF_EXITPIDONE))
+	if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
 		return -EAGAIN;
 
 	/*
@@ -1203,8 +1204,9 @@ static int handle_exit_race(u32 __user *
 	 *    *uaddr = 0xC0000000;	     tsk = get_task(PID);
 	 *   }				     if (!tsk->flags & PF_EXITING) {
 	 *  ...				       attach();
-	 *  tsk->flags |= PF_EXITPIDONE;     } else {
-	 *				       if (!(tsk->flags & PF_EXITPIDONE))
+	 *  tsk->futex_state =               } else {
+	 *	FUTEX_STATE_DEAD;              if (tsk->futex_state !=
+	 *					  FUTEX_STATE_DEAD)
 	 *				         return -EAGAIN;
 	 *				       return -ESRCH; <--- FAIL
 	 *				     }
@@ -1260,17 +1262,16 @@ static int attach_to_pi_owner(u32 __user
 	}
 
 	/*
-	 * We need to look at the task state flags to figure out,
-	 * whether the task is exiting. To protect against the do_exit
-	 * change of the task flags, we do this protected by
-	 * p->pi_lock:
+	 * We need to look at the task state to figure out, whether the
+	 * task is exiting. To protect against the change of the task state
+	 * in futex_exit_release(), we do this protected by p->pi_lock:
 	 */
 	raw_spin_lock_irq(&p->pi_lock);
-	if (unlikely(p->flags & PF_EXITING)) {
+	if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
 		/*
-		 * The task is on the way out. When PF_EXITPIDONE is
-		 * set, we know that the task has finished the
-		 * cleanup:
+		 * The task is on the way out. When the futex state is
+		 * FUTEX_STATE_DEAD, we know that the task has finished
+		 * the cleanup:
 		 */
 		int ret = handle_exit_race(uaddr, uval, p);
 



  parent reply	other threads:[~2019-11-27 21:13 UTC|newest]

Thread overview: 75+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-27 20:31 [PATCH 5.4 00/66] 5.4.1-stable review Greg Kroah-Hartman
2019-11-27 20:31 ` [PATCH 5.4 01/66] Bluetooth: Fix invalid-free in bcsp_close() Greg Kroah-Hartman
2019-11-27 20:31 ` [PATCH 5.4 02/66] ath9k_hw: fix uninitialized variable data Greg Kroah-Hartman
2019-11-27 20:31 ` [PATCH 5.4 03/66] ath10k: Fix a NULL-ptr-deref bug in ath10k_usb_alloc_urb_from_pipe Greg Kroah-Hartman
2019-11-27 20:31 ` [PATCH 5.4 04/66] ath10k: Fix HOST capability QMI incompatibility Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 05/66] ath10k: restore QCA9880-AR1A (v1) detection Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 06/66] Revert "Bluetooth: hci_ll: set operational frequency earlier" Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 07/66] Revert "dm crypt: use WQ_HIGHPRI for the IO and crypt workqueues" Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 08/66] md/raid10: prevent access of uninitialized resync_pages offset Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 09/66] x86/insn: Fix awk regexp warnings Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 10/66] x86/speculation: Fix incorrect MDS/TAA mitigation status Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 11/66] x86/speculation: Fix redundant MDS mitigation message Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 12/66] nbd: prevent memory leak Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 13/66] x86/stackframe/32: Repair 32-bit Xen PV Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 14/66] x86/xen/32: Make xen_iret_crit_fixup() independent of frame layout Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 15/66] x86/xen/32: Simplify ring check in xen_iret_crit_fixup() Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 16/66] x86/doublefault/32: Fix stack canaries in the double fault handler Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 17/66] x86/pti/32: Size initial_page_table correctly Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 18/66] x86/cpu_entry_area: Add guard page for entry stack on 32bit Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 19/66] x86/entry/32: Fix IRET exception Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 20/66] x86/entry/32: Use %ss segment where required Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 21/66] x86/entry/32: Move FIXUP_FRAME after pushing %fs in SAVE_ALL Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 22/66] x86/entry/32: Unwind the ESPFIX stack earlier on exception entry Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 23/66] x86/entry/32: Fix NMI vs ESPFIX Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 24/66] selftests/x86/mov_ss_trap: Fix the SYSENTER test Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 25/66] selftests/x86/sigreturn/32: Invalidate DS and ES when abusing the kernel Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 26/66] x86/pti/32: Calculate the various PTI cpu_entry_area sizes correctly, make the CPU_ENTRY_AREA_PAGES assert precise Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 27/66] x86/entry/32: Fix FIXUP_ESPFIX_STACK with user CR3 Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 28/66] futex: Prevent robust futex exit race Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 29/66] ALSA: usb-audio: Fix NULL dereference at parsing BADD Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 30/66] ALSA: usb-audio: Fix Scarlett 6i6 Gen 2 port data Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 31/66] media: vivid: Set vid_cap_streaming and vid_out_streaming to true Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 32/66] media: vivid: Fix wrong locking that causes race conditions on streaming stop Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 33/66] media: usbvision: Fix invalid accesses after device disconnect Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 34/66] media: usbvision: Fix races among open, close, and disconnect Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 35/66] cpufreq: Add NULL checks to show() and store() methods of cpufreq Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 36/66] futex: Move futex exit handling into futex code Greg Kroah-Hartman
2019-11-27 20:32 ` Greg Kroah-Hartman [this message]
2019-11-27 20:32 ` [PATCH 5.4 38/66] exit/exec: Seperate mm_release() Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 39/66] futex: Split futex_mm_release() for exit/exec Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 40/66] futex: Set task::futex_state to DEAD right after handling futex exit Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 41/66] futex: Mark the begin of futex exit explicitly Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 42/66] futex: Sanitize exit state handling Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 43/66] futex: Provide state handling for exec() as well Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 44/66] futex: Add mutex around futex exit Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 45/66] futex: Provide distinct return value when owner is exiting Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 46/66] futex: Prevent exit livelock Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 47/66] media: uvcvideo: Fix error path in control parsing failure Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 48/66] media: b2c2-flexcop-usb: add sanity checking Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 49/66] media: cxusb: detect cxusb_ctrl_msg error in query Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 50/66] media: imon: invalid dereference in imon_touch_event Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 51/66] media: mceusb: fix out of bounds read in MCE receiver buffer Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 52/66] ALSA: hda - Disable audio component for legacy Nvidia HDMI codecs Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 53/66] USBIP: add config dependency for SGL_ALLOC Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 54/66] usbip: tools: fix fd leakage in the function of read_attr_usbip_status Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 55/66] usbip: Fix uninitialized symbol nents in stub_recv_cmd_submit() Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 56/66] usb-serial: cp201x: support Mark-10 digital force gauge Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 57/66] USB: chaoskey: fix error case of a timeout Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 58/66] appledisplay: fix error handling in the scheduled work Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 59/66] USB: serial: mos7840: add USB ID to support Moxa UPort 2210 Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 60/66] USB: serial: mos7720: fix remote wakeup Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 61/66] USB: serial: mos7840: " Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 62/66] USB: serial: option: add support for DW5821e with eSIM support Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 63/66] USB: serial: option: add support for Foxconn T77W968 LTE modules Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 64/66] staging: comedi: usbduxfast: usbduxfast_ai_cmdtest rounding error Greg Kroah-Hartman
2019-11-27 20:33 ` [PATCH 5.4 65/66] powerpc/book3s64: Fix link stack flush on context switch Greg Kroah-Hartman
2019-11-27 20:33 ` [PATCH 5.4 66/66] KVM: PPC: Book3S HV: Flush link stack on guest exit to host kernel Greg Kroah-Hartman
2019-11-28  8:56 ` [PATCH 5.4 00/66] 5.4.1-stable review Naresh Kamboju
2019-11-29  8:52   ` Greg Kroah-Hartman
2019-11-28 10:42 ` Jon Hunter
2019-11-29  8:52   ` Greg Kroah-Hartman
2019-11-28 15:40 ` shuah
2019-11-29  8:52   ` Greg Kroah-Hartman
2019-11-28 18:47 ` Guenter Roeck
2019-11-29  8:51   ` Greg Kroah-Hartman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191127202717.281355143@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=stable@vger.kernel.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).