All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@kernel.org>,
	"Peter Zijlstra (Intel)" <peterz@infradead.org>
Subject: [PATCH 5.4 37/66] futex: Replace PF_EXITPIDONE with a state
Date: Wed, 27 Nov 2019 21:32:32 +0100	[thread overview]
Message-ID: <20191127202717.281355143@linuxfoundation.org> (raw)
In-Reply-To: <20191127202632.536277063@linuxfoundation.org>

From: Thomas Gleixner <tglx@linutronix.de>

commit 3d4775df0a89240f671861c6ab6e8d59af8e9e41 upstream.

The futex exit handling relies on PF_ flags. That's suboptimal as it
requires a smp_mb() and an ugly lock/unlock of the exiting tasks pi_lock in
the middle of do_exit() to enforce the observability of PF_EXITING in the
futex code.

Add a futex_state member to task_struct and convert the PF_EXITPIDONE logic
over to the new state. The PF_EXITING dependency will be cleaned up in a
later step.

This prepares for handling various futex exit issues later.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20191106224556.149449274@linutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 include/linux/futex.h |   33 +++++++++++++++++++++++++++++++++
 include/linux/sched.h |    2 +-
 kernel/exit.c         |   18 ++----------------
 kernel/futex.c        |   25 +++++++++++++------------
 4 files changed, 49 insertions(+), 29 deletions(-)

--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -50,6 +50,10 @@ union futex_key {
 #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
 
 #ifdef CONFIG_FUTEX
+enum {
+	FUTEX_STATE_OK,
+	FUTEX_STATE_DEAD,
+};
 
 static inline void futex_init_task(struct task_struct *tsk)
 {
@@ -59,6 +63,34 @@ static inline void futex_init_task(struc
 #endif
 	INIT_LIST_HEAD(&tsk->pi_state_list);
 	tsk->pi_state_cache = NULL;
+	tsk->futex_state = FUTEX_STATE_OK;
+}
+
+/**
+ * futex_exit_done - Sets the tasks futex state to FUTEX_STATE_DEAD
+ * @tsk:	task to set the state on
+ *
+ * Set the futex exit state of the task lockless. The futex waiter code
+ * observes that state when a task is exiting and loops until the task has
+ * actually finished the futex cleanup. The worst case for this is that the
+ * waiter runs through the wait loop until the state becomes visible.
+ *
+ * This has two callers:
+ *
+ * - futex_mm_release() after the futex exit cleanup has been done
+ *
+ * - do_exit() from the recursive fault handling path.
+ *
+ * In case of a recursive fault this is best effort. Either the futex exit
+ * code has run already or not. If the OWNER_DIED bit has been set on the
+ * futex then the waiter can take it over. If not, the problem is pushed
+ * back to user space. If the futex exit code did not run yet, then an
+ * already queued waiter might block forever, but there is nothing which
+ * can be done about that.
+ */
+static inline void futex_exit_done(struct task_struct *tsk)
+{
+	tsk->futex_state = FUTEX_STATE_DEAD;
 }
 
 void futex_mm_release(struct task_struct *tsk);
@@ -68,6 +100,7 @@ long do_futex(u32 __user *uaddr, int op,
 #else
 static inline void futex_init_task(struct task_struct *tsk) { }
 static inline void futex_mm_release(struct task_struct *tsk) { }
+static inline void futex_exit_done(struct task_struct *tsk) { }
 static inline long do_futex(u32 __user *uaddr, int op, u32 val,
 			    ktime_t *timeout, u32 __user *uaddr2,
 			    u32 val2, u32 val3)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1054,6 +1054,7 @@ struct task_struct {
 #endif
 	struct list_head		pi_state_list;
 	struct futex_pi_state		*pi_state_cache;
+	unsigned int			futex_state;
 #endif
 #ifdef CONFIG_PERF_EVENTS
 	struct perf_event_context	*perf_event_ctxp[perf_nr_task_contexts];
@@ -1442,7 +1443,6 @@ extern struct pid *cad_pid;
  */
 #define PF_IDLE			0x00000002	/* I am an IDLE thread */
 #define PF_EXITING		0x00000004	/* Getting shut down */
-#define PF_EXITPIDONE		0x00000008	/* PI exit done on shut down */
 #define PF_VCPU			0x00000010	/* I'm a virtual CPU */
 #define PF_WQ_WORKER		0x00000020	/* I'm a workqueue worker */
 #define PF_FORKNOEXEC		0x00000040	/* Forked but didn't exec */
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -746,16 +746,7 @@ void __noreturn do_exit(long code)
 	 */
 	if (unlikely(tsk->flags & PF_EXITING)) {
 		pr_alert("Fixing recursive fault but reboot is needed!\n");
-		/*
-		 * We can do this unlocked here. The futex code uses
-		 * this flag just to verify whether the pi state
-		 * cleanup has been done or not. In the worst case it
-		 * loops once more. We pretend that the cleanup was
-		 * done as there is no way to return. Either the
-		 * OWNER_DIED bit is set by now or we push the blocked
-		 * task into the wait for ever nirwana as well.
-		 */
-		tsk->flags |= PF_EXITPIDONE;
+		futex_exit_done(tsk);
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		schedule();
 	}
@@ -846,12 +837,7 @@ void __noreturn do_exit(long code)
 	 * Make sure we are holding no locks:
 	 */
 	debug_check_no_locks_held();
-	/*
-	 * We can do this unlocked here. The futex code uses this flag
-	 * just to verify whether the pi state cleanup has been done
-	 * or not. In the worst case it loops once more.
-	 */
-	tsk->flags |= PF_EXITPIDONE;
+	futex_exit_done(tsk);
 
 	if (tsk->io_context)
 		exit_io_context(tsk);
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1182,9 +1182,10 @@ static int handle_exit_race(u32 __user *
 	u32 uval2;
 
 	/*
-	 * If PF_EXITPIDONE is not yet set, then try again.
+	 * If the futex exit state is not yet FUTEX_STATE_DEAD, wait
+	 * for it to finish.
 	 */
-	if (tsk && !(tsk->flags & PF_EXITPIDONE))
+	if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
 		return -EAGAIN;
 
 	/*
@@ -1203,8 +1204,9 @@ static int handle_exit_race(u32 __user *
 	 *    *uaddr = 0xC0000000;	     tsk = get_task(PID);
 	 *   }				     if (!tsk->flags & PF_EXITING) {
 	 *  ...				       attach();
-	 *  tsk->flags |= PF_EXITPIDONE;     } else {
-	 *				       if (!(tsk->flags & PF_EXITPIDONE))
+	 *  tsk->futex_state =               } else {
+	 *	FUTEX_STATE_DEAD;              if (tsk->futex_state !=
+	 *					  FUTEX_STATE_DEAD)
 	 *				         return -EAGAIN;
 	 *				       return -ESRCH; <--- FAIL
 	 *				     }
@@ -1260,17 +1262,16 @@ static int attach_to_pi_owner(u32 __user
 	}
 
 	/*
-	 * We need to look at the task state flags to figure out,
-	 * whether the task is exiting. To protect against the do_exit
-	 * change of the task flags, we do this protected by
-	 * p->pi_lock:
+	 * We need to look at the task state to figure out, whether the
+	 * task is exiting. To protect against the change of the task state
+	 * in futex_exit_release(), we do this protected by p->pi_lock:
 	 */
 	raw_spin_lock_irq(&p->pi_lock);
-	if (unlikely(p->flags & PF_EXITING)) {
+	if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
 		/*
-		 * The task is on the way out. When PF_EXITPIDONE is
-		 * set, we know that the task has finished the
-		 * cleanup:
+		 * The task is on the way out. When the futex state is
+		 * FUTEX_STATE_DEAD, we know that the task has finished
+		 * the cleanup:
 		 */
 		int ret = handle_exit_race(uaddr, uval, p);
 



  parent reply	other threads:[~2019-11-27 21:13 UTC|newest]

Thread overview: 76+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-27 20:31 [PATCH 5.4 00/66] 5.4.1-stable review Greg Kroah-Hartman
2019-11-27 20:31 ` [PATCH 5.4 01/66] Bluetooth: Fix invalid-free in bcsp_close() Greg Kroah-Hartman
2019-11-27 20:31 ` [PATCH 5.4 02/66] ath9k_hw: fix uninitialized variable data Greg Kroah-Hartman
2019-11-27 20:31 ` [PATCH 5.4 03/66] ath10k: Fix a NULL-ptr-deref bug in ath10k_usb_alloc_urb_from_pipe Greg Kroah-Hartman
2019-11-27 20:31 ` [PATCH 5.4 04/66] ath10k: Fix HOST capability QMI incompatibility Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 05/66] ath10k: restore QCA9880-AR1A (v1) detection Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 06/66] Revert "Bluetooth: hci_ll: set operational frequency earlier" Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 07/66] Revert "dm crypt: use WQ_HIGHPRI for the IO and crypt workqueues" Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 08/66] md/raid10: prevent access of uninitialized resync_pages offset Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 09/66] x86/insn: Fix awk regexp warnings Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 10/66] x86/speculation: Fix incorrect MDS/TAA mitigation status Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 11/66] x86/speculation: Fix redundant MDS mitigation message Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 12/66] nbd: prevent memory leak Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 13/66] x86/stackframe/32: Repair 32-bit Xen PV Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 14/66] x86/xen/32: Make xen_iret_crit_fixup() independent of frame layout Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 15/66] x86/xen/32: Simplify ring check in xen_iret_crit_fixup() Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 16/66] x86/doublefault/32: Fix stack canaries in the double fault handler Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 17/66] x86/pti/32: Size initial_page_table correctly Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 18/66] x86/cpu_entry_area: Add guard page for entry stack on 32bit Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 19/66] x86/entry/32: Fix IRET exception Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 20/66] x86/entry/32: Use %ss segment where required Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 21/66] x86/entry/32: Move FIXUP_FRAME after pushing %fs in SAVE_ALL Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 22/66] x86/entry/32: Unwind the ESPFIX stack earlier on exception entry Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 23/66] x86/entry/32: Fix NMI vs ESPFIX Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 24/66] selftests/x86/mov_ss_trap: Fix the SYSENTER test Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 25/66] selftests/x86/sigreturn/32: Invalidate DS and ES when abusing the kernel Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 26/66] x86/pti/32: Calculate the various PTI cpu_entry_area sizes correctly, make the CPU_ENTRY_AREA_PAGES assert precise Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 27/66] x86/entry/32: Fix FIXUP_ESPFIX_STACK with user CR3 Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 28/66] futex: Prevent robust futex exit race Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 29/66] ALSA: usb-audio: Fix NULL dereference at parsing BADD Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 30/66] ALSA: usb-audio: Fix Scarlett 6i6 Gen 2 port data Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 31/66] media: vivid: Set vid_cap_streaming and vid_out_streaming to true Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 32/66] media: vivid: Fix wrong locking that causes race conditions on streaming stop Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 33/66] media: usbvision: Fix invalid accesses after device disconnect Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 34/66] media: usbvision: Fix races among open, close, and disconnect Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 35/66] cpufreq: Add NULL checks to show() and store() methods of cpufreq Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 36/66] futex: Move futex exit handling into futex code Greg Kroah-Hartman
2019-11-27 20:32 ` Greg Kroah-Hartman [this message]
2019-11-27 20:32 ` [PATCH 5.4 38/66] exit/exec: Seperate mm_release() Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 39/66] futex: Split futex_mm_release() for exit/exec Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 40/66] futex: Set task::futex_state to DEAD right after handling futex exit Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 41/66] futex: Mark the begin of futex exit explicitly Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 42/66] futex: Sanitize exit state handling Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 43/66] futex: Provide state handling for exec() as well Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 44/66] futex: Add mutex around futex exit Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 45/66] futex: Provide distinct return value when owner is exiting Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 46/66] futex: Prevent exit livelock Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 47/66] media: uvcvideo: Fix error path in control parsing failure Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 48/66] media: b2c2-flexcop-usb: add sanity checking Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 49/66] media: cxusb: detect cxusb_ctrl_msg error in query Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 50/66] media: imon: invalid dereference in imon_touch_event Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 51/66] media: mceusb: fix out of bounds read in MCE receiver buffer Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 52/66] ALSA: hda - Disable audio component for legacy Nvidia HDMI codecs Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 53/66] USBIP: add config dependency for SGL_ALLOC Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 54/66] usbip: tools: fix fd leakage in the function of read_attr_usbip_status Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 55/66] usbip: Fix uninitialized symbol nents in stub_recv_cmd_submit() Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 56/66] usb-serial: cp201x: support Mark-10 digital force gauge Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 57/66] USB: chaoskey: fix error case of a timeout Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 58/66] appledisplay: fix error handling in the scheduled work Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 59/66] USB: serial: mos7840: add USB ID to support Moxa UPort 2210 Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 60/66] USB: serial: mos7720: fix remote wakeup Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 61/66] USB: serial: mos7840: " Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 62/66] USB: serial: option: add support for DW5821e with eSIM support Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 63/66] USB: serial: option: add support for Foxconn T77W968 LTE modules Greg Kroah-Hartman
2019-11-27 20:32 ` [PATCH 5.4 64/66] staging: comedi: usbduxfast: usbduxfast_ai_cmdtest rounding error Greg Kroah-Hartman
2019-11-27 20:33 ` [PATCH 5.4 65/66] powerpc/book3s64: Fix link stack flush on context switch Greg Kroah-Hartman
2019-11-27 20:33 ` [PATCH 5.4 66/66] KVM: PPC: Book3S HV: Flush link stack on guest exit to host kernel Greg Kroah-Hartman
2019-11-28  8:56 ` [PATCH 5.4 00/66] 5.4.1-stable review Naresh Kamboju
2019-11-29  8:52   ` Greg Kroah-Hartman
2019-11-28 10:42 ` Jon Hunter
2019-11-28 10:42   ` Jon Hunter
2019-11-29  8:52   ` Greg Kroah-Hartman
2019-11-28 15:40 ` shuah
2019-11-29  8:52   ` Greg Kroah-Hartman
2019-11-28 18:47 ` Guenter Roeck
2019-11-29  8:51   ` Greg Kroah-Hartman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191127202717.281355143@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=stable@vger.kernel.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.