linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Robust-futex wakes up the waiters will be missed
@ 2019-11-06  3:23 Yi Wang
  2019-11-06 19:02 ` Thomas Gleixner
  2019-11-15 18:19 ` [tip: locking/core] futex: Prevent robust futex exit race tip-bot2 for Yang Tao
  0 siblings, 2 replies; 5+ messages in thread
From: Yi Wang @ 2019-11-06  3:23 UTC (permalink / raw)
  To: tglx
  Cc: mingo, peterz, dvhart, linux-kernel, xue.zhihong, wang.yi59,
	jiang.xuexin, Yang Tao

From: Yang Tao <yang.tao172@zte.com.cn>

We found two scenarios:
(1)When the owner of a mutex lock with robust attribute and no_pi
will release the lock and executes pthread_mutex_unlock(),it is
killed after setting mutex->__data.__lock = 0 and before wake
others. It will go through the robust process, but it will not wake
up watiers because mutex->__data.__lock = 0.

                OWNER

        pthread_mutex_unlock()
                |
                |
                V
        atomic_exchange_rel (&mutex->__data.__lock, 0)
                        <------------------------killed
            lll_futex_wake ()                   |
                                                |
                                                |(__lock = 0)
                                                |(enter kernel)
                                                |
                                                V
                                            do_exit()
                                            exit_mm()
                                          mm_release()
                                        exit_robust_list()
                                        handle_futex_death()
                                                |
                                                |(__lock = 0)
                                                |(uval = 0)
                                                |
                                                V
        if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
                return 0;<------wakes up waiters will be missed

(2) When a waiter wakes up and returns to the user space, it is
killed before getting the lock (before modifying
mutex->__data.__lock), and other waiters will not wake up.

        OWNER                         WAITER

   pthread_mutex_unlock()
                |
                |(__lock = 0)
                |
                V
         futex_wake()
                                fuet_wait()   //awaked
                                        |
                                        |
                                        |(enter userspace)
                                        |(__lock = 0)
                                        |
                                        V
                        oldval = mutex->__data.__lock
                                          <-----------------killed
    atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,  |
                        id | assume_other_futex_waiters, 0)      |
                                                                 |
                                                                 |
                                                   (enter kernel)|
                                                                 |
                                                                 V
                                                         do_exit()
                                                        |
                                                        |
                                                        V
                                        handle_futex_death()
                                        |
                                        |(__lock = 0)
                                        |(uval = 0)
                                        |
                                        V
        if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
                return 0;<------wakes up waiters will be missed
So, in these scenarios, task will not wake up waiters

We found that when the task was killed in two scenarios,
task->robust_list->list_op_pending =&mutex->__data.__list.__next,
so we can do something.

We think that task should wake up once when the following conditions
are met:
        (1) task->robust_list->list_op_pending != NULL;
        (2) mutex->__data.__lock = 0;
        (3) no_pi
In some cases, this may lead to some redundant wakeups, which will
reduce the efficiency of the program, but it will not affectthe
program operation, and it is very rare to meet these three
conditions.

At the same time, we only wake up and do not set the died bit,
because mutex->__data.__lock = 0; mutex->__data.__owner = 0;
At this time, it can be seen that there is no owner,and the wake-up
process directly take the lock.

If the died bit is set, it may cause some misoperation. Such as a
waiter being killed when the owner is releasing the lock, it will
mark the lock with the died bit, which is not good.

We don't need to set "mutex->__data.__count"(in mutex structure),
which will not affect repeated lock holding.

Signed-off-by: Yang Tao <yang.tao172@zte.com.cn>
---
 kernel/futex.c | 41 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 36 insertions(+), 5 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index bd18f60..8511dad 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -3456,7 +3456,9 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
  * Process a futex-list entry, check whether it's owned by the
  * dying task, and do notification if so:
  */
-static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
+static int handle_futex_death(u32 __user *uaddr,
+			      struct task_struct *curr, int pi,
+			      bool pending)
 {
 	u32 uval, uninitialized_var(nval), mval;
 	int err;
@@ -3469,6 +3471,35 @@ static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int p
 	if (get_user(uval, uaddr))
 		return -1;
 
+	/*
+	 * When uval is 0, there may be waiters blocking on the lock:
+	 *
+	 * (1)When the owner of a mutex lock with robust attribute
+	 * and no_pi will release the lock and executes
+	 * pthread_mutex_unlock(),it is killed after setting
+	 * mutex->__data.__lock = 0 (uval = 0)and before wake others.
+	 * It will enter the robust process, but it will not wake up
+	 * watiers because uval = 0.
+	 *
+	 * (2) When a waiter wakes up and returns to the user space,
+	 * it is killed before getting the lock (before modifying
+	 * mutex->__data.__lock), and other waiters will not wake up
+	 * because uval = 0.
+	 *
+	 * We found that when the task was killed in two scenarios,
+	 * task->robust_list->list_op_pending != NULL. Therefore,
+	 * it can be judged that the task is killed when releasing
+	 * or acquiring the lock.
+	 *
+	 * We should wake up once when the following conditions are
+	 * met:
+	 *	1) task->robust_list->list_op_pending != NULL
+	 *	2) mutex->__data.__lock = 0 (uval = 0)
+	 *	3) no_pi
+	 */
+	if (pending && !pi && uval == 0)
+		futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
+
 	if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
 		return 0;
 
@@ -3590,7 +3621,7 @@ void exit_robust_list(struct task_struct *curr)
 		 */
 		if (entry != pending)
 			if (handle_futex_death((void __user *)entry + futex_offset,
-						curr, pi))
+						curr, pi, 0))
 				return;
 		if (rc)
 			return;
@@ -3607,7 +3638,7 @@ void exit_robust_list(struct task_struct *curr)
 
 	if (pending)
 		handle_futex_death((void __user *)pending + futex_offset,
-				   curr, pip);
+				   curr, pip, 1);
 }
 
 long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
@@ -3784,7 +3815,7 @@ void compat_exit_robust_list(struct task_struct *curr)
 		if (entry != pending) {
 			void __user *uaddr = futex_uaddr(entry, futex_offset);
 
-			if (handle_futex_death(uaddr, curr, pi))
+			if (handle_futex_death(uaddr, curr, pi, 0))
 				return;
 		}
 		if (rc)
@@ -3803,7 +3834,7 @@ void compat_exit_robust_list(struct task_struct *curr)
 	if (pending) {
 		void __user *uaddr = futex_uaddr(pending, futex_offset);
 
-		handle_futex_death(uaddr, curr, pip);
+		handle_futex_death(uaddr, curr, pip, 1);
 	}
 }
 
-- 
2.15.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] Robust-futex wakes up the waiters will be missed
  2019-11-06  3:23 [PATCH] Robust-futex wakes up the waiters will be missed Yi Wang
@ 2019-11-06 19:02 ` Thomas Gleixner
  2019-11-15 18:19 ` [tip: locking/core] futex: Prevent robust futex exit race tip-bot2 for Yang Tao
  1 sibling, 0 replies; 5+ messages in thread
From: Thomas Gleixner @ 2019-11-06 19:02 UTC (permalink / raw)
  To: Yi Wang
  Cc: Ingo Molnar, Peter Zijlstra, Darren Hart, LKML, xue.zhihong,
	jiang.xuexin, Yang Tao

Yi,

On Wed, 6 Nov 2019, Yi Wang wrote:

thanks for addressing Peters comments. Just a few nitpicks.

> Subject: [PATCH] Robust-futex wakes up the waiters will be missed

When you send a new version of a patch then please say so in the subject
line, i.e.

 [PATCH v2] ....

Ideally you also add a short change notice _after_ the '---' seperator
which follows the Signed-off-by lines, i.e.

  Signed-off-by: ......

  ---
  v2: Added a proper comment in handle_futex_death() (Peter)

That's helpful for reviewers to see what you changed, so they know where
they should look for. You obviously do not have to document every tiny
change, but the ones which address review comments and those which change
the implementation. So for a follow up on a large set of comments it's
sufficient to mention classes of changes, e.g.

      - Correct the code flow in foo() (Reviewer 1)
      - Fixup coding style (Reviewer 2)
      - Use inlines instead of macros (Reviewer 1)

Not applicaple here, but you get the idea.

  v1: https://lore.kernel.org/r/1572573789-16557-1-git-send-email-wang.yi59@zte.com.cn

A link to previous versions is helpful for people who were not involved in
the V1 discussion. It makes it easy to follow the history via the archives.
The part after '...org/r/' is the message id of your v1 mail.

This section is not part of the changelog when the patch is applied in a
maintainer tree as the text between the '---' seperator and the start of
the patch is discarded along with the diffstat below.

---
  diffstat ...


Also all subjects should start with a subsystem prefix, in this case
'futex:', i.e.

 [PATCH v2] futex: ....

The easy way to figure the right prefix out is with git:

 git log --oneline kernel/futex.c

That lists the subjects of the commits which touched that file. The most
used one is 'futex:' which makes a lot of sense obviously.

There are a few other prefixes as well due to treewide changes of
infrastructure, but it's again pretty obvious that 'hrtimer:', 'treewide:',
'mm/gup:' etc. are not the right ones.

> We found two scenarios:
> (1)When the owner of a mutex lock with robust attribute and no_pi
> will release the lock and executes pthread_mutex_unlock(),it is
> killed after setting mutex->__data.__lock = 0 and before wake
> others. It will go through the robust process, but it will not wake
> up watiers because mutex->__data.__lock = 0.

...

Just for curiosity. How did you manage to trigger these races?

> We don't need to set "mutex->__data.__count"(in mutex structure),
> which will not affect repeated lock holding.

This is interesting in the context of pthread_mutexes, but irrelevant for
the kernel because the kernel does not know at all how the users space data
structure which contains the futex value looks like. The kernel really does
not care and while the problem happened with pthread_mutex it's the same
for any other implementation which uses the robust futex mechanism.

> Signed-off-by: Yang Tao <yang.tao172@zte.com.cn>

This is missing your Signed-off-by: As you are sending the patch and not
the author you have to add your Signed-off-by after the authors to document
the handling chain. i.e.

  Signed-off-by: Yang Tao <yang.tao172@zte.com.cn>
  Signed-off-by: Yi Wang <wang.yi59@zte.com.cn>

Ok?

> -static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
> +static int handle_futex_death(u32 __user *uaddr,
> +			      struct task_struct *curr, int pi,
> +			      bool pending)

If you break the existing line after 'curr,' then you spare an extra one.

>  {
>  	u32 uval, uninitialized_var(nval), mval;
>  	int err;
> @@ -3469,6 +3471,35 @@ static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int p
>  	if (get_user(uval, uaddr))
>  		return -1;
>  
...

> +	 *	1) task->robust_list->list_op_pending != NULL
> +	 *	2) mutex->__data.__lock = 0 (uval = 0)
> +	 *	3) no_pi
> +	 */
> +	if (pending && !pi && uval == 0)
> +		futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);

This really wants an explict return here. Yes, the check below returns the
correct value, but it's not obvious at all. The futex code is complex
enough already, no need to introduce code which forces people to look
twice.

Please keep that in mind for future patches. No need to resend this one as
I already picked it up and fixed up the tiny issues already.

Thanks for the great detective work and profund analysis!

       tglx

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [tip: locking/core] futex: Prevent robust futex exit race
  2019-11-06  3:23 [PATCH] Robust-futex wakes up the waiters will be missed Yi Wang
  2019-11-06 19:02 ` Thomas Gleixner
@ 2019-11-15 18:19 ` tip-bot2 for Yang Tao
  1 sibling, 0 replies; 5+ messages in thread
From: tip-bot2 for Yang Tao @ 2019-11-15 18:19 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Yang Tao, Yi Wang, Thomas Gleixner, Ingo Molnar,
	Peter Zijlstra (Intel),
	stable, Borislav Petkov, linux-kernel

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     ca16d5bee59807bf04deaab0a8eccecd5061528c
Gitweb:        https://git.kernel.org/tip/ca16d5bee59807bf04deaab0a8eccecd5061528c
Author:        Yang Tao <yang.tao172@zte.com.cn>
AuthorDate:    Wed, 06 Nov 2019 22:55:35 +01:00
Committer:     Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Fri, 15 Nov 2019 19:10:49 +01:00

futex: Prevent robust futex exit race

Robust futexes utilize the robust_list mechanism to allow the kernel to
release futexes which are held when a task exits. The exit can be voluntary
or caused by a signal or fault. This prevents that waiters block forever.

The futex operations in user space store a pointer to the futex they are
either locking or unlocking in the op_pending member of the per task robust
list.

After a lock operation has succeeded the futex is queued in the robust list
linked list and the op_pending pointer is cleared.

After an unlock operation has succeeded the futex is removed from the
robust list linked list and the op_pending pointer is cleared.

The robust list exit code checks for the pending operation and any futex
which is queued in the linked list. It carefully checks whether the futex
value is the TID of the exiting task. If so, it sets the OWNER_DIED bit and
tries to wake up a potential waiter.

This is race free for the lock operation but unlock has two race scenarios
where waiters might not be woken up. These issues can be observed with
regular robust pthread mutexes. PI aware pthread mutexes are not affected.

(1) Unlocking task is killed after unlocking the futex value in user space
    before being able to wake a waiter.

        pthread_mutex_unlock()
                |
                V
        atomic_exchange_rel (&mutex->__data.__lock, 0)
                        <------------------------killed
            lll_futex_wake ()                   |
                                                |
                                                |(__lock = 0)
                                                |(enter kernel)
                                                |
                                                V
                                            do_exit()
                                            exit_mm()
                                          mm_release()
                                        exit_robust_list()
                                        handle_futex_death()
                                                |
                                                |(__lock = 0)
                                                |(uval = 0)
                                                |
                                                V
        if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
                return 0;

    The sanity check which ensures that the user space futex is owned by
    the exiting task prevents the wakeup of waiters which in consequence
    block infinitely.

(2) Waiting task is killed after a wakeup and before it can acquire the
    futex in user space.

        OWNER                         WAITER
				futex_wait()      		
   pthread_mutex_unlock()               |
                |                       |
                |(__lock = 0)           |
                |                       |
                V                       |
         futex_wake() ------------>  wakeup()
                                        |
                                        |(return to userspace)
                                        |(__lock = 0)
                                        |
                                        V
                        oldval = mutex->__data.__lock
                                          <-----------------killed
    atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,  |
                        id | assume_other_futex_waiters, 0)      |
                                                                 |
                                                                 |
                                                   (enter kernel)|
                                                                 |
                                                                 V
                                                         do_exit()
                                                        |
                                                        |
                                                        V
                                        handle_futex_death()
                                        |
                                        |(__lock = 0)
                                        |(uval = 0)
                                        |
                                        V
        if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
                return 0;

    The sanity check which ensures that the user space futex is owned
    by the exiting task prevents the wakeup of waiters, which seems to
    be correct as the exiting task does not own the futex value, but
    the consequence is that other waiters wont be woken up and block
    infinitely.

In both scenarios the following conditions are true:

   - task->robust_list->list_op_pending != NULL
   - user space futex value == 0
   - Regular futex (not PI)

If these conditions are met then it is reasonably safe to wake up a
potential waiter in order to prevent the above problems.

As this might be a false positive it can cause spurious wakeups, but the
waiter side has to handle other types of unrelated wakeups, e.g. signals
gracefully anyway. So such a spurious wakeup will not affect the
correctness of these operations.

This workaround must not touch the user space futex value and cannot set
the OWNER_DIED bit because the lock value is 0, i.e. uncontended. Setting
OWNER_DIED in this case would result in inconsistent state and subsequently
in malfunction of the owner died handling in user space.

The rest of the user space state is still consistent as no other task can
observe the list_op_pending entry in the exiting tasks robust list.

The eventually woken up waiter will observe the uncontended lock value and
take it over.

[ tglx: Massaged changelog and comment. Made the return explicit and not
  	depend on the subsequent check and added constants to hand into
  	handle_futex_death() instead of plain numbers. Fixed a few coding
	style issues. ]

Fixes: 0771dfefc9e5 ("[PATCH] lightweight robust futexes: core")
Signed-off-by: Yang Tao <yang.tao172@zte.com.cn>
Signed-off-by: Yi Wang <wang.yi59@zte.com.cn>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/1573010582-35297-1-git-send-email-wang.yi59@zte.com.cn
Link: https://lkml.kernel.org/r/20191106224555.943191378@linutronix.de

---
 kernel/futex.c | 58 +++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 51 insertions(+), 7 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 43229f8..49eaf5b 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -3452,11 +3452,16 @@ err_unlock:
 	return ret;
 }
 
+/* Constants for the pending_op argument of handle_futex_death */
+#define HANDLE_DEATH_PENDING	true
+#define HANDLE_DEATH_LIST	false
+
 /*
  * Process a futex-list entry, check whether it's owned by the
  * dying task, and do notification if so:
  */
-static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
+static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
+			      bool pi, bool pending_op)
 {
 	u32 uval, uninitialized_var(nval), mval;
 	int err;
@@ -3469,6 +3474,42 @@ retry:
 	if (get_user(uval, uaddr))
 		return -1;
 
+	/*
+	 * Special case for regular (non PI) futexes. The unlock path in
+	 * user space has two race scenarios:
+	 *
+	 * 1. The unlock path releases the user space futex value and
+	 *    before it can execute the futex() syscall to wake up
+	 *    waiters it is killed.
+	 *
+	 * 2. A woken up waiter is killed before it can acquire the
+	 *    futex in user space.
+	 *
+	 * In both cases the TID validation below prevents a wakeup of
+	 * potential waiters which can cause these waiters to block
+	 * forever.
+	 *
+	 * In both cases the following conditions are met:
+	 *
+	 *	1) task->robust_list->list_op_pending != NULL
+	 *	   @pending_op == true
+	 *	2) User space futex value == 0
+	 *	3) Regular futex: @pi == false
+	 *
+	 * If these conditions are met, it is safe to attempt waking up a
+	 * potential waiter without touching the user space futex value and
+	 * trying to set the OWNER_DIED bit. The user space futex value is
+	 * uncontended and the rest of the user space mutex state is
+	 * consistent, so a woken waiter will just take over the
+	 * uncontended futex. Setting the OWNER_DIED bit would create
+	 * inconsistent state and malfunction of the user space owner died
+	 * handling.
+	 */
+	if (pending_op && !pi && !uval) {
+		futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
+		return 0;
+	}
+
 	if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
 		return 0;
 
@@ -3588,10 +3629,11 @@ void exit_robust_list(struct task_struct *curr)
 		 * A pending lock might already be on the list, so
 		 * don't process it twice:
 		 */
-		if (entry != pending)
+		if (entry != pending) {
 			if (handle_futex_death((void __user *)entry + futex_offset,
-						curr, pi))
+						curr, pi, HANDLE_DEATH_LIST))
 				return;
+		}
 		if (rc)
 			return;
 		entry = next_entry;
@@ -3605,9 +3647,10 @@ void exit_robust_list(struct task_struct *curr)
 		cond_resched();
 	}
 
-	if (pending)
+	if (pending) {
 		handle_futex_death((void __user *)pending + futex_offset,
-				   curr, pip);
+				   curr, pip, HANDLE_DEATH_PENDING);
+	}
 }
 
 long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
@@ -3784,7 +3827,8 @@ void compat_exit_robust_list(struct task_struct *curr)
 		if (entry != pending) {
 			void __user *uaddr = futex_uaddr(entry, futex_offset);
 
-			if (handle_futex_death(uaddr, curr, pi))
+			if (handle_futex_death(uaddr, curr, pi,
+					       HANDLE_DEATH_LIST))
 				return;
 		}
 		if (rc)
@@ -3803,7 +3847,7 @@ void compat_exit_robust_list(struct task_struct *curr)
 	if (pending) {
 		void __user *uaddr = futex_uaddr(pending, futex_offset);
 
-		handle_futex_death(uaddr, curr, pip);
+		handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING);
 	}
 }
 

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] Robust-futex wakes up the waiters will be missed
  2019-11-01  2:03 [PATCH] Robust-futex wakes up the waiters will be missed Yi Wang
@ 2019-11-04  9:16 ` Peter Zijlstra
  0 siblings, 0 replies; 5+ messages in thread
From: Peter Zijlstra @ 2019-11-04  9:16 UTC (permalink / raw)
  To: Yi Wang
  Cc: tglx, mingo, dvhart, linux-kernel, xue.zhihong, jiang.xuexin, Yang Tao

On Fri, Nov 01, 2019 at 10:03:09AM +0800, Yi Wang wrote:
>  kernel/futex.c | 16 +++++++++++-----
>  1 file changed, 11 insertions(+), 5 deletions(-)
> 
> diff --git a/kernel/futex.c b/kernel/futex.c
> index bd18f60..c2fb590 100644
> --- a/kernel/futex.c
> +++ b/kernel/futex.c
> @@ -3456,7 +3456,9 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
>   * Process a futex-list entry, check whether it's owned by the
>   * dying task, and do notification if so:
>   */
> -static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
> +static int handle_futex_death(u32 __user *uaddr,
> +			      struct task_struct *curr, int pi,
> +			      bool pending)
>  {
>  	u32 uval, uninitialized_var(nval), mval;
>  	int err;
> @@ -3469,6 +3471,10 @@ static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int p
>  	if (get_user(uval, uaddr))
>  		return -1;
>  
> +	/* Robust-futex wakes up the waiters will be missed*/
> +	if (pending && !pi && uval == 0)
> +		futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
> +

I'm thinking you're right and this should fix things, but that comment
is really terse and will not help us understand things the next time we
try and figure out what this code does.

>  	if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
>  		return 0;
>  
> @@ -3590,7 +3596,7 @@ void exit_robust_list(struct task_struct *curr)
>  		 */
>  		if (entry != pending)
>  			if (handle_futex_death((void __user *)entry + futex_offset,
> -						curr, pi))
> +						curr, pi, 0))
>  				return;
>  		if (rc)
>  			return;
> @@ -3607,7 +3613,7 @@ void exit_robust_list(struct task_struct *curr)
>  
>  	if (pending)
>  		handle_futex_death((void __user *)pending + futex_offset,
> -				   curr, pip);
> +				   curr, pip, 1);
>  }
>  
>  long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
> @@ -3784,7 +3790,7 @@ void compat_exit_robust_list(struct task_struct *curr)
>  		if (entry != pending) {
>  			void __user *uaddr = futex_uaddr(entry, futex_offset);
>  
> -			if (handle_futex_death(uaddr, curr, pi))
> +			if (handle_futex_death(uaddr, curr, pi, 0))
>  				return;
>  		}
>  		if (rc)
> @@ -3803,7 +3809,7 @@ void compat_exit_robust_list(struct task_struct *curr)
>  	if (pending) {
>  		void __user *uaddr = futex_uaddr(pending, futex_offset);
>  
> -		handle_futex_death(uaddr, curr, pip);
> +		handle_futex_death(uaddr, curr, pip, 1);
>  	}
>  }
>  
> -- 
> 2.15.2
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] Robust-futex wakes up the waiters will be missed
@ 2019-11-01  2:03 Yi Wang
  2019-11-04  9:16 ` Peter Zijlstra
  0 siblings, 1 reply; 5+ messages in thread
From: Yi Wang @ 2019-11-01  2:03 UTC (permalink / raw)
  To: tglx
  Cc: mingo, peterz, dvhart, linux-kernel, xue.zhihong, wang.yi59,
	jiang.xuexin, Yang Tao

From: Yang Tao <yang.tao172@zte.com.cn>

We found two scenarios:
(1)When the owner of a mutex lock with robust attribute and no_pi
will release the lock and executes pthread_mutex_unlock(),it is
killed after setting mutex->__data.__lock = 0 and before wake
others. It will go through the robust process, but it will not wake
up watiers because mutex->__data.__lock = 0.

                OWNER

        pthread_mutex_unlock()
                |
                |
                V
        atomic_exchange_rel (&mutex->__data.__lock, 0)
                        <------------------------killed
            lll_futex_wake ()                   |
                                                |
                                                |(__lock = 0)
                                                |(enter kernel)
                                                |
                                                V
                                            do_exit()
                                            exit_mm()
                                          mm_release()
                                        exit_robust_list()
                                        handle_futex_death()
                                                |
                                                |(__lock = 0)
                                                |(uval = 0)
                                                |
                                                V
        if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
                return 0;<------wakes up waiters will be missed

(2) When a waiter wakes up and returns to the user space, it is
killed before getting the lock (before modifying
mutex->__data.__lock), and other waiters will not wake up.

        OWNER                         WAITER

   pthread_mutex_unlock()
                |
                |(__lock = 0)
                |
                V
         futex_wake()
                                fuet_wait()   //awaked
                                        |
                                        |
                                        |(enter userspace)
                                        |(__lock = 0)
                                        |
                                        V
                        oldval = mutex->__data.__lock
                                          <-----------------killed
    atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,  |
                        id | assume_other_futex_waiters, 0)      |
                                                                 |
                                                                 |
                                                   (enter kernel)|
                                                                 |
                                                                 V
                                                         do_exit()
                                                        |
                                                        |
                                                        V
                                        handle_futex_death()
                                        |
                                        |(__lock = 0)
                                        |(uval = 0)
                                        |
                                        V
        if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
                return 0;<------wakes up waiters will be missed
So, in these scenarios, task will not wake up waiters

We found that when the task was killed in two scenarios,
task->robust_list->list_op_pending =&mutex->__data.__list.__next,
so we can do something.

We think that task should wake up once when the following conditions
are met:
        (1) task->robust_list->list_op_pending != NULL;
        (2) mutex->__data.__lock = 0;
        (3) no_pi
In some cases, this may lead to some redundant wakeups, which will
reduce the efficiency of the program, but it will not affectthe
program operation, and it is very rare to meet these three
conditions.

At the same time, we only wake up and do not set the died bit,
because mutex->__data.__lock = 0; mutex->__data.__owner = 0;
At this time, it can be seen that there is no owner,and the wake-up
process directly take the lock.

If the died bit is set, it may cause some misoperation. Such as a
waiter being killed when the owner is releasing the lock, it will
mark the lock with the died bit, which is not good.

We don't need to set "mutex->__data.__count"(in mutex structure),
which will not affect repeated lock holding.

Signed-off-by: Yang Tao <yang.tao172@zte.com.cn>
---
 kernel/futex.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index bd18f60..c2fb590 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -3456,7 +3456,9 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
  * Process a futex-list entry, check whether it's owned by the
  * dying task, and do notification if so:
  */
-static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
+static int handle_futex_death(u32 __user *uaddr,
+			      struct task_struct *curr, int pi,
+			      bool pending)
 {
 	u32 uval, uninitialized_var(nval), mval;
 	int err;
@@ -3469,6 +3471,10 @@ static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int p
 	if (get_user(uval, uaddr))
 		return -1;
 
+	/* Robust-futex wakes up the waiters will be missed*/
+	if (pending && !pi && uval == 0)
+		futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
+
 	if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
 		return 0;
 
@@ -3590,7 +3596,7 @@ void exit_robust_list(struct task_struct *curr)
 		 */
 		if (entry != pending)
 			if (handle_futex_death((void __user *)entry + futex_offset,
-						curr, pi))
+						curr, pi, 0))
 				return;
 		if (rc)
 			return;
@@ -3607,7 +3613,7 @@ void exit_robust_list(struct task_struct *curr)
 
 	if (pending)
 		handle_futex_death((void __user *)pending + futex_offset,
-				   curr, pip);
+				   curr, pip, 1);
 }
 
 long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
@@ -3784,7 +3790,7 @@ void compat_exit_robust_list(struct task_struct *curr)
 		if (entry != pending) {
 			void __user *uaddr = futex_uaddr(entry, futex_offset);
 
-			if (handle_futex_death(uaddr, curr, pi))
+			if (handle_futex_death(uaddr, curr, pi, 0))
 				return;
 		}
 		if (rc)
@@ -3803,7 +3809,7 @@ void compat_exit_robust_list(struct task_struct *curr)
 	if (pending) {
 		void __user *uaddr = futex_uaddr(pending, futex_offset);
 
-		handle_futex_death(uaddr, curr, pip);
+		handle_futex_death(uaddr, curr, pip, 1);
 	}
 }
 
-- 
2.15.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-11-15 18:19 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-06  3:23 [PATCH] Robust-futex wakes up the waiters will be missed Yi Wang
2019-11-06 19:02 ` Thomas Gleixner
2019-11-15 18:19 ` [tip: locking/core] futex: Prevent robust futex exit race tip-bot2 for Yang Tao
  -- strict thread matches above, loose matches on Subject: below --
2019-11-01  2:03 [PATCH] Robust-futex wakes up the waiters will be missed Yi Wang
2019-11-04  9:16 ` Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).