All of lore.kernel.org
 help / color / mirror / Atom feed
From: Waiman Long <longman@redhat.com>
To: Davidlohr Bueso <dave@stgolabs.net>
Cc: Peter Zijlstra <peterz@infradead.org>,
	Yongji Xie <elohimes@gmail.com>,
	mingo@redhat.com, will.deacon@arm.com,
	linux-kernel@vger.kernel.org, Xie Yongji <xieyongji@baidu.com>,
	zhangyu31@baidu.com, liuqi16@baidu.com, yuanlinsi01@baidu.com,
	nixun@baidu.com, lilin24@baidu.com,
	andrea.parri@amarulasolutions.com
Subject: Re: [PATCH v4] sched/wake_q: Reduce reference counting for special users
Date: Tue, 18 Dec 2018 15:35:30 -0500	[thread overview]
Message-ID: <772085e9-35f3-1c32-1b87-f101cbc5f8f7@redhat.com> (raw)
In-Reply-To: <20181218195352.7orq3upiwfdbrdne@linux-r8p5>

On 12/18/2018 02:53 PM, Davidlohr Bueso wrote:
> Some users, specifically futexes and rwsems, required fixes
> that allowed the callers to be safe when wakeups occur before
> they are expected by wake_up_q(). Such scenarios also play
> games and rely on reference counting, and until now were
> pivoting on wake_q doing it. With the wake_q_add() call being
> moved down, this can no longer be the case. As such we end up
> with a a double task refcounting overhead; and these callers
> care enough about this (being rather core-ish).
>
> This patch introduces a wake_q_add_safe() call that serves
> for callers that have already done refcounting and therefore the
> task is 'safe' from wake_q point of view (int that it requires
> reference throughout the entire queue/>wakeup cycle). In the one
> case it has internal reference counting, in the other case it
> consumes the reference counting.
>
> Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
> ---
>
> - Changes from v3: fixed wake_q_add_safe. While previous version
>  had been tested with a bootup, the failed cmpxchg path obviously
>  hadn't been exercised.  Sorry about the noise.
>
> include/linux/sched/wake_q.h |  4 +--
> kernel/futex.c               |  3 +--
> kernel/locking/rwsem-xadd.c  |  4 +--
> kernel/sched/core.c          | 60
> ++++++++++++++++++++++++++++++++------------
> 4 files changed, 48 insertions(+), 23 deletions(-)
>
> diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h
> index 545f37138057..ad826d2a4557 100644
> --- a/include/linux/sched/wake_q.h
> +++ b/include/linux/sched/wake_q.h
> @@ -51,8 +51,8 @@ static inline void wake_q_init(struct wake_q_head
> *head)
>     head->lastp = &head->first;
> }
>
> -extern void wake_q_add(struct wake_q_head *head,
> -               struct task_struct *task);
> +extern void wake_q_add(struct wake_q_head *head, struct task_struct
> *task);
> +extern void wake_q_add_safe(struct wake_q_head *head, struct
> task_struct *task);
> extern void wake_up_q(struct wake_q_head *head);
>
> #endif /* _LINUX_SCHED_WAKE_Q_H */
> diff --git a/kernel/futex.c b/kernel/futex.c
> index d14971f6ed3d..6218d98f649b 100644
> --- a/kernel/futex.c
> +++ b/kernel/futex.c
> @@ -1402,8 +1402,7 @@ static void mark_wake_futex(struct wake_q_head
> *wake_q, struct futex_q *q)
>      * Queue the task for later wakeup for after we've released
>      * the hb->lock. wake_q_add() grabs reference to p.
>      */
> -    wake_q_add(wake_q, p);
> -    put_task_struct(p);
> +    wake_q_add_safe(wake_q, p);
> }
>
> /*
> diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
> index 50d9af615dc4..fbe96341beee 100644
> --- a/kernel/locking/rwsem-xadd.c
> +++ b/kernel/locking/rwsem-xadd.c
> @@ -211,9 +211,7 @@ static void __rwsem_mark_wake(struct rw_semaphore
> *sem,
>          * Ensure issuing the wakeup (either by us or someone else)
>          * after setting the reader waiter to nil.
>          */
> -        wake_q_add(wake_q, tsk);
> -        /* wake_q_add() already take the task ref */
> -        put_task_struct(tsk);
> +        wake_q_add_safe(wake_q, tsk);
>     }
>
>     adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index d740d7a3608d..be977df66a21 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -396,19 +396,7 @@ static bool set_nr_if_polling(struct task_struct *p)
> #endif
> #endif
>
> -/**
> - * wake_q_add() - queue a wakeup for 'later' waking.
> - * @head: the wake_q_head to add @task to
> - * @task: the task to queue for 'later' wakeup
> - *
> - * Queue a task for later wakeup, most likely by the wake_up_q() call
> in the
> - * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
> - * instantly.
> - *
> - * This function must be used as-if it were wake_up_process(); IOW
> the task
> - * must be ready to be woken at this location.
> - */
> -void wake_q_add(struct wake_q_head *head, struct task_struct *task)
> +static bool __wake_q_add(struct wake_q_head *head, struct task_struct
> *task)
> {
>     struct wake_q_node *node = &task->wake_q;
>
> @@ -422,15 +410,55 @@ void wake_q_add(struct wake_q_head *head, struct
> task_struct *task)
>      */
>     smp_mb__before_atomic();
>     if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)))
> -        return;
> -
> -    get_task_struct(task);
> +        return false;
>
>     /*
>      * The head is context local, there can be no concurrency.
>      */
>     *head->lastp = node;
>     head->lastp = &node->next;
> +    return true;
> +}
> +
> +/**
> + * wake_q_add() - queue a wakeup for 'later' waking.
> + * @head: the wake_q_head to add @task to
> + * @task: the task to queue for 'later' wakeup
> + *
> + * Queue a task for later wakeup, most likely by the wake_up_q() call
> in the
> + * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
> + * instantly.
> + *
> + * This function must be used as-if it were wake_up_process(); IOW
> the task
> + * must be ready to be woken at this location.
> + */
> +void wake_q_add(struct wake_q_head *head, struct task_struct *task)
> +{
> +    if (__wake_q_add(head, task))
> +        get_task_struct(task);
> +}
> +
> +/**
> + * wake_q_add_safe() - safely queue a wakeup for 'later' waking.
> + * @head: the wake_q_head to add @task to
> + * @task: the task to queue for 'later' wakeup
> + *
> + * Queue a task for later wakeup, most likely by the wake_up_q() call
> in the
> + * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
> + * instantly.
> + *
> + * This function must be used as-if it were wake_up_process(); IOW
> the task
> + * must be ready to be woken at this location.
> + *
> + * This function is essentially a task-safe equivalent to
> wake_q_add(). Callers
> + * that already hold reference to @task can call the 'safe' version
> and trust
> + * wake_q to do the right thing depending whether or not the @task is
> already
> + * queued for wakeup.
> + */
> +void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
> +{
> +    if (!__wake_q_add(head, task))
> +        put_task_struct(task);
> }
>
> void wake_up_q(struct wake_q_head *head)

Acked-by: Waiman Long <longman@redhat.com>


  reply	other threads:[~2018-12-18 20:35 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-11-29 12:50 [RFC] locking/rwsem: Avoid issuing wakeup before setting the reader waiter to nil Yongji Xie
2018-11-29 13:12 ` Peter Zijlstra
2018-11-29 13:44   ` Peter Zijlstra
2018-11-29 14:02     ` Yongji Xie
2018-11-29 18:43     ` Davidlohr Bueso
2018-11-29 18:49       ` Waiman Long
2018-11-29 15:21   ` Waiman Long
2018-11-29 15:29     ` Waiman Long
2018-11-29 16:06     ` Peter Zijlstra
2018-11-29 17:02       ` Waiman Long
2018-11-29 17:27         ` Peter Zijlstra
2018-11-29 17:58           ` Waiman Long
2018-11-29 18:13             ` Peter Zijlstra
2018-11-29 18:17               ` Davidlohr Bueso
2018-11-29 18:08           ` Peter Zijlstra
2018-11-29 18:26             ` Waiman Long
2018-11-29 18:31               ` Will Deacon
2018-11-29 18:34                 ` Waiman Long
2018-11-29 22:05                   ` Peter Zijlstra
2018-11-30  9:34                     ` 答复: " Liu,Qi(ACU-T1)
2018-11-30 14:15                       ` Peter Zijlstra
2018-11-29 21:30               ` Davidlohr Bueso
2018-11-29 21:34                 ` Davidlohr Bueso
2018-11-29 22:17                   ` Peter Zijlstra
2018-11-30  9:30                     ` Andrea Parri
2018-12-03  5:31                     ` [PATCH -tip] kernel/sched,wake_q: Branch predict wake_q_add() cmpxchg Davidlohr Bueso
2018-12-03 16:10                       ` Waiman Long
2019-01-21 11:28                       ` [tip:locking/core] sched/wake_q: Add branch prediction hint to " tip-bot for Davidlohr Bueso
2018-12-10 15:12                     ` [RFC] locking/rwsem: Avoid issuing wakeup before setting the reader waiter to nil Yongji Xie
2018-12-17 11:37                       ` Peter Zijlstra
2018-12-17 13:12                         ` Yongji Xie
2019-01-07 14:35                           ` Waiman Long
2019-01-07 15:31                             ` Peter Zijlstra
2019-01-07 15:35                               ` Waiman Long
2018-12-17 20:53                         ` Davidlohr Bueso
2018-12-18 13:10                           ` Peter Zijlstra
2018-12-18 13:14                             ` Peter Zijlstra
2018-12-18 17:27                               ` Davidlohr Bueso
2018-12-18 18:54                               ` [PATCH v2] sched/wake_q: Reduce reference counting for special users Davidlohr Bueso
2018-12-18 19:17                                 ` Waiman Long
2018-12-18 19:30                                   ` Davidlohr Bueso
2018-12-18 19:39                                     ` Davidlohr Bueso
2018-12-18 19:53                                       ` [PATCH v4] " Davidlohr Bueso
2018-12-18 20:35                                         ` Waiman Long [this message]
2019-01-21 16:02                                           ` Davidlohr Bueso
2019-01-22  8:55                                             ` Peter Zijlstra
2019-02-04  8:57                                         ` [tip:locking/core] " tip-bot for Davidlohr Bueso
2019-02-07 19:30                                           ` Davidlohr Bueso
2019-02-12 14:14                                           ` Daniel Vacek
2019-01-21 11:28 ` [tip:locking/core] locking/rwsem: Fix (possible) missed wakeup tip-bot for Xie Yongji

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=772085e9-35f3-1c32-1b87-f101cbc5f8f7@redhat.com \
    --to=longman@redhat.com \
    --cc=andrea.parri@amarulasolutions.com \
    --cc=dave@stgolabs.net \
    --cc=elohimes@gmail.com \
    --cc=lilin24@baidu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=liuqi16@baidu.com \
    --cc=mingo@redhat.com \
    --cc=nixun@baidu.com \
    --cc=peterz@infradead.org \
    --cc=will.deacon@arm.com \
    --cc=xieyongji@baidu.com \
    --cc=yuanlinsi01@baidu.com \
    --cc=zhangyu31@baidu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.