linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [Resend PATCH v6] psi: fix race between psi_trigger_create/destroy
       [not found] <1623371374-15664-1-git-send-email-huangzhaoyang@gmail.com>
@ 2021-06-11  0:37 ` Zhaoyang Huang
  2021-06-11  6:58   ` Peter Zijlstra
  2021-06-24  7:39 ` [tip: sched/core] psi: Fix " tip-bot2 for Zhaoyang Huang
  1 sibling, 1 reply; 3+ messages in thread
From: Zhaoyang Huang @ 2021-06-11  0:37 UTC (permalink / raw)
  To: Johannes Weiner, Suren Baghdasaryan, Zhaoyang Huang, Ziwei Dai,
	Ke Wang, LKML
  Cc: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira

From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>

Race detected between psi_trigger_destroy/create as shown below, which
cause panic by accessing invalid psi_system->poll_wait->wait_queue_entry
and psi_system->poll_timer->entry->next. Under this modification, the
race window is removed by initialising poll_wait and poll_timer in
group_init which are executed only once at beginning.

psi_trigger_destroy                      psi_trigger_create
mutex_lock(trigger_lock);
rcu_assign_pointer(poll_task, NULL);
mutex_unlock(trigger_lock);
                                        mutex_lock(trigger_lock);
                                        if
(!rcu_access_pointer(group->poll_task)) {


timer_setup(poll_timer, poll_timer_fn, 0);


rcu_assign_pointer(poll_task, task);
                                        }
                                        mutex_unlock(trigger_lock);

synchronize_rcu();
del_timer_sync(poll_timer); <-- poll_timer has been reinitialized by
psi_trigger_create

So, trigger_lock/RCU correctly protects destruction of group->poll_task but
misses this race affecting poll_timer and poll_wait.

Fixes: 461daba06bdc ("psi: eliminate kthread_worker from psi trigger
scheduling mechanism")

Co-developed-by: ziwei.dai <ziwei.dai@unisoc.com>
Signed-off-by: ziwei.dai <ziwei.dai@unisoc.com>
Co-developed-by: ke.wang <ke.wang@unisoc.com>
Signed-off-by: ke.wang <ke.wang@unisoc.com>
Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
---
v2: change del_timer_sync to del_timer in psi_trigger_destroy
v3: remove timer_setup within psi_tirgger_create
    protect del_timer by extending the critical section of mutex_lock
v4: amend fix information on comment
v5: delete the poll_timer while assigning the task to NULL
v6: update subject and comments as Suren's suggestion
---
---
 kernel/sched/psi.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index cc25a3c..58b36d1 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -182,6 +182,8 @@ struct psi_group psi_system = {

 static void psi_avgs_work(struct work_struct *work);

+static void poll_timer_fn(struct timer_list *t);
+
 static void group_init(struct psi_group *group)
 {
        int cpu;
@@ -201,6 +203,8 @@ static void group_init(struct psi_group *group)
        memset(group->polling_total, 0, sizeof(group->polling_total));
        group->polling_next_update = ULLONG_MAX;
        group->polling_until = 0;
+       init_waitqueue_head(&group->poll_wait);
+       timer_setup(&group->poll_timer, poll_timer_fn, 0);
        rcu_assign_pointer(group->poll_task, NULL);
 }

@@ -1157,9 +1161,7 @@ struct psi_trigger *psi_trigger_create(struct
psi_group *group,
                        return ERR_CAST(task);
                }
                atomic_set(&group->poll_wakeup, 0);
-               init_waitqueue_head(&group->poll_wait);
                wake_up_process(task);
-               timer_setup(&group->poll_timer, poll_timer_fn, 0);
                rcu_assign_pointer(group->poll_task, task);
        }

@@ -1211,6 +1213,7 @@ static void psi_trigger_destroy(struct kref *ref)
                                        group->poll_task,
                                        lockdep_is_held(&group->trigger_lock));
                        rcu_assign_pointer(group->poll_task, NULL);
+                       del_timer(&group->poll_timer);
                }
        }

@@ -1223,17 +1226,14 @@ static void psi_trigger_destroy(struct kref *ref)
         */
        synchronize_rcu();
        /*
-        * Destroy the kworker after releasing trigger_lock to prevent a
+        * Stop kthread 'psimon' after releasing trigger_lock to prevent a
         * deadlock while waiting for psi_poll_work to acquire trigger_lock
         */
        if (task_to_destroy) {
                /*
                 * After the RCU grace period has expired, the worker
                 * can no longer be found through group->poll_task.
-                * But it might have been already scheduled before
-                * that - deschedule it cleanly before destroying it.
                 */
-               del_timer_sync(&group->poll_timer);
                kthread_stop(task_to_destroy);
        }
        kfree(t);
--
1.9.1

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [Resend PATCH v6] psi: fix race between psi_trigger_create/destroy
  2021-06-11  0:37 ` [Resend PATCH v6] psi: fix race between psi_trigger_create/destroy Zhaoyang Huang
@ 2021-06-11  6:58   ` Peter Zijlstra
  0 siblings, 0 replies; 3+ messages in thread
From: Peter Zijlstra @ 2021-06-11  6:58 UTC (permalink / raw)
  To: Zhaoyang Huang
  Cc: Johannes Weiner, Suren Baghdasaryan, Zhaoyang Huang, Ziwei Dai,
	Ke Wang, LKML, Ingo Molnar, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira

On Fri, Jun 11, 2021 at 08:37:05AM +0800, Zhaoyang Huang wrote:
> From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> 
> Race detected between psi_trigger_destroy/create as shown below, which
> cause panic by accessing invalid psi_system->poll_wait->wait_queue_entry
> and psi_system->poll_timer->entry->next. Under this modification, the
> race window is removed by initialising poll_wait and poll_timer in
> group_init which are executed only once at beginning.
> 
> psi_trigger_destroy                      psi_trigger_create
> mutex_lock(trigger_lock);
> rcu_assign_pointer(poll_task, NULL);
> mutex_unlock(trigger_lock);
>                                         mutex_lock(trigger_lock);
>                                         if
> (!rcu_access_pointer(group->poll_task)) {
> 
> 
> timer_setup(poll_timer, poll_timer_fn, 0);
> 
> 
> rcu_assign_pointer(poll_task, task);
>                                         }
>                                         mutex_unlock(trigger_lock);
> 
> synchronize_rcu();
> del_timer_sync(poll_timer); <-- poll_timer has been reinitialized by
> psi_trigger_create
> 
> So, trigger_lock/RCU correctly protects destruction of group->poll_task but
> misses this race affecting poll_timer and poll_wait.
> 
> Fixes: 461daba06bdc ("psi: eliminate kthread_worker from psi trigger
> scheduling mechanism")
> 
> Co-developed-by: ziwei.dai <ziwei.dai@unisoc.com>
> Signed-off-by: ziwei.dai <ziwei.dai@unisoc.com>
> Co-developed-by: ke.wang <ke.wang@unisoc.com>
> Signed-off-by: ke.wang <ke.wang@unisoc.com>
> Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> ---

You really should've preserved the tags from Suren and Johannes, I've
added them. Also the Fixes: line shouldn't wrap and should be attached
to the other tags (no whitespace between), also fixed that. And I've
also made another few small edits.

Please pay attention to these things for next time.

Patch can be found in my queue and should show in tip/sched/core
somewhere on Monday if the robots don't hate on it.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [tip: sched/core] psi: Fix race between psi_trigger_create/destroy
       [not found] <1623371374-15664-1-git-send-email-huangzhaoyang@gmail.com>
  2021-06-11  0:37 ` [Resend PATCH v6] psi: fix race between psi_trigger_create/destroy Zhaoyang Huang
@ 2021-06-24  7:39 ` tip-bot2 for Zhaoyang Huang
  1 sibling, 0 replies; 3+ messages in thread
From: tip-bot2 for Zhaoyang Huang @ 2021-06-24  7:39 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: ziwei.dai, ke.wang, Zhaoyang Huang, Peter Zijlstra (Intel),
	Suren Baghdasaryan, Johannes Weiner, x86, linux-kernel

The following commit has been merged into the sched/core branch of tip:

Commit-ID:     8f91efd870ea5d8bc10b0fcc9740db51cd4c0c83
Gitweb:        https://git.kernel.org/tip/8f91efd870ea5d8bc10b0fcc9740db51cd4c0c83
Author:        Zhaoyang Huang <zhaoyang.huang@unisoc.com>
AuthorDate:    Fri, 11 Jun 2021 08:29:34 +08:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 24 Jun 2021 09:07:50 +02:00

psi: Fix race between psi_trigger_create/destroy

Race detected between psi_trigger_destroy/create as shown below, which
cause panic by accessing invalid psi_system->poll_wait->wait_queue_entry
and psi_system->poll_timer->entry->next. Under this modification, the
race window is removed by initialising poll_wait and poll_timer in
group_init which are executed only once at beginning.

  psi_trigger_destroy()                   psi_trigger_create()

  mutex_lock(trigger_lock);
  rcu_assign_pointer(poll_task, NULL);
  mutex_unlock(trigger_lock);
					  mutex_lock(trigger_lock);
					  if (!rcu_access_pointer(group->poll_task)) {
					    timer_setup(poll_timer, poll_timer_fn, 0);
					    rcu_assign_pointer(poll_task, task);
					  }
					  mutex_unlock(trigger_lock);

  synchronize_rcu();
  del_timer_sync(poll_timer); <-- poll_timer has been reinitialized by
                                  psi_trigger_create()

So, trigger_lock/RCU correctly protects destruction of
group->poll_task but misses this race affecting poll_timer and
poll_wait.

Fixes: 461daba06bdc ("psi: eliminate kthread_worker from psi trigger scheduling mechanism")
Co-developed-by: ziwei.dai <ziwei.dai@unisoc.com>
Signed-off-by: ziwei.dai <ziwei.dai@unisoc.com>
Co-developed-by: ke.wang <ke.wang@unisoc.com>
Signed-off-by: ke.wang <ke.wang@unisoc.com>
Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Link: https://lkml.kernel.org/r/1623371374-15664-1-git-send-email-huangzhaoyang@gmail.com
---
 kernel/sched/psi.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index cc25a3c..58b36d1 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -182,6 +182,8 @@ struct psi_group psi_system = {
 
 static void psi_avgs_work(struct work_struct *work);
 
+static void poll_timer_fn(struct timer_list *t);
+
 static void group_init(struct psi_group *group)
 {
 	int cpu;
@@ -201,6 +203,8 @@ static void group_init(struct psi_group *group)
 	memset(group->polling_total, 0, sizeof(group->polling_total));
 	group->polling_next_update = ULLONG_MAX;
 	group->polling_until = 0;
+	init_waitqueue_head(&group->poll_wait);
+	timer_setup(&group->poll_timer, poll_timer_fn, 0);
 	rcu_assign_pointer(group->poll_task, NULL);
 }
 
@@ -1157,9 +1161,7 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
 			return ERR_CAST(task);
 		}
 		atomic_set(&group->poll_wakeup, 0);
-		init_waitqueue_head(&group->poll_wait);
 		wake_up_process(task);
-		timer_setup(&group->poll_timer, poll_timer_fn, 0);
 		rcu_assign_pointer(group->poll_task, task);
 	}
 
@@ -1211,6 +1213,7 @@ static void psi_trigger_destroy(struct kref *ref)
 					group->poll_task,
 					lockdep_is_held(&group->trigger_lock));
 			rcu_assign_pointer(group->poll_task, NULL);
+			del_timer(&group->poll_timer);
 		}
 	}
 
@@ -1223,17 +1226,14 @@ static void psi_trigger_destroy(struct kref *ref)
 	 */
 	synchronize_rcu();
 	/*
-	 * Destroy the kworker after releasing trigger_lock to prevent a
+	 * Stop kthread 'psimon' after releasing trigger_lock to prevent a
 	 * deadlock while waiting for psi_poll_work to acquire trigger_lock
 	 */
 	if (task_to_destroy) {
 		/*
 		 * After the RCU grace period has expired, the worker
 		 * can no longer be found through group->poll_task.
-		 * But it might have been already scheduled before
-		 * that - deschedule it cleanly before destroying it.
 		 */
-		del_timer_sync(&group->poll_timer);
 		kthread_stop(task_to_destroy);
 	}
 	kfree(t);

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-06-24  7:39 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1623371374-15664-1-git-send-email-huangzhaoyang@gmail.com>
2021-06-11  0:37 ` [Resend PATCH v6] psi: fix race between psi_trigger_create/destroy Zhaoyang Huang
2021-06-11  6:58   ` Peter Zijlstra
2021-06-24  7:39 ` [tip: sched/core] psi: Fix " tip-bot2 for Zhaoyang Huang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).