linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] sched/core: fix illegal RCU from offline CPUs
@ 2020-04-01 21:40 Qian Cai
  2020-04-02 11:24 ` Michael Ellerman
  2020-05-01 18:22 ` [tip: sched/core] sched/core: Fix " tip-bot2 for Peter Zijlstra
  0 siblings, 2 replies; 17+ messages in thread
From: Qian Cai @ 2020-04-01 21:40 UTC (permalink / raw)
  To: peterz, mingo
  Cc: juri.lelli, dietmar.eggemann, vincent.guittot, rostedt, bsegall,
	mgorman, paulmck, tglx, mpe, James.Bottomley, deller,
	linuxppc-dev, linux-parisc, linux-mm, linux-kernel, Qian Cai

From: Peter Zijlstra <peterz@infradead.org>

In the CPU-offline process, it calls mmdrop() after idle entry and the
subsequent call to cpuhp_report_idle_dead(). Once execution passes the
call to rcu_report_dead(), RCU is ignoring the CPU, which results in
lockdep complaining when mmdrop() uses RCU from either memcg or
debugobjects below.

Fix it by cleaning up the active_mm state from BP instead. Every arch
which has CONFIG_HOTPLUG_CPU should have already called idle_task_exit()
from AP. The only exception is parisc because it switches them to
&init_mm unconditionally (see smp_boot_one_cpu() and smp_cpu_init()),
but the patch will still work there because it calls mmgrab(&init_mm) in
smp_cpu_init() and then should call mmdrop(&init_mm) in finish_cpu().

WARNING: suspicious RCU usage
-----------------------------
kernel/workqueue.c:710 RCU or wq_pool_mutex should be held!

other info that might help us debug this:

RCU used illegally from offline CPU!
Call Trace:
 dump_stack+0xf4/0x164 (unreliable)
 lockdep_rcu_suspicious+0x140/0x164
 get_work_pool+0x110/0x150
 __queue_work+0x1bc/0xca0
 queue_work_on+0x114/0x120
 css_release+0x9c/0xc0
 percpu_ref_put_many+0x204/0x230
 free_pcp_prepare+0x264/0x570
 free_unref_page+0x38/0xf0
 __mmdrop+0x21c/0x2c0
 idle_task_exit+0x170/0x1b0
 pnv_smp_cpu_kill_self+0x38/0x2e0
 cpu_die+0x48/0x64
 arch_cpu_idle_dead+0x30/0x50
 do_idle+0x2f4/0x470
 cpu_startup_entry+0x38/0x40
 start_secondary+0x7a8/0xa80
 start_secondary_resume+0x10/0x14

<Peter to sign off here>
Signed-off-by: Qian Cai <cai@lca.pw>
---
 arch/powerpc/platforms/powernv/smp.c |  1 -
 include/linux/sched/mm.h             |  2 ++
 kernel/cpu.c                         | 18 +++++++++++++++++-
 kernel/sched/core.c                  |  5 +++--
 4 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 13e251699346..b2ba3e95bda7 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -167,7 +167,6 @@ static void pnv_smp_cpu_kill_self(void)
 	/* Standard hot unplug procedure */
 
 	idle_task_exit();
-	current->active_mm = NULL; /* for sanity */
 	cpu = smp_processor_id();
 	DBG("CPU%d offline\n", cpu);
 	generic_set_cpu_dead(cpu);
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index c49257a3b510..a132d875d351 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -49,6 +49,8 @@ static inline void mmdrop(struct mm_struct *mm)
 		__mmdrop(mm);
 }
 
+void mmdrop(struct mm_struct *mm);
+
 /*
  * This has to be called after a get_task_mm()/mmget_not_zero()
  * followed by taking the mmap_sem for writing before modifying the
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 2371292f30b0..244d30544377 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -3,6 +3,7 @@
  *
  * This code is licenced under the GPL.
  */
+#include <linux/sched/mm.h>
 #include <linux/proc_fs.h>
 #include <linux/smp.h>
 #include <linux/init.h>
@@ -564,6 +565,21 @@ static int bringup_cpu(unsigned int cpu)
 	return bringup_wait_for_ap(cpu);
 }
 
+static int finish_cpu(unsigned int cpu)
+{
+	struct task_struct *idle = idle_thread_get(cpu);
+	struct mm_struct *mm = idle->active_mm;
+
+	/*
+	 * idle_task_exit() will have switched to &init_mm, now
+	 * clean up any remaining active_mm state.
+	 */
+	if (mm != &init_mm)
+		idle->active_mm = &init_mm;
+	mmdrop(mm);
+	return 0;
+}
+
 /*
  * Hotplug state machine related functions
  */
@@ -1549,7 +1565,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
 	[CPUHP_BRINGUP_CPU] = {
 		.name			= "cpu:bringup",
 		.startup.single		= bringup_cpu,
-		.teardown.single	= NULL,
+		.teardown.single	= finish_cpu,
 		.cant_stop		= true,
 	},
 	/* Final state before CPU kills itself */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a2694ba82874..8787958339d5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6200,13 +6200,14 @@ void idle_task_exit(void)
 	struct mm_struct *mm = current->active_mm;
 
 	BUG_ON(cpu_online(smp_processor_id()));
+	BUG_ON(current != this_rq()->idle);
 
 	if (mm != &init_mm) {
 		switch_mm(mm, &init_mm, current);
-		current->active_mm = &init_mm;
 		finish_arch_post_lock_switch();
 	}
-	mmdrop(mm);
+
+	/* finish_cpu(), as ran on the BP, will clean up the active_mm state */
 }
 
 /*
-- 
2.21.0 (Apple Git-122.2)


^ permalink raw reply related	[flat|nested] 17+ messages in thread
* [PATCH v2] sched/core: fix illegal RCU from offline CPUs
@ 2020-01-13 19:03 Qian Cai
  2020-01-20 10:16 ` Peter Zijlstra
  0 siblings, 1 reply; 17+ messages in thread
From: Qian Cai @ 2020-01-13 19:03 UTC (permalink / raw)
  To: peterz, mingo
  Cc: juri.lelli, vincent.guittot, dietmar.eggemann, rostedt, bsegall,
	mgorman, paulmck, tglx, linux-mm, linux-kernel, Qian Cai

In the CPU-offline process, it calls mmdrop() after idle entry and the
subsequent call to cpuhp_report_idle_dead(). Once execution passes the
call to rcu_report_dead(), RCU is ignoring the CPU, which results in
lockdep complaints when mmdrop() uses RCU from either memcg or
debugobjects, so it by scheduling mmdrop() on another online CPU.

According to the commit a79e53d85683 ("x86/mm: Fix pgd_lock deadlock"),
mmdrop() is not interrupt-safe, and called from
smp_call_function_single() could end up running mmdrop() from the IPI
interrupt handler.

 Call Trace:
  mmdrop+0x58/0x80
  flush_smp_call_function_queue+0x128/0x2e0
  smp_ipi_demux_relaxed+0x84/0xf0
  doorbell_exception+0x118/0x588
  h_doorbell_common+0x124/0x130
  --- interrupt: e81 at replay_interrupt_return+0x0/0x4
      LR = arch_local_irq_restore.part.8+0x78/0x90
  arch_local_irq_restore.part.8+0x34/0x90 (unreliable)
  _raw_spin_unlock_irq+0x50/0x80
  finish_task_switch+0xd8/0x340
  __schedule+0x4bc/0xba0
  schedule_idle+0x38/0x70
  do_idle+0x2a4/0x470
  cpu_startup_entry+0x3c/0x40
  start_secondary+0x7a8/0xa80
  start_secondary_resume+0x10/0x14

Therefore, use mmdrop_async() instead to run mmdrop() from a process
context similar to the commit 7283094ec3db ("kernel, oom: fix potential
pgd_lock deadlock from __mmdrop").

=============================
 WARNING: suspicious RCU usage
 -----------------------------
 kernel/workqueue.c:710 RCU or wq_pool_mutex should be held!

 other info that might help us debug this:

 RCU used illegally from offline CPU!
 rcu_scheduler_active = 2, debug_locks = 1
 2 locks held by swapper/37/0:
  #0: c0000000010af608 (rcu_read_lock){....}, at:
      percpu_ref_put_many+0x8/0x230
  #1: c0000000010af608 (rcu_read_lock){....}, at:
      __queue_work+0x7c/0xca0

 stack backtrace:
 Call Trace:
  dump_stack+0xf4/0x164 (unreliable)
  lockdep_rcu_suspicious+0x140/0x164
  get_work_pool+0x110/0x150
  __queue_work+0x1bc/0xca0
  queue_work_on+0x114/0x120
  css_release+0x9c/0xc0
  percpu_ref_put_many+0x204/0x230
  free_pcp_prepare+0x264/0x570
  free_unref_page+0x38/0xf0
  __mmdrop+0x21c/0x2c0
  idle_task_exit+0x170/0x1b0
  pnv_smp_cpu_kill_self+0x38/0x2e0
  cpu_die+0x48/0x64
  arch_cpu_idle_dead+0x30/0x50
  do_idle+0x2f4/0x470
  cpu_startup_entry+0x38/0x40
  start_secondary+0x7a8/0xa80
  start_secondary_resume+0x10/0x14

 =============================
 WARNING: suspicious RCU usage
 -----------------------------
 kernel/sched/core.c:562 suspicious rcu_dereference_check() usage!

 other info that might help us debug this:

 RCU used illegally from offline CPU!
 rcu_scheduler_active = 2, debug_locks = 1
 2 locks held by swapper/94/0:
  #0: c000201cc77dc118 (&base->lock){-.-.}, at:
      lock_timer_base+0x114/0x1f0
  #1: c0000000010af608 (rcu_read_lock){....}, at:
      get_nohz_timer_target+0x3c/0x2d0

 stack backtrace:
 Call Trace:
  dump_stack+0xf4/0x164 (unreliable)
  lockdep_rcu_suspicious+0x140/0x164
  get_nohz_timer_target+0x248/0x2d0
  add_timer+0x24c/0x470
  __queue_delayed_work+0x8c/0x110
  queue_delayed_work_on+0x128/0x130
  __debug_check_no_obj_freed+0x2ec/0x320
  free_pcp_prepare+0x1b4/0x570
  free_unref_page+0x38/0xf0
  __mmdrop+0x21c/0x2c0
  idle_task_exit+0x170/0x1b0
  pnv_smp_cpu_kill_self+0x38/0x2e0
  cpu_die+0x48/0x64
  arch_cpu_idle_dead+0x30/0x50
  do_idle+0x2f4/0x470
  cpu_startup_entry+0x38/0x40
  start_secondary+0x7a8/0xa80
  start_secondary_prolog+0x10/0x14

Signed-off-by: Qian Cai <cai@lca.pw>
---

v2: use mmdrop_async() thanks to Tetsuo.

 include/linux/sched/mm.h | 2 ++
 kernel/fork.c            | 2 +-
 kernel/sched/core.c      | 3 ++-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index c49257a3b510..205de134348c 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -49,6 +49,8 @@ static inline void mmdrop(struct mm_struct *mm)
 		__mmdrop(mm);
 }
 
+extern void mmdrop_async(struct mm_struct *mm);
+
 /*
  * This has to be called after a get_task_mm()/mmget_not_zero()
  * followed by taking the mmap_sem for writing before modifying the
diff --git a/kernel/fork.c b/kernel/fork.c
index 080809560072..9a823a955b7c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -707,7 +707,7 @@ static void mmdrop_async_fn(struct work_struct *work)
 	__mmdrop(mm);
 }
 
-static void mmdrop_async(struct mm_struct *mm)
+void mmdrop_async(struct mm_struct *mm)
 {
 	if (unlikely(atomic_dec_and_test(&mm->mm_count))) {
 		INIT_WORK(&mm->async_put_work, mmdrop_async_fn);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 90e4b00ace89..1863a6fc4d82 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6194,7 +6194,8 @@ void idle_task_exit(void)
 		current->active_mm = &init_mm;
 		finish_arch_post_lock_switch();
 	}
-	mmdrop(mm);
+	smp_call_function_single(cpumask_first(cpu_online_mask),
+				 (void (*)(void *))mmdrop_async, mm, 0);
 }
 
 /*
-- 
2.21.0 (Apple Git-122.2)


^ permalink raw reply related	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2020-05-01 18:23 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-01 21:40 [PATCH v2] sched/core: fix illegal RCU from offline CPUs Qian Cai
2020-04-02 11:24 ` Michael Ellerman
2020-04-02 14:00   ` Qian Cai
2020-04-02 15:54     ` Paul E. McKenney
2020-04-02 16:19       ` Qian Cai
2020-04-02 16:57         ` Paul E. McKenney
2020-04-17 13:26   ` Qian Cai
2020-04-21 13:56     ` Peter Zijlstra
2020-05-01 18:22 ` [tip: sched/core] sched/core: Fix " tip-bot2 for Peter Zijlstra
  -- strict thread matches above, loose matches on Subject: below --
2020-01-13 19:03 [PATCH v2] sched/core: fix " Qian Cai
2020-01-20 10:16 ` Peter Zijlstra
2020-01-20 20:35   ` Qian Cai
2020-01-21 10:35     ` Peter Zijlstra
2020-01-24  4:21       ` Qian Cai
2020-01-24  5:02         ` Matthew Wilcox
2020-03-30  2:42       ` Qian Cai
2020-04-01 21:05         ` Qian Cai

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).