All of lore.kernel.org
 help / color / mirror / Atom feed
* [ANNOUNCE] v4.11.12-rt10
@ 2017-08-18 12:09 Sebastian Andrzej Siewior
  2017-08-23  9:53 ` v4.11.12-rt10 - hotplug lockdep splat Mike Galbraith
  2017-08-23  9:57 ` [patch-rt] drivers/zram: fix zcomp_stream_get() smp_processor_id() use in preemptible code Mike Galbraith
  0 siblings, 2 replies; 13+ messages in thread
From: Sebastian Andrzej Siewior @ 2017-08-18 12:09 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: LKML, linux-rt-users, Steven Rostedt

Dear RT folks!

I'm pleased to announce the v4.11.12-rt10 patch set. 

Changes since v4.11.12-rt9:

  - A tweak to scheduler to let it know that a task is in a migration
    disabled region so there are less possible tasks to migrate. Idea
    and patch by Daniel Bristot de Oliveira.

  - A fix for the CPU idle code on arm64 was merged in v4.11.9-rt6 and
    now updated to version which queued for mainline.

  - hrtimers which fired during a bad window while a shutdown would be
    postponed for ever and could corrupt the deferred list. Reported by
    Mike Galbraith.

  - The new RWLOCK code a flaw in the write-lock path where a task could
    lose its task state. Reported and fixed by Mike Galbraith.

Known issues
	- There was a report regarding a deadlock within the rtmutex code.

The delta patch against v4.11.12-rt9 is appended below and can be found here:
 
     https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.11/incr/patch-4.11.12-rt9-rt10.patch.xz

You can get this release via the git tree at:

    git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v4.11.12-rt10

The RT patch against v4.11.12 can be found here:

    https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patch-4.11.12-rt10.patch.xz

The split quilt queue is available at:

    https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.12-rt10.tar.xz

Sebastian
diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c
--- a/kernel/cpu_pm.c
+++ b/kernel/cpu_pm.c
@@ -28,8 +28,15 @@ static int cpu_pm_notify(enum cpu_pm_event event, int nr_to_call, int *nr_calls)
 {
 	int ret;
 
+	/*
+	 * __atomic_notifier_call_chain has a RCU read critical section, which
+	 * could be disfunctional in cpu idle. Copy RCU_NONIDLE code to let
+	 * RCU know this.
+	 */
+	rcu_irq_enter_irqson();
 	ret = __atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL,
 		nr_to_call, nr_calls);
+	rcu_irq_exit_irqson();
 
 	return notifier_to_errno(ret);
 }
diff --git a/kernel/locking/rwlock-rt.c b/kernel/locking/rwlock-rt.c
--- a/kernel/locking/rwlock-rt.c
+++ b/kernel/locking/rwlock-rt.c
@@ -190,14 +190,14 @@ void __sched __write_rt_lock(struct rt_rw_lock *lock)
 	/* Force readers into slow path */
 	atomic_sub(READER_BIAS, &lock->readers);
 
+	raw_spin_lock_irqsave(&m->wait_lock, flags);
+
+	raw_spin_lock(&self->pi_lock);
+	self->saved_state = self->state;
+	__set_current_state_no_track(TASK_UNINTERRUPTIBLE);
+	raw_spin_unlock(&self->pi_lock);
+
 	for (;;) {
-		raw_spin_lock_irqsave(&m->wait_lock, flags);
-
-		raw_spin_lock(&self->pi_lock);
-		self->saved_state = self->state;
-		__set_current_state_no_track(TASK_UNINTERRUPTIBLE);
-		raw_spin_unlock(&self->pi_lock);
-
 		/* Have all readers left the critical region? */
 		if (!atomic_read(&lock->readers)) {
 			atomic_set(&lock->readers, WRITER_BIAS);
@@ -213,6 +213,12 @@ void __sched __write_rt_lock(struct rt_rw_lock *lock)
 
 		if (atomic_read(&lock->readers) != 0)
 			schedule();
+
+		raw_spin_lock_irqsave(&m->wait_lock, flags);
+
+		raw_spin_lock(&self->pi_lock);
+		__set_current_state_no_track(TASK_UNINTERRUPTIBLE);
+		raw_spin_unlock(&self->pi_lock);
 	}
 }
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7499,6 +7499,47 @@ const u32 sched_prio_to_wmult[40] = {
 
 #if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
 
+static inline void
+update_nr_migratory(struct task_struct *p, long delta)
+{
+	if (unlikely((p->sched_class == &rt_sched_class ||
+		      p->sched_class == &dl_sched_class) &&
+		      p->nr_cpus_allowed > 1)) {
+		if (p->sched_class == &rt_sched_class)
+			task_rq(p)->rt.rt_nr_migratory += delta;
+		else
+			task_rq(p)->dl.dl_nr_migratory += delta;
+	}
+}
+
+static inline void
+migrate_disable_update_cpus_allowed(struct task_struct *p)
+{
+	struct rq *rq;
+	struct rq_flags rf;
+
+	p->cpus_ptr = cpumask_of(smp_processor_id());
+
+	rq = task_rq_lock(p, &rf);
+	update_nr_migratory(p, -1);
+	p->nr_cpus_allowed = 1;
+	task_rq_unlock(rq, p, &rf);
+}
+
+static inline void
+migrate_enable_update_cpus_allowed(struct task_struct *p)
+{
+	struct rq *rq;
+	struct rq_flags rf;
+
+	p->cpus_ptr = &p->cpus_mask;
+
+	rq = task_rq_lock(p, &rf);
+	p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask);
+	update_nr_migratory(p, 1);
+	task_rq_unlock(rq, p, &rf);
+}
+
 void migrate_disable(void)
 {
 	struct task_struct *p = current;
@@ -7524,10 +7565,9 @@ void migrate_disable(void)
 	preempt_disable();
 	preempt_lazy_disable();
 	pin_current_cpu();
-	p->migrate_disable = 1;
 
-	p->cpus_ptr = cpumask_of(smp_processor_id());
-	p->nr_cpus_allowed = 1;
+	migrate_disable_update_cpus_allowed(p);
+	p->migrate_disable = 1;
 
 	preempt_enable();
 }
@@ -7559,9 +7599,8 @@ void migrate_enable(void)
 
 	preempt_disable();
 
-	p->cpus_ptr = &p->cpus_mask;
-	p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask);
 	p->migrate_disable = 0;
+	migrate_enable_update_cpus_allowed(p);
 
 	if (p->migrate_disable_update) {
 		struct rq *rq;
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -552,15 +552,21 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
 
 #define P(x) \
 	SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
+#define PU(x) \
+	SEQ_printf(m, "  .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x))
 #define PN(x) \
 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
 
-	P(rt_nr_running);
+	PU(rt_nr_running);
+#ifdef CONFIG_SMP
+	PU(rt_nr_migratory);
+#endif
 	P(rt_throttled);
 	PN(rt_time);
 	PN(rt_runtime);
 
 #undef PN
+#undef PU
 #undef P
 }
 
@@ -569,14 +575,21 @@ void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq)
 	struct dl_bw *dl_bw;
 
 	SEQ_printf(m, "\ndl_rq[%d]:\n", cpu);
-	SEQ_printf(m, "  .%-30s: %ld\n", "dl_nr_running", dl_rq->dl_nr_running);
+
+#define PU(x) \
+	SEQ_printf(m, "  .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x))
+
+	PU(dl_nr_running);
 #ifdef CONFIG_SMP
+	PU(dl_nr_migratory);
 	dl_bw = &cpu_rq(cpu)->rd->dl_bw;
 #else
 	dl_bw = &dl_rq->dl_bw;
 #endif
 	SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw);
 	SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw);
+
+#undef PU
 }
 
 extern __read_mostly int sched_clock_running;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1802,6 +1802,11 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
 		 */
 		enqueue_hrtimer(timer, new_base);
 	}
+#ifdef CONFIG_PREEMPT_RT_BASE
+	list_splice_tail(&old_base->expired, &new_base->expired);
+	if (!list_empty(&new_base->expired))
+		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+#endif
 }
 
 int hrtimers_dead_cpu(unsigned int scpu)
diff --git a/localversion-rt b/localversion-rt
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt9
+-rt10

^ permalink raw reply	[flat|nested] 13+ messages in thread

* v4.11.12-rt10 - hotplug lockdep splat
  2017-08-18 12:09 [ANNOUNCE] v4.11.12-rt10 Sebastian Andrzej Siewior
@ 2017-08-23  9:53 ` Mike Galbraith
  2017-08-31 16:18   ` Sebastian Andrzej Siewior
  2017-08-23  9:57 ` [patch-rt] drivers/zram: fix zcomp_stream_get() smp_processor_id() use in preemptible code Mike Galbraith
  1 sibling, 1 reply; 13+ messages in thread
From: Mike Galbraith @ 2017-08-23  9:53 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior, Thomas Gleixner
  Cc: LKML, linux-rt-users, Steven Rostedt

virt box reminded me this morning to report this gripe.

[  512.629932] ======================================================
[  512.629933] [ INFO: possible circular locking dependency detected ]
[  512.629934] 4.11.12-rt10-virgin #45 Tainted: G            E  
[  512.629934] -------------------------------------------------------
[  512.629935] stress-cpu-hotp/18312 is trying to acquire lock:
[  512.629935]  (&p->pi_lock){-...-.}, at: [<ffffffff810b3eed>] try_to_wake_up+0x2d/0x970
[  512.629948] but task is already holding lock:
[  512.629948]  (hrtimer_bases.lock){-.....}, at: [<ffffffff8111c757>] hrtimer_interrupt+0x77/0x1d0
[  512.629954] which lock already depends on the new lock.
[  512.629954] the existing dependency chain (in reverse order) is:
[  512.629954] -> #3 (hrtimer_bases.lock){-.....}:
[  512.629960]        lock_acquire+0xbd/0x250
[  512.629965]        _raw_spin_lock_irqsave+0x53/0x70
[  512.629966]        lock_hrtimer_base.isra.27+0x29/0x50
[  512.629966]        hrtimer_start_range_ns+0x2f/0x410
[  512.629968]        enqueue_task_rt+0x325/0x360
[  512.629970]        __sched_setscheduler+0x2d5/0xb60
[  512.629971]        _sched_setscheduler+0x68/0x70
[  512.629972]        sched_setscheduler+0x13/0x20
[  512.629974]        ktimer_softirqd_set_sched_params+0x2a/0x60
[  512.629975]        smpboot_thread_fn+0x131/0x320
[  512.629976]        kthread+0x114/0x150
[  512.629979]        ret_from_fork+0x2a/0x40
[  512.629979] -> #2 (&rt_b->rt_runtime_lock){-.....}:
[  512.629980]        lock_acquire+0xbd/0x250
[  512.629981]        _raw_spin_lock+0x3b/0x50
[  512.629982]        enqueue_task_rt+0x1d8/0x360
[  512.629983]        __sched_setscheduler+0x2d5/0xb60
[  512.629984]        _sched_setscheduler+0x68/0x70
[  512.629985]        sched_setscheduler+0x13/0x20
[  512.629985]        ktimer_softirqd_set_sched_params+0x2a/0x60
[  512.629986]        smpboot_thread_fn+0x131/0x320
[  512.629987]        kthread+0x114/0x150
[  512.629988]        ret_from_fork+0x2a/0x40
[  512.629988] -> #1 (&rq->lock){-...-.}:
[  512.629989]        lock_acquire+0xbd/0x250
[  512.629990]        _raw_spin_lock+0x3b/0x50
[  512.629990]        task_fork_fair+0x3a/0x100
[  512.629991]        sched_fork+0x10d/0x2f0
[  512.629995]        copy_process.part.32+0x747/0x20a0
[  512.629996]        _do_fork+0xe4/0x710
[  512.629997]        kernel_thread+0x29/0x30
[  512.629999]        rest_init+0x22/0xe0
[  512.630007]        start_kernel+0x489/0x496
[  512.630009]        x86_64_start_reservations+0x2a/0x2c
[  512.630010]        x86_64_start_kernel+0x13d/0x14c
[  512.630011]        verify_cpu+0x0/0xfc
[  512.630011] -> #0 (&p->pi_lock){-...-.}:
[  512.630013]        __lock_acquire+0x1527/0x1560
[  512.630013]        lock_acquire+0xbd/0x250
[  512.630014]        _raw_spin_lock_irqsave+0x53/0x70
[  512.630014]        try_to_wake_up+0x2d/0x970
[  512.630015]        wake_up_process+0x15/0x20
[  512.630016]        wakeup_timer_softirqd+0x32/0x40
[  512.630016]        wakeup_proper_softirq+0x25/0x30
[  512.630017]        raise_softirq_irqoff+0x3c/0x50
[  512.630019]        __hrtimer_run_queues+0x19c/0x650
[  512.630020]        hrtimer_interrupt+0xb8/0x1d0
[  512.630021]        hrtimers_dead_cpu+0x37a/0x390
[  512.630022]        cpuhp_invoke_callback+0x248/0x9d0
[  512.630022]        cpuhp_down_callbacks+0x42/0x80
[  512.630023]        _cpu_down+0xc5/0x100
[  512.630023]        do_cpu_down+0x3c/0x60
[  512.630024]        cpu_down+0x10/0x20
[  512.630028]        cpu_subsys_offline+0x14/0x20
[  512.630029]        device_offline+0x8a/0xb0
[  512.630030]        online_store+0x40/0x80
[  512.630034]        dev_attr_store+0x18/0x30
[  512.630038]        sysfs_kf_write+0x44/0x60
[  512.630039]        kernfs_fop_write+0x13c/0x1d0
[  512.630042]        __vfs_write+0x28/0x140
[  512.630042]        vfs_write+0xc7/0x1f0
[  512.630043]        SyS_write+0x49/0xa0
[  512.630044]        entry_SYSCALL_64_fastpath+0x1f/0xc2
[  512.630044] other info that might help us debug this:
[  512.630044] Chain exists of: &p->pi_lock --> &rt_b->rt_runtime_lock --> hrtimer_bases.lock
[  512.630045]  Possible unsafe locking scenario:
[  512.630045]        CPU0                    CPU1
[  512.630045]        ----                    ----
[  512.630046]   lock(hrtimer_bases.lock);
[  512.630046]                                lock(&rt_b->rt_runtime_lock);
[  512.630046]                                lock(hrtimer_bases.lock);
[  512.630047]   lock(&p->pi_lock);
[  512.630047] *** DEADLOCK ***
[  512.630048] 8 locks held by stress-cpu-hotp/18312:
[  512.630048]  #0:  (sb_writers#3){.+.+.+}, at: [<ffffffff8127f726>] vfs_write+0x196/0x1f0
[  512.630050]  #1:  (&of->mutex){+.+.+.}, at: [<ffffffff813187ac>] kernfs_fop_write+0x10c/0x1d0
[  512.630051]  #2:  (s_active#139){.+.+.+}, at: [<ffffffff813187b4>] kernfs_fop_write+0x114/0x1d0
[  512.630053]  #3:  (device_hotplug_lock){+.+.+.}, at: [<ffffffff81553e75>] lock_device_hotplug_sysfs+0x15/0x40
[  512.630055]  #4:  (&dev->mutex){......}, at: [<ffffffff815556f8>] device_offline+0x48/0xb0
[  512.630057]  #5:  (cpu_add_remove_lock){+.+.+.}, at: [<ffffffff8107e295>] do_cpu_down+0x25/0x60
[  512.630059]  #6:  (cpu_hotplug_lock.rw_sem){++++++}, at: [<ffffffff810de1e6>] percpu_down_write+0x26/0x120
[  512.630062]  #7:  (hrtimer_bases.lock){-.....}, at: [<ffffffff8111c757>] hrtimer_interrupt+0x77/0x1d0
[  512.630064] stack backtrace:
[  512.630065] CPU: 0 PID: 18312 Comm: stress-cpu-hotp Tainted: G            E   4.11.12-rt10-virgin #45
[  512.630066] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014
[  512.630067] Call Trace:
[  512.630074]  dump_stack+0x85/0xc8
[  512.630080]  print_circular_bug+0x1f9/0x207
[  512.630082]  __lock_acquire+0x1527/0x1560
[  512.630085]  lock_acquire+0xbd/0x250
[  512.630085]  ? try_to_wake_up+0x2d/0x970
[  512.630087]  _raw_spin_lock_irqsave+0x53/0x70
[  512.630088]  ? try_to_wake_up+0x2d/0x970
[  512.630089]  try_to_wake_up+0x2d/0x970
[  512.630091]  wake_up_process+0x15/0x20
[  512.630092]  wakeup_timer_softirqd+0x32/0x40
[  512.630093]  wakeup_proper_softirq+0x25/0x30
[  512.630094]  raise_softirq_irqoff+0x3c/0x50
[  512.630096]  __hrtimer_run_queues+0x19c/0x650
[  512.630098]  hrtimer_interrupt+0xb8/0x1d0
[  512.630100]  hrtimers_dead_cpu+0x37a/0x390
[  512.630102]  ? hrtimers_prepare_cpu+0x90/0x90
[  512.630103]  cpuhp_invoke_callback+0x248/0x9d0
[  512.630112]  ? flow_cache_lookup+0x430/0x430
[  512.630114]  cpuhp_down_callbacks+0x42/0x80
[  512.630116]  _cpu_down+0xc5/0x100
[  512.630117]  do_cpu_down+0x3c/0x60
[  512.630118]  cpu_down+0x10/0x20
[  512.630120]  cpu_subsys_offline+0x14/0x20
[  512.630121]  device_offline+0x8a/0xb0
[  512.630123]  online_store+0x40/0x80
[  512.630124]  dev_attr_store+0x18/0x30
[  512.630125]  sysfs_kf_write+0x44/0x60
[  512.630127]  kernfs_fop_write+0x13c/0x1d0
[  512.630128]  __vfs_write+0x28/0x140
[  512.630130]  ? rcu_read_lock_sched_held+0x98/0xa0
[  512.630131]  ? rcu_sync_lockdep_assert+0x32/0x60
[  512.630134]  ? __sb_start_write+0x1d2/0x290
[  512.630134]  ? vfs_write+0x196/0x1f0
[  512.630137]  ? security_file_permission+0x3b/0xc0
[  512.630138]  vfs_write+0xc7/0x1f0
[  512.630139]  ? trace_hardirqs_on_caller+0xf9/0x1c0
[  512.630141]  SyS_write+0x49/0xa0
[  512.630142]  entry_SYSCALL_64_fastpath+0x1f/0xc2
[  512.630143] RIP: 0033:0x7fdac1c54d10
[  512.630144] RSP: 002b:00007ffd545bf5e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[  512.630144] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fdac1c54d10
[  512.630145] RDX: 0000000000000002 RSI: 00007fdac27c2000 RDI: 0000000000000001
[  512.630145] RBP: 00007ffd545bebb0 R08: 000000000000000a R09: 00007fdac277c700
[  512.630145] R10: 00000000ffffffff R11: 0000000000000246 R12: 0000000002657d86
[  512.630146] R13: 00007ffd545bebc8 R14: 0000000002657d86 R15: 00007ffd545bebb4

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [patch-rt] drivers/zram: fix zcomp_stream_get() smp_processor_id() use in preemptible code
  2017-08-18 12:09 [ANNOUNCE] v4.11.12-rt10 Sebastian Andrzej Siewior
  2017-08-23  9:53 ` v4.11.12-rt10 - hotplug lockdep splat Mike Galbraith
@ 2017-08-23  9:57 ` Mike Galbraith
  2017-08-31 15:48   ` Sebastian Andrzej Siewior
  2017-08-31 19:11   ` Thomas Gleixner
  1 sibling, 2 replies; 13+ messages in thread
From: Mike Galbraith @ 2017-08-23  9:57 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior, Thomas Gleixner
  Cc: LKML, linux-rt-users, Steven Rostedt


Use get_local_ptr() vs this_cpu_ptr().

Signed-off-by: Mike Galbraith <efault@gmx.de>
---
 drivers/block/zram/zcomp.c |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
@@ -120,7 +120,7 @@ struct zcomp_strm *zcomp_stream_get(stru
 {
 	struct zcomp_strm *zstrm;
 
-	zstrm = *this_cpu_ptr(comp->stream);
+	zstrm = *get_local_ptr(comp->stream);
 	spin_lock(&zstrm->zcomp_lock);
 	return zstrm;
 }
@@ -131,6 +131,7 @@ void zcomp_stream_put(struct zcomp *comp
 
 	zstrm = *this_cpu_ptr(comp->stream);
 	spin_unlock(&zstrm->zcomp_lock);
+	put_local_ptr(zstrm);
 }
 
 int zcomp_compress(struct zcomp_strm *zstrm,

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch-rt] drivers/zram: fix zcomp_stream_get() smp_processor_id() use in preemptible code
  2017-08-23  9:57 ` [patch-rt] drivers/zram: fix zcomp_stream_get() smp_processor_id() use in preemptible code Mike Galbraith
@ 2017-08-31 15:48   ` Sebastian Andrzej Siewior
  2017-08-31 19:11   ` Thomas Gleixner
  1 sibling, 0 replies; 13+ messages in thread
From: Sebastian Andrzej Siewior @ 2017-08-31 15:48 UTC (permalink / raw)
  To: Mike Galbraith; +Cc: Thomas Gleixner, LKML, linux-rt-users, Steven Rostedt

On 2017-08-23 11:57:29 [+0200], Mike Galbraith wrote:
> 
> Use get_local_ptr() vs this_cpu_ptr().

applied.

> Signed-off-by: Mike Galbraith <efault@gmx.de>

Sebastian

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: v4.11.12-rt10 - hotplug lockdep splat
  2017-08-23  9:53 ` v4.11.12-rt10 - hotplug lockdep splat Mike Galbraith
@ 2017-08-31 16:18   ` Sebastian Andrzej Siewior
  2017-09-02  7:00     ` Mike Galbraith
  2017-09-03  2:48     ` Mike Galbraith
  0 siblings, 2 replies; 13+ messages in thread
From: Sebastian Andrzej Siewior @ 2017-08-31 16:18 UTC (permalink / raw)
  To: Mike Galbraith; +Cc: Thomas Gleixner, LKML, linux-rt-users, Steven Rostedt

On 2017-08-23 11:53:44 [+0200], Mike Galbraith wrote:
> virt box reminded me this morning to report this gripe.

if you can reproduce it, then this should make it go away:

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1391,7 +1391,7 @@ static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; }
 
 #endif
 
-static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
+static int __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
 {
 	struct hrtimer_clock_base *base = cpu_base->clock_base;
 	unsigned int active = cpu_base->active_bases;
@@ -1432,8 +1432,7 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
 				raise = 1;
 		}
 	}
-	if (raise)
-		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+	return raise;
 }
 
 #ifdef CONFIG_HIGH_RES_TIMERS
@@ -1447,6 +1446,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
 	ktime_t expires_next, now, entry_time, delta;
 	int retries = 0;
+	int raise;
 
 	BUG_ON(!cpu_base->hres_active);
 	cpu_base->nr_events++;
@@ -1465,7 +1465,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 	 */
 	cpu_base->expires_next = KTIME_MAX;
 
-	__hrtimer_run_queues(cpu_base, now);
+	raise = __hrtimer_run_queues(cpu_base, now);
 
 	/* Reevaluate the clock bases for the next expiry */
 	expires_next = __hrtimer_get_next_event(cpu_base);
@@ -1476,6 +1476,8 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 	cpu_base->expires_next = expires_next;
 	cpu_base->in_hrtirq = 0;
 	raw_spin_unlock(&cpu_base->lock);
+	if (raise)
+		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
 
 	/* Reprogramming necessary ? */
 	if (!tick_program_event(expires_next, 0)) {
@@ -1555,6 +1557,7 @@ void hrtimer_run_queues(void)
 {
 	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
 	ktime_t now;
+	int raise;
 
 	if (__hrtimer_hres_active(cpu_base))
 		return;
@@ -1573,8 +1576,10 @@ void hrtimer_run_queues(void)
 
 	raw_spin_lock(&cpu_base->lock);
 	now = hrtimer_update_base(cpu_base);
-	__hrtimer_run_queues(cpu_base, now);
+	raise = __hrtimer_run_queues(cpu_base, now);
 	raw_spin_unlock(&cpu_base->lock);
+	if (raise)
+		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
 }
 
 /*

Sebastian

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch-rt] drivers/zram: fix zcomp_stream_get() smp_processor_id() use in preemptible code
  2017-08-23  9:57 ` [patch-rt] drivers/zram: fix zcomp_stream_get() smp_processor_id() use in preemptible code Mike Galbraith
  2017-08-31 15:48   ` Sebastian Andrzej Siewior
@ 2017-08-31 19:11   ` Thomas Gleixner
  2017-08-31 19:26     ` Sebastian Andrzej Siewior
  1 sibling, 1 reply; 13+ messages in thread
From: Thomas Gleixner @ 2017-08-31 19:11 UTC (permalink / raw)
  To: Mike Galbraith
  Cc: Sebastian Andrzej Siewior, LKML, linux-rt-users, Steven Rostedt,
	Peter Zijlstra

On Wed, 23 Aug 2017, Mike Galbraith wrote:

> 
> Use get_local_ptr() vs this_cpu_ptr().
> 
> Signed-off-by: Mike Galbraith <efault@gmx.de>
> ---
>  drivers/block/zram/zcomp.c |    3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> --- a/drivers/block/zram/zcomp.c
> +++ b/drivers/block/zram/zcomp.c
> @@ -120,7 +120,7 @@ struct zcomp_strm *zcomp_stream_get(stru
>  {
>  	struct zcomp_strm *zstrm;
>  
> -	zstrm = *this_cpu_ptr(comp->stream);
> +	zstrm = *get_local_ptr(comp->stream);

This looks wrong. On mainline the calling code must have preemption disable
somehow, otherwise this_cpu_ptr() would not work.

Looking at the call site it is;

	zram_slot_lock()
	  bit_spin_lock()

which is of course evading lockdep and everything else debugging wise.

Sebastian, do we have migration protection in bitlocked regions? And we
shpuld look into converting that into a spinlock on rt.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch-rt] drivers/zram: fix zcomp_stream_get() smp_processor_id() use in preemptible code
  2017-08-31 19:11   ` Thomas Gleixner
@ 2017-08-31 19:26     ` Sebastian Andrzej Siewior
  2017-08-31 19:32       ` Thomas Gleixner
  0 siblings, 1 reply; 13+ messages in thread
From: Sebastian Andrzej Siewior @ 2017-08-31 19:26 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Mike Galbraith, LKML, linux-rt-users, Steven Rostedt, Peter Zijlstra

On 2017-08-31 21:11:08 [+0200], Thomas Gleixner wrote:
> On Wed, 23 Aug 2017, Mike Galbraith wrote:
> 
> > 
> > Use get_local_ptr() vs this_cpu_ptr().
> > 
> > Signed-off-by: Mike Galbraith <efault@gmx.de>
> > ---
> >  drivers/block/zram/zcomp.c |    3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> > 
> > --- a/drivers/block/zram/zcomp.c
> > +++ b/drivers/block/zram/zcomp.c
> > @@ -120,7 +120,7 @@ struct zcomp_strm *zcomp_stream_get(stru
> >  {
> >  	struct zcomp_strm *zstrm;
> >  
> > -	zstrm = *this_cpu_ptr(comp->stream);
> > +	zstrm = *get_local_ptr(comp->stream);
> 
> This looks wrong. On mainline the calling code must have preemption disable
> somehow, otherwise this_cpu_ptr() would not work.

This was introduced by Mike in a previous patch. The zstrm is only
accessed while the spinlock is held.

> Looking at the call site it is;
> 
> 	zram_slot_lock()
> 	  bit_spin_lock()
> 
> which is of course evading lockdep and everything else debugging wise.
> 
> Sebastian, do we have migration protection in bitlocked regions? And we
> shpuld look into converting that into a spinlock on rt.

zram_lock_table() is bit_spin_lock() on !RT and spin_lock(&table->lock);
on RT. So this is done.
!RT has this running in a kmap_atomic() section so they have no
preemption there.
zcomp_stream_get() returns a per-CPU object which is protected with a
spinlock and only accessed locked.

> Thanks,
> 
> 	tglx

Sebastian

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch-rt] drivers/zram: fix zcomp_stream_get() smp_processor_id() use in preemptible code
  2017-08-31 19:26     ` Sebastian Andrzej Siewior
@ 2017-08-31 19:32       ` Thomas Gleixner
  2017-09-01  7:40         ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 13+ messages in thread
From: Thomas Gleixner @ 2017-08-31 19:32 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Mike Galbraith, LKML, linux-rt-users, Steven Rostedt, Peter Zijlstra

On Thu, 31 Aug 2017, Sebastian Andrzej Siewior wrote:

> On 2017-08-31 21:11:08 [+0200], Thomas Gleixner wrote:
> > On Wed, 23 Aug 2017, Mike Galbraith wrote:
> > 
> > > 
> > > Use get_local_ptr() vs this_cpu_ptr().
> > > 
> > > Signed-off-by: Mike Galbraith <efault@gmx.de>
> > > ---
> > >  drivers/block/zram/zcomp.c |    3 ++-
> > >  1 file changed, 2 insertions(+), 1 deletion(-)
> > > 
> > > --- a/drivers/block/zram/zcomp.c
> > > +++ b/drivers/block/zram/zcomp.c
> > > @@ -120,7 +120,7 @@ struct zcomp_strm *zcomp_stream_get(stru
> > >  {
> > >  	struct zcomp_strm *zstrm;
> > >  
> > > -	zstrm = *this_cpu_ptr(comp->stream);
> > > +	zstrm = *get_local_ptr(comp->stream);
> > 
> > This looks wrong. On mainline the calling code must have preemption disable
> > somehow, otherwise this_cpu_ptr() would not work.
> 
> This was introduced by Mike in a previous patch. The zstrm is only
> accessed while the spinlock is held.
> 
> > Looking at the call site it is;
> > 
> > 	zram_slot_lock()
> > 	  bit_spin_lock()
> > 
> > which is of course evading lockdep and everything else debugging wise.
> > 
> > Sebastian, do we have migration protection in bitlocked regions? And we
> > shpuld look into converting that into a spinlock on rt.
> 
> zram_lock_table() is bit_spin_lock() on !RT and spin_lock(&table->lock);
> on RT. So this is done.
> !RT has this running in a kmap_atomic() section so they have no
> preemption there.
> zcomp_stream_get() returns a per-CPU object which is protected with a
> spinlock and only accessed locked.

So when we are inside a spinlocked section, why is this_cpu_ptr() not
working? That does not make sense.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch-rt] drivers/zram: fix zcomp_stream_get() smp_processor_id() use in preemptible code
  2017-08-31 19:32       ` Thomas Gleixner
@ 2017-09-01  7:40         ` Sebastian Andrzej Siewior
  2017-09-01  7:47           ` Thomas Gleixner
  0 siblings, 1 reply; 13+ messages in thread
From: Sebastian Andrzej Siewior @ 2017-09-01  7:40 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Mike Galbraith, LKML, linux-rt-users, Steven Rostedt, Peter Zijlstra

On 2017-08-31 21:32:30 [+0200], Thomas Gleixner wrote:
> > zram_lock_table() is bit_spin_lock() on !RT and spin_lock(&table->lock);
> > on RT. So this is done.
> > !RT has this running in a kmap_atomic() section so they have no
> > preemption there.
> > zcomp_stream_get() returns a per-CPU object which is protected with a
> > spinlock and only accessed locked.
> 
> So when we are inside a spinlocked section, why is this_cpu_ptr() not
> working? That does not make sense.

zram_decompress_page() invokes zcomp_stream_get() within a spin_lock
section (zram_lock_table()). This is fine.
zram_bvec_write() does not invoke zcomp_stream_get() within a spin_lock
section.

> Thanks,
> 
> 	tglx

Sebastian

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch-rt] drivers/zram: fix zcomp_stream_get() smp_processor_id() use in preemptible code
  2017-09-01  7:40         ` Sebastian Andrzej Siewior
@ 2017-09-01  7:47           ` Thomas Gleixner
  0 siblings, 0 replies; 13+ messages in thread
From: Thomas Gleixner @ 2017-09-01  7:47 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Mike Galbraith, LKML, linux-rt-users, Steven Rostedt, Peter Zijlstra

On Fri, 1 Sep 2017, Sebastian Andrzej Siewior wrote:

> On 2017-08-31 21:32:30 [+0200], Thomas Gleixner wrote:
> > > zram_lock_table() is bit_spin_lock() on !RT and spin_lock(&table->lock);
> > > on RT. So this is done.
> > > !RT has this running in a kmap_atomic() section so they have no
> > > preemption there.
> > > zcomp_stream_get() returns a per-CPU object which is protected with a
> > > spinlock and only accessed locked.
> > 
> > So when we are inside a spinlocked section, why is this_cpu_ptr() not
> > working? That does not make sense.
> 
> zram_decompress_page() invokes zcomp_stream_get() within a spin_lock
> section (zram_lock_table()). This is fine.
> zram_bvec_write() does not invoke zcomp_stream_get() within a spin_lock
> section.

Fair enough.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: v4.11.12-rt10 - hotplug lockdep splat
  2017-08-31 16:18   ` Sebastian Andrzej Siewior
@ 2017-09-02  7:00     ` Mike Galbraith
  2017-09-03  2:48     ` Mike Galbraith
  1 sibling, 0 replies; 13+ messages in thread
From: Mike Galbraith @ 2017-09-02  7:00 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Thomas Gleixner, LKML, linux-rt-users, Steven Rostedt

On Thu, 2017-08-31 at 18:18 +0200, Sebastian Andrzej Siewior wrote:
> On 2017-08-23 11:53:44 [+0200], Mike Galbraith wrote:
> > virt box reminded me this morning to report this gripe.
> 
> if you can reproduce it, then this should make it go away:

Bug yawns, stretches: "Huh? Make what go away?" ;-)

[  186.049162] ======================================================
[  186.049163] [ INFO: possible circular locking dependency detected ]
[  186.049164] 4.11.12-rt11-virgin #47 Tainted: G            E  
[  186.049164] -------------------------------------------------------
[  186.049165] stress-cpu-hotp/3338 is trying to acquire lock:
[  186.049165]  (&p->pi_lock){-...-.}, at: [<ffffffff810b3eed>] try_to_wake_up+0x2d/0x970
[  186.049179] 
[  186.049179] but task is already holding lock:
[  186.049179]  (hrtimer_bases.lock/1){......}, at: [<ffffffff8111cb6d>] hrtimers_dead_cpu+0x7d/0x390
[  186.049186] 
[  186.049186] which lock already depends on the new lock.
[  186.049186] 
[  186.049186] 
[  186.049186] the existing dependency chain (in reverse order) is:
[  186.049186] 
[  186.049186] -> #4 (hrtimer_bases.lock/1){......}:
[  186.049189]        lock_acquire+0xbd/0x250
[  186.049194]        _raw_spin_lock_nested+0x41/0x60
[  186.049195]        hrtimers_dead_cpu+0x7d/0x390
[  186.049196]        cpuhp_invoke_callback+0x248/0x9d0
[  186.049197]        cpuhp_down_callbacks+0x42/0x80
[  186.049198]        _cpu_down+0xc5/0x100
[  186.049199]        do_cpu_down+0x3c/0x60
[  186.049199]        cpu_down+0x10/0x20
[  186.049203]        cpu_subsys_offline+0x14/0x20
[  186.049204]        device_offline+0x8a/0xb0
[  186.049205]        online_store+0x40/0x80
[  186.049209]        dev_attr_store+0x18/0x30
[  186.049213]        sysfs_kf_write+0x44/0x60
[  186.049214]        kernfs_fop_write+0x13c/0x1d0
[  186.049217]        __vfs_write+0x28/0x140
[  186.049218]        vfs_write+0xc7/0x1f0
[  186.049219]        SyS_write+0x49/0xa0
[  186.049221]        entry_SYSCALL_64_fastpath+0x1f/0xc2
[  186.049221] 
[  186.049221] -> #3 (hrtimer_bases.lock){-.....}:
[  186.049226]        lock_acquire+0xbd/0x250
[  186.049226]        _raw_spin_lock_irqsave+0x53/0x70
[  186.049227]        lock_hrtimer_base.isra.27+0x29/0x50
[  186.049228]        hrtimer_start_range_ns+0x2f/0x410
[  186.049229]        enqueue_task_rt+0x325/0x360
[  186.049232]        __sched_setscheduler+0x2d5/0xb60
[  186.049233]        _sched_setscheduler+0x68/0x70
[  186.049234]        sched_setscheduler+0x13/0x20
[  186.049235]        ktimer_softirqd_set_sched_params+0x2a/0x60
[  186.049236]        smpboot_thread_fn+0x131/0x320
[  186.049238]        kthread+0x114/0x150
[  186.049239]        ret_from_fork+0x2a/0x40
[  186.049239] 
[  186.049239] -> #2 (&rt_b->rt_runtime_lock){-.....}:
[  186.049241]        lock_acquire+0xbd/0x250
[  186.049242]        _raw_spin_lock+0x3b/0x50
[  186.049242]        enqueue_task_rt+0x1d8/0x360
[  186.049243]        __sched_setscheduler+0x2d5/0xb60
[  186.049244]        _sched_setscheduler+0x68/0x70
[  186.049245]        sched_setscheduler+0x13/0x20
[  186.049246]        ktimer_softirqd_set_sched_params+0x2a/0x60
[  186.049247]        smpboot_thread_fn+0x131/0x320
[  186.049248]        kthread+0x114/0x150
[  186.049248]        ret_from_fork+0x2a/0x40
[  186.049248] 
[  186.049248] -> #1 (&rq->lock){-...-.}:
[  186.049250]        lock_acquire+0xbd/0x250
[  186.049250]        _raw_spin_lock+0x3b/0x50
[  186.049251]        task_fork_fair+0x3a/0x100
[  186.049252]        sched_fork+0x10d/0x2f0
[  186.049255]        copy_process.part.32+0x747/0x20a0
[  186.049256]        _do_fork+0xe4/0x710
[  186.049257]        kernel_thread+0x29/0x30
[  186.049258]        rest_init+0x22/0xe0
[  186.049266]        start_kernel+0x489/0x496
[  186.049268]        x86_64_start_reservations+0x2a/0x2c
[  186.049269]        x86_64_start_kernel+0x13d/0x14c
[  186.049271]        verify_cpu+0x0/0xfc
[  186.049271] 
[  186.049271] -> #0 (&p->pi_lock){-...-.}:
[  186.049272]        __lock_acquire+0x1527/0x1560
[  186.049273]        lock_acquire+0xbd/0x250
[  186.049273]        _raw_spin_lock_irqsave+0x53/0x70
[  186.049274]        try_to_wake_up+0x2d/0x970
[  186.049274]        wake_up_process+0x15/0x20
[  186.049275]        wakeup_timer_softirqd+0x32/0x40
[  186.049276]        wakeup_proper_softirq+0x25/0x30
[  186.049277]        raise_softirq_irqoff+0x3c/0x50
[  186.049278]        hrtimers_dead_cpu+0x289/0x390
[  186.049278]        cpuhp_invoke_callback+0x248/0x9d0
[  186.049279]        cpuhp_down_callbacks+0x42/0x80
[  186.049279]        _cpu_down+0xc5/0x100
[  186.049280]        do_cpu_down+0x3c/0x60
[  186.049280]        cpu_down+0x10/0x20
[  186.049281]        cpu_subsys_offline+0x14/0x20
[  186.049282]        device_offline+0x8a/0xb0
[  186.049283]        online_store+0x40/0x80
[  186.049284]        dev_attr_store+0x18/0x30
[  186.049284]        sysfs_kf_write+0x44/0x60
[  186.049285]        kernfs_fop_write+0x13c/0x1d0
[  186.049285]        __vfs_write+0x28/0x140
[  186.049286]        vfs_write+0xc7/0x1f0
[  186.049287]        SyS_write+0x49/0xa0
[  186.049287]        entry_SYSCALL_64_fastpath+0x1f/0xc2
[  186.049288] 
[  186.049288] other info that might help us debug this:
[  186.049288] 
[  186.049288] Chain exists of:
[  186.049288]   &p->pi_lock --> hrtimer_bases.lock --> hrtimer_bases.lock/1
[  186.049288] 
[  186.049289]  Possible unsafe locking scenario:
[  186.049289] 
[  186.049289]        CPU0                    CPU1
[  186.049290]        ----                    ----
[  186.049290]   lock(hrtimer_bases.lock/1);
[  186.049290]                                lock(hrtimer_bases.lock);
[  186.049291]                                lock(hrtimer_bases.lock/1);
[  186.049291]   lock(&p->pi_lock);
[  186.049292] 
[  186.049292]  *** DEADLOCK ***
[  186.049292] 
[  186.049292] 9 locks held by stress-cpu-hotp/3338:
[  186.049292]  #0:  (sb_writers#3){.+.+.+}, at: [<ffffffff8127f746>] vfs_write+0x196/0x1f0
[  186.049294]  #1:  (&of->mutex){+.+.+.}, at: [<ffffffff813187cc>] kernfs_fop_write+0x10c/0x1d0
[  186.049295]  #2:  (s_active#139){.+.+.+}, at: [<ffffffff813187d4>] kernfs_fop_write+0x114/0x1d0
[  186.049296]  #3:  (device_hotplug_lock){+.+.+.}, at: [<ffffffff81553e95>] lock_device_hotplug_sysfs+0x15/0x40
[  186.049297]  #4:  (&dev->mutex){......}, at: [<ffffffff81555718>] device_offline+0x48/0xb0
[  186.049299]  #5:  (cpu_add_remove_lock){+.+.+.}, at: [<ffffffff8107e295>] do_cpu_down+0x25/0x60
[  186.049300]  #6:  (cpu_hotplug_lock.rw_sem){++++++}, at: [<ffffffff810de1e6>] percpu_down_write+0x26/0x120
[  186.049301]  #7:  (hrtimer_bases.lock){-.....}, at: [<ffffffff8111cb60>] hrtimers_dead_cpu+0x70/0x390
[  186.049303]  #8:  (hrtimer_bases.lock/1){......}, at: [<ffffffff8111cb6d>] hrtimers_dead_cpu+0x7d/0x390
[  186.049304] 
[  186.049304] stack backtrace:
[  186.049305] CPU: 0 PID: 3338 Comm: stress-cpu-hotp Tainted: G            E   4.11.12-rt11-virgin #47
[  186.049306] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014
[  186.049306] Call Trace:
[  186.049312]  dump_stack+0x85/0xc8
[  186.049317]  print_circular_bug+0x1f9/0x207
[  186.049318]  __lock_acquire+0x1527/0x1560
[  186.049320]  lock_acquire+0xbd/0x250
[  186.049321]  ? try_to_wake_up+0x2d/0x970
[  186.049322]  _raw_spin_lock_irqsave+0x53/0x70
[  186.049322]  ? try_to_wake_up+0x2d/0x970
[  186.049323]  try_to_wake_up+0x2d/0x970
[  186.049324]  wake_up_process+0x15/0x20
[  186.049325]  wakeup_timer_softirqd+0x32/0x40
[  186.049326]  wakeup_proper_softirq+0x25/0x30
[  186.049327]  raise_softirq_irqoff+0x3c/0x50
[  186.049328]  hrtimers_dead_cpu+0x289/0x390
[  186.049329]  ? hrtimers_prepare_cpu+0x90/0x90
[  186.049330]  cpuhp_invoke_callback+0x248/0x9d0
[  186.049335]  ? flow_cache_lookup+0x430/0x430
[  186.049336]  cpuhp_down_callbacks+0x42/0x80
[  186.049337]  _cpu_down+0xc5/0x100
[  186.049338]  do_cpu_down+0x3c/0x60
[  186.049339]  cpu_down+0x10/0x20
[  186.049340]  cpu_subsys_offline+0x14/0x20
[  186.049341]  device_offline+0x8a/0xb0
[  186.049341]  online_store+0x40/0x80
[  186.049343]  dev_attr_store+0x18/0x30
[  186.049343]  sysfs_kf_write+0x44/0x60
[  186.049344]  kernfs_fop_write+0x13c/0x1d0
[  186.049345]  __vfs_write+0x28/0x140
[  186.049346]  ? rcu_read_lock_sched_held+0x98/0xa0
[  186.049347]  ? rcu_sync_lockdep_assert+0x32/0x60
[  186.049348]  ? __sb_start_write+0x1d2/0x290
[  186.049349]  ? vfs_write+0x196/0x1f0
[  186.049352]  ? security_file_permission+0x3b/0xc0
[  186.049353]  vfs_write+0xc7/0x1f0
[  186.049354]  ? trace_hardirqs_on_caller+0xf9/0x1c0
[  186.049355]  SyS_write+0x49/0xa0
[  186.049356]  entry_SYSCALL_64_fastpath+0x1f/0xc2
[  186.049357] RIP: 0033:0x7fde821d0d10
[  186.049358] RSP: 002b:00007ffc2ee807e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[  186.049359] RAX: ffffffffffffffda RBX: 0000000001fae090 RCX: 00007fde821d0d10
[  186.049360] RDX: 0000000000000002 RSI: 00007fde82d3e000 RDI: 0000000000000001
[  186.049360] RBP: 00007ffc2ee80800 R08: 000000000000000a R09: 00007fde82cf8700
[  186.049360] R10: 00000000ffffffff R11: 0000000000000246 R12: 0000000000000007
[  186.049361] R13: 0000000000000001 R14: 0000000000000009 R15: 000000000000000a

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: v4.11.12-rt10 - hotplug lockdep splat
  2017-08-31 16:18   ` Sebastian Andrzej Siewior
  2017-09-02  7:00     ` Mike Galbraith
@ 2017-09-03  2:48     ` Mike Galbraith
  2017-09-04 14:58       ` Sebastian Andrzej Siewior
  1 sibling, 1 reply; 13+ messages in thread
From: Mike Galbraith @ 2017-09-03  2:48 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Thomas Gleixner, LKML, linux-rt-users, Steven Rostedt

On Thu, 2017-08-31 at 18:18 +0200, Sebastian Andrzej Siewior wrote:
> On 2017-08-23 11:53:44 [+0200], Mike Galbraith wrote:
> > virt box reminded me this morning to report this gripe.
> 
> if you can reproduce it, then this should make it go away:

Missed a spot.  With this on top, lockdep went silent.

kernel/hrtimer/hotplug: don't wake ktimersoftd while holding the hrtimer base lock

kernel/hrtimer: don't wakeup a process while holding the hrtimer base lock
missed a path, namely hrtimers_dead_cpu() -> migrate_hrtimer_list().  Defer
raising softirq until after base lock has been released there as well.

Signed-off-by: Mike Galbraith <efault@gmx.de>
---
 kernel/time/hrtimer.c |   19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1779,7 +1779,7 @@ int hrtimers_prepare_cpu(unsigned int cp
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
+static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
 				struct hrtimer_clock_base *new_base)
 {
 	struct hrtimer *timer;
@@ -1809,15 +1809,19 @@ static void migrate_hrtimer_list(struct
 	}
 #ifdef CONFIG_PREEMPT_RT_BASE
 	list_splice_tail(&old_base->expired, &new_base->expired);
-	if (!list_empty(&new_base->expired))
-		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+	/*
+	 * Tell the caller to raise HRTIMER_SOFTIRQ.  We can't safely
+	 * acquire ktimersoftd->pi_lock while the base lock is held.
+	 */
+	return !list_empty(&new_base->expired);
 #endif
+	return 0;
 }
 
 int hrtimers_dead_cpu(unsigned int scpu)
 {
 	struct hrtimer_cpu_base *old_base, *new_base;
-	int i;
+	int i, raise = 0;
 
 	BUG_ON(cpu_online(scpu));
 	tick_cancel_sched_timer(scpu);
@@ -1833,13 +1837,16 @@ int hrtimers_dead_cpu(unsigned int scpu)
 	raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
 
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
-		migrate_hrtimer_list(&old_base->clock_base[i],
-				     &new_base->clock_base[i]);
+		raise |= migrate_hrtimer_list(&old_base->clock_base[i],
+					      &new_base->clock_base[i]);
 	}
 
 	raw_spin_unlock(&old_base->lock);
 	raw_spin_unlock(&new_base->lock);
 
+	if (raise)
+		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+
 	/* Check, if we got expired work to do */
 	__hrtimer_peek_ahead_timers();
 	local_irq_enable();

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: v4.11.12-rt10 - hotplug lockdep splat
  2017-09-03  2:48     ` Mike Galbraith
@ 2017-09-04 14:58       ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 13+ messages in thread
From: Sebastian Andrzej Siewior @ 2017-09-04 14:58 UTC (permalink / raw)
  To: Mike Galbraith; +Cc: Thomas Gleixner, LKML, linux-rt-users, Steven Rostedt

On 2017-09-03 04:48:10 [+0200], Mike Galbraith wrote:
> Missed a spot.  With this on top, lockdep went silent.
> 
> kernel/hrtimer/hotplug: don't wake ktimersoftd while holding the hrtimer base lock
> 
> kernel/hrtimer: don't wakeup a process while holding the hrtimer base lock
> missed a path, namely hrtimers_dead_cpu() -> migrate_hrtimer_list().  Defer
> raising softirq until after base lock has been released there as well.
> 
> Signed-off-by: Mike Galbraith <efault@gmx.de>
Thank you, applied.

Sebastian

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2017-09-04 14:58 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-18 12:09 [ANNOUNCE] v4.11.12-rt10 Sebastian Andrzej Siewior
2017-08-23  9:53 ` v4.11.12-rt10 - hotplug lockdep splat Mike Galbraith
2017-08-31 16:18   ` Sebastian Andrzej Siewior
2017-09-02  7:00     ` Mike Galbraith
2017-09-03  2:48     ` Mike Galbraith
2017-09-04 14:58       ` Sebastian Andrzej Siewior
2017-08-23  9:57 ` [patch-rt] drivers/zram: fix zcomp_stream_get() smp_processor_id() use in preemptible code Mike Galbraith
2017-08-31 15:48   ` Sebastian Andrzej Siewior
2017-08-31 19:11   ` Thomas Gleixner
2017-08-31 19:26     ` Sebastian Andrzej Siewior
2017-08-31 19:32       ` Thomas Gleixner
2017-09-01  7:40         ` Sebastian Andrzej Siewior
2017-09-01  7:47           ` Thomas Gleixner

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.