All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 2/2] Make hard lockup detection use timestamps
@ 2011-07-21 18:11 Alex Neronskiy
  2011-07-22 19:53 ` Don Zickus
  0 siblings, 1 reply; 15+ messages in thread
From: Alex Neronskiy @ 2011-07-21 18:11 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Don Zickus, Mandeep Singh Baines, Alex Neronskiy

The hard lockup detector is made to work more like the soft lockup
detector by using timestamps to compare how long it's been since
an event happened. Specifically, when the period function that checks
for soft lockups fires, it touches a timestamp, and the hard lockup
detector now checks if the timer is late, based on when it last fired.
Hard stalls are now sub-hardlock threshold delays in the timer's
firing which are nonetheless notable.

Signed-off-by: Alex Neronskiy <zakmagnus@google.com>
---
 kernel/watchdog.c |   83 +++++++++++++++++++++++++++++-----------------------
 1 files changed, 46 insertions(+), 37 deletions(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index bf26810..17bcded 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -40,30 +40,28 @@ static DEFINE_PER_CPU(bool, soft_watchdog_warn);
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
 static DEFINE_PER_CPU(bool, hard_watchdog_warn);
 static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
-static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
-static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
-static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_missed);
+static DEFINE_PER_CPU(unsigned long, hrtimer_touch_ts);
 static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
 
 unsigned long worst_hardstall;
 static DEFINE_SPINLOCK(hardstall_lock);
 
-/* The number of consecutive hard check failures before it's a lockup */
-#define hardlockup_thresh (5)
+/* How often to check for hard lockups compared to watchdog_thresh */
+#define hardlockup_freq (5)
 
-/*
- * The minimum expected number of missed interrupts in a normal hard
- * lockup check.
- */
-#define hardstall_thresh (2)
-
-#endif /* CONFIG_HARDLOCKUP_DETECTOR */
+/* How late (in ms) a timer could be while still being ignored as "normal" */
+#define hardstall_thresh (200)
 
 /*
  * The number of milliseconds by which a stall must be worse than the
  * current worst in order to be recorded. This prevents bursts of near-
  * identical stalls.
  */
+#define hardstall_diff_thresh (100)
+
+#endif /* CONFIG_HARDLOCKUP_DETECTOR */
+
+/* Difference threshold for soft stalls (ms) */
 #define softstall_diff_thresh (100)
 
 /* boot commands */
@@ -156,6 +154,11 @@ static unsigned long get_sample_period(void)
 	return get_softlockup_thresh() * (NSEC_PER_SEC / 5);
 }
 
+static inline unsigned long get_sample_period_ms(void)
+{
+	return get_sample_period() >> 20LL;
+}
+
 /* Commands for resetting the watchdog */
 static void __touch_watchdog(void)
 {
@@ -211,11 +214,11 @@ void touch_softlockup_watchdog_sync(void)
 static void update_hardstall(unsigned long stall, int this_cpu)
 {
 	int update_stall = 0;
-
-	if (stall > hardstall_thresh && stall > worst_hardstall) {
+	if (stall > hardstall_thresh &&
+			stall > worst_hardstall + hardstall_diff_thresh) {
 		unsigned long flags;
 		spin_lock_irqsave(&hardstall_lock, flags);
-		update_stall = stall > worst_hardstall;
+		update_stall = stall > worst_hardstall + hardstall_diff_thresh;
 		if (update_stall)
 			worst_hardstall = stall;
 		spin_unlock_irqrestore(&hardstall_lock, flags);
@@ -223,7 +226,7 @@ static void update_hardstall(unsigned long stall, int this_cpu)
 
 	if (update_stall) {
 		printk(KERN_WARNING "LOCKUP may be in progress!"
-			"Worst hard stall seen on CPU#%d: %lu interrupts missed\n",
+			"Worst hard stall seen on CPU#%d: %lums\n",
 			this_cpu, stall);
 		dump_stack();
 	}
@@ -231,21 +234,27 @@ static void update_hardstall(unsigned long stall, int this_cpu)
 
 static int is_hardlockup(int this_cpu)
 {
-	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
-	unsigned long hrint_saved = __this_cpu_read(hrtimer_interrupts_saved);
-	unsigned long ints_missed = 0;
+	unsigned long touch_ts = __this_cpu_read(hrtimer_touch_ts);
+	unsigned long next_touch = touch_ts + get_sample_period_ms();
+	unsigned long now = get_timestamp(this_cpu);
+	unsigned long delay = now - next_touch;
 
-	__this_cpu_write(hrtimer_interrupts_saved, hrint);
+	/* the timer has never fired; no data to go on yet */
+	if (unlikely(!touch_ts))
+		return 0;
 
-	if (hrint_saved == hrint)
-		ints_missed = per_cpu(hrtimer_interrupts_missed, this_cpu)++;
-	else
-		__this_cpu_write(hrtimer_interrupts_missed, 0);
+	/*
+	 * If the next touch is supposed to be in the future, then
+	 * everything is fine. If it's not, that means it was missed
+	 * and something's wrong.
+	 */
+	if (time_after(next_touch, now))
+		return 0;
 
-	if (ints_missed >= hardlockup_thresh)
+	if (delay >= (watchdog_thresh << 10L))
 		return 1;
 
-	update_hardstall(ints_missed, this_cpu);
+	update_hardstall(delay, this_cpu);
 	return 0;
 }
 #endif
@@ -311,10 +320,10 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi,
 	}
 
 	/* check for a hardlockup
-	 * This is done by making sure our timer interrupt
-	 * is incrementing.  The timer interrupt should have
-	 * fired multiple times before we overflow'd.  If it hasn't
-	 * then this is a good indication the cpu is stuck
+	 * This is done by making sure our timer interrupts
+	 * are firing. It is known approximately when the timer
+	 * should fire and if it hasn't by then this is a good
+	 * indication the cpu is stuck.
 	 */
 	lockup = is_hardlockup(this_cpu);
 	if (lockup) {
@@ -334,12 +343,12 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi,
 	__this_cpu_write(hard_watchdog_warn, false);
 	return;
 }
-static void watchdog_interrupt_count(void)
+static void watchdog_interrupt_touch(int this_cpu)
 {
-	__this_cpu_inc(hrtimer_interrupts);
+	__this_cpu_write(hrtimer_touch_ts, get_timestamp(this_cpu));
 }
 #else
-static inline void watchdog_interrupt_count(void) { return; }
+static inline void watchdog_interrupt_touch(int this_cpu) { return; }
 #endif /* CONFIG_HARDLOCKUP_DETECTOR */
 
 /* watchdog kicker functions */
@@ -350,15 +359,15 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	int duration;
 	int this_cpu = smp_processor_id();
 
-	/* kick the hardlockup detector */
-	watchdog_interrupt_count();
-
 	/* kick the softlockup detector */
 	wake_up_process(__this_cpu_read(softlockup_watchdog));
 
 	/* .. and repeat */
 	hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
 
+	/* kick the hardlockup detector now that the timer's been scheduled */
+	watchdog_interrupt_touch(this_cpu);
+
 	if (touch_ts == 0) {
 		if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
 			/*
@@ -460,7 +469,7 @@ static int watchdog_nmi_enable(int cpu)
 	/* Try to register using hardware perf events */
 	wd_attr = &wd_hw_attr;
 	wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
-	do_div(wd_attr->sample_period, hardlockup_thresh);
+	do_div(wd_attr->sample_period, hardlockup_freq);
 	event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback);
 	if (!IS_ERR(event)) {
 		printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
-- 
1.7.3.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 2/2] Make hard lockup detection use timestamps
  2011-07-21 18:11 [PATCH v3 2/2] Make hard lockup detection use timestamps Alex Neronskiy
@ 2011-07-22 19:53 ` Don Zickus
  2011-07-22 22:34   ` ZAK Magnus
  0 siblings, 1 reply; 15+ messages in thread
From: Don Zickus @ 2011-07-22 19:53 UTC (permalink / raw)
  To: Alex Neronskiy; +Cc: linux-kernel, Ingo Molnar, Mandeep Singh Baines

On Thu, Jul 21, 2011 at 11:11:13AM -0700, Alex Neronskiy wrote:
> The hard lockup detector is made to work more like the soft lockup
> detector by using timestamps to compare how long it's been since
> an event happened. Specifically, when the period function that checks
> for soft lockups fires, it touches a timestamp, and the hard lockup
> detector now checks if the timer is late, based on when it last fired.
> Hard stalls are now sub-hardlock threshold delays in the timer's
> firing which are nonetheless notable.

So I played with the hardlockup case and I kinda like the timestamp thing.
It seems to give useful data.  In fact I feel like I can shrink the
hardlockup window, run some tests and see where the latencies are in a
system.  The patch itself I think is ok, I'll review on Monday or Tuesday
when I get some more free time.

However, I ran the softlockup case and the output was a mess.  I think
rcu_sched stalls were being detected and as a result it was NMI dumping
stack traces for all cpus.  I can't tell if it was your patch or some
uncovered bug.

I'll dig into on Monday.  Not sure if you were able to see that.

Thanks,
Don

> 
> Signed-off-by: Alex Neronskiy <zakmagnus@google.com>
> ---
>  kernel/watchdog.c |   83 +++++++++++++++++++++++++++++-----------------------
>  1 files changed, 46 insertions(+), 37 deletions(-)
> 
> diff --git a/kernel/watchdog.c b/kernel/watchdog.c
> index bf26810..17bcded 100644
> --- a/kernel/watchdog.c
> +++ b/kernel/watchdog.c
> @@ -40,30 +40,28 @@ static DEFINE_PER_CPU(bool, soft_watchdog_warn);
>  #ifdef CONFIG_HARDLOCKUP_DETECTOR
>  static DEFINE_PER_CPU(bool, hard_watchdog_warn);
>  static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
> -static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
> -static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
> -static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_missed);
> +static DEFINE_PER_CPU(unsigned long, hrtimer_touch_ts);
>  static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
>  
>  unsigned long worst_hardstall;
>  static DEFINE_SPINLOCK(hardstall_lock);
>  
> -/* The number of consecutive hard check failures before it's a lockup */
> -#define hardlockup_thresh (5)
> +/* How often to check for hard lockups compared to watchdog_thresh */
> +#define hardlockup_freq (5)
>  
> -/*
> - * The minimum expected number of missed interrupts in a normal hard
> - * lockup check.
> - */
> -#define hardstall_thresh (2)
> -
> -#endif /* CONFIG_HARDLOCKUP_DETECTOR */
> +/* How late (in ms) a timer could be while still being ignored as "normal" */
> +#define hardstall_thresh (200)
>  
>  /*
>   * The number of milliseconds by which a stall must be worse than the
>   * current worst in order to be recorded. This prevents bursts of near-
>   * identical stalls.
>   */
> +#define hardstall_diff_thresh (100)
> +
> +#endif /* CONFIG_HARDLOCKUP_DETECTOR */
> +
> +/* Difference threshold for soft stalls (ms) */
>  #define softstall_diff_thresh (100)
>  
>  /* boot commands */
> @@ -156,6 +154,11 @@ static unsigned long get_sample_period(void)
>  	return get_softlockup_thresh() * (NSEC_PER_SEC / 5);
>  }
>  
> +static inline unsigned long get_sample_period_ms(void)
> +{
> +	return get_sample_period() >> 20LL;
> +}
> +
>  /* Commands for resetting the watchdog */
>  static void __touch_watchdog(void)
>  {
> @@ -211,11 +214,11 @@ void touch_softlockup_watchdog_sync(void)
>  static void update_hardstall(unsigned long stall, int this_cpu)
>  {
>  	int update_stall = 0;
> -
> -	if (stall > hardstall_thresh && stall > worst_hardstall) {
> +	if (stall > hardstall_thresh &&
> +			stall > worst_hardstall + hardstall_diff_thresh) {
>  		unsigned long flags;
>  		spin_lock_irqsave(&hardstall_lock, flags);
> -		update_stall = stall > worst_hardstall;
> +		update_stall = stall > worst_hardstall + hardstall_diff_thresh;
>  		if (update_stall)
>  			worst_hardstall = stall;
>  		spin_unlock_irqrestore(&hardstall_lock, flags);
> @@ -223,7 +226,7 @@ static void update_hardstall(unsigned long stall, int this_cpu)
>  
>  	if (update_stall) {
>  		printk(KERN_WARNING "LOCKUP may be in progress!"
> -			"Worst hard stall seen on CPU#%d: %lu interrupts missed\n",
> +			"Worst hard stall seen on CPU#%d: %lums\n",
>  			this_cpu, stall);
>  		dump_stack();
>  	}
> @@ -231,21 +234,27 @@ static void update_hardstall(unsigned long stall, int this_cpu)
>  
>  static int is_hardlockup(int this_cpu)
>  {
> -	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
> -	unsigned long hrint_saved = __this_cpu_read(hrtimer_interrupts_saved);
> -	unsigned long ints_missed = 0;
> +	unsigned long touch_ts = __this_cpu_read(hrtimer_touch_ts);
> +	unsigned long next_touch = touch_ts + get_sample_period_ms();
> +	unsigned long now = get_timestamp(this_cpu);
> +	unsigned long delay = now - next_touch;
>  
> -	__this_cpu_write(hrtimer_interrupts_saved, hrint);
> +	/* the timer has never fired; no data to go on yet */
> +	if (unlikely(!touch_ts))
> +		return 0;
>  
> -	if (hrint_saved == hrint)
> -		ints_missed = per_cpu(hrtimer_interrupts_missed, this_cpu)++;
> -	else
> -		__this_cpu_write(hrtimer_interrupts_missed, 0);
> +	/*
> +	 * If the next touch is supposed to be in the future, then
> +	 * everything is fine. If it's not, that means it was missed
> +	 * and something's wrong.
> +	 */
> +	if (time_after(next_touch, now))
> +		return 0;
>  
> -	if (ints_missed >= hardlockup_thresh)
> +	if (delay >= (watchdog_thresh << 10L))
>  		return 1;
>  
> -	update_hardstall(ints_missed, this_cpu);
> +	update_hardstall(delay, this_cpu);
>  	return 0;
>  }
>  #endif
> @@ -311,10 +320,10 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi,
>  	}
>  
>  	/* check for a hardlockup
> -	 * This is done by making sure our timer interrupt
> -	 * is incrementing.  The timer interrupt should have
> -	 * fired multiple times before we overflow'd.  If it hasn't
> -	 * then this is a good indication the cpu is stuck
> +	 * This is done by making sure our timer interrupts
> +	 * are firing. It is known approximately when the timer
> +	 * should fire and if it hasn't by then this is a good
> +	 * indication the cpu is stuck.
>  	 */
>  	lockup = is_hardlockup(this_cpu);
>  	if (lockup) {
> @@ -334,12 +343,12 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi,
>  	__this_cpu_write(hard_watchdog_warn, false);
>  	return;
>  }
> -static void watchdog_interrupt_count(void)
> +static void watchdog_interrupt_touch(int this_cpu)
>  {
> -	__this_cpu_inc(hrtimer_interrupts);
> +	__this_cpu_write(hrtimer_touch_ts, get_timestamp(this_cpu));
>  }
>  #else
> -static inline void watchdog_interrupt_count(void) { return; }
> +static inline void watchdog_interrupt_touch(int this_cpu) { return; }
>  #endif /* CONFIG_HARDLOCKUP_DETECTOR */
>  
>  /* watchdog kicker functions */
> @@ -350,15 +359,15 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
>  	int duration;
>  	int this_cpu = smp_processor_id();
>  
> -	/* kick the hardlockup detector */
> -	watchdog_interrupt_count();
> -
>  	/* kick the softlockup detector */
>  	wake_up_process(__this_cpu_read(softlockup_watchdog));
>  
>  	/* .. and repeat */
>  	hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
>  
> +	/* kick the hardlockup detector now that the timer's been scheduled */
> +	watchdog_interrupt_touch(this_cpu);
> +
>  	if (touch_ts == 0) {
>  		if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
>  			/*
> @@ -460,7 +469,7 @@ static int watchdog_nmi_enable(int cpu)
>  	/* Try to register using hardware perf events */
>  	wd_attr = &wd_hw_attr;
>  	wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
> -	do_div(wd_attr->sample_period, hardlockup_thresh);
> +	do_div(wd_attr->sample_period, hardlockup_freq);
>  	event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback);
>  	if (!IS_ERR(event)) {
>  		printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
> -- 
> 1.7.3.1
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 2/2] Make hard lockup detection use timestamps
  2011-07-22 19:53 ` Don Zickus
@ 2011-07-22 22:34   ` ZAK Magnus
  2011-07-25 12:44     ` Don Zickus
  0 siblings, 1 reply; 15+ messages in thread
From: ZAK Magnus @ 2011-07-22 22:34 UTC (permalink / raw)
  To: Don Zickus; +Cc: linux-kernel, Ingo Molnar, Mandeep Singh Baines

On Fri, Jul 22, 2011 at 12:53 PM, Don Zickus <dzickus@redhat.com> wrote:

> So I played with the hardlockup case and I kinda like the timestamp thing.
> It seems to give useful data.  In fact I feel like I can shrink the
> hardlockup window, run some tests and see where the latencies are in a
> system.  The patch itself I think is ok, I'll review on Monday or Tuesday
> when I get some more free time.
>
> However, I ran the softlockup case and the output was a mess.  I think
> rcu_sched stalls were being detected and as a result it was NMI dumping
> stack traces for all cpus.  I can't tell if it was your patch or some
> uncovered bug.
>
> I'll dig into on Monday.  Not sure if you were able to see that.
>
> Thanks,
> Don
>
I'm not sure what you mean. One problem could be the wording I used.
For the soft stalls I just called it LOCKUP, mostly to be very showy
in order to cover that case where it's unclear what exactly is
happening. This doesn't do much to distinguish soft and hard lockups,
and I see LOCKUP otherwise seems to refer to hard lockup, so maybe
that's misleading.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 2/2] Make hard lockup detection use timestamps
  2011-07-22 22:34   ` ZAK Magnus
@ 2011-07-25 12:44     ` Don Zickus
  2011-07-29  0:16       ` ZAK Magnus
  0 siblings, 1 reply; 15+ messages in thread
From: Don Zickus @ 2011-07-25 12:44 UTC (permalink / raw)
  To: ZAK Magnus; +Cc: linux-kernel, Ingo Molnar, Mandeep Singh Baines

On Fri, Jul 22, 2011 at 03:34:37PM -0700, ZAK Magnus wrote:
> On Fri, Jul 22, 2011 at 12:53 PM, Don Zickus <dzickus@redhat.com> wrote:
> 
> > So I played with the hardlockup case and I kinda like the timestamp thing.
> > It seems to give useful data.  In fact I feel like I can shrink the
> > hardlockup window, run some tests and see where the latencies are in a
> > system.  The patch itself I think is ok, I'll review on Monday or Tuesday
> > when I get some more free time.
> >
> > However, I ran the softlockup case and the output was a mess.  I think
> > rcu_sched stalls were being detected and as a result it was NMI dumping
> > stack traces for all cpus.  I can't tell if it was your patch or some
> > uncovered bug.
> >
> > I'll dig into on Monday.  Not sure if you were able to see that.
> >
> > Thanks,
> > Don
> >
> I'm not sure what you mean. One problem could be the wording I used.
> For the soft stalls I just called it LOCKUP, mostly to be very showy
> in order to cover that case where it's unclear what exactly is
> happening. This doesn't do much to distinguish soft and hard lockups,
> and I see LOCKUP otherwise seems to refer to hard lockup, so maybe
> that's misleading.

It had nothing to do with the wording.  It was spewing a ton of stack
traces.  Most of them related to rcu_sched stalls which requested stack
traces for each cpu (and the machine I as on had 16 cpus) repeatedly.

So from a user perspective, I just saw a flood of stack traces scroll
across the screen forever for a minute.  It was impossible to determine
what was going on without reviewing the logs once everything calmed down.
That is never a good thing.  It probably has nothing to do with your
patch, but it is something that should be looked at.

I'll try and poke today or tomorrow.

Cheers,
Don

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 2/2] Make hard lockup detection use timestamps
  2011-07-25 12:44     ` Don Zickus
@ 2011-07-29  0:16       ` ZAK Magnus
  2011-07-29 13:10         ` Don Zickus
  2011-07-29 20:55         ` Don Zickus
  0 siblings, 2 replies; 15+ messages in thread
From: ZAK Magnus @ 2011-07-29  0:16 UTC (permalink / raw)
  To: Don Zickus; +Cc: linux-kernel, Ingo Molnar, Mandeep Singh Baines

No news?

I've been testing and looking into issues and I realized dump_stack()
calls touch_nmi_watchdog(). That wrecks what the patch is trying to do
so I'm changing it to save the trace and print it later after the
stall has completed. This would also resolve some other things you
were saying weren't so good. Hopefully the logic is similar enough
that some things you may have learned still apply.

On Mon, Jul 25, 2011 at 5:44 AM, Don Zickus <dzickus@redhat.com> wrote:
> On Fri, Jul 22, 2011 at 03:34:37PM -0700, ZAK Magnus wrote:
>> On Fri, Jul 22, 2011 at 12:53 PM, Don Zickus <dzickus@redhat.com> wrote:
>>
>> > So I played with the hardlockup case and I kinda like the timestamp thing.
>> > It seems to give useful data.  In fact I feel like I can shrink the
>> > hardlockup window, run some tests and see where the latencies are in a
>> > system.  The patch itself I think is ok, I'll review on Monday or Tuesday
>> > when I get some more free time.
>> >
>> > However, I ran the softlockup case and the output was a mess.  I think
>> > rcu_sched stalls were being detected and as a result it was NMI dumping
>> > stack traces for all cpus.  I can't tell if it was your patch or some
>> > uncovered bug.
>> >
>> > I'll dig into on Monday.  Not sure if you were able to see that.
>> >
>> > Thanks,
>> > Don
>> >
>> I'm not sure what you mean. One problem could be the wording I used.
>> For the soft stalls I just called it LOCKUP, mostly to be very showy
>> in order to cover that case where it's unclear what exactly is
>> happening. This doesn't do much to distinguish soft and hard lockups,
>> and I see LOCKUP otherwise seems to refer to hard lockup, so maybe
>> that's misleading.
>
> It had nothing to do with the wording.  It was spewing a ton of stack
> traces.  Most of them related to rcu_sched stalls which requested stack
> traces for each cpu (and the machine I as on had 16 cpus) repeatedly.
>
> So from a user perspective, I just saw a flood of stack traces scroll
> across the screen forever for a minute.  It was impossible to determine
> what was going on without reviewing the logs once everything calmed down.
> That is never a good thing.  It probably has nothing to do with your
> patch, but it is something that should be looked at.
>
> I'll try and poke today or tomorrow.
>
> Cheers,
> Don
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 2/2] Make hard lockup detection use timestamps
  2011-07-29  0:16       ` ZAK Magnus
@ 2011-07-29 13:10         ` Don Zickus
  2011-07-29 20:55         ` Don Zickus
  1 sibling, 0 replies; 15+ messages in thread
From: Don Zickus @ 2011-07-29 13:10 UTC (permalink / raw)
  To: ZAK Magnus; +Cc: linux-kernel, Ingo Molnar, Mandeep Singh Baines

On Thu, Jul 28, 2011 at 05:16:00PM -0700, ZAK Magnus wrote:
> No news?
> 
> I've been testing and looking into issues and I realized dump_stack()
> calls touch_nmi_watchdog(). That wrecks what the patch is trying to do
> so I'm changing it to save the trace and print it later after the
> stall has completed. This would also resolve some other things you
> were saying weren't so good. Hopefully the logic is similar enough
> that some things you may have learned still apply.

Sorry, I have been caught up with a bunch of other work related issues
here.  I should be able to poke at this today.

Yeah, I forgot about the console write path.  That was causing another
separate problem for me a while ago with softlockups causing a
touch_nmi_watchdog on all cpus.  I still have to trade fixes to get that
in.

Cheers,
Don

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 2/2] Make hard lockup detection use timestamps
  2011-07-29  0:16       ` ZAK Magnus
  2011-07-29 13:10         ` Don Zickus
@ 2011-07-29 20:55         ` Don Zickus
  2011-07-29 23:12           ` ZAK Magnus
  1 sibling, 1 reply; 15+ messages in thread
From: Don Zickus @ 2011-07-29 20:55 UTC (permalink / raw)
  To: ZAK Magnus; +Cc: linux-kernel, Ingo Molnar, Mandeep Singh Baines

On Thu, Jul 28, 2011 at 05:16:00PM -0700, ZAK Magnus wrote:
> No news?
> 
> I've been testing and looking into issues and I realized dump_stack()
> calls touch_nmi_watchdog(). That wrecks what the patch is trying to do
> so I'm changing it to save the trace and print it later after the
> stall has completed. This would also resolve some other things you
> were saying weren't so good. Hopefully the logic is similar enough
> that some things you may have learned still apply.

So yeah, the acting of printing was resesting the softlockup counter and
delaying it forever.  In parallel, rcu has its own stall detector that was
going off after a minute or two.

Once I routed the printk to trace_printk and disabled dump_stack,
everything started working as expected.

Now the question is how to avoid shooting ourselves in the foot by
printk'ing a message without resetting the hard/soft lock watchdogs.

I'll have to think about how to do that.  If you can come up with any
ideas let me know.

We almost need a quiet dump_stack that dumps to a buffer instead of the
console.  But I am not sure that is worth the effort.

Hmm.

Cheers,
Don

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 2/2] Make hard lockup detection use timestamps
  2011-07-29 20:55         ` Don Zickus
@ 2011-07-29 23:12           ` ZAK Magnus
  2011-08-01 12:52             ` Don Zickus
  0 siblings, 1 reply; 15+ messages in thread
From: ZAK Magnus @ 2011-07-29 23:12 UTC (permalink / raw)
  To: Don Zickus; +Cc: linux-kernel, Ingo Molnar, Mandeep Singh Baines

Are you saying that any call to printk() will touch the watchdogs? I
wasn't aware of that and it doesn't seem to comply with my
observations too well, either. Then again, at the moment I don't
understand some of the things I'm currently seeing so I could just be
wrong.

On Fri, Jul 29, 2011 at 1:55 PM, Don Zickus <dzickus@redhat.com> wrote:
> On Thu, Jul 28, 2011 at 05:16:00PM -0700, ZAK Magnus wrote:
>> No news?
>>
>> I've been testing and looking into issues and I realized dump_stack()
>> calls touch_nmi_watchdog(). That wrecks what the patch is trying to do
>> so I'm changing it to save the trace and print it later after the
>> stall has completed. This would also resolve some other things you
>> were saying weren't so good. Hopefully the logic is similar enough
>> that some things you may have learned still apply.
>
> So yeah, the acting of printing was resesting the softlockup counter and
> delaying it forever.  In parallel, rcu has its own stall detector that was
> going off after a minute or two.
>
> Once I routed the printk to trace_printk and disabled dump_stack,
> everything started working as expected.
>
> Now the question is how to avoid shooting ourselves in the foot by
> printk'ing a message without resetting the hard/soft lock watchdogs.
>
> I'll have to think about how to do that.  If you can come up with any
> ideas let me know.
>
> We almost need a quiet dump_stack that dumps to a buffer instead of the
> console.  But I am not sure that is worth the effort.
>
> Hmm.
>
> Cheers,
> Don
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 2/2] Make hard lockup detection use timestamps
  2011-07-29 23:12           ` ZAK Magnus
@ 2011-08-01 12:52             ` Don Zickus
  2011-08-01 18:33               ` ZAK Magnus
  0 siblings, 1 reply; 15+ messages in thread
From: Don Zickus @ 2011-08-01 12:52 UTC (permalink / raw)
  To: ZAK Magnus; +Cc: linux-kernel, Ingo Molnar, Mandeep Singh Baines

On Fri, Jul 29, 2011 at 04:12:32PM -0700, ZAK Magnus wrote:
> Are you saying that any call to printk() will touch the watchdogs? I
> wasn't aware of that and it doesn't seem to comply with my
> observations too well, either. Then again, at the moment I don't
> understand some of the things I'm currently seeing so I could just be
> wrong.

I believe the serial console write is the source of all the
touch_nmi_watchdogs that result from a printk.

drivers/tty/serial/8250.c::serial8250_console_write()

Cheers,
Don

> 
> On Fri, Jul 29, 2011 at 1:55 PM, Don Zickus <dzickus@redhat.com> wrote:
> > On Thu, Jul 28, 2011 at 05:16:00PM -0700, ZAK Magnus wrote:
> >> No news?
> >>
> >> I've been testing and looking into issues and I realized dump_stack()
> >> calls touch_nmi_watchdog(). That wrecks what the patch is trying to do
> >> so I'm changing it to save the trace and print it later after the
> >> stall has completed. This would also resolve some other things you
> >> were saying weren't so good. Hopefully the logic is similar enough
> >> that some things you may have learned still apply.
> >
> > So yeah, the acting of printing was resesting the softlockup counter and
> > delaying it forever.  In parallel, rcu has its own stall detector that was
> > going off after a minute or two.
> >
> > Once I routed the printk to trace_printk and disabled dump_stack,
> > everything started working as expected.
> >
> > Now the question is how to avoid shooting ourselves in the foot by
> > printk'ing a message without resetting the hard/soft lock watchdogs.
> >
> > I'll have to think about how to do that.  If you can come up with any
> > ideas let me know.
> >
> > We almost need a quiet dump_stack that dumps to a buffer instead of the
> > console.  But I am not sure that is worth the effort.
> >
> > Hmm.
> >
> > Cheers,
> > Don
> >

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 2/2] Make hard lockup detection use timestamps
  2011-08-01 12:52             ` Don Zickus
@ 2011-08-01 18:33               ` ZAK Magnus
  2011-08-01 19:24                 ` Don Zickus
  0 siblings, 1 reply; 15+ messages in thread
From: ZAK Magnus @ 2011-08-01 18:33 UTC (permalink / raw)
  To: Don Zickus; +Cc: linux-kernel, Ingo Molnar, Mandeep Singh Baines

Okay... So this is a problem we need to solve. Does there exist a good
way to output a stack trace to, say, a file in /proc? I think that
would be an appealing solution, if doable.

On Mon, Aug 1, 2011 at 5:52 AM, Don Zickus <dzickus@redhat.com> wrote:
> On Fri, Jul 29, 2011 at 04:12:32PM -0700, ZAK Magnus wrote:
>> Are you saying that any call to printk() will touch the watchdogs? I
>> wasn't aware of that and it doesn't seem to comply with my
>> observations too well, either. Then again, at the moment I don't
>> understand some of the things I'm currently seeing so I could just be
>> wrong.
>
> I believe the serial console write is the source of all the
> touch_nmi_watchdogs that result from a printk.
>
> drivers/tty/serial/8250.c::serial8250_console_write()
>
> Cheers,
> Don
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 2/2] Make hard lockup detection use timestamps
  2011-08-01 18:33               ` ZAK Magnus
@ 2011-08-01 19:24                 ` Don Zickus
  2011-08-01 20:11                   ` ZAK Magnus
  0 siblings, 1 reply; 15+ messages in thread
From: Don Zickus @ 2011-08-01 19:24 UTC (permalink / raw)
  To: ZAK Magnus; +Cc: linux-kernel, Ingo Molnar, Mandeep Singh Baines

On Mon, Aug 01, 2011 at 11:33:24AM -0700, ZAK Magnus wrote:
> Okay... So this is a problem we need to solve. Does there exist a good
> way to output a stack trace to, say, a file in /proc? I think that
> would be an appealing solution, if doable.

One idea I thought of to workaround this is to save the timestamp and the
watchdog bool and restore after the stack dump.  It's a cheap hack and I
am not to sure about the locking as it might race with
touch_nmi_watchdog().  But it gives you an idea what I was thinking.

Being in the nmi context, no one can normally touch these variables,
except for another cpu using touch_nmi_watchdog() (or watchdog_enable()
but that should never race in these scenarios).

Cheers,
Don

compile tested only.


diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 17bcded..2dcedb3 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -214,6 +214,9 @@ void touch_softlockup_watchdog_sync(void)
 static void update_hardstall(unsigned long stall, int this_cpu)
 {
 	int update_stall = 0;
+	int ts;
+	bool touched;
+
 	if (stall > hardstall_thresh &&
 			stall > worst_hardstall + hardstall_diff_thresh) {
 		unsigned long flags;
@@ -225,10 +228,14 @@ static void update_hardstall(unsigned long stall, int this_cpu)
 	}
 
 	if (update_stall) {
+		ts = __this_cpu_read(watchdog_touch_ts);
+		touched = __this_cpu_read(watchdog_nmi_touch);
 		printk(KERN_WARNING "LOCKUP may be in progress!"
 			"Worst hard stall seen on CPU#%d: %lums\n",
 			this_cpu, stall);
 		dump_stack();
+		__this_cpu_write(watchdog_touch_ts, ts);
+		__this_cpu_write(watchdog_nmi_touch, touched);
 	}
 }
 
@@ -262,6 +269,9 @@ static int is_hardlockup(int this_cpu)
 static void update_softstall(unsigned long stall, int this_cpu)
 {
 	int update_stall = 0;
+	int ts;
+	bool touched;
+
 	if (stall > get_softstall_thresh() &&
 			stall > worst_softstall + softstall_diff_thresh) {
 		unsigned long flags;
@@ -273,10 +283,14 @@ static void update_softstall(unsigned long stall, int this_cpu)
 	}
 
 	if (update_stall) {
+		ts = __this_cpu_read(watchdog_touch_ts);
+		touched = __this_cpu_read(watchdog_nmi_touch);
 		printk(KERN_WARNING "LOCKUP may be in progress!"
 				"Worst soft stall seen on CPU#%d: %lums\n",
 				this_cpu, stall);
 		dump_stack();
+		__this_cpu_write(watchdog_touch_ts, ts);
+		__this_cpu_write(watchdog_nmi_touch, touched);
 	}
 }
 

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 2/2] Make hard lockup detection use timestamps
  2011-08-01 19:24                 ` Don Zickus
@ 2011-08-01 20:11                   ` ZAK Magnus
  2011-08-03 18:27                     ` ZAK Magnus
  2011-08-03 19:11                     ` Don Zickus
  0 siblings, 2 replies; 15+ messages in thread
From: ZAK Magnus @ 2011-08-01 20:11 UTC (permalink / raw)
  To: Don Zickus; +Cc: linux-kernel, Ingo Molnar, Mandeep Singh Baines

On Mon, Aug 1, 2011 at 12:24 PM, Don Zickus <dzickus@redhat.com> wrote:
> One idea I thought of to workaround this is to save the timestamp and the
> watchdog bool and restore after the stack dump.  It's a cheap hack and I
> am not to sure about the locking as it might race with
> touch_nmi_watchdog().  But it gives you an idea what I was thinking.
Yes, I see. Is the hackiness of it okay?

> Being in the nmi context, no one can normally touch these variables,
> except for another cpu using touch_nmi_watchdog() (or watchdog_enable()
> but that should never race in these scenarios).
Well, the soft lockup stuff is not a NMI but just a regular interrupt,
right? Is that also okay?

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 2/2] Make hard lockup detection use timestamps
  2011-08-01 20:11                   ` ZAK Magnus
@ 2011-08-03 18:27                     ` ZAK Magnus
  2011-08-03 19:44                       ` Don Zickus
  2011-08-03 19:11                     ` Don Zickus
  1 sibling, 1 reply; 15+ messages in thread
From: ZAK Magnus @ 2011-08-03 18:27 UTC (permalink / raw)
  To: Don Zickus; +Cc: linux-kernel, Ingo Molnar, Mandeep Singh Baines

An update:

I realized that the timing function I was using was no good. On x86 it
will try to use the TSC but if it can't (which, as I understand, isn't
a very rare case), it will fall back on a jiffies-based solution. If
interrupts are disabled, time would appear to "stand still" and
lockups wouldn't be detected. So, looks like I have to nix this
particular patch and not change the hard lockup detection that much.

Meanwhile, I'm working on printing stack traces to a buffer/out to
file. I got some help and it looks like it's not that hard to do. I'll
send it when it's ready.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 2/2] Make hard lockup detection use timestamps
  2011-08-01 20:11                   ` ZAK Magnus
  2011-08-03 18:27                     ` ZAK Magnus
@ 2011-08-03 19:11                     ` Don Zickus
  1 sibling, 0 replies; 15+ messages in thread
From: Don Zickus @ 2011-08-03 19:11 UTC (permalink / raw)
  To: ZAK Magnus; +Cc: linux-kernel, Ingo Molnar, Mandeep Singh Baines

On Mon, Aug 01, 2011 at 01:11:27PM -0700, ZAK Magnus wrote:
> On Mon, Aug 1, 2011 at 12:24 PM, Don Zickus <dzickus@redhat.com> wrote:
> > One idea I thought of to workaround this is to save the timestamp and the
> > watchdog bool and restore after the stack dump.  It's a cheap hack and I
> > am not to sure about the locking as it might race with
> > touch_nmi_watchdog().  But it gives you an idea what I was thinking.
> Yes, I see. Is the hackiness of it okay?

Hi,

I don't think it is too bad.  Most of the stuff is per_cpu and is intended
to be per_cpu.  There might be a random case where another cpu is trying
to zero out the watchdog_nmi_touch or watchdog_touch_ts variables.

I was trying to fix the cross-cpu case for watchdog_nmi_touch to eliminate
that problem but Ingo wanted me to implement some panic ratelimit first
(which I lost track of doing).  And being in the NMI context and staying
per_cpu should make that case safe I believe, despite the hackiness of it.

The watchdog_touch_ts is only called on another cpu in the
touch_all_softlockup_watchdogs() case, which only happens when the
scheduler is spewing stats currently.  This should happen rarely.  This
leaves the problem of softlockups being preempted in the interrupt
context and touched by another interrupt handler.  I don't know how to
solve this reliably but I think it should be ok most of the time.  The
only downside is a premature softlockup I would think.

I can't think of a better way to workaround the problem and still move
forward with your idea of warning on future stalls.

Then again I have been busy here and haven't put enough thought into it.

Cheers,
Don

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 2/2] Make hard lockup detection use timestamps
  2011-08-03 18:27                     ` ZAK Magnus
@ 2011-08-03 19:44                       ` Don Zickus
  0 siblings, 0 replies; 15+ messages in thread
From: Don Zickus @ 2011-08-03 19:44 UTC (permalink / raw)
  To: ZAK Magnus; +Cc: linux-kernel, Ingo Molnar, Mandeep Singh Baines

On Wed, Aug 03, 2011 at 11:27:16AM -0700, ZAK Magnus wrote:
> An update:
> 
> I realized that the timing function I was using was no good. On x86 it
> will try to use the TSC but if it can't (which, as I understand, isn't
> a very rare case), it will fall back on a jiffies-based solution. If
> interrupts are disabled, time would appear to "stand still" and
> lockups wouldn't be detected. So, looks like I have to nix this
> particular patch and not change the hard lockup detection that much.

Hmm, I was initially worried about that, but after seeing the function
you were using assuming Peter vetted it properly to use cleanly from an
NMI context.  I guess I didn't fully understand the limitations of it.

> 
> Meanwhile, I'm working on printing stack traces to a buffer/out to
> file. I got some help and it looks like it's not that hard to do. I'll
> send it when it's ready.

Interesting.  I look forward to seeing it.

Cheers,
Don


^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2011-08-03 19:45 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-07-21 18:11 [PATCH v3 2/2] Make hard lockup detection use timestamps Alex Neronskiy
2011-07-22 19:53 ` Don Zickus
2011-07-22 22:34   ` ZAK Magnus
2011-07-25 12:44     ` Don Zickus
2011-07-29  0:16       ` ZAK Magnus
2011-07-29 13:10         ` Don Zickus
2011-07-29 20:55         ` Don Zickus
2011-07-29 23:12           ` ZAK Magnus
2011-08-01 12:52             ` Don Zickus
2011-08-01 18:33               ` ZAK Magnus
2011-08-01 19:24                 ` Don Zickus
2011-08-01 20:11                   ` ZAK Magnus
2011-08-03 18:27                     ` ZAK Magnus
2011-08-03 19:44                       ` Don Zickus
2011-08-03 19:11                     ` Don Zickus

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.