linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] sched: watchdog: Touch kernel watchdog in sched code
@ 2020-03-04 21:39 Xi Wang
  2020-03-05  3:11 ` Steven Rostedt
  2020-03-05  7:57 ` Peter Zijlstra
  0 siblings, 2 replies; 12+ messages in thread
From: Xi Wang @ 2020-03-04 21:39 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra
  Cc: Juri Lelli, Vincent Guittot, Dietmar Eggemann, Steven Rostedt,
	Josh Don, linux-kernel, linux-fsdevel, Xi Wang, Paul Turner

The main purpose of kernel watchdog is to test whether scheduler can
still schedule tasks on a cpu. In order to reduce latency from
periodically invoking watchdog reset in thread context, we can simply
touch watchdog from pick_next_task in scheduler. Compared to actually
resetting watchdog from cpu stop / migration threads, we lose coverage
on: a migration thread actually get picked and we actually context
switch to the migration thread. Both steps are heavily protected by
kernel locks and unlikely to silently fail. Thus the change would
provide the same level of protection with less overhead.

The new way vs the old way to touch the watchdogs is configurable
from:

/proc/sys/kernel/watchdog_touch_in_thread_interval

The value means:
0: Always touch watchdog from pick_next_task
1: Always touch watchdog from migration thread
N (N>0): Touch watchdog from migration thread once in every N
         invocations, and touch watchdog from pick_next_task for
         other invocations.

Suggested-by: Paul Turner <pjt@google.com>
Signed-off-by: Xi Wang <xii@google.com>
---
 kernel/sched/core.c | 36 ++++++++++++++++++++++++++++++++++--
 kernel/sysctl.c     | 11 ++++++++++-
 kernel/watchdog.c   | 39 ++++++++++++++++++++++++++++++++++-----
 3 files changed, 78 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1a9983da4408..9d8e00760d1c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3898,6 +3898,27 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt)
 	schedstat_inc(this_rq()->sched_count);
 }
 
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+
+DEFINE_PER_CPU(bool, sched_should_touch_watchdog);
+
+void touch_watchdog_from_sched(void);
+
+/* Helper called by watchdog code */
+void resched_for_watchdog(void)
+{
+	unsigned long flags;
+	struct rq *rq = this_rq();
+
+	this_cpu_write(sched_should_touch_watchdog, true);
+	raw_spin_lock_irqsave(&rq->lock, flags);
+	/* Trigger resched for code in pick_next_task to touch watchdog */
+	resched_curr(rq);
+	raw_spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+#endif /* CONFIG_SOFTLOCKUP_DETECTOR */
+
 /*
  * Pick up the highest-prio task:
  */
@@ -3927,7 +3948,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 			p = pick_next_task_idle(rq);
 		}
 
-		return p;
+		goto out;
 	}
 
 restart:
@@ -3951,11 +3972,22 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	for_each_class(class) {
 		p = class->pick_next_task(rq);
 		if (p)
-			return p;
+			goto out;
 	}
 
 	/* The idle class should always have a runnable task: */
 	BUG();
+
+out:
+
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+	if (this_cpu_read(sched_should_touch_watchdog)) {
+		touch_watchdog_from_sched();
+		this_cpu_write(sched_should_touch_watchdog, false);
+	}
+#endif
+
+	return p;
 }
 
 /*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ad5b88a53c5a..adb4b11fbccb 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -118,6 +118,9 @@ extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
 #ifndef CONFIG_MMU
 extern int sysctl_nr_trim_pages;
 #endif
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+extern unsigned int sysctl_watchdog_touch_in_thread_interval;
+#endif
 
 /* Constants used for minimum and  maximum */
 #ifdef CONFIG_LOCKUP_DETECTOR
@@ -961,6 +964,13 @@ static struct ctl_table kern_table[] = {
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_ONE,
 	},
+	{
+		.procname	= "watchdog_touch_in_thread_interval",
+		.data		= &sysctl_watchdog_touch_in_thread_interval,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 #ifdef CONFIG_SMP
 	{
 		.procname	= "softlockup_all_cpu_backtrace",
@@ -996,7 +1006,6 @@ static struct ctl_table kern_table[] = {
 #endif /* CONFIG_SMP */
 #endif
 #endif
-
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 	{
 		.procname       = "unknown_nmi_panic",
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index b6b1f54a7837..f9138c29db48 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -49,6 +49,16 @@ static struct cpumask watchdog_allowed_mask __read_mostly;
 struct cpumask watchdog_cpumask __read_mostly;
 unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
 
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+/*
+ * 0: Always touch watchdog from pick_next_task
+ * 1: Always touch watchdog from migration thread
+ * N (N>0): Touch watchdog from migration thread once in every N invocations,
+ *          and touch watchdog from pick_next_task for other invocations.
+ */
+unsigned int sysctl_watchdog_touch_in_thread_interval = 10;
+#endif
+
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
 /*
  * Should we panic when a soft-lockup or hard-lockup occurs:
@@ -356,6 +366,9 @@ static int softlockup_fn(void *data)
 	return 0;
 }
 
+static DEFINE_PER_CPU(unsigned int, num_watchdog_wakeup_skipped);
+void resched_for_watchdog(void);
+
 /* watchdog kicker functions */
 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 {
@@ -371,11 +384,20 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	watchdog_interrupt_count();
 
 	/* kick the softlockup detector */
-	if (completion_done(this_cpu_ptr(&softlockup_completion))) {
-		reinit_completion(this_cpu_ptr(&softlockup_completion));
-		stop_one_cpu_nowait(smp_processor_id(),
-				softlockup_fn, NULL,
-				this_cpu_ptr(&softlockup_stop_work));
+	if ((!sysctl_watchdog_touch_in_thread_interval ||
+	  sysctl_watchdog_touch_in_thread_interval > this_cpu_read(num_watchdog_wakeup_skipped) + 1)) {
+		this_cpu_write(num_watchdog_wakeup_skipped, sysctl_watchdog_touch_in_thread_interval ?
+		  this_cpu_read(num_watchdog_wakeup_skipped) + 1 : 0);
+		/* touch watchdog from pick_next_task */
+		resched_for_watchdog();
+	} else {
+		this_cpu_write(num_watchdog_wakeup_skipped, 0);
+		if (completion_done(this_cpu_ptr(&softlockup_completion))) {
+			reinit_completion(this_cpu_ptr(&softlockup_completion));
+			stop_one_cpu_nowait(smp_processor_id(),
+					softlockup_fn, NULL,
+					this_cpu_ptr(&softlockup_stop_work));
+		}
 	}
 
 	/* .. and repeat */
@@ -526,6 +548,13 @@ static int softlockup_start_fn(void *data)
 	return 0;
 }
 
+
+/* Similar to watchdog thread function but called from pick_next_task */
+void touch_watchdog_from_sched(void)
+{
+	__touch_watchdog();
+}
+
 static void softlockup_start_all(void)
 {
 	int cpu;
-- 
2.25.1.481.gfbce0eb801-goog


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] sched: watchdog: Touch kernel watchdog in sched code
  2020-03-04 21:39 [PATCH] sched: watchdog: Touch kernel watchdog in sched code Xi Wang
@ 2020-03-05  3:11 ` Steven Rostedt
  2020-03-05  7:57 ` Peter Zijlstra
  1 sibling, 0 replies; 12+ messages in thread
From: Steven Rostedt @ 2020-03-05  3:11 UTC (permalink / raw)
  To: Xi Wang
  Cc: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Josh Don, linux-kernel, linux-fsdevel,
	Paul Turner

On Wed,  4 Mar 2020 13:39:41 -0800
Xi Wang <xii@google.com> wrote:

> The main purpose of kernel watchdog is to test whether scheduler can
> still schedule tasks on a cpu. In order to reduce latency from
> periodically invoking watchdog reset in thread context, we can simply
> touch watchdog from pick_next_task in scheduler. Compared to actually
> resetting watchdog from cpu stop / migration threads, we lose coverage
> on: a migration thread actually get picked and we actually context
> switch to the migration thread. Both steps are heavily protected by
> kernel locks and unlikely to silently fail. Thus the change would
> provide the same level of protection with less overhead.

Have any measurements showing the drop in overhead?

> 
> The new way vs the old way to touch the watchdogs is configurable
> from:
> 
> /proc/sys/kernel/watchdog_touch_in_thread_interval
> 
> The value means:
> 0: Always touch watchdog from pick_next_task
> 1: Always touch watchdog from migration thread
> N (N>0): Touch watchdog from migration thread once in every N
>          invocations, and touch watchdog from pick_next_task for
>          other invocations.
> 
> Suggested-by: Paul Turner <pjt@google.com>
> Signed-off-by: Xi Wang <xii@google.com>
> ---
>  kernel/sched/core.c | 36 ++++++++++++++++++++++++++++++++++--
>  kernel/sysctl.c     | 11 ++++++++++-
>  kernel/watchdog.c   | 39 ++++++++++++++++++++++++++++++++++-----
>  3 files changed, 78 insertions(+), 8 deletions(-)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 1a9983da4408..9d8e00760d1c 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3898,6 +3898,27 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt)
>  	schedstat_inc(this_rq()->sched_count);
>  }
>  
> +#ifdef CONFIG_SOFTLOCKUP_DETECTOR
> +
> +DEFINE_PER_CPU(bool, sched_should_touch_watchdog);
> +
> +void touch_watchdog_from_sched(void);
> +
> +/* Helper called by watchdog code */
> +void resched_for_watchdog(void)
> +{
> +	unsigned long flags;
> +	struct rq *rq = this_rq();
> +
> +	this_cpu_write(sched_should_touch_watchdog, true);

Perhaps we should have a preempt_disable, otherwise it is possible
to get preempted here.

-- Steve

> +	raw_spin_lock_irqsave(&rq->lock, flags);
> +	/* Trigger resched for code in pick_next_task to touch watchdog */
> +	resched_curr(rq);
> +	raw_spin_unlock_irqrestore(&rq->lock, flags);
> +}
> +
> +#endif /* CONFIG_SOFTLOCKUP_DETECTOR */
> +


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] sched: watchdog: Touch kernel watchdog in sched code
  2020-03-04 21:39 [PATCH] sched: watchdog: Touch kernel watchdog in sched code Xi Wang
  2020-03-05  3:11 ` Steven Rostedt
@ 2020-03-05  7:57 ` Peter Zijlstra
  2020-03-05 18:07   ` Thomas Gleixner
  2020-03-05 22:11   ` Paul Turner
  1 sibling, 2 replies; 12+ messages in thread
From: Peter Zijlstra @ 2020-03-05  7:57 UTC (permalink / raw)
  To: Xi Wang
  Cc: Ingo Molnar, Juri Lelli, Vincent Guittot, Dietmar Eggemann,
	Steven Rostedt, Josh Don, linux-kernel, linux-fsdevel,
	Paul Turner

On Wed, Mar 04, 2020 at 01:39:41PM -0800, Xi Wang wrote:
> The main purpose of kernel watchdog is to test whether scheduler can
> still schedule tasks on a cpu. In order to reduce latency from
> periodically invoking watchdog reset in thread context, we can simply
> touch watchdog from pick_next_task in scheduler. Compared to actually
> resetting watchdog from cpu stop / migration threads, we lose coverage
> on: a migration thread actually get picked and we actually context
> switch to the migration thread. Both steps are heavily protected by
> kernel locks and unlikely to silently fail. Thus the change would
> provide the same level of protection with less overhead.
> 
> The new way vs the old way to touch the watchdogs is configurable
> from:
> 
> /proc/sys/kernel/watchdog_touch_in_thread_interval
> 
> The value means:
> 0: Always touch watchdog from pick_next_task
> 1: Always touch watchdog from migration thread
> N (N>0): Touch watchdog from migration thread once in every N
>          invocations, and touch watchdog from pick_next_task for
>          other invocations.
> 

This is configurable madness. What are we really trying to do here?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] sched: watchdog: Touch kernel watchdog in sched code
  2020-03-05  7:57 ` Peter Zijlstra
@ 2020-03-05 18:07   ` Thomas Gleixner
  2020-03-05 21:41     ` Xi Wang
                       ` (2 more replies)
  2020-03-05 22:11   ` Paul Turner
  1 sibling, 3 replies; 12+ messages in thread
From: Thomas Gleixner @ 2020-03-05 18:07 UTC (permalink / raw)
  To: Peter Zijlstra, Xi Wang
  Cc: Ingo Molnar, Juri Lelli, Vincent Guittot, Dietmar Eggemann,
	Steven Rostedt, Josh Don, linux-kernel, linux-fsdevel,
	Paul Turner

Peter Zijlstra <peterz@infradead.org> writes:

> On Wed, Mar 04, 2020 at 01:39:41PM -0800, Xi Wang wrote:
>> The main purpose of kernel watchdog is to test whether scheduler can
>> still schedule tasks on a cpu. In order to reduce latency from
>> periodically invoking watchdog reset in thread context, we can simply
>> touch watchdog from pick_next_task in scheduler. Compared to actually
>> resetting watchdog from cpu stop / migration threads, we lose coverage
>> on: a migration thread actually get picked and we actually context
>> switch to the migration thread. Both steps are heavily protected by
>> kernel locks and unlikely to silently fail. Thus the change would
>> provide the same level of protection with less overhead.
>> 
>> The new way vs the old way to touch the watchdogs is configurable
>> from:
>> 
>> /proc/sys/kernel/watchdog_touch_in_thread_interval
>> 
>> The value means:
>> 0: Always touch watchdog from pick_next_task
>> 1: Always touch watchdog from migration thread
>> N (N>0): Touch watchdog from migration thread once in every N
>>          invocations, and touch watchdog from pick_next_task for
>>          other invocations.
>> 
>
> This is configurable madness. What are we really trying to do here?

Create yet another knob which will be advertised in random web blogs to
solve all problems of the world and some more. Like the one which got
silently turned into a NOOP ~10 years ago :)



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] sched: watchdog: Touch kernel watchdog in sched code
  2020-03-05 18:07   ` Thomas Gleixner
@ 2020-03-05 21:41     ` Xi Wang
  2020-03-05 22:07     ` Paul Turner
       [not found]     ` <CAOBoifgHNag0P33PKg81iNoCjxenJHfBZG-t-8aEkr_Tjf7o_w@mail.gmail.com>
  2 siblings, 0 replies; 12+ messages in thread
From: Xi Wang @ 2020-03-05 21:41 UTC (permalink / raw)
  Cc: linux-kernel, linux-fsdevel

Measuring jitter from a userspace busy loop showed a 4us peak was
flattened in the histogram (cascadelake). So the effect is likely
reducing overhead/jitter by 4us.

Code in resched_for_watchdog should be ok since it is always called
from the watchdog hrtimer function?

Why supporting the option to alternate between thread context and
touch in sched: Might be a little risky to completely switch to the
touch in sched method. Touch in sched for 9 out of 10 times still
captures most of the latency benefit. I can remove it or change it to
on/off if desired.

Advertising the knob on random blogs: Maybe I should create a blog :)

-Xi


On Thu, Mar 5, 2020 at 10:07 AM Thomas Gleixner <tglx@linutronix.de> wrote:
>
> Peter Zijlstra <peterz@infradead.org> writes:
>
> > On Wed, Mar 04, 2020 at 01:39:41PM -0800, Xi Wang wrote:
> >> The main purpose of kernel watchdog is to test whether scheduler can
> >> still schedule tasks on a cpu. In order to reduce latency from
> >> periodically invoking watchdog reset in thread context, we can simply
> >> touch watchdog from pick_next_task in scheduler. Compared to actually
> >> resetting watchdog from cpu stop / migration threads, we lose coverage
> >> on: a migration thread actually get picked and we actually context
> >> switch to the migration thread. Both steps are heavily protected by
> >> kernel locks and unlikely to silently fail. Thus the change would
> >> provide the same level of protection with less overhead.
> >>
> >> The new way vs the old way to touch the watchdogs is configurable
> >> from:
> >>
> >> /proc/sys/kernel/watchdog_touch_in_thread_interval
> >>
> >> The value means:
> >> 0: Always touch watchdog from pick_next_task
> >> 1: Always touch watchdog from migration thread
> >> N (N>0): Touch watchdog from migration thread once in every N
> >>          invocations, and touch watchdog from pick_next_task for
> >>          other invocations.
> >>
> >
> > This is configurable madness. What are we really trying to do here?
>
> Create yet another knob which will be advertised in random web blogs to
> solve all problems of the world and some more. Like the one which got
> silently turned into a NOOP ~10 years ago :)
>
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] sched: watchdog: Touch kernel watchdog in sched code
  2020-03-05 18:07   ` Thomas Gleixner
  2020-03-05 21:41     ` Xi Wang
@ 2020-03-05 22:07     ` Paul Turner
       [not found]     ` <CAOBoifgHNag0P33PKg81iNoCjxenJHfBZG-t-8aEkr_Tjf7o_w@mail.gmail.com>
  2 siblings, 0 replies; 12+ messages in thread
From: Paul Turner @ 2020-03-05 22:07 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, Xi Wang, Ingo Molnar, Juri Lelli,
	Vincent Guittot, Dietmar Eggemann, Steven Rostedt, Josh Don,
	LKML, linux-fsdevel

On Thu, Mar 5, 2020 at 10:07 AM Thomas Gleixner <tglx@linutronix.de> wrote:
>
> Peter Zijlstra <peterz@infradead.org> writes:
>
> > On Wed, Mar 04, 2020 at 01:39:41PM -0800, Xi Wang wrote:
> >> The main purpose of kernel watchdog is to test whether scheduler can
> >> still schedule tasks on a cpu. In order to reduce latency from
> >> periodically invoking watchdog reset in thread context, we can simply
> >> touch watchdog from pick_next_task in scheduler. Compared to actually
> >> resetting watchdog from cpu stop / migration threads, we lose coverage
> >> on: a migration thread actually get picked and we actually context
> >> switch to the migration thread. Both steps are heavily protected by
> >> kernel locks and unlikely to silently fail. Thus the change would
> >> provide the same level of protection with less overhead.
> >>
> >> The new way vs the old way to touch the watchdogs is configurable
> >> from:
> >>
> >> /proc/sys/kernel/watchdog_touch_in_thread_interval
> >>
> >> The value means:
> >> 0: Always touch watchdog from pick_next_task
> >> 1: Always touch watchdog from migration thread
> >> N (N>0): Touch watchdog from migration thread once in every N
> >>          invocations, and touch watchdog from pick_next_task for
> >>          other invocations.
> >>
> >
> > This is configurable madness. What are we really trying to do here?
>
> Create yet another knob which will be advertised in random web blogs to
> solve all problems of the world and some more. Like the one which got
> silently turned into a NOOP ~10 years ago :)
>

The knob can obviously be removed, it's vestigial and reflects caution
from when we were implementing / rolling things over to it.  We have
default values that we know work at scale. I don't think this actually
needs or wants to be tunable beyond on or off (and even that could be
strictly compile or boot time only).

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] sched: watchdog: Touch kernel watchdog in sched code
  2020-03-05  7:57 ` Peter Zijlstra
  2020-03-05 18:07   ` Thomas Gleixner
@ 2020-03-05 22:11   ` Paul Turner
  2020-03-06  8:40     ` Peter Zijlstra
  1 sibling, 1 reply; 12+ messages in thread
From: Paul Turner @ 2020-03-05 22:11 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Xi Wang, Ingo Molnar, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Josh Don, LKML, linux-fsdevel

On Wed, Mar 4, 2020 at 11:57 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Wed, Mar 04, 2020 at 01:39:41PM -0800, Xi Wang wrote:
> > The main purpose of kernel watchdog is to test whether scheduler can
> > still schedule tasks on a cpu. In order to reduce latency from
> > periodically invoking watchdog reset in thread context, we can simply
> > touch watchdog from pick_next_task in scheduler. Compared to actually
> > resetting watchdog from cpu stop / migration threads, we lose coverage
> > on: a migration thread actually get picked and we actually context
> > switch to the migration thread. Both steps are heavily protected by
> > kernel locks and unlikely to silently fail. Thus the change would
> > provide the same level of protection with less overhead.
> >
> > The new way vs the old way to touch the watchdogs is configurable
> > from:
> >
> > /proc/sys/kernel/watchdog_touch_in_thread_interval
> >
> > The value means:
> > 0: Always touch watchdog from pick_next_task
> > 1: Always touch watchdog from migration thread
> > N (N>0): Touch watchdog from migration thread once in every N
> >          invocations, and touch watchdog from pick_next_task for
> >          other invocations.
> >
>
> This is configurable madness. What are we really trying to do here?

See reply to Thomas, no config is actually required here.  Focusing on
the intended outcome:

The goal is to improve jitter since we're constantly periodically
preempting other classes to run the watchdog.   Even on a single CPU
this is measurable as jitter in the us range.  But, what increases the
motivation is this disruption has been recently magnified by CPU
"gifts" which require evicting the whole core when one of the siblings
schedules one of these watchdog threads.

The majority outcome being asserted here is that we could actually
exercise pick_next_task if required -- there are other potential
things this will catch, but they are much more braindead generally
speaking (e.g. a bug in pick_next_task itself).

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] sched: watchdog: Touch kernel watchdog in sched code
       [not found]     ` <CAOBoifgHNag0P33PKg81iNoCjxenJHfBZG-t-8aEkr_Tjf7o_w@mail.gmail.com>
@ 2020-03-06  8:28       ` Peter Zijlstra
  0 siblings, 0 replies; 12+ messages in thread
From: Peter Zijlstra @ 2020-03-06  8:28 UTC (permalink / raw)
  To: Xi Wang
  Cc: Thomas Gleixner, Ingo Molnar, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Josh Don, linux-kernel,
	linux-fsdevel, Paul Turner



A: Because it messes up the order in which people normally read text.
Q: Why is top-posting such a bad thing?
A: Top-posting.
Q: What is the most annoying thing in e-mail?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] sched: watchdog: Touch kernel watchdog in sched code
  2020-03-05 22:11   ` Paul Turner
@ 2020-03-06  8:40     ` Peter Zijlstra
  2020-03-06 22:34       ` Xi Wang
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2020-03-06  8:40 UTC (permalink / raw)
  To: Paul Turner
  Cc: Xi Wang, Ingo Molnar, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Josh Don, LKML, linux-fsdevel

On Thu, Mar 05, 2020 at 02:11:49PM -0800, Paul Turner wrote:
> The goal is to improve jitter since we're constantly periodically
> preempting other classes to run the watchdog.   Even on a single CPU
> this is measurable as jitter in the us range.  But, what increases the
> motivation is this disruption has been recently magnified by CPU
> "gifts" which require evicting the whole core when one of the siblings
> schedules one of these watchdog threads.
> 
> The majority outcome being asserted here is that we could actually
> exercise pick_next_task if required -- there are other potential
> things this will catch, but they are much more braindead generally
> speaking (e.g. a bug in pick_next_task itself).

I still utterly hate what the patch does though; there is no way I'll
have watchdog code hook in the scheduler like this. That's just asking
for trouble.

Why isn't it sufficient to sample the existing context switch counters
from the watchdog? And why can't we fix that?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] sched: watchdog: Touch kernel watchdog in sched code
  2020-03-06  8:40     ` Peter Zijlstra
@ 2020-03-06 22:34       ` Xi Wang
  2020-10-05 11:19         ` Peter Zijlstra
  0 siblings, 1 reply; 12+ messages in thread
From: Xi Wang @ 2020-03-06 22:34 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Paul Turner, Ingo Molnar, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Josh Don, LKML, linux-fsdevel

On Fri, Mar 6, 2020 at 12:40 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Thu, Mar 05, 2020 at 02:11:49PM -0800, Paul Turner wrote:
> > The goal is to improve jitter since we're constantly periodically
> > preempting other classes to run the watchdog.   Even on a single CPU
> > this is measurable as jitter in the us range.  But, what increases the
> > motivation is this disruption has been recently magnified by CPU
> > "gifts" which require evicting the whole core when one of the siblings
> > schedules one of these watchdog threads.
> >
> > The majority outcome being asserted here is that we could actually
> > exercise pick_next_task if required -- there are other potential
> > things this will catch, but they are much more braindead generally
> > speaking (e.g. a bug in pick_next_task itself).
>
> I still utterly hate what the patch does though; there is no way I'll
> have watchdog code hook in the scheduler like this. That's just asking
> for trouble.
>
> Why isn't it sufficient to sample the existing context switch counters
> from the watchdog? And why can't we fix that?

We could go to pick next and repick the same task. There won't be a
context switch but we still want to hold the watchdog. I assume such a
counter also needs to be per cpu and inside the rq lock. There doesn't
seem to be an existing one that fits this purpose.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] sched: watchdog: Touch kernel watchdog in sched code
  2020-03-06 22:34       ` Xi Wang
@ 2020-10-05 11:19         ` Peter Zijlstra
  2020-10-06  2:21           ` Xi Wang
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2020-10-05 11:19 UTC (permalink / raw)
  To: Xi Wang
  Cc: Paul Turner, Ingo Molnar, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Josh Don, LKML, linux-fsdevel,
	Stephane Eranian, Arnaldo Carvalho de Melo

On Fri, Mar 06, 2020 at 02:34:20PM -0800, Xi Wang wrote:
> On Fri, Mar 6, 2020 at 12:40 AM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > On Thu, Mar 05, 2020 at 02:11:49PM -0800, Paul Turner wrote:
> > > The goal is to improve jitter since we're constantly periodically
> > > preempting other classes to run the watchdog.   Even on a single CPU
> > > this is measurable as jitter in the us range.  But, what increases the
> > > motivation is this disruption has been recently magnified by CPU
> > > "gifts" which require evicting the whole core when one of the siblings
> > > schedules one of these watchdog threads.
> > >
> > > The majority outcome being asserted here is that we could actually
> > > exercise pick_next_task if required -- there are other potential
> > > things this will catch, but they are much more braindead generally
> > > speaking (e.g. a bug in pick_next_task itself).
> >
> > I still utterly hate what the patch does though; there is no way I'll
> > have watchdog code hook in the scheduler like this. That's just asking
> > for trouble.
> >
> > Why isn't it sufficient to sample the existing context switch counters
> > from the watchdog? And why can't we fix that?
> 
> We could go to pick next and repick the same task. There won't be a
> context switch but we still want to hold the watchdog. I assume such a
> counter also needs to be per cpu and inside the rq lock. There doesn't
> seem to be an existing one that fits this purpose.

Sorry, your reply got lost, but I just ran into something that reminded
me of this.

There's sched_count. That's currently schedstat, but if you can find a
spot in a hot cacheline (from schedule()'s perspective) then it
should be cheap to incremenent unconditionally.

If only someone were to write a useful cacheline perf tool (and no that
c2c trainwreck doesn't count).


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] sched: watchdog: Touch kernel watchdog in sched code
  2020-10-05 11:19         ` Peter Zijlstra
@ 2020-10-06  2:21           ` Xi Wang
  0 siblings, 0 replies; 12+ messages in thread
From: Xi Wang @ 2020-10-06  2:21 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Paul Turner, Ingo Molnar, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Josh Don, LKML, linux-fsdevel,
	Stephane Eranian, Arnaldo Carvalho de Melo

On Mon, Oct 5, 2020 at 4:19 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Fri, Mar 06, 2020 at 02:34:20PM -0800, Xi Wang wrote:
> > On Fri, Mar 6, 2020 at 12:40 AM Peter Zijlstra <peterz@infradead.org> wrote:
> > >
> > > On Thu, Mar 05, 2020 at 02:11:49PM -0800, Paul Turner wrote:
> > > > The goal is to improve jitter since we're constantly periodically
> > > > preempting other classes to run the watchdog.   Even on a single CPU
> > > > this is measurable as jitter in the us range.  But, what increases the
> > > > motivation is this disruption has been recently magnified by CPU
> > > > "gifts" which require evicting the whole core when one of the siblings
> > > > schedules one of these watchdog threads.
> > > >
> > > > The majority outcome being asserted here is that we could actually
> > > > exercise pick_next_task if required -- there are other potential
> > > > things this will catch, but they are much more braindead generally
> > > > speaking (e.g. a bug in pick_next_task itself).
> > >
> > > I still utterly hate what the patch does though; there is no way I'll
> > > have watchdog code hook in the scheduler like this. That's just asking
> > > for trouble.
> > >
> > > Why isn't it sufficient to sample the existing context switch counters
> > > from the watchdog? And why can't we fix that?
> >
> > We could go to pick next and repick the same task. There won't be a
> > context switch but we still want to hold the watchdog. I assume such a
> > counter also needs to be per cpu and inside the rq lock. There doesn't
> > seem to be an existing one that fits this purpose.
>
> Sorry, your reply got lost, but I just ran into something that reminded
> me of this.
>
> There's sched_count. That's currently schedstat, but if you can find a
> spot in a hot cacheline (from schedule()'s perspective) then it
> should be cheap to incremenent unconditionally.
>
> If only someone were to write a useful cacheline perf tool (and no that
> c2c trainwreck doesn't count).
>

Thanks, I'll try the alternative implementation.

-Xi

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2020-10-06  2:21 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-04 21:39 [PATCH] sched: watchdog: Touch kernel watchdog in sched code Xi Wang
2020-03-05  3:11 ` Steven Rostedt
2020-03-05  7:57 ` Peter Zijlstra
2020-03-05 18:07   ` Thomas Gleixner
2020-03-05 21:41     ` Xi Wang
2020-03-05 22:07     ` Paul Turner
     [not found]     ` <CAOBoifgHNag0P33PKg81iNoCjxenJHfBZG-t-8aEkr_Tjf7o_w@mail.gmail.com>
2020-03-06  8:28       ` Peter Zijlstra
2020-03-05 22:11   ` Paul Turner
2020-03-06  8:40     ` Peter Zijlstra
2020-03-06 22:34       ` Xi Wang
2020-10-05 11:19         ` Peter Zijlstra
2020-10-06  2:21           ` Xi Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).