linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: paulmck@kernel.org
To: linux-kernel@vger.kernel.org
Cc: john.stultz@linaro.org, tglx@linutronix.de, sboyd@kernel.org,
	corbet@lwn.net, Mark.Rutland@arm.com, maz@kernel.org,
	kernel-team@fb.com, neeraju@codeaurora.org, ak@linux.intel.com,
	"Paul E. McKenney" <paulmck@kernel.org>
Subject: [PATCH v6 clocksource] Do not mark clocks unstable dueclocksource: Check per-CPU clock synchronization when marked unstable
Date: Fri,  2 Apr 2021 13:31:35 -0700	[thread overview]
Message-ID: <20210402203137.22479-3-paulmck@kernel.org> (raw)
In-Reply-To: <20210402202929.GA22273@paulmck-ThinkPad-P72>

From: "Paul E. McKenney" <paulmck@kernel.org>

Some sorts of per-CPU clock sources have a history of going out of
synchronization with each other.  However, this problem has purportedy
been solved in the past ten years.  Except that it is all too possible
that the problem has instead simply been made less likely, which might
mean that some of the occasional "Marking clocksource 'tsc' as unstable"
messages might be due to desynchronization.  How would anyone know?

This commit therefore adds CPU-to-CPU synchronization checking
for newly unstable clocksource that are marked with the new
CLOCK_SOURCE_VERIFY_PERCPU flag.  Lists of desynchronized CPUs are
printed, with the caveat that if it is the reporting CPU that is itself
desynchronized, it will appear that all the other clocks are wrong.
Just like in real life.

Cc: John Stultz <john.stultz@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Mark Rutland <Mark.Rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Reported-by: Chris Mason <clm@fb.com>
[ paulmck: Add "static" to clocksource_verify_one_cpu() per kernel test robot feedback. ]
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 arch/x86/kernel/kvmclock.c  |  2 +-
 arch/x86/kernel/tsc.c       |  3 +-
 include/linux/clocksource.h |  2 +-
 kernel/time/clocksource.c   | 73 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 1fc0962..97eeaf1 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -169,7 +169,7 @@ struct clocksource kvm_clock = {
 	.read	= kvm_clock_get_cycles,
 	.rating	= 400,
 	.mask	= CLOCKSOURCE_MASK(64),
-	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
+	.flags	= CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_VERIFY_PERCPU,
 	.enable	= kvm_cs_enable,
 };
 EXPORT_SYMBOL_GPL(kvm_clock);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index f70dffc..5628917 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1151,7 +1151,8 @@ static struct clocksource clocksource_tsc = {
 	.mask			= CLOCKSOURCE_MASK(64),
 	.flags			= CLOCK_SOURCE_IS_CONTINUOUS |
 				  CLOCK_SOURCE_VALID_FOR_HRES |
-				  CLOCK_SOURCE_MUST_VERIFY,
+				  CLOCK_SOURCE_MUST_VERIFY |
+				  CLOCK_SOURCE_VERIFY_PERCPU,
 	.vdso_clock_mode	= VDSO_CLOCKMODE_TSC,
 	.enable			= tsc_cs_enable,
 	.resume			= tsc_resume,
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 86d143d..83a3ebf 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -131,7 +131,7 @@ struct clocksource {
 #define CLOCK_SOURCE_UNSTABLE			0x40
 #define CLOCK_SOURCE_SUSPEND_NONSTOP		0x80
 #define CLOCK_SOURCE_RESELECT			0x100
-
+#define CLOCK_SOURCE_VERIFY_PERCPU		0x200
 /* simplify initialization of mask field */
 #define CLOCKSOURCE_MASK(bits) GENMASK_ULL((bits) - 1, 0)
 
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 3f734c6..663bc53 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -211,6 +211,78 @@ static void clocksource_watchdog_inject_delay(void)
 	WARN_ON_ONCE(injectfail < 0);
 }
 
+static struct clocksource *clocksource_verify_work_cs;
+static DEFINE_PER_CPU(u64, csnow_mid);
+static cpumask_t cpus_ahead;
+static cpumask_t cpus_behind;
+
+static void clocksource_verify_one_cpu(void *csin)
+{
+	struct clocksource *cs = (struct clocksource *)csin;
+
+	__this_cpu_write(csnow_mid, cs->read(cs));
+}
+
+static void clocksource_verify_percpu_wq(struct work_struct *unused)
+{
+	int cpu;
+	struct clocksource *cs;
+	int64_t cs_nsec;
+	u64 csnow_begin;
+	u64 csnow_end;
+	u64 delta;
+
+	cs = smp_load_acquire(&clocksource_verify_work_cs); // pairs with release
+	if (WARN_ON_ONCE(!cs))
+		return;
+	pr_warn("Checking clocksource %s synchronization from CPU %d.\n",
+		cs->name, smp_processor_id());
+	cpumask_clear(&cpus_ahead);
+	cpumask_clear(&cpus_behind);
+	csnow_begin = cs->read(cs);
+	smp_call_function(clocksource_verify_one_cpu, cs, 1);
+	csnow_end = cs->read(cs);
+	for_each_online_cpu(cpu) {
+		if (cpu == smp_processor_id())
+			continue;
+		delta = (per_cpu(csnow_mid, cpu) - csnow_begin) & cs->mask;
+		if ((s64)delta < 0)
+			cpumask_set_cpu(cpu, &cpus_behind);
+		delta = (csnow_end - per_cpu(csnow_mid, cpu)) & cs->mask;
+		if ((s64)delta < 0)
+			cpumask_set_cpu(cpu, &cpus_ahead);
+	}
+	if (!cpumask_empty(&cpus_ahead))
+		pr_warn("        CPUs %*pbl ahead of CPU %d for clocksource %s.\n",
+			cpumask_pr_args(&cpus_ahead),
+			smp_processor_id(), cs->name);
+	if (!cpumask_empty(&cpus_behind))
+		pr_warn("        CPUs %*pbl behind CPU %d for clocksource %s.\n",
+			cpumask_pr_args(&cpus_behind),
+			smp_processor_id(), cs->name);
+	if (!cpumask_empty(&cpus_ahead) || !cpumask_empty(&cpus_behind)) {
+		delta = clocksource_delta(csnow_end, csnow_begin, cs->mask);
+		cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
+		pr_warn("        CPU %d duration %lldns for clocksource %s.\n",
+			smp_processor_id(), cs_nsec, cs->name);
+	}
+	smp_store_release(&clocksource_verify_work_cs, NULL); // pairs with acquire.
+}
+
+static DECLARE_WORK(clocksource_verify_work, clocksource_verify_percpu_wq);
+
+static void clocksource_verify_percpu(struct clocksource *cs)
+{
+	if (!(cs->flags & CLOCK_SOURCE_VERIFY_PERCPU))
+		return;
+	if (smp_load_acquire(&clocksource_verify_work_cs)) { // pairs with release.
+		pr_warn("Previous clocksource synchronization still in flight.\n");
+		return;
+	}
+	smp_store_release(&clocksource_verify_work_cs, cs); //pairs with acquire.
+	queue_work(system_highpri_wq, &clocksource_verify_work);
+}
+
 static void clocksource_watchdog(struct timer_list *unused)
 {
 	struct clocksource *cs;
@@ -284,6 +356,7 @@ static void clocksource_watchdog(struct timer_list *unused)
 				watchdog->name, wdnow, wdlast, watchdog->mask);
 			pr_warn("                      '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
 				cs->name, csnow, cslast, cs->mask);
+			clocksource_verify_percpu(cs);
 			__clocksource_unstable(cs);
 			continue;
 		}
-- 
2.9.5


  parent reply	other threads:[~2021-04-02 20:31 UTC|newest]

Thread overview: 72+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-06  0:40 [PATCH RFC clocksource] Do not mark clocks unstable due to delays Paul E. McKenney
2021-01-06  0:41 ` [PATCH RFC clocksource 1/5] clocksource: Provide module parameters to inject delays in watchdog paulmck
2021-01-06  0:41 ` [PATCH RFC clocksource 2/5] clocksource: Retry clock read if long delays detected paulmck
2021-01-06 16:28   ` Rik van Riel
2021-01-06 19:53     ` Paul E. McKenney
2021-01-06 20:59       ` Rik van Riel
2021-01-06  0:41 ` [PATCH RFC clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable paulmck
2021-01-06  0:41 ` [PATCH RFC clocksource 4/5] clocksource: Provide a module parameter to fuzz per-CPU clock checking paulmck
2021-01-06  0:41 ` [PATCH RFC clocksource 5/5] clocksource: Do pairwise clock-desynchronization checking paulmck
2021-01-12  0:42 ` [PATCH v2 clocksource] Do not mark clocks unstable due to delays Paul E. McKenney
2021-01-12  0:45   ` [PATCH v2 clocksource 1/5] clocksource: Provide module parameters to inject delays in watchdog paulmck
2021-01-12  0:45   ` [PATCH v2 clocksource 2/5] clocksource: Retry clock read if long delays detected paulmck
2021-01-12  0:45   ` [PATCH v2 clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable paulmck
2021-01-12  0:45   ` [PATCH v2 clocksource 4/5] clocksource: Provide a module parameter to fuzz per-CPU clock checking paulmck
2021-01-12  0:45   ` [PATCH v2 clocksource 5/5] clocksource: Do pairwise clock-desynchronization checking paulmck
2021-02-02 17:04   ` [PATCH v3 clocksource] Do not mark clocks unstable due to delays Paul E. McKenney
2021-02-02 17:06     ` [PATCH clocksource 1/5] clocksource: Provide module parameters to inject delays in watchdog paulmck
2021-02-02 17:06     ` [PATCH clocksource 2/5] clocksource: Retry clock read if long delays detected paulmck
2021-02-02 17:06     ` [PATCH clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable paulmck
2021-02-02 17:06     ` [PATCH clocksource 4/5] clocksource: Provide a module parameter to fuzz per-CPU clock checking paulmck
2021-02-02 19:51       ` Randy Dunlap
2021-02-03  0:50         ` Paul E. McKenney
2021-02-03  1:31           ` Randy Dunlap
2021-02-03  1:40             ` Paul E. McKenney
2021-02-02 17:06     ` [PATCH clocksource 5/5] clocksource: Do pairwise clock-desynchronization checking paulmck
2021-02-17 21:28     ` [PATCH v3 clocksource] Do not mark clocks unstable due to delays Paul E. McKenney
2021-02-17 21:29       ` [PATCH clocksource 1/5] clocksource: Provide module parameters to inject delays in watchdog paulmck
2021-02-17 21:29       ` [PATCH clocksource 2/5] clocksource: Retry clock read if long delays detected paulmck
2021-02-17 21:29       ` [PATCH clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable paulmck
2021-02-17 21:29       ` [PATCH clocksource 4/5] clocksource: Provide a module parameter to fuzz per-CPU clock checking paulmck
2021-02-17 21:29       ` [PATCH clocksource 5/5] clocksource: Do pairwise clock-desynchronization checking paulmck
2021-03-04  0:49       ` [PATCH v5 clocksource] Do not mark clocks unstable due to delays for v5.13 Paul E. McKenney
2021-03-04  0:53         ` [PATCH kernel/time 1/5] clocksource: Provide module parameters to inject delays in watchdog paulmck
2021-03-04  0:53         ` [PATCH kernel/time 2/5] clocksource: Retry clock read if long delays detected paulmck
2021-03-04  0:53         ` [PATCH kernel/time 3/5] clocksource: Check per-CPU clock synchronization when marked unstable paulmck
2021-03-04  0:53         ` [PATCH kernel/time 4/5] clocksource: Provide a module parameter to fuzz per-CPU clock checking paulmck
2021-03-04  0:53         ` [PATCH kernel/time 5/5] clocksource: Do pairwise clock-desynchronization checking paulmck
2021-04-02 20:29         ` [PATCH v5 clocksource] Do not mark clocks unstable due to delays for v5.13 Paul E. McKenney
2021-04-02 20:31           ` [PATCH v6 clocksource] Do not mark clocks unstable dueclocksource: Provide module parameters to inject delays in watchdog paulmck
2021-04-02 22:22             ` Thomas Gleixner
2021-04-02 22:37               ` Paul E. McKenney
2021-04-02 22:48               ` [PATCH v7 clocksource] Do not mark clocks unstable due to delays for v5.13 Paul E. McKenney
2021-04-02 22:49                 ` [PATCH v7 clocksource 1/5] clocksource: Provide module parameters to inject delays in watchdog paulmck
2021-04-02 22:49                 ` [PATCH v7 clocksource 2/5] clocksource: Retry clock read if long delays detected paulmck
2021-04-10  8:41                   ` Thomas Gleixner
2021-04-10 23:50                     ` Paul E. McKenney
2021-04-02 22:49                 ` [PATCH v7 clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable paulmck
2021-04-10  9:00                   ` Thomas Gleixner
2021-04-11  0:20                     ` Paul E. McKenney
2021-04-11 10:33                       ` Thomas Gleixner
2021-04-11 16:46                         ` Paul E. McKenney
2021-04-12  4:21                           ` Paul E. McKenney
2021-04-12 13:08                             ` Thomas Gleixner
2021-04-12 18:20                               ` Paul E. McKenney
2021-04-12 18:54                                 ` Thomas Gleixner
2021-04-12 19:57                                   ` Paul E. McKenney
2021-04-12 20:37                                     ` Thomas Gleixner
2021-04-12 23:18                                       ` Paul E. McKenney
2021-04-13 20:49                                         ` Thomas Gleixner
2021-04-14  4:48                                           ` Paul E. McKenney
2021-04-02 22:49                 ` [PATCH v7 clocksource 4/5] clocksource: Provide a module parameter to fuzz per-CPU clock checking paulmck
2021-04-02 22:49                 ` [PATCH v7 clocksource 5/5] clocksource: Do pairwise clock-desynchronization checking paulmck
2021-04-10  9:04                   ` Thomas Gleixner
2021-04-11  0:21                     ` Paul E. McKenney
2021-04-10  8:01                 ` [PATCH v7 clocksource] Do not mark clocks unstable due to delays for v5.13 Thomas Gleixner
2021-04-10 23:26                   ` Paul E. McKenney
2021-04-11 10:58                     ` Thomas Gleixner
2021-04-11 16:50                       ` Paul E. McKenney
2021-04-02 20:31           ` [PATCH v6 clocksource] Do not mark clocks unstable dueclocksource: Retry clock read if long delays detected paulmck
2021-04-02 20:31           ` paulmck [this message]
2021-04-02 20:31           ` [PATCH v6 clocksource] Do not mark clocks unstable dueclocksource: Provide a module parameter to fuzz per-CPU clock checking paulmck
2021-04-02 20:31           ` [PATCH v6 clocksource] Do not mark clocks unstable dueclocksource: Do pairwise clock-desynchronization checking paulmck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210402203137.22479-3-paulmck@kernel.org \
    --to=paulmck@kernel.org \
    --cc=Mark.Rutland@arm.com \
    --cc=ak@linux.intel.com \
    --cc=corbet@lwn.net \
    --cc=john.stultz@linaro.org \
    --cc=kernel-team@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maz@kernel.org \
    --cc=neeraju@codeaurora.org \
    --cc=sboyd@kernel.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).