All of lore.kernel.org
 help / color / mirror / Atom feed
From: Daniel Thompson <daniel.thompson@linaro.org>
To: John Stultz <john.stultz@linaro.org>
Cc: Daniel Thompson <daniel.thompson@linaro.org>,
	linux-kernel@vger.kernel.org, patches@linaro.org,
	linaro-kernel@lists.linaro.org,
	Sumit Semwal <sumit.semwal@linaro.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Stephen Boyd <sboyd@codeaurora.org>,
	Steven Rostedt <rostedt@goodmis.org>
Subject: [RFC PATCH] sched_clock: Avoid tearing during read from NMI
Date: Wed, 21 Jan 2015 16:53:56 +0000	[thread overview]
Message-ID: <1421859236-19782-1-git-send-email-daniel.thompson@linaro.org> (raw)

Currently it is possible for an NMI (or FIQ on ARM) to come in and
read sched_clock() whilst update_sched_clock() has half updated the
state. This results in a bad time value being observed.

This patch fixes that problem in a similar manner to Thomas Gleixner's
4396e058c52e("timekeeping: Provide fast and NMI safe access to
CLOCK_MONOTONIC").

Note that ripping out the seqcount lock from sched_clock_register() and
replacing it with a large comment is not nearly as bad as it looks! The
locking here is actually pretty useless since most of the variables
modified within the write lock are not covered by the read lock. As a
result a big comment and the sequence bump implicit in the call
to update_epoch() should work pretty much the same.

Suggested-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
---

Notes:
    This patch has only had fairly light testing at this point. However it
    survives basic tests. In particular I am running perf from FIQ/NMI and
    have instrumented it with some monotonicity tests none of which have
    reported any problem.
    

 kernel/time/sched_clock.c | 63 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 50 insertions(+), 13 deletions(-)

diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 01d2d15aa662..485d5070259c 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -27,6 +27,10 @@ struct clock_data {
 	u32 mult;
 	u32 shift;
 	bool suspended;
+
+	/* Used only temporarily whilst we are updating the primary copy */
+	u64 old_epoch_ns;
+	u64 old_epoch_cyc;
 };

 static struct hrtimer sched_clock_timer;
@@ -67,9 +71,14 @@ unsigned long long notrace sched_clock(void)
 		return cd.epoch_ns;

 	do {
-		seq = raw_read_seqcount_begin(&cd.seq);
-		epoch_cyc = cd.epoch_cyc;
-		epoch_ns = cd.epoch_ns;
+		seq = raw_read_seqcount(&cd.seq);
+		if (likely(0 == (seq & 1))) {
+			epoch_cyc = cd.epoch_cyc;
+			epoch_ns = cd.epoch_ns;
+		} else {
+			epoch_cyc = cd.old_epoch_cyc;
+			epoch_ns = cd.old_epoch_ns;
+		}
 	} while (read_seqcount_retry(&cd.seq, seq));

 	cyc = read_sched_clock();
@@ -78,6 +87,35 @@ unsigned long long notrace sched_clock(void)
 }

 /*
+ * Update the epoch without allowing sched_clock to observe
+ * a mismatched epoch pair even if called from NMI.
+ *
+ * We do this by maintaining and odd/even copy of the epoch data and
+ * steering sched_clock to one or the other using a sequence counter.
+ * In order to preserve the (average case) data cache profile of
+ * sched_clock the system reverts back to the even copy as soon as
+ * possible; the odd copy is used *only* during an update.
+ *
+ * The caller is responsible for avoiding simultaneous updates.
+ */
+static void notrace update_epoch(u64 cyc, u64 ns)
+{
+	/* Update the backup copy */
+	cd.old_epoch_cyc = cd.epoch_cyc;
+	cd.old_epoch_ns = cd.epoch_ns;
+
+	/* Force readers to use the backup (odd) copy */
+	raw_write_seqcount_latch(&cd.seq);
+
+	/* Update the primary copy */
+	cd.epoch_cyc = cyc;
+	cd.epoch_ns = ns;
+
+	/* Steer readers back the primary (even) copy */
+	raw_write_seqcount_latch(&cd.seq);
+}
+
+/*
  * Atomically update the sched_clock epoch.
  */
 static void notrace update_sched_clock(void)
@@ -91,12 +129,7 @@ static void notrace update_sched_clock(void)
 		cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
 			  cd.mult, cd.shift);

-	raw_local_irq_save(flags);
-	raw_write_seqcount_begin(&cd.seq);
-	cd.epoch_ns = ns;
-	cd.epoch_cyc = cyc;
-	raw_write_seqcount_end(&cd.seq);
-	raw_local_irq_restore(flags);
+	update_epoch(cyc, ns);
 }

 static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)
@@ -135,16 +168,20 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
 	ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
 			  cd.mult, cd.shift);

-	raw_write_seqcount_begin(&cd.seq);
+	/*
+	 * sched_clock will report a bad value if it executes
+	 * concurrently with the following code. No locking exists to
+	 * prevent this; we rely mostly on this function being called
+	 * early during kernel boot up before we have lots of other
+	 * stuff going on.
+	 */
 	read_sched_clock = read;
 	sched_clock_mask = new_mask;
 	cd.rate = rate;
 	cd.wrap_kt = new_wrap_kt;
 	cd.mult = new_mult;
 	cd.shift = new_shift;
-	cd.epoch_cyc = new_epoch;
-	cd.epoch_ns = ns;
-	raw_write_seqcount_end(&cd.seq);
+	update_epoch(new_epoch, ns);

 	r = rate;
 	if (r >= 4000000) {
--
1.9.3


             reply	other threads:[~2015-01-21 16:54 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-21 16:53 Daniel Thompson [this message]
2015-01-21 17:29 ` [RFC PATCH] sched_clock: Avoid tearing during read from NMI John Stultz
2015-01-21 20:20   ` Daniel Thompson
2015-01-21 20:58   ` Stephen Boyd
2015-01-22 13:06 ` [PATCH v2] sched_clock: Avoid deadlock " Daniel Thompson
2015-01-30 19:03 ` [PATCH v3 0/4] sched_clock: Optimize and avoid " Daniel Thompson
2015-01-30 19:03   ` [PATCH v3 1/4] sched_clock: Match scope of read and write seqcounts Daniel Thompson
2015-01-30 19:03   ` [PATCH v3 2/4] sched_clock: Optimize cache line usage Daniel Thompson
2015-02-05  1:14     ` Stephen Boyd
2015-02-05 10:21       ` Daniel Thompson
2015-01-30 19:03   ` [PATCH v3 3/4] sched_clock: Remove suspend from clock_read_data Daniel Thompson
2015-01-30 19:03   ` [PATCH v3 4/4] sched_clock: Avoid deadlock during read from NMI Daniel Thompson
2015-02-05  1:23     ` Stephen Boyd
2015-02-05  1:48       ` Steven Rostedt
2015-02-05  6:23         ` Stephen Boyd
2015-02-05  0:50   ` [PATCH v3 0/4] sched_clock: Optimize and avoid " Stephen Boyd
2015-02-05  9:05     ` Daniel Thompson
2015-02-08 12:09       ` Daniel Thompson
2015-02-09 22:08         ` Stephen Boyd
2015-02-08 12:02 ` [PATCH v4 0/5] " Daniel Thompson
2015-02-08 12:02   ` [PATCH v4 1/5] sched_clock: Match scope of read and write seqcounts Daniel Thompson
2015-02-08 12:02   ` [PATCH v4 2/5] sched_clock: Optimize cache line usage Daniel Thompson
2015-02-09  1:28     ` Will Deacon
2015-02-09  9:47       ` Daniel Thompson
2015-02-10  2:37         ` Stephen Boyd
2015-02-08 12:02   ` [PATCH v4 3/5] sched_clock: Remove suspend from clock_read_data Daniel Thompson
2015-02-08 12:02   ` [PATCH v4 4/5] sched_clock: Remove redundant notrace from update function Daniel Thompson
2015-02-08 12:02   ` [PATCH v4 5/5] sched_clock: Avoid deadlock during read from NMI Daniel Thompson
2015-02-13  3:49   ` [PATCH v4 0/5] sched_clock: Optimize and avoid " Stephen Boyd
2015-03-02 15:56 ` [PATCH v5 " Daniel Thompson
2015-03-02 15:56   ` [PATCH v5 1/5] sched_clock: Match scope of read and write seqcounts Daniel Thompson
2015-03-02 15:56   ` [PATCH v5 2/5] sched_clock: Optimize cache line usage Daniel Thompson
2015-03-02 15:56   ` [PATCH v5 3/5] sched_clock: Remove suspend from clock_read_data Daniel Thompson
2015-03-02 15:56   ` [PATCH v5 4/5] sched_clock: Remove redundant notrace from update function Daniel Thompson
2015-03-02 15:56   ` [PATCH v5 5/5] sched_clock: Avoid deadlock during read from NMI Daniel Thompson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1421859236-19782-1-git-send-email-daniel.thompson@linaro.org \
    --to=daniel.thompson@linaro.org \
    --cc=john.stultz@linaro.org \
    --cc=linaro-kernel@lists.linaro.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=patches@linaro.org \
    --cc=rostedt@goodmis.org \
    --cc=sboyd@codeaurora.org \
    --cc=sumit.semwal@linaro.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.