All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 0/2] ARM/ARM64: arch_timer: QorIQ errata
@ 2016-04-11  2:22 Scott Wood
  2016-04-11  2:22 ` [RFC PATCH 1/2] ARM/ARM64: arch_timer: Work around QorIQ Erratum A-008585 Scott Wood
  2016-04-11  2:22 ` [RFC PATCH 2/2] ARM64: arch_timer: Work around QorIQ Erratum A-009971 Scott Wood
  0 siblings, 2 replies; 16+ messages in thread
From: Scott Wood @ 2016-04-11  2:22 UTC (permalink / raw)
  To: linux-arm-kernel

These patches work around timer errata on certain Freescale QorIQ ARM
chips.

Please let me know if there's a particular way these patches should be
split up, or any other feedback.

Priyanka Jain (1):
  ARM64: arch_timer: Work around QorIQ Erratum A-009971

Scott Wood (1):
  ARM/ARM64: arch_timer: Work around QorIQ Erratum A-008585

 .../devicetree/bindings/arm/arch_timer.txt         |  9 +++
 arch/arm/boot/dts/ls1021a.dtsi                     |  1 +
 arch/arm/include/asm/arch_timer.h                  | 71 +++++++++++++++++++---
 arch/arm/include/asm/vdso_datapage.h               |  1 +
 arch/arm/kernel/vdso.c                             |  1 +
 arch/arm/vdso/vgettimeofday.c                      |  2 +-
 arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi     |  1 +
 arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi     |  2 +
 arch/arm64/include/asm/arch_timer.h                | 62 ++++++++++++++++---
 arch/arm64/include/asm/vdso_datapage.h             |  1 +
 arch/arm64/kernel/asm-offsets.c                    |  1 +
 arch/arm64/kernel/vdso.c                           |  2 +
 arch/arm64/kernel/vdso/gettimeofday.S              | 14 ++++-
 drivers/clocksource/arm_arch_timer.c               |  8 +++
 14 files changed, 155 insertions(+), 21 deletions(-)

-- 
2.5.0

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [RFC PATCH 1/2] ARM/ARM64: arch_timer: Work around QorIQ Erratum A-008585
  2016-04-11  2:22 [RFC PATCH 0/2] ARM/ARM64: arch_timer: QorIQ errata Scott Wood
@ 2016-04-11  2:22 ` Scott Wood
  2016-04-11  9:19   ` Will Deacon
                     ` (2 more replies)
  2016-04-11  2:22 ` [RFC PATCH 2/2] ARM64: arch_timer: Work around QorIQ Erratum A-009971 Scott Wood
  1 sibling, 3 replies; 16+ messages in thread
From: Scott Wood @ 2016-04-11  2:22 UTC (permalink / raw)
  To: linux-arm-kernel

Erratum A-008585 says that the ARM generic timer "has the potential to
contain an erroneous value for a small number of core clock cycles
every time the timer value changes" and that the workaround is to
reread TVAL and count registers until successive reads return the same
value.

This erratum can be found on LS1021A (32-bit), LS1043A (64-bit), and
LS2080A (64-bit).

This patch is loosely based on work by Priyanka Jain and Bhupesh
Sharma.

Signed-off-by: Scott Wood <oss@buserror.net>
---
 .../devicetree/bindings/arm/arch_timer.txt         |  4 ++
 arch/arm/boot/dts/ls1021a.dtsi                     |  1 +
 arch/arm/include/asm/arch_timer.h                  | 71 +++++++++++++++++++---
 arch/arm/include/asm/vdso_datapage.h               |  1 +
 arch/arm/kernel/vdso.c                             |  1 +
 arch/arm/vdso/vgettimeofday.c                      |  2 +-
 arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi     |  1 +
 arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi     |  1 +
 arch/arm64/include/asm/arch_timer.h                | 35 ++++++++---
 arch/arm64/include/asm/vdso_datapage.h             |  1 +
 arch/arm64/kernel/asm-offsets.c                    |  1 +
 arch/arm64/kernel/vdso.c                           |  2 +
 arch/arm64/kernel/vdso/gettimeofday.S              | 14 ++++-
 drivers/clocksource/arm_arch_timer.c               |  5 ++
 14 files changed, 121 insertions(+), 19 deletions(-)

diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt
index e774128..7117fbd 100644
--- a/Documentation/devicetree/bindings/arm/arch_timer.txt
+++ b/Documentation/devicetree/bindings/arm/arch_timer.txt
@@ -25,6 +25,10 @@ to deliver its interrupts via SPIs.
 - always-on : a boolean property. If present, the timer is powered through an
   always-on power domain, therefore it never loses context.
 
+- fsl,erratum-a008585 : A boolean property. Indicates the presence of
+  QorIQ erratum A-008585, which says reading the timer is unreliable
+  unless the same value is returned by back-to-back reads.
+
 ** Optional properties:
 
 - arm,cpu-registers-not-fw-configured : Firmware does not initialize
diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi
index 726372d..a2e9f66 100644
--- a/arch/arm/boot/dts/ls1021a.dtsi
+++ b/arch/arm/boot/dts/ls1021a.dtsi
@@ -91,6 +91,7 @@
 			     <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
 			     <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
 			     <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>;
+		fsl,erratum-a008585;
 	};
 
 	pmu {
diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index d4ebf56..5b8e1f9 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -12,6 +12,8 @@
 #ifdef CONFIG_ARM_ARCH_TIMER
 int arch_timer_arch_init(void);
 
+extern bool arm_arch_timer_reread;
+
 /*
  * These register accessors are marked inline so the compiler can
  * nicely work out which register we want, and chuck away the rest of
@@ -44,7 +46,7 @@ void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val)
 }
 
 static __always_inline
-u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
+u32 arch_timer_reg_read_cp15_raw(int access, enum arch_timer_reg reg)
 {
 	u32 val = 0;
 
@@ -71,6 +73,38 @@ u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
 	return val;
 }
 
+static __always_inline
+u32 arch_timer_reg_tval_reread(int access, enum arch_timer_reg reg)
+{
+	u32 val, val_new;
+	int timeout = 200;
+
+	do {
+		if (access == ARCH_TIMER_PHYS_ACCESS) {
+			asm volatile("mrc p15, 0, %0, c14, c2, 0;"
+				     "mrc p15, 0, %1, c14, c2, 0"
+				     : "=r" (val), "=r" (val_new));
+		} else if (access == ARCH_TIMER_VIRT_ACCESS) {
+			asm volatile("mrc p15, 0, %0, c14, c3, 0;"
+				     "mrc p15, 0, %1, c14, c3, 0"
+				     : "=r" (val), "=r" (val_new));
+		}
+		timeout--;
+	} while (val != val_new && timeout);
+
+	WARN_ON_ONCE(!timeout);
+	return val;
+}
+
+static __always_inline
+u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
+{
+	if (arm_arch_timer_reread && reg == ARCH_TIMER_REG_TVAL)
+		return arch_timer_reg_tval_reread(access, reg);
+
+	return arch_timer_reg_read_cp15_raw(access, reg);
+}
+
 static inline u32 arch_timer_get_cntfrq(void)
 {
 	u32 val;
@@ -78,22 +112,39 @@ static inline u32 arch_timer_get_cntfrq(void)
 	return val;
 }
 
-static inline u64 arch_counter_get_cntpct(void)
+static __always_inline u64 arch_counter_get_cnt(int opcode, bool reread)
 {
-	u64 cval;
+	u64 val, val_new;
+	int timeout = 200;
 
 	isb();
-	asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (cval));
-	return cval;
+
+	if (reread) {
+		do {
+			asm volatile("mrrc p15, %2, %Q0, %R0, c14;"
+				     "mrrc p15, %2, %Q1, %R1, c14"
+				     : "=r" (val), "=r" (val_new)
+				     : "i" (opcode));
+			timeout--;
+		} while (val != val_new && timeout);
+
+		BUG_ON(!timeout);
+	} else {
+		asm volatile("mrrc p15, %1, %Q0, %R0, c14" : "=r" (val)
+			     : "i" (opcode));
+	}
+
+	return val;
 }
 
-static inline u64 arch_counter_get_cntvct(void)
+static inline u64 arch_counter_get_cntpct(void)
 {
-	u64 cval;
+	return arch_counter_get_cnt(0, arm_arch_timer_reread);
+}
 
-	isb();
-	asm volatile("mrrc p15, 1, %Q0, %R0, c14" : "=r" (cval));
-	return cval;
+static inline u64 arch_counter_get_cntvct(void)
+{
+	return arch_counter_get_cnt(1, arm_arch_timer_reread);
 }
 
 static inline u32 arch_timer_get_cntkctl(void)
diff --git a/arch/arm/include/asm/vdso_datapage.h b/arch/arm/include/asm/vdso_datapage.h
index 9be2594..6c8e1cb 100644
--- a/arch/arm/include/asm/vdso_datapage.h
+++ b/arch/arm/include/asm/vdso_datapage.h
@@ -46,6 +46,7 @@ struct vdso_data {
 	u64 xtime_clock_snsec;	/* CLOCK_REALTIME sub-ns base */
 	u32 tz_minuteswest;	/* timezone info for gettimeofday(2) */
 	u32 tz_dsttime;
+	u32 timer_reread;	/* Erratum requires two equal timer reads */
 };
 
 union vdso_data_store {
diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
index 994e971..8885e76 100644
--- a/arch/arm/kernel/vdso.c
+++ b/arch/arm/kernel/vdso.c
@@ -307,6 +307,7 @@ void update_vsyscall(struct timekeeper *tk)
 
 	vdso_write_begin(vdso_data);
 
+	vdso_data->timer_reread			= arm_arch_timer_reread;
 	vdso_data->tk_is_cntvct			= tk_is_cntvct(tk);
 	vdso_data->xtime_coarse_sec		= tk->xtime_sec;
 	vdso_data->xtime_coarse_nsec		= (u32)(tk->tkr_mono.xtime_nsec >>
diff --git a/arch/arm/vdso/vgettimeofday.c b/arch/arm/vdso/vgettimeofday.c
index 79214d5..a29fc61 100644
--- a/arch/arm/vdso/vgettimeofday.c
+++ b/arch/arm/vdso/vgettimeofday.c
@@ -123,7 +123,7 @@ static notrace u64 get_ns(struct vdso_data *vdata)
 	u64 cycle_now;
 	u64 nsec;
 
-	cycle_now = arch_counter_get_cntvct();
+	cycle_now = arch_counter_get_cnt(1, vdata->timer_reread);
 
 	cycle_delta = (cycle_now - vdata->cs_cycle_last) & vdata->cs_mask;
 
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
index be72bf5..596420c 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
@@ -115,6 +115,7 @@
 			     <1 14 0x1>, /* Physical Non-Secure PPI */
 			     <1 11 0x1>, /* Virtual PPI */
 			     <1 10 0x1>; /* Hypervisor PPI */
+		fsl,erratum-a008585;
 	};
 
 	pmu {
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
index 9d746c6..0270ccf 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
@@ -171,6 +171,7 @@
 			     <1 14 0x8>, /* Physical Non-Secure PPI, active-low */
 			     <1 11 0x8>, /* Virtual PPI, active-low */
 			     <1 10 0x8>; /* Hypervisor PPI, active-low */
+		fsl,erratum-a008585;
 	};
 
 	pmu {
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index fbe0ca3..9367ee3 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -27,6 +27,31 @@
 
 #include <clocksource/arm_arch_timer.h>
 
+extern bool arm_arch_timer_reread;
+
+/* QorIQ errata workarounds */
+#define ARCH_TIMER_REREAD(reg) ({ \
+	u64 _val_old, _val_new; \
+	int _timeout = 200; \
+	do { \
+		asm volatile("mrs %0, " reg ";" \
+			     "mrs %1, " reg \
+			     : "=r" (_val_old), "=r" (_val_new)); \
+		_timeout--; \
+	} while (_val_old != _val_new && _timeout); \
+	WARN_ON_ONCE(!_timeout); \
+	_val_old; \
+})
+
+#define ARCH_TIMER_READ(reg) ({ \
+	u64 _val; \
+	if (arm_arch_timer_reread) \
+		_val = ARCH_TIMER_REREAD(reg); \
+	else \
+		asm volatile("mrs %0, " reg : "=r" (_val)); \
+	_val; \
+})
+
 /*
  * These register accessors are marked inline so the compiler can
  * nicely work out which register we want, and chuck away the rest of
@@ -69,7 +94,7 @@ u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
 			asm volatile("mrs %0,  cntp_ctl_el0" : "=r" (val));
 			break;
 		case ARCH_TIMER_REG_TVAL:
-			asm volatile("mrs %0, cntp_tval_el0" : "=r" (val));
+			val = ARCH_TIMER_READ("cntp_tval_el0");
 			break;
 		}
 	} else if (access == ARCH_TIMER_VIRT_ACCESS) {
@@ -78,7 +103,7 @@ u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
 			asm volatile("mrs %0,  cntv_ctl_el0" : "=r" (val));
 			break;
 		case ARCH_TIMER_REG_TVAL:
-			asm volatile("mrs %0, cntv_tval_el0" : "=r" (val));
+			val = ARCH_TIMER_READ("cntv_tval_el0");
 			break;
 		}
 	}
@@ -116,12 +141,8 @@ static inline u64 arch_counter_get_cntpct(void)
 
 static inline u64 arch_counter_get_cntvct(void)
 {
-	u64 cval;
-
 	isb();
-	asm volatile("mrs %0, cntvct_el0" : "=r" (cval));
-
-	return cval;
+	return ARCH_TIMER_READ("cntvct_el0");
 }
 
 static inline int arch_timer_arch_init(void)
diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h
index de66199..9f64af4 100644
--- a/arch/arm64/include/asm/vdso_datapage.h
+++ b/arch/arm64/include/asm/vdso_datapage.h
@@ -34,6 +34,7 @@ struct vdso_data {
 	__u32 tz_minuteswest;	/* Whacky timezone stuff */
 	__u32 tz_dsttime;
 	__u32 use_syscall;
+	__u32 timer_reread;	/* Erratum requires two equal timer reads */
 };
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 3ae6b31..c9dca8d 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -95,6 +95,7 @@ int main(void)
   DEFINE(VDSO_TZ_MINWEST,	offsetof(struct vdso_data, tz_minuteswest));
   DEFINE(VDSO_TZ_DSTTIME,	offsetof(struct vdso_data, tz_dsttime));
   DEFINE(VDSO_USE_SYSCALL,	offsetof(struct vdso_data, use_syscall));
+  DEFINE(VDSO_TIMER_REREAD,	offsetof(struct vdso_data, timer_reread));
   BLANK();
   DEFINE(TVAL_TV_SEC,		offsetof(struct timeval, tv_sec));
   DEFINE(TVAL_TV_USEC,		offsetof(struct timeval, tv_usec));
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 97bc68f..f8c6158 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -31,6 +31,7 @@
 #include <linux/timekeeper_internal.h>
 #include <linux/vmalloc.h>
 
+#include <asm/arch_timer.h>
 #include <asm/cacheflush.h>
 #include <asm/signal32.h>
 #include <asm/vdso.h>
@@ -204,6 +205,7 @@ void update_vsyscall(struct timekeeper *tk)
 	++vdso_data->tb_seq_count;
 	smp_wmb();
 
+	vdso_data->timer_reread			= arm_arch_timer_reread;
 	vdso_data->use_syscall			= use_syscall;
 	vdso_data->xtime_coarse_sec		= tk->xtime_sec;
 	vdso_data->xtime_coarse_nsec		= tk->tkr_mono.xtime_nsec >>
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index efa79e8..2e6008d 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -207,7 +207,7 @@ ENDPROC(__kernel_clock_getres)
 /*
  * Read the current time from the architected counter.
  * Expects vdso_data to be initialised.
- * Clobbers the temporary registers (x9 - x15).
+ * Clobbers the temporary registers (x9 - x17).
  * Returns:
  *  - w9		= vDSO sequence counter
  *  - (x10, x11)	= (ts->tv_sec, shifted ts->tv_nsec)
@@ -217,6 +217,7 @@ ENTRY(__do_get_tspec)
 	.cfi_startproc
 
 	/* Read from the vDSO data page. */
+	ldr	w17, [vdso_data, #VDSO_TIMER_REREAD]
 	ldr	x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
 	ldp	x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
 	ldp	w11, w12, [vdso_data, #VDSO_CS_MULT]
@@ -225,6 +226,17 @@ ENTRY(__do_get_tspec)
 	/* Read the virtual counter. */
 	isb
 	mrs	x15, cntvct_el0
+	/*
+	 * Erratum A-008585 requires back-to-back reads to be identical
+	 * in order to avoid glitches.
+	 */
+	cmp	w17, #0
+	b.eq	2f
+1:	mrs	x15, cntvct_el0
+	mrs	x16, cntvct_el0
+	cmp	x16, x15
+	b.ne	1b
+2:
 
 	/* Calculate cycle delta and convert to ns. */
 	sub	x10, x15, x10
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 5152b38..5ed7c7f 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -79,6 +79,9 @@ static enum ppi_nr arch_timer_uses_ppi = VIRT_PPI;
 static bool arch_timer_c3stop;
 static bool arch_timer_mem_use_virtual;
 
+bool arm_arch_timer_reread; /* QorIQ erratum A-008585 */
+EXPORT_SYMBOL(arm_arch_timer_reread);
+
 /*
  * Architected system timer support.
  */
@@ -762,6 +765,8 @@ static void __init arch_timer_of_init(struct device_node *np)
 	arch_timer_detect_rate(NULL, np);
 
 	arch_timer_c3stop = !of_property_read_bool(np, "always-on");
+	arm_arch_timer_reread =
+		of_property_read_bool(np, "fsl,erratum-a008585");
 
 	/*
 	 * If we cannot rely on firmware initializing the timer registers then
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [RFC PATCH 2/2] ARM64: arch_timer: Work around QorIQ Erratum A-009971
  2016-04-11  2:22 [RFC PATCH 0/2] ARM/ARM64: arch_timer: QorIQ errata Scott Wood
  2016-04-11  2:22 ` [RFC PATCH 1/2] ARM/ARM64: arch_timer: Work around QorIQ Erratum A-008585 Scott Wood
@ 2016-04-11  2:22 ` Scott Wood
  2016-04-11  9:55   ` Marc Zyngier
  1 sibling, 1 reply; 16+ messages in thread
From: Scott Wood @ 2016-04-11  2:22 UTC (permalink / raw)
  To: linux-arm-kernel

From: Priyanka Jain <priyanka.jain@nxp.com>

Erratum A-009971 says that it is possible for the timer value register
to be written incorrectly, resulting in "an incorrect and potentially
very long timeout".  The workaround is to read the timer count
immediately before and after writing the timer value register, and
repeat if the counter reads don't match.

This erratum can be found on LS2080A.

Signed-off-by: Priyanka Jain <priyanka.jain@nxp.com>
[scottwood: cleanup and fixes]
Signed-off-by: Scott Wood <oss@buserror.net>
---
 .../devicetree/bindings/arm/arch_timer.txt         |  5 ++++
 arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi     |  1 +
 arch/arm64/include/asm/arch_timer.h                | 27 ++++++++++++++++++++--
 drivers/clocksource/arm_arch_timer.c               |  3 +++
 4 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt
index 7117fbd..ab4d3b1 100644
--- a/Documentation/devicetree/bindings/arm/arch_timer.txt
+++ b/Documentation/devicetree/bindings/arm/arch_timer.txt
@@ -29,6 +29,11 @@ to deliver its interrupts via SPIs.
   QorIQ erratum A-008585, which says reading the timer is unreliable
   unless the same value is returned by back-to-back reads.
 
+- fsl,erratum-a009971 : A boolean property. Indicates the presence of
+  QorIQ erratum A-009971, which says writing the timer value register
+  is unreliable unless timer count reads before and after the timer write
+  return the same value.
+
 ** Optional properties:
 
 - arm,cpu-registers-not-fw-configured : Firmware does not initialize
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
index 0270ccf..529e441 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
@@ -172,6 +172,7 @@
 			     <1 11 0x8>, /* Virtual PPI, active-low */
 			     <1 10 0x8>; /* Hypervisor PPI, active-low */
 		fsl,erratum-a008585;
+		fsl,erratum-a009971;
 	};
 
 	pmu {
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index 9367ee3..1867e60 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -28,6 +28,7 @@
 #include <clocksource/arm_arch_timer.h>
 
 extern bool arm_arch_timer_reread;
+extern bool arm_arch_timer_rewrite;
 
 /* QorIQ errata workarounds */
 #define ARCH_TIMER_REREAD(reg) ({ \
@@ -52,6 +53,20 @@ extern bool arm_arch_timer_reread;
 	_val; \
 })
 
+#define ARCH_TIMER_TVAL_REWRITE(pv, val) do { \
+	u64 _cnt_old, _cnt_new; \
+	int _timeout = 200; \
+	do { \
+		asm volatile("mrs %0, cntvct_el0;" \
+			     "msr cnt" pv "_tval_el0, %2;" \
+			     "mrs %1, cntvct_el0" \
+			     : "=&r" (_cnt_old), "=r" (_cnt_new) \
+			     : "r" (val)); \
+		_timeout--; \
+	} while (_cnt_old != _cnt_new && _timeout); \
+	WARN_ON_ONCE(!_timeout); \
+} while (0)
+
 /*
  * These register accessors are marked inline so the compiler can
  * nicely work out which register we want, and chuck away the rest of
@@ -66,7 +81,11 @@ void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val)
 			asm volatile("msr cntp_ctl_el0,  %0" : : "r" (val));
 			break;
 		case ARCH_TIMER_REG_TVAL:
-			asm volatile("msr cntp_tval_el0, %0" : : "r" (val));
+			if (arm_arch_timer_rewrite)
+				ARCH_TIMER_TVAL_REWRITE("p", val);
+			else
+				asm volatile("msr cntp_tval_el0, %0" : :
+					     "r" (val));
 			break;
 		}
 	} else if (access == ARCH_TIMER_VIRT_ACCESS) {
@@ -75,7 +94,11 @@ void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val)
 			asm volatile("msr cntv_ctl_el0,  %0" : : "r" (val));
 			break;
 		case ARCH_TIMER_REG_TVAL:
-			asm volatile("msr cntv_tval_el0, %0" : : "r" (val));
+			if (arm_arch_timer_rewrite)
+				ARCH_TIMER_TVAL_REWRITE("v", val);
+			else
+				asm volatile("msr cntv_tval_el0, %0" : :
+					     "r" (val));
 			break;
 		}
 	}
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 5ed7c7f..82b32cb 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -81,6 +81,7 @@ static bool arch_timer_mem_use_virtual;
 
 bool arm_arch_timer_reread; /* QorIQ erratum A-008585 */
 EXPORT_SYMBOL(arm_arch_timer_reread);
+bool arm_arch_timer_rewrite; /* QorIQ erratum A-009971 */
 
 /*
  * Architected system timer support.
@@ -767,6 +768,8 @@ static void __init arch_timer_of_init(struct device_node *np)
 	arch_timer_c3stop = !of_property_read_bool(np, "always-on");
 	arm_arch_timer_reread =
 		of_property_read_bool(np, "fsl,erratum-a008585");
+	arm_arch_timer_rewrite =
+		of_property_read_bool(np, "fsl,erratum-a009971");
 
 	/*
 	 * If we cannot rely on firmware initializing the timer registers then
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [RFC PATCH 1/2] ARM/ARM64: arch_timer: Work around QorIQ Erratum A-008585
  2016-04-11  2:22 ` [RFC PATCH 1/2] ARM/ARM64: arch_timer: Work around QorIQ Erratum A-008585 Scott Wood
@ 2016-04-11  9:19   ` Will Deacon
  2016-04-11  9:52   ` Marc Zyngier
  2016-04-13 13:22   ` Rob Herring
  2 siblings, 0 replies; 16+ messages in thread
From: Will Deacon @ 2016-04-11  9:19 UTC (permalink / raw)
  To: linux-arm-kernel

On Sun, Apr 10, 2016 at 09:22:32PM -0500, Scott Wood wrote:
> Erratum A-008585 says that the ARM generic timer "has the potential to
> contain an erroneous value for a small number of core clock cycles
> every time the timer value changes" and that the workaround is to
> reread TVAL and count registers until successive reads return the same
> value.
> 
> This erratum can be found on LS1021A (32-bit), LS1043A (64-bit), and
> LS2080A (64-bit).
> 
> This patch is loosely based on work by Priyanka Jain and Bhupesh
> Sharma.
> 
> Signed-off-by: Scott Wood <oss@buserror.net>
> ---
>  .../devicetree/bindings/arm/arch_timer.txt         |  4 ++
>  arch/arm/boot/dts/ls1021a.dtsi                     |  1 +
>  arch/arm/include/asm/arch_timer.h                  | 71 +++++++++++++++++++---
>  arch/arm/include/asm/vdso_datapage.h               |  1 +
>  arch/arm/kernel/vdso.c                             |  1 +
>  arch/arm/vdso/vgettimeofday.c                      |  2 +-
>  arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi     |  1 +
>  arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi     |  1 +
>  arch/arm64/include/asm/arch_timer.h                | 35 ++++++++---
>  arch/arm64/include/asm/vdso_datapage.h             |  1 +
>  arch/arm64/kernel/asm-offsets.c                    |  1 +
>  arch/arm64/kernel/vdso.c                           |  2 +
>  arch/arm64/kernel/vdso/gettimeofday.S              | 14 ++++-
>  drivers/clocksource/arm_arch_timer.c               |  5 ++
>  14 files changed, 121 insertions(+), 19 deletions(-)

[...]

> diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
> index efa79e8..2e6008d 100644
> --- a/arch/arm64/kernel/vdso/gettimeofday.S
> +++ b/arch/arm64/kernel/vdso/gettimeofday.S
> @@ -207,7 +207,7 @@ ENDPROC(__kernel_clock_getres)
>  /*
>   * Read the current time from the architected counter.
>   * Expects vdso_data to be initialised.
> - * Clobbers the temporary registers (x9 - x15).
> + * Clobbers the temporary registers (x9 - x17).
>   * Returns:
>   *  - w9		= vDSO sequence counter
>   *  - (x10, x11)	= (ts->tv_sec, shifted ts->tv_nsec)
> @@ -217,6 +217,7 @@ ENTRY(__do_get_tspec)
>  	.cfi_startproc
>  
>  	/* Read from the vDSO data page. */
> +	ldr	w17, [vdso_data, #VDSO_TIMER_REREAD]
>  	ldr	x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
>  	ldp	x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
>  	ldp	w11, w12, [vdso_data, #VDSO_CS_MULT]
> @@ -225,6 +226,17 @@ ENTRY(__do_get_tspec)
>  	/* Read the virtual counter. */
>  	isb
>  	mrs	x15, cntvct_el0
> +	/*
> +	 * Erratum A-008585 requires back-to-back reads to be identical
> +	 * in order to avoid glitches.
> +	 */
> +	cmp	w17, #0
> +	b.eq	2f
> +1:	mrs	x15, cntvct_el0
> +	mrs	x16, cntvct_el0
> +	cmp	x16, x15
> +	b.ne	1b
> +2:

I'm not at all keen on having this in the vdso. Can you just force
use_syscall=1 instead, please?

Documentation/arm64/silicon-errata.txt needs updating too.

Will

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [RFC PATCH 1/2] ARM/ARM64: arch_timer: Work around QorIQ Erratum A-008585
  2016-04-11  2:22 ` [RFC PATCH 1/2] ARM/ARM64: arch_timer: Work around QorIQ Erratum A-008585 Scott Wood
  2016-04-11  9:19   ` Will Deacon
@ 2016-04-11  9:52   ` Marc Zyngier
  2016-04-12  5:48     ` Scott Wood
  2016-04-13 13:22   ` Rob Herring
  2 siblings, 1 reply; 16+ messages in thread
From: Marc Zyngier @ 2016-04-11  9:52 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Scott,

On 11/04/16 03:22, Scott Wood wrote:
> Erratum A-008585 says that the ARM generic timer "has the potential to
> contain an erroneous value for a small number of core clock cycles
> every time the timer value changes" and that the workaround is to
> reread TVAL and count registers until successive reads return the same
> value.
> 
> This erratum can be found on LS1021A (32-bit), LS1043A (64-bit), and
> LS2080A (64-bit).
> 
> This patch is loosely based on work by Priyanka Jain and Bhupesh
> Sharma.
> 
> Signed-off-by: Scott Wood <oss@buserror.net>
> ---
>  .../devicetree/bindings/arm/arch_timer.txt         |  4 ++
>  arch/arm/boot/dts/ls1021a.dtsi                     |  1 +
>  arch/arm/include/asm/arch_timer.h                  | 71 +++++++++++++++++++---
>  arch/arm/include/asm/vdso_datapage.h               |  1 +
>  arch/arm/kernel/vdso.c                             |  1 +
>  arch/arm/vdso/vgettimeofday.c                      |  2 +-
>  arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi     |  1 +
>  arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi     |  1 +
>  arch/arm64/include/asm/arch_timer.h                | 35 ++++++++---
>  arch/arm64/include/asm/vdso_datapage.h             |  1 +
>  arch/arm64/kernel/asm-offsets.c                    |  1 +
>  arch/arm64/kernel/vdso.c                           |  2 +
>  arch/arm64/kernel/vdso/gettimeofday.S              | 14 ++++-
>  drivers/clocksource/arm_arch_timer.c               |  5 ++
>  14 files changed, 121 insertions(+), 19 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt
> index e774128..7117fbd 100644
> --- a/Documentation/devicetree/bindings/arm/arch_timer.txt
> +++ b/Documentation/devicetree/bindings/arm/arch_timer.txt
> @@ -25,6 +25,10 @@ to deliver its interrupts via SPIs.
>  - always-on : a boolean property. If present, the timer is powered through an
>    always-on power domain, therefore it never loses context.
>  
> +- fsl,erratum-a008585 : A boolean property. Indicates the presence of
> +  QorIQ erratum A-008585, which says reading the timer is unreliable
> +  unless the same value is returned by back-to-back reads.
> +
>  ** Optional properties:
>  
>  - arm,cpu-registers-not-fw-configured : Firmware does not initialize
> diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi
> index 726372d..a2e9f66 100644
> --- a/arch/arm/boot/dts/ls1021a.dtsi
> +++ b/arch/arm/boot/dts/ls1021a.dtsi
> @@ -91,6 +91,7 @@
>  			     <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
>  			     <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
>  			     <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>;
> +		fsl,erratum-a008585;
>  	};
>  
>  	pmu {
> diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
> index d4ebf56..5b8e1f9 100644
> --- a/arch/arm/include/asm/arch_timer.h
> +++ b/arch/arm/include/asm/arch_timer.h
> @@ -12,6 +12,8 @@
>  #ifdef CONFIG_ARM_ARCH_TIMER
>  int arch_timer_arch_init(void);
>  
> +extern bool arm_arch_timer_reread;
> +
>  /*
>   * These register accessors are marked inline so the compiler can
>   * nicely work out which register we want, and chuck away the rest of
> @@ -44,7 +46,7 @@ void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val)
>  }
>  
>  static __always_inline
> -u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
> +u32 arch_timer_reg_read_cp15_raw(int access, enum arch_timer_reg reg)
>  {
>  	u32 val = 0;
>  
> @@ -71,6 +73,38 @@ u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
>  	return val;
>  }
>  
> +static __always_inline
> +u32 arch_timer_reg_tval_reread(int access, enum arch_timer_reg reg)
> +{
> +	u32 val, val_new;
> +	int timeout = 200;
> +
> +	do {
> +		if (access == ARCH_TIMER_PHYS_ACCESS) {
> +			asm volatile("mrc p15, 0, %0, c14, c2, 0;"
> +				     "mrc p15, 0, %1, c14, c2, 0"
> +				     : "=r" (val), "=r" (val_new));
> +		} else if (access == ARCH_TIMER_VIRT_ACCESS) {
> +			asm volatile("mrc p15, 0, %0, c14, c3, 0;"
> +				     "mrc p15, 0, %1, c14, c3, 0"
> +				     : "=r" (val), "=r" (val_new));
> +		}
> +		timeout--;
> +	} while (val != val_new && timeout);
> +
> +	WARN_ON_ONCE(!timeout);
> +	return val;
> +}
> +
> +static __always_inline
> +u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
> +{
> +	if (arm_arch_timer_reread && reg == ARCH_TIMER_REG_TVAL)
> +		return arch_timer_reg_tval_reread(access, reg);

I'm really not keen on this. Please implement this workaround as a
static_key, and branch to the workaround in the slow path.

> +
> +	return arch_timer_reg_read_cp15_raw(access, reg);
> +}
> +
>  static inline u32 arch_timer_get_cntfrq(void)
>  {
>  	u32 val;
> @@ -78,22 +112,39 @@ static inline u32 arch_timer_get_cntfrq(void)
>  	return val;
>  }
>  
> -static inline u64 arch_counter_get_cntpct(void)
> +static __always_inline u64 arch_counter_get_cnt(int opcode, bool reread)

Why the __always_inline? The compiler should already do the right thing.

>  {
> -	u64 cval;
> +	u64 val, val_new;
> +	int timeout = 200;
>  
>  	isb();
> -	asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (cval));
> -	return cval;
> +
> +	if (reread) {
> +		do {
> +			asm volatile("mrrc p15, %2, %Q0, %R0, c14;"
> +				     "mrrc p15, %2, %Q1, %R1, c14"
> +				     : "=r" (val), "=r" (val_new)
> +				     : "i" (opcode));
> +			timeout--;
> +		} while (val != val_new && timeout);
> +
> +		BUG_ON(!timeout);

BUG_ON()? Really? Is there any condition where you wouldn't be able to
converge to a single value?

> +	} else {
> +		asm volatile("mrrc p15, %1, %Q0, %R0, c14" : "=r" (val)
> +			     : "i" (opcode));
> +	}
> +
> +	return val;
>  }
>  
> -static inline u64 arch_counter_get_cntvct(void)
> +static inline u64 arch_counter_get_cntpct(void)
>  {
> -	u64 cval;
> +	return arch_counter_get_cnt(0, arm_arch_timer_reread);
> +}
>  
> -	isb();
> -	asm volatile("mrrc p15, 1, %Q0, %R0, c14" : "=r" (cval));
> -	return cval;
> +static inline u64 arch_counter_get_cntvct(void)
> +{
> +	return arch_counter_get_cnt(1, arm_arch_timer_reread);
>  }
>  
>  static inline u32 arch_timer_get_cntkctl(void)
> diff --git a/arch/arm/include/asm/vdso_datapage.h b/arch/arm/include/asm/vdso_datapage.h
> index 9be2594..6c8e1cb 100644
> --- a/arch/arm/include/asm/vdso_datapage.h
> +++ b/arch/arm/include/asm/vdso_datapage.h
> @@ -46,6 +46,7 @@ struct vdso_data {
>  	u64 xtime_clock_snsec;	/* CLOCK_REALTIME sub-ns base */
>  	u32 tz_minuteswest;	/* timezone info for gettimeofday(2) */
>  	u32 tz_dsttime;
> +	u32 timer_reread;	/* Erratum requires two equal timer reads */
>  };
>  
>  union vdso_data_store {
> diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
> index 994e971..8885e76 100644
> --- a/arch/arm/kernel/vdso.c
> +++ b/arch/arm/kernel/vdso.c
> @@ -307,6 +307,7 @@ void update_vsyscall(struct timekeeper *tk)
>  
>  	vdso_write_begin(vdso_data);
>  
> +	vdso_data->timer_reread			= arm_arch_timer_reread;
>  	vdso_data->tk_is_cntvct			= tk_is_cntvct(tk);
>  	vdso_data->xtime_coarse_sec		= tk->xtime_sec;
>  	vdso_data->xtime_coarse_nsec		= (u32)(tk->tkr_mono.xtime_nsec >>
> diff --git a/arch/arm/vdso/vgettimeofday.c b/arch/arm/vdso/vgettimeofday.c
> index 79214d5..a29fc61 100644
> --- a/arch/arm/vdso/vgettimeofday.c
> +++ b/arch/arm/vdso/vgettimeofday.c
> @@ -123,7 +123,7 @@ static notrace u64 get_ns(struct vdso_data *vdata)
>  	u64 cycle_now;
>  	u64 nsec;
>  
> -	cycle_now = arch_counter_get_cntvct();
> +	cycle_now = arch_counter_get_cnt(1, vdata->timer_reread);
>  
>  	cycle_delta = (cycle_now - vdata->cs_cycle_last) & vdata->cs_mask;
>  
> diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
> index be72bf5..596420c 100644
> --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
> +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
> @@ -115,6 +115,7 @@
>  			     <1 14 0x1>, /* Physical Non-Secure PPI */
>  			     <1 11 0x1>, /* Virtual PPI */
>  			     <1 10 0x1>; /* Hypervisor PPI */
> +		fsl,erratum-a008585;
>  	};
>  
>  	pmu {
> diff --git a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
> index 9d746c6..0270ccf 100644
> --- a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
> +++ b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
> @@ -171,6 +171,7 @@
>  			     <1 14 0x8>, /* Physical Non-Secure PPI, active-low */
>  			     <1 11 0x8>, /* Virtual PPI, active-low */
>  			     <1 10 0x8>; /* Hypervisor PPI, active-low */
> +		fsl,erratum-a008585;
>  	};
>  
>  	pmu {
> diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
> index fbe0ca3..9367ee3 100644
> --- a/arch/arm64/include/asm/arch_timer.h
> +++ b/arch/arm64/include/asm/arch_timer.h
> @@ -27,6 +27,31 @@
>  
>  #include <clocksource/arm_arch_timer.h>
>  
> +extern bool arm_arch_timer_reread;
> +
> +/* QorIQ errata workarounds */
> +#define ARCH_TIMER_REREAD(reg) ({ \
> +	u64 _val_old, _val_new; \
> +	int _timeout = 200; \
> +	do { \
> +		asm volatile("mrs %0, " reg ";" \
> +			     "mrs %1, " reg \
> +			     : "=r" (_val_old), "=r" (_val_new)); \
> +		_timeout--; \
> +	} while (_val_old != _val_new && _timeout); \
> +	WARN_ON_ONCE(!_timeout); \

And here we only warn? I'd expect some degree of consistency between the
two architectures.

> +	_val_old; \
> +})
> +
> +#define ARCH_TIMER_READ(reg) ({ \
> +	u64 _val; \
> +	if (arm_arch_timer_reread) \
> +		_val = ARCH_TIMER_REREAD(reg); \
> +	else \
> +		asm volatile("mrs %0, " reg : "=r" (_val)); \
> +	_val; \
> +})

This should also be implemented as a static key.

> +
>  /*
>   * These register accessors are marked inline so the compiler can
>   * nicely work out which register we want, and chuck away the rest of
> @@ -69,7 +94,7 @@ u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
>  			asm volatile("mrs %0,  cntp_ctl_el0" : "=r" (val));
>  			break;
>  		case ARCH_TIMER_REG_TVAL:
> -			asm volatile("mrs %0, cntp_tval_el0" : "=r" (val));
> +			val = ARCH_TIMER_READ("cntp_tval_el0");
>  			break;
>  		}
>  	} else if (access == ARCH_TIMER_VIRT_ACCESS) {
> @@ -78,7 +103,7 @@ u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
>  			asm volatile("mrs %0,  cntv_ctl_el0" : "=r" (val));
>  			break;
>  		case ARCH_TIMER_REG_TVAL:
> -			asm volatile("mrs %0, cntv_tval_el0" : "=r" (val));
> +			val = ARCH_TIMER_READ("cntv_tval_el0");
>  			break;
>  		}
>  	}
> @@ -116,12 +141,8 @@ static inline u64 arch_counter_get_cntpct(void)
>  
>  static inline u64 arch_counter_get_cntvct(void)
>  {
> -	u64 cval;
> -
>  	isb();
> -	asm volatile("mrs %0, cntvct_el0" : "=r" (cval));
> -
> -	return cval;
> +	return ARCH_TIMER_READ("cntvct_el0");
>  }
>  
>  static inline int arch_timer_arch_init(void)
> diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h
> index de66199..9f64af4 100644
> --- a/arch/arm64/include/asm/vdso_datapage.h
> +++ b/arch/arm64/include/asm/vdso_datapage.h
> @@ -34,6 +34,7 @@ struct vdso_data {
>  	__u32 tz_minuteswest;	/* Whacky timezone stuff */
>  	__u32 tz_dsttime;
>  	__u32 use_syscall;
> +	__u32 timer_reread;	/* Erratum requires two equal timer reads */
>  };
>  
>  #endif /* !__ASSEMBLY__ */
> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> index 3ae6b31..c9dca8d 100644
> --- a/arch/arm64/kernel/asm-offsets.c
> +++ b/arch/arm64/kernel/asm-offsets.c
> @@ -95,6 +95,7 @@ int main(void)
>    DEFINE(VDSO_TZ_MINWEST,	offsetof(struct vdso_data, tz_minuteswest));
>    DEFINE(VDSO_TZ_DSTTIME,	offsetof(struct vdso_data, tz_dsttime));
>    DEFINE(VDSO_USE_SYSCALL,	offsetof(struct vdso_data, use_syscall));
> +  DEFINE(VDSO_TIMER_REREAD,	offsetof(struct vdso_data, timer_reread));
>    BLANK();
>    DEFINE(TVAL_TV_SEC,		offsetof(struct timeval, tv_sec));
>    DEFINE(TVAL_TV_USEC,		offsetof(struct timeval, tv_usec));
> diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
> index 97bc68f..f8c6158 100644
> --- a/arch/arm64/kernel/vdso.c
> +++ b/arch/arm64/kernel/vdso.c
> @@ -31,6 +31,7 @@
>  #include <linux/timekeeper_internal.h>
>  #include <linux/vmalloc.h>
>  
> +#include <asm/arch_timer.h>
>  #include <asm/cacheflush.h>
>  #include <asm/signal32.h>
>  #include <asm/vdso.h>
> @@ -204,6 +205,7 @@ void update_vsyscall(struct timekeeper *tk)
>  	++vdso_data->tb_seq_count;
>  	smp_wmb();
>  
> +	vdso_data->timer_reread			= arm_arch_timer_reread;
>  	vdso_data->use_syscall			= use_syscall;
>  	vdso_data->xtime_coarse_sec		= tk->xtime_sec;
>  	vdso_data->xtime_coarse_nsec		= tk->tkr_mono.xtime_nsec >>
> diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
> index efa79e8..2e6008d 100644
> --- a/arch/arm64/kernel/vdso/gettimeofday.S
> +++ b/arch/arm64/kernel/vdso/gettimeofday.S
> @@ -207,7 +207,7 @@ ENDPROC(__kernel_clock_getres)
>  /*
>   * Read the current time from the architected counter.
>   * Expects vdso_data to be initialised.
> - * Clobbers the temporary registers (x9 - x15).
> + * Clobbers the temporary registers (x9 - x17).
>   * Returns:
>   *  - w9		= vDSO sequence counter
>   *  - (x10, x11)	= (ts->tv_sec, shifted ts->tv_nsec)
> @@ -217,6 +217,7 @@ ENTRY(__do_get_tspec)
>  	.cfi_startproc
>  
>  	/* Read from the vDSO data page. */
> +	ldr	w17, [vdso_data, #VDSO_TIMER_REREAD]
>  	ldr	x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
>  	ldp	x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
>  	ldp	w11, w12, [vdso_data, #VDSO_CS_MULT]
> @@ -225,6 +226,17 @@ ENTRY(__do_get_tspec)
>  	/* Read the virtual counter. */
>  	isb
>  	mrs	x15, cntvct_el0
> +	/*
> +	 * Erratum A-008585 requires back-to-back reads to be identical
> +	 * in order to avoid glitches.
> +	 */
> +	cmp	w17, #0
> +	b.eq	2f
> +1:	mrs	x15, cntvct_el0
> +	mrs	x16, cntvct_el0
> +	cmp	x16, x15
> +	b.ne	1b
> +2:

Could userspace lock-up here? If it can, you need to be able to bail
out. If not, then your BUG_ON() sprinkling is bogus.

>  
>  	/* Calculate cycle delta and convert to ns. */
>  	sub	x10, x15, x10
> diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
> index 5152b38..5ed7c7f 100644
> --- a/drivers/clocksource/arm_arch_timer.c
> +++ b/drivers/clocksource/arm_arch_timer.c
> @@ -79,6 +79,9 @@ static enum ppi_nr arch_timer_uses_ppi = VIRT_PPI;
>  static bool arch_timer_c3stop;
>  static bool arch_timer_mem_use_virtual;
>  
> +bool arm_arch_timer_reread; /* QorIQ erratum A-008585 */
> +EXPORT_SYMBOL(arm_arch_timer_reread);
> +
>  /*
>   * Architected system timer support.
>   */
> @@ -762,6 +765,8 @@ static void __init arch_timer_of_init(struct device_node *np)
>  	arch_timer_detect_rate(NULL, np);
>  
>  	arch_timer_c3stop = !of_property_read_bool(np, "always-on");
> +	arm_arch_timer_reread =
> +		of_property_read_bool(np, "fsl,erratum-a008585");
>  
>  	/*
>  	 * If we cannot rely on firmware initializing the timer registers then
> 

The elephant in the room is KVM. I'm pretty sure it suffers from the
same erratum, yet you did not handle it at all. I'd expect to see
something in an upcoming version of the patch.

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [RFC PATCH 2/2] ARM64: arch_timer: Work around QorIQ Erratum A-009971
  2016-04-11  2:22 ` [RFC PATCH 2/2] ARM64: arch_timer: Work around QorIQ Erratum A-009971 Scott Wood
@ 2016-04-11  9:55   ` Marc Zyngier
  2016-04-12  5:54     ` Scott Wood
  0 siblings, 1 reply; 16+ messages in thread
From: Marc Zyngier @ 2016-04-11  9:55 UTC (permalink / raw)
  To: linux-arm-kernel

On 11/04/16 03:22, Scott Wood wrote:
> From: Priyanka Jain <priyanka.jain@nxp.com>
> 
> Erratum A-009971 says that it is possible for the timer value register
> to be written incorrectly, resulting in "an incorrect and potentially
> very long timeout".  The workaround is to read the timer count
> immediately before and after writing the timer value register, and
> repeat if the counter reads don't match.
> 
> This erratum can be found on LS2080A.
> 
> Signed-off-by: Priyanka Jain <priyanka.jain@nxp.com>
> [scottwood: cleanup and fixes]
> Signed-off-by: Scott Wood <oss@buserror.net>
> ---
>  .../devicetree/bindings/arm/arch_timer.txt         |  5 ++++
>  arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi     |  1 +
>  arch/arm64/include/asm/arch_timer.h                | 27 ++++++++++++++++++++--
>  drivers/clocksource/arm_arch_timer.c               |  3 +++
>  4 files changed, 34 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt
> index 7117fbd..ab4d3b1 100644
> --- a/Documentation/devicetree/bindings/arm/arch_timer.txt
> +++ b/Documentation/devicetree/bindings/arm/arch_timer.txt
> @@ -29,6 +29,11 @@ to deliver its interrupts via SPIs.
>    QorIQ erratum A-008585, which says reading the timer is unreliable
>    unless the same value is returned by back-to-back reads.
>  
> +- fsl,erratum-a009971 : A boolean property. Indicates the presence of
> +  QorIQ erratum A-009971, which says writing the timer value register
> +  is unreliable unless timer count reads before and after the timer write
> +  return the same value.
> +
>  ** Optional properties:
>  
>  - arm,cpu-registers-not-fw-configured : Firmware does not initialize
> diff --git a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
> index 0270ccf..529e441 100644
> --- a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
> +++ b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
> @@ -172,6 +172,7 @@
>  			     <1 11 0x8>, /* Virtual PPI, active-low */
>  			     <1 10 0x8>; /* Hypervisor PPI, active-low */
>  		fsl,erratum-a008585;
> +		fsl,erratum-a009971;
>  	};
>  
>  	pmu {
> diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
> index 9367ee3..1867e60 100644
> --- a/arch/arm64/include/asm/arch_timer.h
> +++ b/arch/arm64/include/asm/arch_timer.h
> @@ -28,6 +28,7 @@
>  #include <clocksource/arm_arch_timer.h>
>  
>  extern bool arm_arch_timer_reread;
> +extern bool arm_arch_timer_rewrite;

Just as for the other bug, please implement this as a static key.

>  
>  /* QorIQ errata workarounds */
>  #define ARCH_TIMER_REREAD(reg) ({ \
> @@ -52,6 +53,20 @@ extern bool arm_arch_timer_reread;
>  	_val; \
>  })
>  
> +#define ARCH_TIMER_TVAL_REWRITE(pv, val) do { \
> +	u64 _cnt_old, _cnt_new; \
> +	int _timeout = 200; \
> +	do { \
> +		asm volatile("mrs %0, cntvct_el0;" \
> +			     "msr cnt" pv "_tval_el0, %2;" \
> +			     "mrs %1, cntvct_el0" \
> +			     : "=&r" (_cnt_old), "=r" (_cnt_new) \
> +			     : "r" (val)); \
> +		_timeout--; \
> +	} while (_cnt_old != _cnt_new && _timeout); \
> +	WARN_ON_ONCE(!_timeout); \
> +} while (0)
> +

Given how many times you've written that loop, I'm sure you can have a
preprocessor macro that will do the right thing.

>  /*
>   * These register accessors are marked inline so the compiler can
>   * nicely work out which register we want, and chuck away the rest of
> @@ -66,7 +81,11 @@ void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val)
>  			asm volatile("msr cntp_ctl_el0,  %0" : : "r" (val));
>  			break;
>  		case ARCH_TIMER_REG_TVAL:
> -			asm volatile("msr cntp_tval_el0, %0" : : "r" (val));
> +			if (arm_arch_timer_rewrite)
> +				ARCH_TIMER_TVAL_REWRITE("p", val);
> +			else
> +				asm volatile("msr cntp_tval_el0, %0" : :
> +					     "r" (val));
>  			break;
>  		}
>  	} else if (access == ARCH_TIMER_VIRT_ACCESS) {
> @@ -75,7 +94,11 @@ void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val)
>  			asm volatile("msr cntv_ctl_el0,  %0" : : "r" (val));
>  			break;
>  		case ARCH_TIMER_REG_TVAL:
> -			asm volatile("msr cntv_tval_el0, %0" : : "r" (val));
> +			if (arm_arch_timer_rewrite)
> +				ARCH_TIMER_TVAL_REWRITE("v", val);
> +			else
> +				asm volatile("msr cntv_tval_el0, %0" : :
> +					     "r" (val));
>  			break;
>  		}
>  	}
> diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
> index 5ed7c7f..82b32cb 100644
> --- a/drivers/clocksource/arm_arch_timer.c
> +++ b/drivers/clocksource/arm_arch_timer.c
> @@ -81,6 +81,7 @@ static bool arch_timer_mem_use_virtual;
>  
>  bool arm_arch_timer_reread; /* QorIQ erratum A-008585 */
>  EXPORT_SYMBOL(arm_arch_timer_reread);
> +bool arm_arch_timer_rewrite; /* QorIQ erratum A-009971 */
>  
>  /*
>   * Architected system timer support.
> @@ -767,6 +768,8 @@ static void __init arch_timer_of_init(struct device_node *np)
>  	arch_timer_c3stop = !of_property_read_bool(np, "always-on");
>  	arm_arch_timer_reread =
>  		of_property_read_bool(np, "fsl,erratum-a008585");
> +	arm_arch_timer_rewrite =
> +		of_property_read_bool(np, "fsl,erratum-a009971");
>  
>  	/*
>  	 * If we cannot rely on firmware initializing the timer registers then
> 

This also requires handling in KVM.

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [RFC PATCH 1/2] ARM/ARM64: arch_timer: Work around QorIQ Erratum A-008585
  2016-04-11  9:52   ` Marc Zyngier
@ 2016-04-12  5:48     ` Scott Wood
  2016-04-12  9:07       ` Marc Zyngier
  0 siblings, 1 reply; 16+ messages in thread
From: Scott Wood @ 2016-04-12  5:48 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, 2016-04-11 at 10:52 +0100, Marc Zyngier wrote:
> Hi Scott,
> 
> On 11/04/16 03:22, Scott Wood wrote:
> > +static __always_inline
> > +u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
> > +{
> > +	if (arm_arch_timer_reread && reg == ARCH_TIMER_REG_TVAL)
> > +		return arch_timer_reg_tval_reread(access, reg);
> 
> I'm really not keen on this. Please implement this workaround as a
> static_key, and branch to the workaround in the slow path.

OK, I'll look into that.

> > -static inline u64 arch_counter_get_cntpct(void)
> > +static __always_inline u64 arch_counter_get_cnt(int opcode, bool reread)
> 
> Why the __always_inline? The compiler should already do the right thing.

The "i" asm constraint requires that it be inline.  Maybe GCC is likely to
inline it anyway, but it's better to be explicit when it's required for
correctness.

> > -	u64 cval;
> > +	u64 val, val_new;
> > +	int timeout = 200;
> >  
> >  	isb();
> > -	asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (cval));
> > -	return cval;
> > +
> > +	if (reread) {
> > +		do {
> > +			asm volatile("mrrc p15, %2, %Q0, %R0, c14;"
> > +				     "mrrc p15, %2, %Q1, %R1, c14"
> > +				     : "=r" (val), "=r" (val_new)
> > +				     : "i" (opcode));
> > +			timeout--;
> > +		} while (val != val_new && timeout);
> > +
> > +		BUG_ON(!timeout);
> 
> BUG_ON()? Really? Is there any condition where you wouldn't be able to
> converge to a single value?

This function is used from the vdso, and thus WARN causes a link error.

> > +	/*
> > +	 * Erratum A-008585 requires back-to-back reads to be identical
> > +	 * in order to avoid glitches.
> > +	 */
> > +	cmp	w17, #0
> > +	b.eq	2f
> > +1:	mrs	x15, cntvct_el0
> > +	mrs	x16, cntvct_el0
> > +	cmp	x16, x15
> > +	b.ne	1b
> > +2:
> 
> Could userspace lock-up here? If it can, you need to be able to bail
> out. If not, then your BUG_ON() sprinkling is bogus.

It *shouldn't* be possible for these loops to time out -- it would not be a
viable workaround if it's not guaranteed to resolve quickly -- but if there
are situations where the workaround fails (e.g. unusual clock speeds) it would
be useful to get that diagnostic rather than have to hunt down a hang.  I can
remove them if you want, though.

As for the VDSO, it seems quite unlikely that failures would be seen only in
userspace and not in the kernel, so the utility of adding a timeout here was
less, especially relative to the hassle.  Will Deacon asked that we leave the
VDSO alone and set use_syscall instead, though, so adding a timeout here is
moot.
 
> >  	/* Calculate cycle delta and convert to ns. */
> >  	sub	x10, x15, x10
> > diff --git a/drivers/clocksource/arm_arch_timer.c
> > b/drivers/clocksource/arm_arch_timer.c
> > index 5152b38..5ed7c7f 100644
> > --- a/drivers/clocksource/arm_arch_timer.c
> > +++ b/drivers/clocksource/arm_arch_timer.c
> > @@ -79,6 +79,9 @@ static enum ppi_nr arch_timer_uses_ppi = VIRT_PPI;
> >  static bool arch_timer_c3stop;
> >  static bool arch_timer_mem_use_virtual;
> >  
> > +bool arm_arch_timer_reread; /* QorIQ erratum A-008585 */
> > +EXPORT_SYMBOL(arm_arch_timer_reread);
> > +
> >  /*
> >   * Architected system timer support.
> >   */
> > @@ -762,6 +765,8 @@ static void __init arch_timer_of_init(struct
> > device_node *np)
> >  	arch_timer_detect_rate(NULL, np);
> >  
> >  	arch_timer_c3stop = !of_property_read_bool(np, "always-on");
> > +	arm_arch_timer_reread =
> > +		of_property_read_bool(np, "fsl,erratum-a008585");
> >  
> >  	/*
> >  	 * If we cannot rely on firmware initializing the timer registers
> > then
> > 
> 
> The elephant in the room is KVM. I'm pretty sure it suffers from the
> same erratum, yet you did not handle it at all. I'd expect to see
> something in an upcoming version of the patch.

cval isn't listed in the erratum description as being affected.  I looked
around a bit and couldn't find the KVM code directly accessing tval or count. 
 Am I missing something?

-Scott

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [RFC PATCH 2/2] ARM64: arch_timer: Work around QorIQ Erratum A-009971
  2016-04-11  9:55   ` Marc Zyngier
@ 2016-04-12  5:54     ` Scott Wood
  2016-04-12  8:22       ` Marc Zyngier
  0 siblings, 1 reply; 16+ messages in thread
From: Scott Wood @ 2016-04-12  5:54 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, 2016-04-11 at 10:55 +0100, Marc Zyngier wrote:
> On 11/04/16 03:22, Scott Wood wrote:
> > @@ -52,6 +53,20 @@ extern bool arm_arch_timer_reread;
> >  	_val; \
> >  })
> >  
> > +#define ARCH_TIMER_TVAL_REWRITE(pv, val) do { \
> > +	u64 _cnt_old, _cnt_new; \
> > +	int _timeout = 200; \
> > +	do { \
> > +		asm volatile("mrs %0, cntvct_el0;" \
> > +			     "msr cnt" pv "_tval_el0, %2;" \
> > +			     "mrs %1, cntvct_el0" \
> > +			     : "=&r" (_cnt_old), "=r" (_cnt_new) \
> > +			     : "r" (val)); \
> > +		_timeout--; \
> > +	} while (_cnt_old != _cnt_new && _timeout); \
> > +	WARN_ON_ONCE(!_timeout); \
> > +} while (0)
> > +
> 
> Given how many times you've written that loop, I'm sure you can have a
> preprocessor macro that will do the right thing.

I did use a preprocessor macro.  Are you asking me to consolidate the read and
write macros?  That seems like it would create a mess that's worse than an
extra instance of a simple loop.

-Scott

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [RFC PATCH 2/2] ARM64: arch_timer: Work around QorIQ Erratum A-009971
  2016-04-12  5:54     ` Scott Wood
@ 2016-04-12  8:22       ` Marc Zyngier
  2016-04-17  1:32         ` Scott Wood
  0 siblings, 1 reply; 16+ messages in thread
From: Marc Zyngier @ 2016-04-12  8:22 UTC (permalink / raw)
  To: linux-arm-kernel

On 12/04/16 06:54, Scott Wood wrote:
> On Mon, 2016-04-11 at 10:55 +0100, Marc Zyngier wrote:
>> On 11/04/16 03:22, Scott Wood wrote:
>>> @@ -52,6 +53,20 @@ extern bool arm_arch_timer_reread;
>>>  	_val; \
>>>  })
>>>  
>>> +#define ARCH_TIMER_TVAL_REWRITE(pv, val) do { \
>>> +	u64 _cnt_old, _cnt_new; \
>>> +	int _timeout = 200; \
>>> +	do { \
>>> +		asm volatile("mrs %0, cntvct_el0;" \
>>> +			     "msr cnt" pv "_tval_el0, %2;" \
>>> +			     "mrs %1, cntvct_el0" \
>>> +			     : "=&r" (_cnt_old), "=r" (_cnt_new) \
>>> +			     : "r" (val)); \
>>> +		_timeout--; \
>>> +	} while (_cnt_old != _cnt_new && _timeout); \
>>> +	WARN_ON_ONCE(!_timeout); \
>>> +} while (0)
>>> +
>>
>> Given how many times you've written that loop, I'm sure you can have a
>> preprocessor macro that will do the right thing.
> 
> I did use a preprocessor macro.  Are you asking me to consolidate the read and
> write macros?  That seems like it would create a mess that's worse than an
> extra instance of a simple loop.

>From patch 1:

+static __always_inline
+u32 arch_timer_reg_tval_reread(int access, enum arch_timer_reg reg)
+{
+	u32 val, val_new;
+	int timeout = 200;
+
+	do {
+		if (access == ARCH_TIMER_PHYS_ACCESS) {
+			asm volatile("mrc p15, 0, %0, c14, c2, 0;"
+				     "mrc p15, 0, %1, c14, c2, 0"
+				     : "=r" (val), "=r" (val_new));
+		} else if (access == ARCH_TIMER_VIRT_ACCESS) {
+			asm volatile("mrc p15, 0, %0, c14, c3, 0;"
+				     "mrc p15, 0, %1, c14, c3, 0"
+				     : "=r" (val), "=r" (val_new));
+		}
+		timeout--;
+	} while (val != val_new && timeout);
+
+	WARN_ON_ONCE(!timeout);
+	return val;
+}

[...]

+static __always_inline u64 arch_counter_get_cnt(int opcode, bool reread)
 {
-	u64 cval;
+	u64 val, val_new;
+	int timeout = 200;

 	isb();
-	asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (cval));
-	return cval;
+
+	if (reread) {
+		do {
+			asm volatile("mrrc p15, %2, %Q0, %R0, c14;"
+				     "mrrc p15, %2, %Q1, %R1, c14"
+				     : "=r" (val), "=r" (val_new)
+				     : "i" (opcode));
+			timeout--;
+		} while (val != val_new && timeout);
+
+		BUG_ON(!timeout);
+	} else {
+		asm volatile("mrrc p15, %1, %Q0, %R0, c14" : "=r" (val)
+			     : "i" (opcode));
+	}
+
+	return val;
 }

[...]

+/* QorIQ errata workarounds */
+#define ARCH_TIMER_REREAD(reg) ({ \
+	u64 _val_old, _val_new; \
+	int _timeout = 200; \
+	do { \
+		asm volatile("mrs %0, " reg ";" \
+			     "mrs %1, " reg \
+			     : "=r" (_val_old), "=r" (_val_new)); \
+		_timeout--; \
+	} while (_val_old != _val_new && _timeout); \
+	WARN_ON_ONCE(!_timeout); \
+	_val_old; \
+})

Do you notice a pattern? You are expressing the same loop in various
ways (sometimes in a function, sometimes in a macro). I'm looking for a
loop template that encapsulates the read access. You can have a similar
macro for writes if you have more than a single instance.

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [RFC PATCH 1/2] ARM/ARM64: arch_timer: Work around QorIQ Erratum A-008585
  2016-04-12  5:48     ` Scott Wood
@ 2016-04-12  9:07       ` Marc Zyngier
  2016-04-13  5:41         ` Scott Wood
  0 siblings, 1 reply; 16+ messages in thread
From: Marc Zyngier @ 2016-04-12  9:07 UTC (permalink / raw)
  To: linux-arm-kernel

On 12/04/16 06:48, Scott Wood wrote:
> On Mon, 2016-04-11 at 10:52 +0100, Marc Zyngier wrote:
>> Hi Scott,
>>
>> On 11/04/16 03:22, Scott Wood wrote:
>>> +static __always_inline
>>> +u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
>>> +{
>>> +	if (arm_arch_timer_reread && reg == ARCH_TIMER_REG_TVAL)
>>> +		return arch_timer_reg_tval_reread(access, reg);
>>
>> I'm really not keen on this. Please implement this workaround as a
>> static_key, and branch to the workaround in the slow path.
> 
> OK, I'll look into that.
> 
>>> -static inline u64 arch_counter_get_cntpct(void)
>>> +static __always_inline u64 arch_counter_get_cnt(int opcode, bool reread)
>>
>> Why the __always_inline? The compiler should already do the right thing.
> 
> The "i" asm constraint requires that it be inline.  Maybe GCC is likely to
> inline it anyway, but it's better to be explicit when it's required for
> correctness.

Probably. But the underlying issue is that you are reinventing your own
accessors instead of using the existing ones to implement your
workaround. What is wrong with looping around the existing accessors?

> 
>>> -	u64 cval;
>>> +	u64 val, val_new;
>>> +	int timeout = 200;
>>>  
>>>  	isb();
>>> -	asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (cval));
>>> -	return cval;
>>> +
>>> +	if (reread) {
>>> +		do {
>>> +			asm volatile("mrrc p15, %2, %Q0, %R0, c14;"
>>> +				     "mrrc p15, %2, %Q1, %R1, c14"
>>> +				     : "=r" (val), "=r" (val_new)
>>> +				     : "i" (opcode));
>>> +			timeout--;
>>> +		} while (val != val_new && timeout);
>>> +
>>> +		BUG_ON(!timeout);
>>
>> BUG_ON()? Really? Is there any condition where you wouldn't be able to
>> converge to a single value?
> 
> This function is used from the vdso, and thus WARN causes a link error.

And surely BUG_ON() is suitable for userspace. /me rolls eyes...

>>> +	/*
>>> +	 * Erratum A-008585 requires back-to-back reads to be identical
>>> +	 * in order to avoid glitches.
>>> +	 */
>>> +	cmp	w17, #0
>>> +	b.eq	2f
>>> +1:	mrs	x15, cntvct_el0
>>> +	mrs	x16, cntvct_el0
>>> +	cmp	x16, x15
>>> +	b.ne	1b
>>> +2:
>>
>> Could userspace lock-up here? If it can, you need to be able to bail
>> out. If not, then your BUG_ON() sprinkling is bogus.
> 
> It *shouldn't* be possible for these loops to time out -- it would not be a
> viable workaround if it's not guaranteed to resolve quickly -- but if there
> are situations where the workaround fails (e.g. unusual clock speeds) it would
> be useful to get that diagnostic rather than have to hunt down a hang.  I can
> remove them if you want, though.

Warning once + tainting the kernel should be enough.

> 
> As for the VDSO, it seems quite unlikely that failures would be seen only in
> userspace and not in the kernel, so the utility of adding a timeout here was
> less, especially relative to the hassle.  Will Deacon asked that we leave the
> VDSO alone and set use_syscall instead, though, so adding a timeout here is
> moot.

Indeed.

>  
>>>  	/* Calculate cycle delta and convert to ns. */
>>>  	sub	x10, x15, x10
>>> diff --git a/drivers/clocksource/arm_arch_timer.c
>>> b/drivers/clocksource/arm_arch_timer.c
>>> index 5152b38..5ed7c7f 100644
>>> --- a/drivers/clocksource/arm_arch_timer.c
>>> +++ b/drivers/clocksource/arm_arch_timer.c
>>> @@ -79,6 +79,9 @@ static enum ppi_nr arch_timer_uses_ppi = VIRT_PPI;
>>>  static bool arch_timer_c3stop;
>>>  static bool arch_timer_mem_use_virtual;
>>>  
>>> +bool arm_arch_timer_reread; /* QorIQ erratum A-008585 */
>>> +EXPORT_SYMBOL(arm_arch_timer_reread);
>>> +
>>>  /*
>>>   * Architected system timer support.
>>>   */
>>> @@ -762,6 +765,8 @@ static void __init arch_timer_of_init(struct
>>> device_node *np)
>>>  	arch_timer_detect_rate(NULL, np);
>>>  
>>>  	arch_timer_c3stop = !of_property_read_bool(np, "always-on");
>>> +	arm_arch_timer_reread =
>>> +		of_property_read_bool(np, "fsl,erratum-a008585");
>>>  
>>>  	/*
>>>  	 * If we cannot rely on firmware initializing the timer registers
>>> then
>>>
>>
>> The elephant in the room is KVM. I'm pretty sure it suffers from the
>> same erratum, yet you did not handle it at all. I'd expect to see
>> something in an upcoming version of the patch.
> 
> cval isn't listed in the erratum description as being affected.  I looked
> around a bit and couldn't find the KVM code directly accessing tval or count. 
>  Am I missing something?

You are missing the fact that CVAL and TVAL are the two sides of the
same coin. From the ARMv8 ARM:

<quote>
This view of a timer depends on the following behavior of accesses to
TimerValue registers:

Reads: TimerValue = (CompareValue ? (Counter - Offset))[31:0]
Writes: CompareValue = ((Counter - Offset)[63:0] +
SignExtend(TimerValue))[63:0]
</quote>

So I'd be really surprised if TVAL was buggy and CVAL was not (why would
loop around programming TVAL if you could hit CVAL and be correct?).

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [RFC PATCH 1/2] ARM/ARM64: arch_timer: Work around QorIQ Erratum A-008585
  2016-04-12  9:07       ` Marc Zyngier
@ 2016-04-13  5:41         ` Scott Wood
  2016-04-13  7:36           ` Marc Zyngier
  0 siblings, 1 reply; 16+ messages in thread
From: Scott Wood @ 2016-04-13  5:41 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, 2016-04-12 at 10:07 +0100, Marc Zyngier wrote:
> On 12/04/16 06:48, Scott Wood wrote:
> > On Mon, 2016-04-11 at 10:52 +0100, Marc Zyngier wrote:
> > > Hi Scott,
> > > 
> > > On 11/04/16 03:22, Scott Wood wrote:
> > > > +static __always_inline
> > > > +u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
> > > > +{
> > > > +	if (arm_arch_timer_reread && reg == ARCH_TIMER_REG_TVAL)
> > > > +		return arch_timer_reg_tval_reread(access, reg);
> > > 
> > > I'm really not keen on this. Please implement this workaround as a
> > > static_key, and branch to the workaround in the slow path.
> > 
> > OK, I'll look into that.
> > 
> > > > -static inline u64 arch_counter_get_cntpct(void)
> > > > +static __always_inline u64 arch_counter_get_cnt(int opcode, bool
> > > > reread)
> > > 
> > > Why the __always_inline? The compiler should already do the right thing.
> > 
> > The "i" asm constraint requires that it be inline.  Maybe GCC is likely to
> > inline it anyway, but it's better to be explicit when it's required for
> > correctness.
> 
> Probably. But the underlying issue is that you are reinventing your own
> accessors instead of using the existing ones to implement your
> workaround. What is wrong with looping around the existing accessors?

The existing accessors don't guarantee that multiple accesses are done with
back-to-back instructions.  I don't know how far apart they can get without
risking a loop that doesn't finish, nor do I know what weirdness GCC might do,
now or in the future, to place nearby asm statements farther from each other
than expected.

> 
> > 
> > > > -	u64 cval;
> > > > +	u64 val, val_new;
> > > > +	int timeout = 200;
> > > >  
> > > >  	isb();
> > > > -	asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (cval));
> > > > -	return cval;
> > > > +
> > > > +	if (reread) {
> > > > +		do {
> > > > +			asm volatile("mrrc p15, %2, %Q0, %R0, c14;"
> > > > +				     "mrrc p15, %2, %Q1, %R1, c14"
> > > > +				     : "=r" (val), "=r" (val_new)
> > > > +				     : "i" (opcode));
> > > > +			timeout--;
> > > > +		} while (val != val_new && timeout);
> > > > +
> > > > +		BUG_ON(!timeout);
> > > 
> > > BUG_ON()? Really? Is there any condition where you wouldn't be able to
> > > converge to a single value?
> > 
> > This function is used from the vdso, and thus WARN causes a link error.
> 
> And surely BUG_ON() is suitable for userspace. /me rolls eyes...

It's not ideal, but it will raise a signal which seems no worse than a hang,
and again, if there is a problem I expect you'd see it first in the kernel.

I'll have the erratum disable vdso on arm32 as well, and then this can be
WARN_ON_ONCE like the others.

> > > > +	/*
> > > > +	 * Erratum A-008585 requires back-to-back reads to be
> > > > identical
> > > > +	 * in order to avoid glitches.
> > > > +	 */
> > > > +	cmp	w17, #0
> > > > +	b.eq	2f
> > > > +1:	mrs	x15, cntvct_el0
> > > > +	mrs	x16, cntvct_el0
> > > > +	cmp	x16, x15
> > > > +	b.ne	1b
> > > > +2:
> > > 
> > > Could userspace lock-up here? If it can, you need to be able to bail
> > > out. If not, then your BUG_ON() sprinkling is bogus.
> > 
> > It *shouldn't* be possible for these loops to time out -- it would not be
> > a
> > viable workaround if it's not guaranteed to resolve quickly -- but if
> > there
> > are situations where the workaround fails (e.g. unusual clock speeds) it
> > would
> > be useful to get that diagnostic rather than have to hunt down a hang.  I
> > can
> > remove them if you want, though.
> 
> Warning once + tainting the kernel should be enough.

That's what the patches do, in codepaths that are capable of it.

> > > The elephant in the room is KVM. I'm pretty sure it suffers from the
> > > same erratum, yet you did not handle it at all. I'd expect to see
> > > something in an upcoming version of the patch.
> > 
> > cval isn't listed in the erratum description as being affected.  I looked
> > around a bit and couldn't find the KVM code directly accessing tval or
> > count. 
> >  Am I missing something?
> 
> You are missing the fact that CVAL and TVAL are the two sides of the
> same coin. From the ARMv8 ARM:
> 
> <quote>
> This view of a timer depends on the following behavior of accesses to
> TimerValue registers:
> 
> Reads: TimerValue = (CompareValue ? (Counter - Offset))[31:0]
> Writes: CompareValue = ((Counter - Offset)[63:0] +
> SignExtend(TimerValue))[63:0]
> </quote>

If the underlying representation is CompareValue, as the above suggests, then
it makes sense that only tval would be affected, since the underlying problem
is the counter.  The counter needs to be read in order to read or write tval. 
 cval accesses the underlying representation directly, and the bad SoC clock
logic doesn't have a chance to interfere.

> So I'd be really surprised if TVAL was buggy and CVAL was not (why would
> loop around programming TVAL if you could hit CVAL and be correct?).

Switching to cval would be great, if everyone's OK with it.  We'd still need
the loop on the counter.

-Scott

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [RFC PATCH 1/2] ARM/ARM64: arch_timer: Work around QorIQ Erratum A-008585
  2016-04-13  5:41         ` Scott Wood
@ 2016-04-13  7:36           ` Marc Zyngier
  0 siblings, 0 replies; 16+ messages in thread
From: Marc Zyngier @ 2016-04-13  7:36 UTC (permalink / raw)
  To: linux-arm-kernel

On 13/04/16 06:41, Scott Wood wrote:

[...]

> If the underlying representation is CompareValue, as the above suggests, then
> it makes sense that only tval would be affected, since the underlying problem
> is the counter.  The counter needs to be read in order to read or write tval. 
>  cval accesses the underlying representation directly, and the bad SoC clock
> logic doesn't have a chance to interfere.
> 
>> So I'd be really surprised if TVAL was buggy and CVAL was not (why would
>> loop around programming TVAL if you could hit CVAL and be correct?).
> 
> Switching to cval would be great, if everyone's OK with it.  We'd still need
> the loop on the counter.

Please only do that on the workaround path. I don't want sane platforms
to have to read the counter just to be able to program CVAL, as the core
code is only going to provide you with the delta that should be
programmed in TVAL.

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [RFC PATCH 1/2] ARM/ARM64: arch_timer: Work around QorIQ Erratum A-008585
  2016-04-11  2:22 ` [RFC PATCH 1/2] ARM/ARM64: arch_timer: Work around QorIQ Erratum A-008585 Scott Wood
  2016-04-11  9:19   ` Will Deacon
  2016-04-11  9:52   ` Marc Zyngier
@ 2016-04-13 13:22   ` Rob Herring
  2016-04-17  0:58     ` Scott Wood
  2 siblings, 1 reply; 16+ messages in thread
From: Rob Herring @ 2016-04-13 13:22 UTC (permalink / raw)
  To: linux-arm-kernel

On Sun, Apr 10, 2016 at 9:22 PM, Scott Wood <oss@buserror.net> wrote:
> Erratum A-008585 says that the ARM generic timer "has the potential to
> contain an erroneous value for a small number of core clock cycles
> every time the timer value changes" and that the workaround is to
> reread TVAL and count registers until successive reads return the same
> value.
>
> This erratum can be found on LS1021A (32-bit), LS1043A (64-bit), and
> LS2080A (64-bit).
>
> This patch is loosely based on work by Priyanka Jain and Bhupesh
> Sharma.
>
> Signed-off-by: Scott Wood <oss@buserror.net>
> ---
>  .../devicetree/bindings/arm/arch_timer.txt         |  4 ++
>  arch/arm/boot/dts/ls1021a.dtsi                     |  1 +
>  arch/arm/include/asm/arch_timer.h                  | 71 +++++++++++++++++++---
>  arch/arm/include/asm/vdso_datapage.h               |  1 +
>  arch/arm/kernel/vdso.c                             |  1 +
>  arch/arm/vdso/vgettimeofday.c                      |  2 +-
>  arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi     |  1 +
>  arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi     |  1 +
>  arch/arm64/include/asm/arch_timer.h                | 35 ++++++++---
>  arch/arm64/include/asm/vdso_datapage.h             |  1 +
>  arch/arm64/kernel/asm-offsets.c                    |  1 +
>  arch/arm64/kernel/vdso.c                           |  2 +
>  arch/arm64/kernel/vdso/gettimeofday.S              | 14 ++++-
>  drivers/clocksource/arm_arch_timer.c               |  5 ++
>  14 files changed, 121 insertions(+), 19 deletions(-)
>
> diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt
> index e774128..7117fbd 100644
> --- a/Documentation/devicetree/bindings/arm/arch_timer.txt
> +++ b/Documentation/devicetree/bindings/arm/arch_timer.txt
> @@ -25,6 +25,10 @@ to deliver its interrupts via SPIs.
>  - always-on : a boolean property. If present, the timer is powered through an
>    always-on power domain, therefore it never loses context.
>
> +- fsl,erratum-a008585 : A boolean property. Indicates the presence of
> +  QorIQ erratum A-008585, which says reading the timer is unreliable
> +  unless the same value is returned by back-to-back reads.

Bindings need to go to the DT list.

An excellent example of how not having specific compatible strings
will bite you latter.

Rob

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [RFC PATCH 1/2] ARM/ARM64: arch_timer: Work around QorIQ Erratum A-008585
  2016-04-13 13:22   ` Rob Herring
@ 2016-04-17  0:58     ` Scott Wood
  0 siblings, 0 replies; 16+ messages in thread
From: Scott Wood @ 2016-04-17  0:58 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, 2016-04-13 at 08:22 -0500, Rob Herring wrote:
> On Sun, Apr 10, 2016 at 9:22 PM, Scott Wood <oss@buserror.net> wrote:
> > Erratum A-008585 says that the ARM generic timer "has the potential to
> > contain an erroneous value for a small number of core clock cycles
> > every time the timer value changes" and that the workaround is to
> > reread TVAL and count registers until successive reads return the same
> > value.
> > 
> > This erratum can be found on LS1021A (32-bit), LS1043A (64-bit), and
> > LS2080A (64-bit).
> > 
> > This patch is loosely based on work by Priyanka Jain and Bhupesh
> > Sharma.
> > 
> > Signed-off-by: Scott Wood <oss@buserror.net>
> > ---
> >  .../devicetree/bindings/arm/arch_timer.txt         |  4 ++
> >  arch/arm/boot/dts/ls1021a.dtsi                     |  1 +
> >  arch/arm/include/asm/arch_timer.h                  | 71
> > +++++++++++++++++++---
> >  arch/arm/include/asm/vdso_datapage.h               |  1 +
> >  arch/arm/kernel/vdso.c                             |  1 +
> >  arch/arm/vdso/vgettimeofday.c                      |  2 +-
> >  arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi     |  1 +
> >  arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi     |  1 +
> >  arch/arm64/include/asm/arch_timer.h                | 35 ++++++++---
> >  arch/arm64/include/asm/vdso_datapage.h             |  1 +
> >  arch/arm64/kernel/asm-offsets.c                    |  1 +
> >  arch/arm64/kernel/vdso.c                           |  2 +
> >  arch/arm64/kernel/vdso/gettimeofday.S              | 14 ++++-
> >  drivers/clocksource/arm_arch_timer.c               |  5 ++
> >  14 files changed, 121 insertions(+), 19 deletions(-)
> > 
> > diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt
> > b/Documentation/devicetree/bindings/arm/arch_timer.txt
> > index e774128..7117fbd 100644
> > --- a/Documentation/devicetree/bindings/arm/arch_timer.txt
> > +++ b/Documentation/devicetree/bindings/arm/arch_timer.txt
> > @@ -25,6 +25,10 @@ to deliver its interrupts via SPIs.
> >  - always-on : a boolean property. If present, the timer is powered
> > through an
> >    always-on power domain, therefore it never loses context.
> > 
> > +- fsl,erratum-a008585 : A boolean property. Indicates the presence of
> > +  QorIQ erratum A-008585, which says reading the timer is unreliable
> > +  unless the same value is returned by back-to-back reads.
> 
> Bindings need to go to the DT list.

Yes, sorry about that.

> An excellent example of how not having specific compatible strings
> will bite you latter.

I don't think that would have helped...  Even if it said "arm,cortex-a57
-timer" it would not have identified the problem, because it comes from system
logic rather than the logic being described.  Even if the SoC name were in
there, it wouldn't usually tell you if it's the revision with the problem.

Normally we'd use SVR to detect such errata but I didn't want to add that
dependency to the common code.

-Scott

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [RFC PATCH 2/2] ARM64: arch_timer: Work around QorIQ Erratum A-009971
  2016-04-12  8:22       ` Marc Zyngier
@ 2016-04-17  1:32         ` Scott Wood
  2016-04-18  9:28           ` Marc Zyngier
  0 siblings, 1 reply; 16+ messages in thread
From: Scott Wood @ 2016-04-17  1:32 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, 2016-04-12 at 09:22 +0100, Marc Zyngier wrote:
> On 12/04/16 06:54, Scott Wood wrote:
> > On Mon, 2016-04-11 at 10:55 +0100, Marc Zyngier wrote:
> > > On 11/04/16 03:22, Scott Wood wrote:
> > > > @@ -52,6 +53,20 @@ extern bool arm_arch_timer_reread;
> > > >  	_val; \
> > > >  })
> > > >  
> > > > +#define ARCH_TIMER_TVAL_REWRITE(pv, val) do { \
> > > > +	u64 _cnt_old, _cnt_new; \
> > > > +	int _timeout = 200; \
> > > > +	do { \
> > > > +		asm volatile("mrs %0, cntvct_el0;" \
> > > > +			     "msr cnt" pv "_tval_el0, %2;" \
> > > > +			     "mrs %1, cntvct_el0" \
> > > > +			     : "=&r" (_cnt_old), "=r" (_cnt_new) \
> > > > +			     : "r" (val)); \
> > > > +		_timeout--; \
> > > > +	} while (_cnt_old != _cnt_new && _timeout); \
> > > > +	WARN_ON_ONCE(!_timeout); \
> > > > +} while (0)
> > > > +
> > > 
> > > Given how many times you've written that loop, I'm sure you can have a
> > > preprocessor macro that will do the right thing.
> > 
> > I did use a preprocessor macro.  Are you asking me to consolidate the read
> > and
> > write macros?  That seems like it would create a mess that's worse than an
> > extra instance of a simple loop.
> 
> From patch 1:
> 
> +static __always_inline
> +u32 arch_timer_reg_tval_reread(int access, enum arch_timer_reg reg)
> +{
> +	u32 val, val_new;
> +	int timeout = 200;
> +
> +	do {
> +		if (access == ARCH_TIMER_PHYS_ACCESS) {
> +			asm volatile("mrc p15, 0, %0, c14, c2, 0;"
> +				     "mrc p15, 0, %1, c14, c2, 0"
> +				     : "=r" (val), "=r" (val_new));
> +		} else if (access == ARCH_TIMER_VIRT_ACCESS) {
> +			asm volatile("mrc p15, 0, %0, c14, c3, 0;"
> +				     "mrc p15, 0, %1, c14, c3, 0"
> +				     : "=r" (val), "=r" (val_new));
> +		}
> +		timeout--;
> +	} while (val != val_new && timeout);
> +
> +	WARN_ON_ONCE(!timeout);
> +	return val;
> +}
> 
> [...]
> 
> +static __always_inline u64 arch_counter_get_cnt(int opcode, bool reread)
>  {
> -	u64 cval;
> +	u64 val, val_new;
> +	int timeout = 200;
> 
>  	isb();
> -	asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (cval));
> -	return cval;
> +
> +	if (reread) {
> +		do {
> +			asm volatile("mrrc p15, %2, %Q0, %R0, c14;"
> +				     "mrrc p15, %2, %Q1, %R1, c14"
> +				     : "=r" (val), "=r" (val_new)
> +				     : "i" (opcode));
> +			timeout--;
> +		} while (val != val_new && timeout);
> +
> +		BUG_ON(!timeout);
> +	} else {
> +		asm volatile("mrrc p15, %1, %Q0, %R0, c14" : "=r" (val)
> +			     : "i" (opcode));
> +	}
> +
> +	return val;
>  }
> 
> [...]
> 
> +/* QorIQ errata workarounds */
> +#define ARCH_TIMER_REREAD(reg) ({ \
> +	u64 _val_old, _val_new; \
> +	int _timeout = 200; \
> +	do { \
> +		asm volatile("mrs %0, " reg ";" \
> +			     "mrs %1, " reg \
> +			     : "=r" (_val_old), "=r" (_val_new)); \
> +		_timeout--; \
> +	} while (_val_old != _val_new && _timeout); \
> +	WARN_ON_ONCE(!_timeout); \
> +	_val_old; \
> +})
> 
> Do you notice a pattern? You are expressing the same loop in various
> ways (sometimes in a function, sometimes in a macro). I'm looking for a
> loop template that encapsulates the read access. You can have a similar
> macro for writes if you have more than a single instance.

One that covers arm and arm64?  Where would it go?

If you mean one per arch, that's already the case on 64-bit (and you
complained in response to the write macro, hence my inferring that you wanted
read and write combined).  Two instances on 32-bit (of a fairly simple loop)
didn't seem enough to warrant using ugly macros, but I can if you want (with
the entire asm statement passed as a macro parameter).

-Scott

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [RFC PATCH 2/2] ARM64: arch_timer: Work around QorIQ Erratum A-009971
  2016-04-17  1:32         ` Scott Wood
@ 2016-04-18  9:28           ` Marc Zyngier
  0 siblings, 0 replies; 16+ messages in thread
From: Marc Zyngier @ 2016-04-18  9:28 UTC (permalink / raw)
  To: linux-arm-kernel

On 17/04/16 02:32, Scott Wood wrote:
> On Tue, 2016-04-12 at 09:22 +0100, Marc Zyngier wrote:
>> On 12/04/16 06:54, Scott Wood wrote:
>>> On Mon, 2016-04-11 at 10:55 +0100, Marc Zyngier wrote:
>>>> On 11/04/16 03:22, Scott Wood wrote:
>>>>> @@ -52,6 +53,20 @@ extern bool arm_arch_timer_reread;
>>>>>  	_val; \
>>>>>  })
>>>>>  
>>>>> +#define ARCH_TIMER_TVAL_REWRITE(pv, val) do { \
>>>>> +	u64 _cnt_old, _cnt_new; \
>>>>> +	int _timeout = 200; \
>>>>> +	do { \
>>>>> +		asm volatile("mrs %0, cntvct_el0;" \
>>>>> +			     "msr cnt" pv "_tval_el0, %2;" \
>>>>> +			     "mrs %1, cntvct_el0" \
>>>>> +			     : "=&r" (_cnt_old), "=r" (_cnt_new) \
>>>>> +			     : "r" (val)); \
>>>>> +		_timeout--; \
>>>>> +	} while (_cnt_old != _cnt_new && _timeout); \
>>>>> +	WARN_ON_ONCE(!_timeout); \
>>>>> +} while (0)
>>>>> +
>>>>
>>>> Given how many times you've written that loop, I'm sure you can have a
>>>> preprocessor macro that will do the right thing.
>>>
>>> I did use a preprocessor macro.  Are you asking me to consolidate the read
>>> and
>>> write macros?  That seems like it would create a mess that's worse than an
>>> extra instance of a simple loop.
>>
>> From patch 1:
>>
>> +static __always_inline
>> +u32 arch_timer_reg_tval_reread(int access, enum arch_timer_reg reg)
>> +{
>> +	u32 val, val_new;
>> +	int timeout = 200;
>> +
>> +	do {
>> +		if (access == ARCH_TIMER_PHYS_ACCESS) {
>> +			asm volatile("mrc p15, 0, %0, c14, c2, 0;"
>> +				     "mrc p15, 0, %1, c14, c2, 0"
>> +				     : "=r" (val), "=r" (val_new));
>> +		} else if (access == ARCH_TIMER_VIRT_ACCESS) {
>> +			asm volatile("mrc p15, 0, %0, c14, c3, 0;"
>> +				     "mrc p15, 0, %1, c14, c3, 0"
>> +				     : "=r" (val), "=r" (val_new));
>> +		}
>> +		timeout--;
>> +	} while (val != val_new && timeout);
>> +
>> +	WARN_ON_ONCE(!timeout);
>> +	return val;
>> +}
>>
>> [...]
>>
>> +static __always_inline u64 arch_counter_get_cnt(int opcode, bool reread)
>>  {
>> -	u64 cval;
>> +	u64 val, val_new;
>> +	int timeout = 200;
>>
>>  	isb();
>> -	asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (cval));
>> -	return cval;
>> +
>> +	if (reread) {
>> +		do {
>> +			asm volatile("mrrc p15, %2, %Q0, %R0, c14;"
>> +				     "mrrc p15, %2, %Q1, %R1, c14"
>> +				     : "=r" (val), "=r" (val_new)
>> +				     : "i" (opcode));
>> +			timeout--;
>> +		} while (val != val_new && timeout);
>> +
>> +		BUG_ON(!timeout);
>> +	} else {
>> +		asm volatile("mrrc p15, %1, %Q0, %R0, c14" : "=r" (val)
>> +			     : "i" (opcode));
>> +	}
>> +
>> +	return val;
>>  }
>>
>> [...]
>>
>> +/* QorIQ errata workarounds */
>> +#define ARCH_TIMER_REREAD(reg) ({ \
>> +	u64 _val_old, _val_new; \
>> +	int _timeout = 200; \
>> +	do { \
>> +		asm volatile("mrs %0, " reg ";" \
>> +			     "mrs %1, " reg \
>> +			     : "=r" (_val_old), "=r" (_val_new)); \
>> +		_timeout--; \
>> +	} while (_val_old != _val_new && _timeout); \
>> +	WARN_ON_ONCE(!_timeout); \
>> +	_val_old; \
>> +})
>>
>> Do you notice a pattern? You are expressing the same loop in various
>> ways (sometimes in a function, sometimes in a macro). I'm looking for a
>> loop template that encapsulates the read access. You can have a similar
>> macro for writes if you have more than a single instance.
> 
> One that covers arm and arm64?  Where would it go?

In the common code. Not anywhere else.

> If you mean one per arch, that's already the case on 64-bit (and you
> complained in response to the write macro, hence my inferring that you wanted
> read and write combined).  Two instances on 32-bit (of a fairly simple loop)
> didn't seem enough to warrant using ugly macros, but I can if you want (with
> the entire asm statement passed as a macro parameter).

One issue is that you insist on these loops being inlined. They shouldn't.
You're going to loop almost forever on this counter, so there is little
point in saving a branch. Once you've given up on that, you'll find that
you can write things in a much nicer way. Like this:

<untested>
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index fbe0ca3..55ff1d4 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -23,6 +23,7 @@
 
 #include <linux/bug.h>
 #include <linux/init.h>
+#include <linux/static_key.h>
 #include <linux/types.h>
 
 #include <clocksource/arm_arch_timer.h>
@@ -114,16 +115,27 @@ static inline u64 arch_counter_get_cntpct(void)
 	return 0;
 }
 
-static inline u64 arch_counter_get_cntvct(void)
+extern struct static_key_false arch_timer_cntvct_ool_enabled;
+extern u64 arch_counter_get_cntvct_ool(void);
+
+/* Do not call this from outside of the arch_timer driver */
+static inline u64 __arch_counter_get_cntvct(void)
 {
 	u64 cval;
-
-	isb();
 	asm volatile("mrs %0, cntvct_el0" : "=r" (cval));
-
 	return cval;
 }
 
+static inline u64 arch_counter_get_cntvct(void)
+{
+	if (static_branch_unlikely(&arch_timer_cntvct_ool_enabled)) {
+		return arch_counter_get_cntvct_ool();
+	} else {
+		isb();
+		return __arch_counter_get_cntvct();
+	}
+}
+
 static inline int arch_timer_arch_init(void)
 {
 	return 0;
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 5152b38..c8a386c 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -79,10 +79,30 @@ static enum ppi_nr arch_timer_uses_ppi = VIRT_PPI;
 static bool arch_timer_c3stop;
 static bool arch_timer_mem_use_virtual;
 
+DEFINE_STATIC_KEY_FALSE(arch_timer_cntvct_ool_enabled);
+EXPORT_SYMBOL_GPL(arch_timer_cntvct_ool_enabled);
+
 /*
  * Architected system timer support.
  */
 
+u64 arch_counter_get_cntvct_ool(void)
+{
+	u64 cval_old, cval_new;
+	int timeout = 200;
+
+	do {
+		isb();
+		cval_old = __arch_counter_get_cntvct();
+		cval_new = __arch_counter_get_cntvct();
+		timeout--;
+	} while (cval_old != cval_new && timeout);
+
+	WARN_ON_ONCE(!timeout);
+	return cval_new;
+}
+EXPORT_SYMBOL_GPL(arch_counter_get_cntvct_ool);
+
 static __always_inline
 void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val,
 			  struct clock_event_device *clk)
</untested>

32bit and physical counter handling should be pretty obvious.

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply related	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2016-04-18  9:28 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-11  2:22 [RFC PATCH 0/2] ARM/ARM64: arch_timer: QorIQ errata Scott Wood
2016-04-11  2:22 ` [RFC PATCH 1/2] ARM/ARM64: arch_timer: Work around QorIQ Erratum A-008585 Scott Wood
2016-04-11  9:19   ` Will Deacon
2016-04-11  9:52   ` Marc Zyngier
2016-04-12  5:48     ` Scott Wood
2016-04-12  9:07       ` Marc Zyngier
2016-04-13  5:41         ` Scott Wood
2016-04-13  7:36           ` Marc Zyngier
2016-04-13 13:22   ` Rob Herring
2016-04-17  0:58     ` Scott Wood
2016-04-11  2:22 ` [RFC PATCH 2/2] ARM64: arch_timer: Work around QorIQ Erratum A-009971 Scott Wood
2016-04-11  9:55   ` Marc Zyngier
2016-04-12  5:54     ` Scott Wood
2016-04-12  8:22       ` Marc Zyngier
2016-04-17  1:32         ` Scott Wood
2016-04-18  9:28           ` Marc Zyngier

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.