All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] powerpc/fsl-booke: Work around erratum A-006958
@ 2013-07-13  1:03 Scott Wood
  2013-07-15  6:03 ` Aneesh Kumar K.V
  2013-07-15  8:45 ` David Laight
  0 siblings, 2 replies; 8+ messages in thread
From: Scott Wood @ 2013-07-13  1:03 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Scott Wood, linuxppc-dev

Erratum A-006598 says that 64-bit mftb is not atomic -- it's subject
to the same race condition as doing mftbu/mftbl on 32-bit, thus we
need to use the same loop to safely read it.

This deals with kernel and vdso accesses, but other userspace accesses
will of course need to be fixed elsewhere.

This erratum exists on all current Freescale 64-bit cores (e5500 and
e6500).

Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/include/asm/exception-64s.h  |  2 +-
 arch/powerpc/include/asm/ppc_asm.h        | 25 +++++++++++++++++--------
 arch/powerpc/include/asm/reg.h            |  2 +-
 arch/powerpc/include/asm/time.h           |  4 ++--
 arch/powerpc/include/asm/timex.h          |  3 ++-
 arch/powerpc/kernel/entry_64.S            |  8 ++++----
 arch/powerpc/kernel/exceptions-64e.S      |  4 ++--
 arch/powerpc/kernel/time.c                |  2 +-
 arch/powerpc/kernel/vdso64/gettimeofday.S |  2 +-
 arch/powerpc/platforms/Kconfig.cputype    | 11 +++++++++++
 10 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 07ca627..575c72e 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -279,7 +279,7 @@ do_kvm_##n:								\
 	std	r0,GPR0(r1);		/* save r0 in stackframe	*/ \
 	std	r10,GPR1(r1);		/* save r1 in stackframe	*/ \
 	beq	4f;			/* if from kernel mode		*/ \
-	ACCOUNT_CPU_USER_ENTRY(r9, r10);				   \
+	ACCOUNT_CPU_USER_ENTRY(r9, r10, unused);			   \
 	SAVE_PPR(area, r9, r10);					   \
 4:	std	r2,GPR2(r1);		/* save r2 in stackframe	*/ \
 	SAVE_4GPRS(3, r1);		/* save r3 - r6 in stackframe	*/ \
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 2f1b6c5..56a82e8 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -25,12 +25,13 @@
  */
 
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-#define ACCOUNT_CPU_USER_ENTRY(ra, rb)
-#define ACCOUNT_CPU_USER_EXIT(ra, rb)
+#define ACCOUNT_CPU_USER_ENTRY(ra, rb, scratch)
+#define ACCOUNT_CPU_USER_EXIT(ra, rb, scratch)
 #define ACCOUNT_STOLEN_TIME
 #else
-#define ACCOUNT_CPU_USER_ENTRY(ra, rb)					\
-	MFTB(ra);			/* get timebase */		\
+/* Note that MFTB can clobber CR0 as well as the scratch registers. */
+#define ACCOUNT_CPU_USER_ENTRY(ra, rb, scratch)				\
+	MFTB(ra, rb, scratch);		/* get timebase */		\
 	ld	rb,PACA_STARTTIME_USER(r13);				\
 	std	ra,PACA_STARTTIME(r13);					\
 	subf	rb,rb,ra;		/* subtract start value */	\
@@ -38,8 +39,8 @@
 	add	ra,ra,rb;		/* add on to user time */	\
 	std	ra,PACA_USER_TIME(r13);					\
 
-#define ACCOUNT_CPU_USER_EXIT(ra, rb)					\
-	MFTB(ra);			/* get timebase */		\
+#define ACCOUNT_CPU_USER_EXIT(ra, rb, scratch)				\
+	MFTB(ra, rb, scratch);		/* get timebase */		\
 	ld	rb,PACA_STARTTIME(r13);					\
 	std	ra,PACA_STARTTIME_USER(r13);				\
 	subf	rb,rb,ra;		/* subtract start value */	\
@@ -444,14 +445,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_601)
 #endif
 
 #ifdef CONFIG_PPC_CELL
-#define MFTB(dest)			\
+#define MFTB(dest, scratch1, scratch2)	\
 90:	mftb  dest;			\
 BEGIN_FTR_SECTION_NESTED(96);		\
 	cmpwi dest,0;			\
 	beq-  90b;			\
 END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96)
+#elif defined(CONFIG_FSL_ERRATUM_A_006958)
+#define MFTB(dest, scratch1, scratch2)	\
+90:	mftbu	scratch1;		\
+	mftbl	dest;			\
+	mftbu	scratch2;		\
+	cmpw	scratch1,scratch2;	\
+	bne	90b;			\
+	rldimi	dest,scratch1,32,0;
 #else
-#define MFTB(dest)			mftb dest
+#define MFTB(dest, scratch1, scratch2)	mftb dest
 #endif
 
 #ifndef CONFIG_SMP
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 5d7d9c2..82aae0d 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1115,7 +1115,7 @@
 				     : "r" ((unsigned long)(v)) \
 				     : "memory")
 
-#ifdef __powerpc64__
+#if defined(CONFIG_PPC64) && !defined(CONFIG_FSL_ERRATUM_A_006958)
 #ifdef CONFIG_PPC_CELL
 #define mftb()		({unsigned long rval;				\
 			asm volatile(					\
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index c1f2676..13340d2 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -52,7 +52,7 @@ struct div_result {
 #define __USE_RTC()	0
 #endif
 
-#ifdef CONFIG_PPC64
+#if defined(CONFIG_PPC64) && !defined(CONFIG_FSL_ERRATUM_A_006958)
 
 /* For compatibility, get_tbl() is defined as get_tb() on ppc64 */
 #define get_tbl		get_tb
@@ -101,7 +101,7 @@ static inline u64 get_rtc(void)
 	return (u64)hi * 1000000000 + lo;
 }
 
-#ifdef CONFIG_PPC64
+#if defined(CONFIG_PPC64) && !defined(CONFIG_FSL_ERRATUM_A_006958)
 static inline u64 get_tb(void)
 {
 	return mftb();
diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h
index c55e14f..101c03e 100644
--- a/arch/powerpc/include/asm/timex.h
+++ b/arch/powerpc/include/asm/timex.h
@@ -9,6 +9,7 @@
 
 #include <asm/cputable.h>
 #include <asm/reg.h>
+#include <asm/time.h>
 
 #define CLOCK_TICK_RATE	1024000 /* Underlying HZ */
 
@@ -17,7 +18,7 @@ typedef unsigned long cycles_t;
 static inline cycles_t get_cycles(void)
 {
 #ifdef __powerpc64__
-	return mftb();
+	return get_tb();
 #else
 	cycles_t ret;
 
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index ab15b8d..ccfd894 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -64,7 +64,7 @@ system_call_common:
 	std	r0,GPR0(r1)
 	std	r10,GPR1(r1)
 	beq	2f			/* if from kernel mode */
-	ACCOUNT_CPU_USER_ENTRY(r10, r11)
+	ACCOUNT_CPU_USER_ENTRY(r10, r11, r12)
 2:	std	r2,GPR2(r1)
 	std	r3,GPR3(r1)
 	mfcr	r2
@@ -227,7 +227,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 	ld	r4,_LINK(r1)
 
 	beq-	1f
-	ACCOUNT_CPU_USER_EXIT(r11, r12)
+	ACCOUNT_CPU_USER_EXIT(r11, r12, r2)
 	HMT_MEDIUM_LOW_HAS_PPR
 	ld	r13,GPR13(r1)	/* only restore r13 if returning to usermode */
 1:	ld	r2,GPR2(r1)
@@ -840,7 +840,7 @@ fast_exception_return:
 	 */
 	andi.	r0,r3,MSR_PR
 	beq	1f
-	ACCOUNT_CPU_USER_EXIT(r2, r4)
+	ACCOUNT_CPU_USER_EXIT(r2, r4, unused)
 	RESTORE_PPR(r2, r4)
 	REST_GPR(13, r1)
 1:
@@ -860,7 +860,7 @@ fast_exception_return:
 	rfid
 	b	.	/* prevent speculative execution */
 
-#endif /* CONFIG_PPC_BOOK3E */
+#endif /* !CONFIG_PPC_BOOK3E */
 
 	/*
 	 * We are returning to a context with interrupts soft disabled.
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 645170a..2b5c114 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -160,7 +160,7 @@ exc_##n##_common:							    \
 	std	r10,_NIP(r1);		/* save SRR0 to stackframe */	    \
 	std	r11,_MSR(r1);		/* save SRR1 to stackframe */	    \
 	beq	2f;			/* if from kernel mode */	    \
-	ACCOUNT_CPU_USER_ENTRY(r10,r11);/* accounting (uses cr0+eq) */	    \
+	ACCOUNT_CPU_USER_ENTRY(r10,r11,r3); /* accounting (uses cr0+eq) */  \
 2:	ld	r3,excf+EX_R10(r13);	/* get back r10 */		    \
 	ld	r4,excf+EX_R11(r13);	/* get back r11 */		    \
 	mfspr	r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 */		    \
@@ -806,7 +806,7 @@ fast_exception_return:
 	andi.	r6,r10,MSR_PR
 	REST_2GPRS(6, r1)
 	beq	1f
-	ACCOUNT_CPU_USER_EXIT(r10, r11)
+	ACCOUNT_CPU_USER_EXIT(r10, r11, r8)
 	ld	r0,GPR13(r1)
 
 1:	stdcx.	r0,0,r1		/* to clear the reservation */
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 65ab9e9..6255c85 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -299,7 +299,7 @@ static u64 vtime_delta(struct task_struct *tsk,
 
 	WARN_ON_ONCE(!irqs_disabled());
 
-	now = mftb();
+	now = get_tb();
 	nowscaled = read_spurr(now);
 	get_paca()->system_time += now - get_paca()->starttime;
 	get_paca()->starttime = now;
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index a76b4af..c6bf8bf 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -213,7 +213,7 @@ V_FUNCTION_BEGIN(__do_get_tspec)
 	/* Get TB & offset it. We use the MFTB macro which will generate
 	 * workaround code for Cell.
 	 */
-	MFTB(r6)
+	MFTB(r6,r9,r5)
 	ld	r9,CFG_TB_ORIG_STAMP(r3)
 	subf	r6,r9,r6
 
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index ae0aaea..7f0e2e5 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -165,6 +165,17 @@ config PPC_E500MC
 	  such as e5500/e6500), and must be disabled for running on
 	  e500v1 or e500v2.
 
+config FSL_ERRATUM_A_006958
+	bool
+	depends on PPC_E500MC && PPC64
+	default y
+	help
+	  Workaround for erratum A-006958, which says that 64-bit
+	  timebase reads are not atomic.  The workaround is to fall back
+	  to the 32-bit method of reading timebase.  Note that timebase
+	  is readable by userspace, so any non-vdso userspace accesses
+	  will need to have the workaround applied separately.
+
 config PPC_FPU
 	bool
 	default y if PPC64
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] powerpc/fsl-booke: Work around erratum A-006958
  2013-07-13  1:03 [PATCH] powerpc/fsl-booke: Work around erratum A-006958 Scott Wood
@ 2013-07-15  6:03 ` Aneesh Kumar K.V
  2013-07-15 16:55   ` Scott Wood
  2013-07-15  8:45 ` David Laight
  1 sibling, 1 reply; 8+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-15  6:03 UTC (permalink / raw)
  To: Scott Wood, Benjamin Herrenschmidt; +Cc: Scott Wood, linuxppc-dev

Scott Wood <scottwood@freescale.com> writes:



> diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
> index ae0aaea..7f0e2e5 100644
> --- a/arch/powerpc/platforms/Kconfig.cputype
> +++ b/arch/powerpc/platforms/Kconfig.cputype
> @@ -165,6 +165,17 @@ config PPC_E500MC
>  	  such as e5500/e6500), and must be disabled for running on
>  	  e500v1 or e500v2.
>
> +config FSL_ERRATUM_A_006958
> +	bool
> +	depends on PPC_E500MC && PPC64
> +	default y
> +	help
> +	  Workaround for erratum A-006958, which says that 64-bit
> +	  timebase reads are not atomic.  The workaround is to fall back
> +	  to the 32-bit method of reading timebase.  Note that timebase
> +	  is readable by userspace, so any non-vdso userspace accesses
> +	  will need to have the workaround applied separately.
> +
>  config PPC_FPU
>  	bool
>  	default y if PPC64


I am completely new to this area, so ignore if it is silly. But how do
we expect to select this config ? Should that happen via .config ?
That seems strange, because without this change some of the configs
will surely be broken/buggy right ?. I was expecting it not be a config
entry, primarily because I haven't seem something similar on other
archs. And if there is an erratum against a cpu release, we should by
default apply this when we know we are running on those cpus right ?
Which implies this should not be a config option ?

-aneesh

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH] powerpc/fsl-booke: Work around erratum A-006958
  2013-07-13  1:03 [PATCH] powerpc/fsl-booke: Work around erratum A-006958 Scott Wood
  2013-07-15  6:03 ` Aneesh Kumar K.V
@ 2013-07-15  8:45 ` David Laight
  2013-07-15 16:53   ` Scott Wood
  1 sibling, 1 reply; 8+ messages in thread
From: David Laight @ 2013-07-15  8:45 UTC (permalink / raw)
  To: Scott Wood, Benjamin Herrenschmidt; +Cc: linuxppc-dev

> +#define MFTB(dest, scratch1, scratch2)	\
> +90:	mftbu	scratch1;		\
> +	mftbl	dest;			\
> +	mftbu	scratch2;		\
> +	cmpw	scratch1,scratch2;	\
> +	bne	90b;			\
> +	rldimi	dest,scratch1,32,0;

Are the three mftbu/l instructions guaranteed to be executed
in order?

Also, if the high word changes, there is no need to loop.
Just return the second value with a low word of zero
(the returned count happened while the function was active).

If the mftbx instructions are synchronizing ones (or need
sequencing), then it might be possible to only have 2 of them
by reading the 64bit value and the high 32bits.
(The correct order depends on the exact definition of the errata!)

Another option is that the 64bit value returned by mftb is
(presumably) only likely to be wrong when the low 32bits is
very near zero (either side).
A second mftb could be done in this case only with some logic
to work out a valid result (I think):
  High changed - 2nd high, low zero.
  First low -ve - first value.
  First low +ve - second value.

	David

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] powerpc/fsl-booke: Work around erratum A-006958
  2013-07-15  8:45 ` David Laight
@ 2013-07-15 16:53   ` Scott Wood
  2013-07-15 22:15     ` Scott Wood
  0 siblings, 1 reply; 8+ messages in thread
From: Scott Wood @ 2013-07-15 16:53 UTC (permalink / raw)
  To: David Laight; +Cc: linuxppc-dev

On 07/15/2013 03:45:36 AM, David Laight wrote:
> > +#define MFTB(dest, scratch1, scratch2)	\
> > +90:	mftbu	scratch1;		\
> > +	mftbl	dest;			\
> > +	mftbu	scratch2;		\
> > +	cmpw	scratch1,scratch2;	\
> > +	bne	90b;			\
> > +	rldimi	dest,scratch1,32,0;
>=20
> Are the three mftbu/l instructions guaranteed to be executed
> in order?

This is the architecturally defined instruction sequence for 32-bit, =20
and is what the erratum writeup says to use.

> Also, if the high word changes, there is no need to loop.
> Just return the second value with a low word of zero
> (the returned count happened while the function was active).

That would be more complicated than looping.

> If the mftbx instructions are synchronizing ones (or need
> sequencing), then it might be possible to only have 2 of them
> by reading the 64bit value and the high 32bits.
> (The correct order depends on the exact definition of the errata!)

Again, I'm not sure how helpful that would be, and it would be =20
deviating from the instruction sequence called for by the erratum text, =20
which says to use the instruction sequence used on 32-bit.

> Another option is that the 64bit value returned by mftb is
> (presumably) only likely to be wrong when the low 32bits is
> very near zero (either side).
> A second mftb could be done in this case only with some logic
> to work out a valid result (I think):
>   High changed - 2nd high, low zero.
>   First low -ve - first value.
>   First low +ve - second value.

Likewise.

-Scott=

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] powerpc/fsl-booke: Work around erratum A-006958
  2013-07-15  6:03 ` Aneesh Kumar K.V
@ 2013-07-15 16:55   ` Scott Wood
  0 siblings, 0 replies; 8+ messages in thread
From: Scott Wood @ 2013-07-15 16:55 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: linuxppc-dev

On 07/15/2013 01:03:22 AM, Aneesh Kumar K.V wrote:
> Scott Wood <scottwood@freescale.com> writes:
>=20
>=20
>=20
> > diff --git a/arch/powerpc/platforms/Kconfig.cputype =20
> b/arch/powerpc/platforms/Kconfig.cputype
> > index ae0aaea..7f0e2e5 100644
> > --- a/arch/powerpc/platforms/Kconfig.cputype
> > +++ b/arch/powerpc/platforms/Kconfig.cputype
> > @@ -165,6 +165,17 @@ config PPC_E500MC
> >  	  such as e5500/e6500), and must be disabled for running on
> >  	  e500v1 or e500v2.
> >
> > +config FSL_ERRATUM_A_006958
> > +	bool
> > +	depends on PPC_E500MC && PPC64
> > +	default y
> > +	help
> > +	  Workaround for erratum A-006958, which says that 64-bit
> > +	  timebase reads are not atomic.  The workaround is to fall back
> > +	  to the 32-bit method of reading timebase.  Note that timebase
> > +	  is readable by userspace, so any non-vdso userspace accesses
> > +	  will need to have the workaround applied separately.
> > +
> >  config PPC_FPU
> >  	bool
> >  	default y if PPC64
>=20
>=20
> I am completely new to this area, so ignore if it is silly. But how do
> we expect to select this config ? Should that happen via .config ?

It will automatically be selected if PPC_E500MC and PPC64 are both =20
selected.

-Scott=

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] powerpc/fsl-booke: Work around erratum A-006958
  2013-07-15 16:53   ` Scott Wood
@ 2013-07-15 22:15     ` Scott Wood
  2013-07-16  8:28       ` David Laight
  0 siblings, 1 reply; 8+ messages in thread
From: Scott Wood @ 2013-07-15 22:15 UTC (permalink / raw)
  To: Scott Wood; +Cc: David Laight, linuxppc-dev

On 07/15/2013 11:53:54 AM, Scott Wood wrote:
> On 07/15/2013 03:45:36 AM, David Laight wrote:
>> Also, if the high word changes, there is no need to loop.
>> Just return the second value with a low word of zero
>> (the returned count happened while the function was active).
>=20
> That would be more complicated than looping.

That said, it's since been confirmed internally that the low word =20
should always be zero when this happens, so we could share the Cell =20
workaround code -- as long as we do something special in the timebase =20
sync code so that we don't get stuck if the timebase happens to be =20
frozen with TBL=3D=3D0.  This would avoid the need for scratch registers =20
(other than CR0).

-Scott=

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH] powerpc/fsl-booke: Work around erratum A-006958
  2013-07-15 22:15     ` Scott Wood
@ 2013-07-16  8:28       ` David Laight
  2013-07-16 18:16         ` Scott Wood
  0 siblings, 1 reply; 8+ messages in thread
From: David Laight @ 2013-07-16  8:28 UTC (permalink / raw)
  To: Scott Wood; +Cc: linuxppc-dev

> On 07/15/2013 11:53:54 AM, Scott Wood wrote:
> > On 07/15/2013 03:45:36 AM, David Laight wrote:
> >> Also, if the high word changes, there is no need to loop.
> >> Just return the second value with a low word of zero
> >> (the returned count happened while the function was active).
> >
> > That would be more complicated than looping.

I'm not that familiar with the ppc instructions set, but on x86
the equivalent instructions are synchronising ones - so can have
performance penalties, so a few extra 'normal' instructions to
avoid re-reading the timebase counter may be worth while.

The branch for the loop might also be statically predicted 'taken'
leading to a branch misprediction penalty as well.

> That said, it's since been confirmed internally that the low word
> should always be zero when this happens, so we could share the Cell
> workaround code -- as long as we do something special in the timebase
> sync code so that we don't get stuck if the timebase happens to be
> frozen with TBL=3D=3D0.  This would avoid the need for scratch =
registers
> (other than CR0).

Yes - if the actual errata is that the low word is read one clock
later then the high word then the only illegal value is one where
the low word is zero.
In that case it is sufficient to reread the counter - it can't be
wrong again!
Indeed it is only necessary to read the high part (even if the
code pre-empted for over 2^32 counts, the returned value will
have happened during the designated period).

	David

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] powerpc/fsl-booke: Work around erratum A-006958
  2013-07-16  8:28       ` David Laight
@ 2013-07-16 18:16         ` Scott Wood
  0 siblings, 0 replies; 8+ messages in thread
From: Scott Wood @ 2013-07-16 18:16 UTC (permalink / raw)
  To: David Laight; +Cc: linuxppc-dev

On 07/16/2013 03:28:06 AM, David Laight wrote:
> > On 07/15/2013 11:53:54 AM, Scott Wood wrote:
> > > On 07/15/2013 03:45:36 AM, David Laight wrote:
> > >> Also, if the high word changes, there is no need to loop.
> > >> Just return the second value with a low word of zero
> > >> (the returned count happened while the function was active).
> > >
> > > That would be more complicated than looping.
>=20
> I'm not that familiar with the ppc instructions set, but on x86
> the equivalent instructions are synchronising ones - so can have
> performance penalties, so a few extra 'normal' instructions to
> avoid re-reading the timebase counter may be worth while.

The core manual doesn't list any special syncronization for it, though =20
it does take 4 cycles.

> The branch for the loop might also be statically predicted 'taken'
> leading to a branch misprediction penalty as well.

The branch predictor should work fine most of the time.

> > That said, it's since been confirmed internally that the low word
> > should always be zero when this happens, so we could share the Cell
> > workaround code -- as long as we do something special in the =20
> timebase
> > sync code so that we don't get stuck if the timebase happens to be
> > frozen with TBL=3D=3D0.  This would avoid the need for scratch register=
s
> > (other than CR0).
>=20
> Yes - if the actual errata is that the low word is read one clock
> later then the high word then the only illegal value is one where
> the low word is zero.

It's actually the timebase itself that is updated in the low word =20
first...

> In that case it is sufficient to reread the counter - it can't be
> wrong again!

...which means, while it *probably* won't be wrong again, I'd want a =20
statement from the hardware people that this is guaranteed if we were =20
to rely on it.

The workaround for a similar bug on Cell rereads until the lower half =20
is non-zero, which will be fine as long as we avoid the workaround in =20
timebase sync code (when the timebase is frozen).

-Scott=

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2013-07-16 18:17 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-13  1:03 [PATCH] powerpc/fsl-booke: Work around erratum A-006958 Scott Wood
2013-07-15  6:03 ` Aneesh Kumar K.V
2013-07-15 16:55   ` Scott Wood
2013-07-15  8:45 ` David Laight
2013-07-15 16:53   ` Scott Wood
2013-07-15 22:15     ` Scott Wood
2013-07-16  8:28       ` David Laight
2013-07-16 18:16         ` Scott Wood

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.