All of lore.kernel.org
 help / color / mirror / Atom feed
* [U-Boot] [PATCH 1/2] arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL) with asm code
@ 2016-04-04 18:31 Hans de Goede
  2016-04-04 18:31 ` [U-Boot] [PATCH 2/2] arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_INVAL_ALL) " Hans de Goede
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Hans de Goede @ 2016-04-04 18:31 UTC (permalink / raw)
  To: u-boot

v7_maint_dcache_all() does not work reliable when build with gcc6,
see: https://bugzilla.redhat.com/show_bug.cgi?id=1318788

While debugging this I learned that v7_maint_dcache_all() is unreliable
when build with gcc5 too when it is marked as noinline.

This commit fixes the reliability issues by replacing the C-code with
the ready to use asm implementation from the kernel.

Given that this code when written as C-code clearly is quite fragile
(also see the existing comments about the C-code being the way it is
 to get optimal assembly) and that we have a proven asm alternative,
I believe that this is the best solution.

Note that we actually already have a copy of the kernel's
v7_flush_dcache_all() in arch/arm/mach-uniphier/arm32/lowlevel_init.S.

We should replace that implementation with a call to this one, but I'm
leaving this up to people with access to actual unifier hw. With this
replacement in mind I've kept the original function as is, only renamed
it to __v7_flush_dcache_all and v7_flush_dcache_all is a wrapper
saving the registered clobbered by the core __v7_flush_dcache_all code

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 arch/arm/cpu/armv7/Makefile       |  2 +-
 arch/arm/cpu/armv7/cache_v7.c     | 41 +++----------------
 arch/arm/cpu/armv7/cache_v7_asm.S | 85 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 92 insertions(+), 36 deletions(-)
 create mode 100644 arch/arm/cpu/armv7/cache_v7_asm.S

diff --git a/arch/arm/cpu/armv7/Makefile b/arch/arm/cpu/armv7/Makefile
index 45f346c..328c4b1 100644
--- a/arch/arm/cpu/armv7/Makefile
+++ b/arch/arm/cpu/armv7/Makefile
@@ -7,7 +7,7 @@
 
 extra-y	:= start.o
 
-obj-y	+= cache_v7.o
+obj-y	+= cache_v7.o cache_v7_asm.o
 
 obj-y	+= cpu.o cp15.o
 obj-y	+= syslib.o
diff --git a/arch/arm/cpu/armv7/cache_v7.c b/arch/arm/cpu/armv7/cache_v7.c
index 94ff488..dd07ba1 100644
--- a/arch/arm/cpu/armv7/cache_v7.c
+++ b/arch/arm/cpu/armv7/cache_v7.c
@@ -16,6 +16,10 @@
 #define ARMV7_DCACHE_CLEAN_INVAL_RANGE	4
 
 #ifndef CONFIG_SYS_DCACHE_OFF
+
+/* Asm functions from cache_v7_asm.S */
+void v7_flush_dcache_all(void);
+
 static int check_cache_range(unsigned long start, unsigned long stop)
 {
 	int ok = 1;
@@ -88,34 +92,6 @@ static void v7_inval_dcache_level_setway(u32 level, u32 num_sets,
 	DSB;
 }
 
-static void v7_clean_inval_dcache_level_setway(u32 level, u32 num_sets,
-					       u32 num_ways, u32 way_shift,
-					       u32 log2_line_len)
-{
-	int way, set;
-	u32 setway;
-
-	/*
-	 * For optimal assembly code:
-	 *	a. count down
-	 *	b. have bigger loop inside
-	 */
-	for (way = num_ways - 1; way >= 0 ; way--) {
-		for (set = num_sets - 1; set >= 0; set--) {
-			setway = (level << 1) | (set << log2_line_len) |
-				 (way << way_shift);
-			/*
-			 * Clean & Invalidate data/unified
-			 * cache line by set/way
-			 */
-			asm volatile ("	mcr p15, 0, %0, c7, c14, 2"
-					: : "r" (setway));
-		}
-	}
-	/* DSB to make sure the operation is complete */
-	DSB;
-}
-
 static void v7_maint_dcache_level_setway(u32 level, u32 operation)
 {
 	u32 ccsidr;
@@ -142,13 +118,8 @@ static void v7_maint_dcache_level_setway(u32 level, u32 operation)
 	log2_num_ways = log_2_n_round_up(num_ways);
 
 	way_shift = (32 - log2_num_ways);
-	if (operation == ARMV7_DCACHE_INVAL_ALL) {
-		v7_inval_dcache_level_setway(level, num_sets, num_ways,
+	v7_inval_dcache_level_setway(level, num_sets, num_ways,
 				      way_shift, log2_line_len);
-	} else if (operation == ARMV7_DCACHE_CLEAN_INVAL_ALL) {
-		v7_clean_inval_dcache_level_setway(level, num_sets, num_ways,
-						   way_shift, log2_line_len);
-	}
 }
 
 static void v7_maint_dcache_all(u32 operation)
@@ -263,7 +234,7 @@ void invalidate_dcache_all(void)
  */
 void flush_dcache_all(void)
 {
-	v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL);
+	v7_flush_dcache_all();
 
 	v7_outer_cache_flush_all();
 }
diff --git a/arch/arm/cpu/armv7/cache_v7_asm.S b/arch/arm/cpu/armv7/cache_v7_asm.S
new file mode 100644
index 0000000..1b1daf8
--- /dev/null
+++ b/arch/arm/cpu/armv7/cache_v7_asm.S
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2012-2015 Masahiro Yamada <yamada.masahiro@socionext.com>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <config.h>
+#include <linux/linkage.h>
+#include <linux/sizes.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_SYS_THUMB_BUILD
+#define ARM(x...)
+#define THUMB(x...)	x
+#else
+#define ARM(x...)	x
+#define THUMB(x...)
+#endif
+
+/*
+ *	v7_flush_dcache_all()
+ *
+ *	Flush the whole D-cache.
+ *
+ *	Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
+ *
+ *	Note: copied from arch/arm/mm/cache-v7.S of Linux 4.4
+ */
+ENTRY(__v7_flush_dcache_all)
+	dmb					@ ensure ordering with previous memory accesses
+	mrc	p15, 1, r0, c0, c0, 1		@ read clidr
+	mov	r3, r0, lsr #23			@ move LoC into position
+	ands	r3, r3, #7 << 1			@ extract LoC*2 from clidr
+	beq	finished			@ if loc is 0, then no need to clean
+	mov	r10, #0				@ start clean at cache level 0
+flush_levels:
+	add	r2, r10, r10, lsr #1		@ work out 3x current cache level
+	mov	r1, r0, lsr r2			@ extract cache type bits from clidr
+	and	r1, r1, #7			@ mask of the bits for current cache only
+	cmp	r1, #2				@ see what cache we have@this level
+	blt	skip				@ skip if no cache, or just i-cache
+	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
+	isb					@ isb to sych the new cssr&csidr
+	mrc	p15, 1, r1, c0, c0, 0		@ read the new csidr
+	and	r2, r1, #7			@ extract the length of the cache lines
+	add	r2, r2, #4			@ add 4 (line length offset)
+	movw	r4, #0x3ff
+	ands	r4, r4, r1, lsr #3		@ find maximum number on the way size
+	clz	r5, r4				@ find bit position of way size increment
+	movw	r7, #0x7fff
+	ands	r7, r7, r1, lsr #13		@ extract max number of the index size
+loop1:
+	mov	r9, r7				@ create working copy of max index
+loop2:
+ ARM(	orr	r11, r10, r4, lsl r5	)	@ factor way and cache number into r11
+ THUMB(	lsl	r6, r4, r5		)
+ THUMB(	orr	r11, r10, r6		)	@ factor way and cache number into r11
+ ARM(	orr	r11, r11, r9, lsl r2	)	@ factor index number into r11
+ THUMB(	lsl	r6, r9, r2		)
+ THUMB(	orr	r11, r11, r6		)	@ factor index number into r11
+	mcr	p15, 0, r11, c7, c14, 2		@ clean & invalidate by set/way
+	subs	r9, r9, #1			@ decrement the index
+	bge	loop2
+	subs	r4, r4, #1			@ decrement the way
+	bge	loop1
+skip:
+	add	r10, r10, #2			@ increment cache number
+	cmp	r3, r10
+	bgt	flush_levels
+finished:
+	mov	r10, #0				@ swith back to cache level 0
+	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
+	dsb	st
+	isb
+	bx	lr
+ENDPROC(__v7_flush_dcache_all)
+
+ENTRY(v7_flush_dcache_all)
+ ARM(	stmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
+ THUMB(	stmfd	sp!, {r4-r7, r9-r11, lr}	)
+	bl	__v7_flush_dcache_all
+ ARM(	ldmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
+ THUMB(	ldmfd	sp!, {r4-r7, r9-r11, lr}	)
+	bx	lr
+ENDPROC(v7_flush_dcache_all)
-- 
2.7.3

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [U-Boot] [PATCH 2/2] arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_INVAL_ALL) with asm code
  2016-04-04 18:31 [U-Boot] [PATCH 1/2] arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL) with asm code Hans de Goede
@ 2016-04-04 18:31 ` Hans de Goede
  2016-04-04 23:59 ` [U-Boot] [PATCH 1/2] arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL) " Tom Rini
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 10+ messages in thread
From: Hans de Goede @ 2016-04-04 18:31 UTC (permalink / raw)
  To: u-boot

Lets be consistent and also replace v7_maint_dcache_all()
with asm code for the invalidate case.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 arch/arm/cpu/armv7/cache_v7.c     | 100 ++------------------------------------
 arch/arm/cpu/armv7/cache_v7_asm.S |  70 ++++++++++++++++++++++++++
 2 files changed, 74 insertions(+), 96 deletions(-)

diff --git a/arch/arm/cpu/armv7/cache_v7.c b/arch/arm/cpu/armv7/cache_v7.c
index dd07ba1..dc309da 100644
--- a/arch/arm/cpu/armv7/cache_v7.c
+++ b/arch/arm/cpu/armv7/cache_v7.c
@@ -10,15 +10,14 @@
 #include <asm/armv7.h>
 #include <asm/utils.h>
 
-#define ARMV7_DCACHE_INVAL_ALL		1
-#define ARMV7_DCACHE_CLEAN_INVAL_ALL	2
-#define ARMV7_DCACHE_INVAL_RANGE	3
-#define ARMV7_DCACHE_CLEAN_INVAL_RANGE	4
+#define ARMV7_DCACHE_INVAL_RANGE	1
+#define ARMV7_DCACHE_CLEAN_INVAL_RANGE	2
 
 #ifndef CONFIG_SYS_DCACHE_OFF
 
 /* Asm functions from cache_v7_asm.S */
 void v7_flush_dcache_all(void);
+void v7_invalidate_dcache_all(void);
 
 static int check_cache_range(unsigned long start, unsigned long stop)
 {
@@ -37,18 +36,6 @@ static int check_cache_range(unsigned long start, unsigned long stop)
 	return ok;
 }
 
-/*
- * Write the level and type you want to Cache Size Selection Register(CSSELR)
- * to get size details from Current Cache Size ID Register(CCSIDR)
- */
-static void set_csselr(u32 level, u32 type)
-{
-	u32 csselr = level << 1 | type;
-
-	/* Write to Cache Size Selection Register(CSSELR) */
-	asm volatile ("mcr p15, 2, %0, c0, c0, 0" : : "r" (csselr));
-}
-
 static u32 get_ccsidr(void)
 {
 	u32 ccsidr;
@@ -58,85 +45,6 @@ static u32 get_ccsidr(void)
 	return ccsidr;
 }
 
-static u32 get_clidr(void)
-{
-	u32 clidr;
-
-	/* Read current CP15 Cache Level ID Register */
-	asm volatile ("mrc p15,1,%0,c0,c0,1" : "=r" (clidr));
-	return clidr;
-}
-
-static void v7_inval_dcache_level_setway(u32 level, u32 num_sets,
-					 u32 num_ways, u32 way_shift,
-					 u32 log2_line_len)
-{
-	int way, set;
-	u32 setway;
-
-	/*
-	 * For optimal assembly code:
-	 *	a. count down
-	 *	b. have bigger loop inside
-	 */
-	for (way = num_ways - 1; way >= 0 ; way--) {
-		for (set = num_sets - 1; set >= 0; set--) {
-			setway = (level << 1) | (set << log2_line_len) |
-				 (way << way_shift);
-			/* Invalidate data/unified cache line by set/way */
-			asm volatile ("	mcr p15, 0, %0, c7, c6, 2"
-					: : "r" (setway));
-		}
-	}
-	/* DSB to make sure the operation is complete */
-	DSB;
-}
-
-static void v7_maint_dcache_level_setway(u32 level, u32 operation)
-{
-	u32 ccsidr;
-	u32 num_sets, num_ways, log2_line_len, log2_num_ways;
-	u32 way_shift;
-
-	set_csselr(level, ARMV7_CSSELR_IND_DATA_UNIFIED);
-
-	ccsidr = get_ccsidr();
-
-	log2_line_len = ((ccsidr & CCSIDR_LINE_SIZE_MASK) >>
-				CCSIDR_LINE_SIZE_OFFSET) + 2;
-	/* Converting from words to bytes */
-	log2_line_len += 2;
-
-	num_ways  = ((ccsidr & CCSIDR_ASSOCIATIVITY_MASK) >>
-			CCSIDR_ASSOCIATIVITY_OFFSET) + 1;
-	num_sets  = ((ccsidr & CCSIDR_NUM_SETS_MASK) >>
-			CCSIDR_NUM_SETS_OFFSET) + 1;
-	/*
-	 * According to ARMv7 ARM number of sets and number of ways need
-	 * not be a power of 2
-	 */
-	log2_num_ways = log_2_n_round_up(num_ways);
-
-	way_shift = (32 - log2_num_ways);
-	v7_inval_dcache_level_setway(level, num_sets, num_ways,
-				      way_shift, log2_line_len);
-}
-
-static void v7_maint_dcache_all(u32 operation)
-{
-	u32 level, cache_type, level_start_bit = 0;
-	u32 clidr = get_clidr();
-
-	for (level = 0; level < 7; level++) {
-		cache_type = (clidr >> level_start_bit) & 0x7;
-		if ((cache_type == ARMV7_CLIDR_CTYPE_DATA_ONLY) ||
-		    (cache_type == ARMV7_CLIDR_CTYPE_INSTRUCTION_DATA) ||
-		    (cache_type == ARMV7_CLIDR_CTYPE_UNIFIED))
-			v7_maint_dcache_level_setway(level, operation);
-		level_start_bit += 3;
-	}
-}
-
 static void v7_dcache_clean_inval_range(u32 start, u32 stop, u32 line_len)
 {
 	u32 mva;
@@ -223,7 +131,7 @@ static void v7_inval_tlb(void)
 
 void invalidate_dcache_all(void)
 {
-	v7_maint_dcache_all(ARMV7_DCACHE_INVAL_ALL);
+	v7_invalidate_dcache_all();
 
 	v7_outer_cache_inval_all();
 }
diff --git a/arch/arm/cpu/armv7/cache_v7_asm.S b/arch/arm/cpu/armv7/cache_v7_asm.S
index 1b1daf8..e47571f 100644
--- a/arch/arm/cpu/armv7/cache_v7_asm.S
+++ b/arch/arm/cpu/armv7/cache_v7_asm.S
@@ -83,3 +83,73 @@ ENTRY(v7_flush_dcache_all)
  THUMB(	ldmfd	sp!, {r4-r7, r9-r11, lr}	)
 	bx	lr
 ENDPROC(v7_flush_dcache_all)
+
+/*
+ *	v7_invalidate_dcache_all()
+ *
+ *	Invalidate the whole D-cache.
+ *
+ *	Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
+ *
+ *	Note: copied from __v7_flush_dcache_all above with
+ *	mcr     p15, 0, r11, c7, c14, 2
+ *	Replaced with:
+ *	mcr     p15, 0, r11, c7, c6, 2
+ */
+ENTRY(__v7_invalidate_dcache_all)
+	dmb					@ ensure ordering with previous memory accesses
+	mrc	p15, 1, r0, c0, c0, 1		@ read clidr
+	mov	r3, r0, lsr #23			@ move LoC into position
+	ands	r3, r3, #7 << 1			@ extract LoC*2 from clidr
+	beq	inval_finished			@ if loc is 0, then no need to clean
+	mov	r10, #0				@ start clean at cache level 0
+inval_levels:
+	add	r2, r10, r10, lsr #1		@ work out 3x current cache level
+	mov	r1, r0, lsr r2			@ extract cache type bits from clidr
+	and	r1, r1, #7			@ mask of the bits for current cache only
+	cmp	r1, #2				@ see what cache we have@this level
+	blt	inval_skip			@ skip if no cache, or just i-cache
+	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
+	isb					@ isb to sych the new cssr&csidr
+	mrc	p15, 1, r1, c0, c0, 0		@ read the new csidr
+	and	r2, r1, #7			@ extract the length of the cache lines
+	add	r2, r2, #4			@ add 4 (line length offset)
+	movw	r4, #0x3ff
+	ands	r4, r4, r1, lsr #3		@ find maximum number on the way size
+	clz	r5, r4				@ find bit position of way size increment
+	movw	r7, #0x7fff
+	ands	r7, r7, r1, lsr #13		@ extract max number of the index size
+inval_loop1:
+	mov	r9, r7				@ create working copy of max index
+inval_loop2:
+ ARM(	orr	r11, r10, r4, lsl r5	)	@ factor way and cache number into r11
+ THUMB(	lsl	r6, r4, r5		)
+ THUMB(	orr	r11, r10, r6		)	@ factor way and cache number into r11
+ ARM(	orr	r11, r11, r9, lsl r2	)	@ factor index number into r11
+ THUMB(	lsl	r6, r9, r2		)
+ THUMB(	orr	r11, r11, r6		)	@ factor index number into r11
+	mcr	p15, 0, r11, c7, c6, 2		@ invalidate by set/way
+	subs	r9, r9, #1			@ decrement the index
+	bge	inval_loop2
+	subs	r4, r4, #1			@ decrement the way
+	bge	inval_loop1
+inval_skip:
+	add	r10, r10, #2			@ increment cache number
+	cmp	r3, r10
+	bgt	inval_levels
+inval_finished:
+	mov	r10, #0				@ swith back to cache level 0
+	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
+	dsb	st
+	isb
+	bx	lr
+ENDPROC(__v7_invalidate_dcache_all)
+
+ENTRY(v7_invalidate_dcache_all)
+ ARM(	stmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
+ THUMB(	stmfd	sp!, {r4-r7, r9-r11, lr}	)
+	bl	__v7_invalidate_dcache_all
+ ARM(	ldmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
+ THUMB(	ldmfd	sp!, {r4-r7, r9-r11, lr}	)
+	bx	lr
+ENDPROC(v7_invalidate_dcache_all)
-- 
2.7.3

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [U-Boot] [PATCH 1/2] arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL) with asm code
  2016-04-04 18:31 [U-Boot] [PATCH 1/2] arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL) with asm code Hans de Goede
  2016-04-04 18:31 ` [U-Boot] [PATCH 2/2] arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_INVAL_ALL) " Hans de Goede
@ 2016-04-04 23:59 ` Tom Rini
  2016-04-05  8:33   ` Hans de Goede
  2016-04-06  2:07 ` Masahiro Yamada
  2016-04-06 14:51 ` Tom Rini
  3 siblings, 1 reply; 10+ messages in thread
From: Tom Rini @ 2016-04-04 23:59 UTC (permalink / raw)
  To: u-boot

On Mon, Apr 04, 2016 at 08:31:48PM +0200, Hans de Goede wrote:

> v7_maint_dcache_all() does not work reliable when build with gcc6,
> see: https://bugzilla.redhat.com/show_bug.cgi?id=1318788

So, I see on the bug you want to know if U-Boot is OK with this patch
series being the fix.  At the high level, yes, borrowing code from the
Linux Kernel is a good clean-up and I'd like to see this series could
clean up things a little more and borrow from cache-v7.S when we could.

But I'd also push back on the toolchain team.  Are they happy saying
"that code is just too fragile, it's probably relying on undefined
behavior, investigation concluded" ?

-- 
Tom
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 836 bytes
Desc: Digital signature
URL: <http://lists.denx.de/pipermail/u-boot/attachments/20160404/906221b5/attachment.sig>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [U-Boot] [PATCH 1/2] arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL) with asm code
  2016-04-04 23:59 ` [U-Boot] [PATCH 1/2] arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL) " Tom Rini
@ 2016-04-05  8:33   ` Hans de Goede
  2016-04-05  8:44     ` Hans de Goede
  0 siblings, 1 reply; 10+ messages in thread
From: Hans de Goede @ 2016-04-05  8:33 UTC (permalink / raw)
  To: u-boot

Hi,

On 05-04-16 01:59, Tom Rini wrote:
> On Mon, Apr 04, 2016 at 08:31:48PM +0200, Hans de Goede wrote:
>
>> v7_maint_dcache_all() does not work reliable when build with gcc6,
>> see: https://bugzilla.redhat.com/show_bug.cgi?id=1318788
>
> So, I see on the bug you want to know if U-Boot is OK with this patch
> series being the fix.  At the high level, yes, borrowing code from the
> Linux Kernel is a good clean-up and I'd like to see this series could
> clean up things a little more and borrow from cache-v7.S when we could.
>
> But I'd also push back on the toolchain team.  Are they happy saying
> "that code is just too fragile, it's probably relying on undefined
> behavior, investigation concluded" ?

You should be able to reproduce the problems we're seeing on sunxi
yourself. Add a "noinline" to "v7_clean_inval_dcache_level_setway"
and then boot on say a lime2 you should see a data abort
after "Starting kernel" instead of, well, the kernel starting.

Given that just adding a noinline already breaks the code with
gcc-5 the "too fragile" thing was my own conclusion really.

I'll ask for some more info from the toolchain team in the bug
(note you're welcome to join the discussion in bugzilla yourself
  creating an account if you don't have one only requires an email
  address).

Regards,

Hans

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [U-Boot] [PATCH 1/2] arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL) with asm code
  2016-04-05  8:33   ` Hans de Goede
@ 2016-04-05  8:44     ` Hans de Goede
  2016-04-05 13:45       ` Tom Rini
  0 siblings, 1 reply; 10+ messages in thread
From: Hans de Goede @ 2016-04-05  8:44 UTC (permalink / raw)
  To: u-boot

Hi,

On 05-04-16 10:33, Hans de Goede wrote:
> Hi,
>
> On 05-04-16 01:59, Tom Rini wrote:
>> On Mon, Apr 04, 2016 at 08:31:48PM +0200, Hans de Goede wrote:
>>
>>> v7_maint_dcache_all() does not work reliable when build with gcc6,
>>> see: https://bugzilla.redhat.com/show_bug.cgi?id=1318788
>>
>> So, I see on the bug you want to know if U-Boot is OK with this patch
>> series being the fix.  At the high level, yes, borrowing code from the
>> Linux Kernel is a good clean-up and I'd like to see this series could
>> clean up things a little more and borrow from cache-v7.S when we could.
>>
>> But I'd also push back on the toolchain team.  Are they happy saying
>> "that code is just too fragile, it's probably relying on undefined
>> behavior, investigation concluded" ?
>
> You should be able to reproduce the problems we're seeing on sunxi
> yourself. Add a "noinline" to "v7_clean_inval_dcache_level_setway"
> and then boot on say a lime2 you should see a data abort
> after "Starting kernel" instead of, well, the kernel starting.
>
> Given that just adding a noinline already breaks the code with
> gcc-5 the "too fragile" thing was my own conclusion really.
>
> I'll ask for some more info from the toolchain team in the bug
> (note you're welcome to join the discussion in bugzilla yourself
>   creating an account if you don't have one only requires an email
>   address).

Answer from Jakub Jelinek who has been helping me from the toolchain side
with this bug sofar:

"That would need to answer somebody familiar with the ARM cache flushing
instructions.  All I can say is that I haven't found any obvious errors on the
toolchain side when compiling the code.  Ask somebody from Linaro or ARM?"

Regards,

Hans

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [U-Boot] [PATCH 1/2] arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL) with asm code
  2016-04-05  8:44     ` Hans de Goede
@ 2016-04-05 13:45       ` Tom Rini
  0 siblings, 0 replies; 10+ messages in thread
From: Tom Rini @ 2016-04-05 13:45 UTC (permalink / raw)
  To: u-boot

On Tue, Apr 05, 2016 at 10:44:42AM +0200, Hans de Goede wrote:
> Hi,
> 
> On 05-04-16 10:33, Hans de Goede wrote:
> >Hi,
> >
> >On 05-04-16 01:59, Tom Rini wrote:
> >>On Mon, Apr 04, 2016 at 08:31:48PM +0200, Hans de Goede wrote:
> >>
> >>>v7_maint_dcache_all() does not work reliable when build with gcc6,
> >>>see: https://bugzilla.redhat.com/show_bug.cgi?id=1318788
> >>
> >>So, I see on the bug you want to know if U-Boot is OK with this patch
> >>series being the fix.  At the high level, yes, borrowing code from the
> >>Linux Kernel is a good clean-up and I'd like to see this series could
> >>clean up things a little more and borrow from cache-v7.S when we could.
> >>
> >>But I'd also push back on the toolchain team.  Are they happy saying
> >>"that code is just too fragile, it's probably relying on undefined
> >>behavior, investigation concluded" ?
> >
> >You should be able to reproduce the problems we're seeing on sunxi
> >yourself. Add a "noinline" to "v7_clean_inval_dcache_level_setway"
> >and then boot on say a lime2 you should see a data abort
> >after "Starting kernel" instead of, well, the kernel starting.
> >
> >Given that just adding a noinline already breaks the code with
> >gcc-5 the "too fragile" thing was my own conclusion really.
> >
> >I'll ask for some more info from the toolchain team in the bug
> >(note you're welcome to join the discussion in bugzilla yourself
> >  creating an account if you don't have one only requires an email
> >  address).

Yes, I added myself to the bug earlier :)

> Answer from Jakub Jelinek who has been helping me from the toolchain side
> with this bug sofar:
> 
> "That would need to answer somebody familiar with the ARM cache flushing
> instructions.  All I can say is that I haven't found any obvious errors on the
> toolchain side when compiling the code.  Ask somebody from Linaro or ARM?"

It's a good thing there's a bunch of Linaro and ARM people around here
right now.  I'll see if I can talk one of them into looking into the
bug.

-- 
Tom
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 836 bytes
Desc: Digital signature
URL: <http://lists.denx.de/pipermail/u-boot/attachments/20160405/a503c5ae/attachment.sig>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [U-Boot] [PATCH 1/2] arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL) with asm code
  2016-04-04 18:31 [U-Boot] [PATCH 1/2] arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL) with asm code Hans de Goede
  2016-04-04 18:31 ` [U-Boot] [PATCH 2/2] arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_INVAL_ALL) " Hans de Goede
  2016-04-04 23:59 ` [U-Boot] [PATCH 1/2] arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL) " Tom Rini
@ 2016-04-06  2:07 ` Masahiro Yamada
  2016-04-06 14:51 ` Tom Rini
  3 siblings, 0 replies; 10+ messages in thread
From: Masahiro Yamada @ 2016-04-06  2:07 UTC (permalink / raw)
  To: u-boot

Hi Hans,


2016-04-05 3:31 GMT+09:00 Hans de Goede <hdegoede@redhat.com>:

> diff --git a/arch/arm/cpu/armv7/cache_v7_asm.S b/arch/arm/cpu/armv7/cache_v7_asm.S
> new file mode 100644
> index 0000000..1b1daf8
> --- /dev/null
> +++ b/arch/arm/cpu/armv7/cache_v7_asm.S
> @@ -0,0 +1,85 @@
> +/*
> + * Copyright (C) 2012-2015 Masahiro Yamada <yamada.masahiro@socionext.com>
> + *
> + * SPDX-License-Identifier:    GPL-2.0+
> + */


Please delete my Copyright that I do not deserve to have.
I just copied this asm routine from the kernel.




> +#include <config.h>
> +#include <linux/linkage.h>
> +#include <linux/sizes.h>
> +#include <asm/system.h>
> +
> +#ifdef CONFIG_SYS_THUMB_BUILD
> +#define ARM(x...)
> +#define THUMB(x...)    x
> +#else
> +#define ARM(x...)      x
> +#define THUMB(x...)
> +#endif
> +
> +/*
> + *     v7_flush_dcache_all()
> + *
> + *     Flush the whole D-cache.
> + *
> + *     Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
> + *
> + *     Note: copied from arch/arm/mm/cache-v7.S of Linux 4.4
> + */
> +ENTRY(__v7_flush_dcache_all)
> +       dmb                                     @ ensure ordering with previous memory accesses
> +       mrc     p15, 1, r0, c0, c0, 1           @ read clidr
> +       mov     r3, r0, lsr #23                 @ move LoC into position
> +       ands    r3, r3, #7 << 1                 @ extract LoC*2 from clidr
> +       beq     finished                        @ if loc is 0, then no need to clean
> +       mov     r10, #0                         @ start clean at cache level 0
> +flush_levels:
> +       add     r2, r10, r10, lsr #1            @ work out 3x current cache level
> +       mov     r1, r0, lsr r2                  @ extract cache type bits from clidr
> +       and     r1, r1, #7                      @ mask of the bits for current cache only
> +       cmp     r1, #2                          @ see what cache we have at this level
> +       blt     skip                            @ skip if no cache, or just i-cache
> +       mcr     p15, 2, r10, c0, c0, 0          @ select current cache level in cssr
> +       isb                                     @ isb to sych the new cssr&csidr
> +       mrc     p15, 1, r1, c0, c0, 0           @ read the new csidr
> +       and     r2, r1, #7                      @ extract the length of the cache lines
> +       add     r2, r2, #4                      @ add 4 (line length offset)
> +       movw    r4, #0x3ff
> +       ands    r4, r4, r1, lsr #3              @ find maximum number on the way size
> +       clz     r5, r4                          @ find bit position of way size increment
> +       movw    r7, #0x7fff
> +       ands    r7, r7, r1, lsr #13             @ extract max number of the index size
> +loop1:
> +       mov     r9, r7                          @ create working copy of max index
> +loop2:
> + ARM(  orr     r11, r10, r4, lsl r5    )       @ factor way and cache number into r11
> + THUMB(        lsl     r6, r4, r5              )
> + THUMB(        orr     r11, r10, r6            )       @ factor way and cache number into r11
> + ARM(  orr     r11, r11, r9, lsl r2    )       @ factor index number into r11
> + THUMB(        lsl     r6, r9, r2              )
> + THUMB(        orr     r11, r11, r6            )       @ factor index number into r11
> +       mcr     p15, 0, r11, c7, c14, 2         @ clean & invalidate by set/way
> +       subs    r9, r9, #1                      @ decrement the index
> +       bge     loop2
> +       subs    r4, r4, #1                      @ decrement the way
> +       bge     loop1
> +skip:
> +       add     r10, r10, #2                    @ increment cache number
> +       cmp     r3, r10
> +       bgt     flush_levels
> +finished:
> +       mov     r10, #0                         @ swith back to cache level 0
> +       mcr     p15, 2, r10, c0, c0, 0          @ select current cache level in cssr
> +       dsb     st
> +       isb
> +       bx      lr
> +ENDPROC(__v7_flush_dcache_all)
> +
> +ENTRY(v7_flush_dcache_all)
> + ARM(  stmfd   sp!, {r4-r5, r7, r9-r11, lr}    )
> + THUMB(        stmfd   sp!, {r4-r7, r9-r11, lr}        )
> +       bl      __v7_flush_dcache_all
> + ARM(  ldmfd   sp!, {r4-r5, r7, r9-r11, lr}    )
> + THUMB(        ldmfd   sp!, {r4-r7, r9-r11, lr}        )
> +       bx      lr
> +ENDPROC(v7_flush_dcache_all)


Please delete the define in arch/arm/mach-uniphier/arm32/lowlevel_init.S

Otherwise, my board would fail to build due to multi define error.


arch/arm/cpu/armv7/built-in.o: In function `v7_flush_dcache_all':
/home/yamada/workspace/u-boot/arch/arm/cpu/armv7/cache_v7_asm.S:79:
multiple definition of `v7_flush_dcache_all'
arch/arm/mach-uniphier/built-in.o:/home/yamada/workspace/u-boot/arch/arm/mach-uniphier/arm32/lowlevel_init.S:160:
first defined here
make[1]: *** [spl/u-boot-spl] Error 1
make: *** [spl/u-boot-spl] Error 2






-- 
Best Regards
Masahiro Yamada

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [U-Boot] [PATCH 1/2] arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL) with asm code
  2016-04-04 18:31 [U-Boot] [PATCH 1/2] arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL) with asm code Hans de Goede
                   ` (2 preceding siblings ...)
  2016-04-06  2:07 ` Masahiro Yamada
@ 2016-04-06 14:51 ` Tom Rini
  2016-04-06 15:22   ` Hans de Goede
  3 siblings, 1 reply; 10+ messages in thread
From: Tom Rini @ 2016-04-06 14:51 UTC (permalink / raw)
  To: u-boot

On Mon, Apr 04, 2016 at 08:31:48PM +0200, Hans de Goede wrote:

> v7_maint_dcache_all() does not work reliable when build with gcc6,
> see: https://bugzilla.redhat.com/show_bug.cgi?id=1318788
> 
> While debugging this I learned that v7_maint_dcache_all() is unreliable
> when build with gcc5 too when it is marked as noinline.
> 
> This commit fixes the reliability issues by replacing the C-code with
> the ready to use asm implementation from the kernel.
> 
> Given that this code when written as C-code clearly is quite fragile
> (also see the existing comments about the C-code being the way it is
>  to get optimal assembly) and that we have a proven asm alternative,
> I believe that this is the best solution.
> 
> Note that we actually already have a copy of the kernel's
> v7_flush_dcache_all() in arch/arm/mach-uniphier/arm32/lowlevel_init.S.
> 
> We should replace that implementation with a call to this one, but I'm
> leaving this up to people with access to actual unifier hw. With this
> replacement in mind I've kept the original function as is, only renamed
> it to __v7_flush_dcache_all and v7_flush_dcache_all is a wrapper
> saving the registered clobbered by the core __v7_flush_dcache_all code
> 
> Signed-off-by: Hans de Goede <hdegoede@redhat.com>

So I was able to talk with a few people and this is the right approach.
The cache routines we're doing here in C must not in fact be done in C.
That things work today with some compilers is not by design.  This is at
least a minimally correct thing to do and a more correct thing to do
would be to leverage more of the code from the kernel for cache
functions (and not just for v7).

-- 
Tom
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 836 bytes
Desc: Digital signature
URL: <http://lists.denx.de/pipermail/u-boot/attachments/20160406/958c481b/attachment.sig>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [U-Boot] [PATCH 1/2] arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL) with asm code
  2016-04-06 14:51 ` Tom Rini
@ 2016-04-06 15:22   ` Hans de Goede
  2016-04-06 15:27     ` Tom Rini
  0 siblings, 1 reply; 10+ messages in thread
From: Hans de Goede @ 2016-04-06 15:22 UTC (permalink / raw)
  To: u-boot

Hi,

On 06-04-16 16:51, Tom Rini wrote:
> On Mon, Apr 04, 2016 at 08:31:48PM +0200, Hans de Goede wrote:
>
>> v7_maint_dcache_all() does not work reliable when build with gcc6,
>> see: https://bugzilla.redhat.com/show_bug.cgi?id=1318788
>>
>> While debugging this I learned that v7_maint_dcache_all() is unreliable
>> when build with gcc5 too when it is marked as noinline.
>>
>> This commit fixes the reliability issues by replacing the C-code with
>> the ready to use asm implementation from the kernel.
>>
>> Given that this code when written as C-code clearly is quite fragile
>> (also see the existing comments about the C-code being the way it is
>>   to get optimal assembly) and that we have a proven asm alternative,
>> I believe that this is the best solution.
>>
>> Note that we actually already have a copy of the kernel's
>> v7_flush_dcache_all() in arch/arm/mach-uniphier/arm32/lowlevel_init.S.
>>
>> We should replace that implementation with a call to this one, but I'm
>> leaving this up to people with access to actual unifier hw. With this
>> replacement in mind I've kept the original function as is, only renamed
>> it to __v7_flush_dcache_all and v7_flush_dcache_all is a wrapper
>> saving the registered clobbered by the core __v7_flush_dcache_all code
>>
>> Signed-off-by: Hans de Goede <hdegoede@redhat.com>
>
> So I was able to talk with a few people and this is the right approach.
> The cache routines we're doing here in C must not in fact be done in C.
> That things work today with some compilers is not by design.  This is at
> least a minimally correct thing to do and a more correct thing to do
> would be to leverage more of the code from the kernel for cache
> functions (and not just for v7).

Thanks! I guess that means that we can consider this issue resolved?
Which is good news as this was a nasty bug to track down.

So are you planning on merging this patch / these 2 patches to
master then ?

Note that in my testing only the first patch is necessary to fix
various sunxi boards no longer booting.

Regards,

Hans

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [U-Boot] [PATCH 1/2] arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL) with asm code
  2016-04-06 15:22   ` Hans de Goede
@ 2016-04-06 15:27     ` Tom Rini
  0 siblings, 0 replies; 10+ messages in thread
From: Tom Rini @ 2016-04-06 15:27 UTC (permalink / raw)
  To: u-boot

On Wed, Apr 06, 2016 at 05:22:25PM +0200, Hans de Goede wrote:
> Hi,
> 
> On 06-04-16 16:51, Tom Rini wrote:
> >On Mon, Apr 04, 2016 at 08:31:48PM +0200, Hans de Goede wrote:
> >
> >>v7_maint_dcache_all() does not work reliable when build with gcc6,
> >>see: https://bugzilla.redhat.com/show_bug.cgi?id=1318788
> >>
> >>While debugging this I learned that v7_maint_dcache_all() is unreliable
> >>when build with gcc5 too when it is marked as noinline.
> >>
> >>This commit fixes the reliability issues by replacing the C-code with
> >>the ready to use asm implementation from the kernel.
> >>
> >>Given that this code when written as C-code clearly is quite fragile
> >>(also see the existing comments about the C-code being the way it is
> >>  to get optimal assembly) and that we have a proven asm alternative,
> >>I believe that this is the best solution.
> >>
> >>Note that we actually already have a copy of the kernel's
> >>v7_flush_dcache_all() in arch/arm/mach-uniphier/arm32/lowlevel_init.S.
> >>
> >>We should replace that implementation with a call to this one, but I'm
> >>leaving this up to people with access to actual unifier hw. With this
> >>replacement in mind I've kept the original function as is, only renamed
> >>it to __v7_flush_dcache_all and v7_flush_dcache_all is a wrapper
> >>saving the registered clobbered by the core __v7_flush_dcache_all code
> >>
> >>Signed-off-by: Hans de Goede <hdegoede@redhat.com>
> >
> >So I was able to talk with a few people and this is the right approach.
> >The cache routines we're doing here in C must not in fact be done in C.
> >That things work today with some compilers is not by design.  This is at
> >least a minimally correct thing to do and a more correct thing to do
> >would be to leverage more of the code from the kernel for cache
> >functions (and not just for v7).
> 
> Thanks! I guess that means that we can consider this issue resolved?
> Which is good news as this was a nasty bug to track down.

Yes it was, good job btw.

> So are you planning on merging this patch / these 2 patches to
> master then ?
> 
> Note that in my testing only the first patch is necessary to fix
> various sunxi boards no longer booting.

We need at least a v2 to fix the build issue Masahiro pointed out.  But
yes, I'll tkae these two and put the rest on my TODO list.  Thanks!

-- 
Tom
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 836 bytes
Desc: Digital signature
URL: <http://lists.denx.de/pipermail/u-boot/attachments/20160406/0909ae32/attachment.sig>

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2016-04-06 15:27 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-04 18:31 [U-Boot] [PATCH 1/2] arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL) with asm code Hans de Goede
2016-04-04 18:31 ` [U-Boot] [PATCH 2/2] arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_INVAL_ALL) " Hans de Goede
2016-04-04 23:59 ` [U-Boot] [PATCH 1/2] arm: Replace v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL) " Tom Rini
2016-04-05  8:33   ` Hans de Goede
2016-04-05  8:44     ` Hans de Goede
2016-04-05 13:45       ` Tom Rini
2016-04-06  2:07 ` Masahiro Yamada
2016-04-06 14:51 ` Tom Rini
2016-04-06 15:22   ` Hans de Goede
2016-04-06 15:27     ` Tom Rini

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.