All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/6] ARM: fix be8 support for phys/virt address conversion
@ 2011-11-15  2:06 Junxiao Bi
  2011-11-15  2:06 ` [PATCH 2/6] ARM: gic: fix big endian support Junxiao Bi
                   ` (6 more replies)
  0 siblings, 7 replies; 23+ messages in thread
From: Junxiao Bi @ 2011-11-15  2:06 UTC (permalink / raw)
  To: linux-arm-kernel

phys_to_virt() and virt_to_phys() is implemented by one assembler instruction,
add/sub an offset between the physical and virtual address. For some arm machines
with CONFIG_ARM_PATCH_PHYS_VIRT defined, this offset is unknown at compile time,
it is calculated dynamically at run time and then patched to the add/sub instruction.
To support BE8 mode, firstly we need to convert the instruntion to big-endian after
loading it from the memory, and then modify its offset field, at last convert it to
little-endian and write it back to the memory.

Signed-off-by: Junxiao Bi <junxiao.bi@windriver.com>
---
 arch/arm/kernel/head.S |   12 ++++++++++++
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 566c54c..838fb0c 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -522,8 +522,14 @@ __fixup_a_pv_table:
 	b	2f
 1:	add     r7, r3
 	ldrh	ip, [r7, #2]
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	rev16	ip, ip
+#endif
 	and	ip, 0x8f00
 	orr	ip, r6	@ mask in offset bits 31-24
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	rev16	ip, ip
+#endif
 	strh	ip, [r7, #2]
 2:	cmp	r4, r5
 	ldrcc	r7, [r4], #4	@ use branch for delay slot
@@ -532,8 +538,14 @@ __fixup_a_pv_table:
 #else
 	b	2f
 1:	ldr	ip, [r7, r3]
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	rev	ip, ip
+#endif
 	bic	ip, ip, #0x000000ff
 	orr	ip, ip, r6	@ mask in offset bits 31-24
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	rev	ip, ip
+#endif
 	str	ip, [r7, r3]
 2:	cmp	r4, r5
 	ldrcc	r7, [r4], #4	@ use branch for delay slot
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 2/6] ARM: gic: fix big endian support
  2011-11-15  2:06 [PATCH 1/6] ARM: fix be8 support for phys/virt address conversion Junxiao Bi
@ 2011-11-15  2:06 ` Junxiao Bi
  2011-11-21 19:11   ` Nicolas Pitre
  2011-11-15  2:06 ` [PATCH 3/6] ARM: early_printk: pl01x: " Junxiao Bi
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 23+ messages in thread
From: Junxiao Bi @ 2011-11-15  2:06 UTC (permalink / raw)
  To: linux-arm-kernel

The irq state from gic is in little-endian mode. We need do endian
conversion for it in big-endian machine. Usually byte swapping of
32-bits word needs 4 assembler instructions for arm machine which
arch < 6. But since the high order half word of the irq state is
zero, there is a way that can use lesser instrunctions to improve
the performance since we only need a bytes swaping of half word.
That's what swab16_low_half and swab16_high_half do.

Signed-off-by: Junxiao Bi <junxiao.bi@windriver.com>
---
 arch/arm/include/asm/assembler.h                |   27 +++++++++++++++++++++++
 arch/arm/include/asm/hardware/entry-macro-gic.S |    7 ++++++
 2 files changed, 34 insertions(+), 0 deletions(-)

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 29035e8..3ddee22 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -302,4 +302,31 @@
 	.size \name , . - \name
 	.endm
 
+/*
+ * swab16_low_half is used to swap the bytes order of the low order
+ * half word, it assumes that the high order half word is zero.
+ * swab16_high_half is used to swap the bytes order of the high order
+ * half word, it assumes that the low order half word is zero.
+ */
+#if __LINUX_ARM_ARCH__ >= 6
+	.macro swab16_low_half, reg
+	rev \reg, \reg
+	.endm
+
+	.macro swab16_high_half, reg
+	rev \reg, \reg
+	.endm
+#else
+	.macro swab16_low_half, reg
+	mov \reg, \reg, ror #8
+	orr \reg, \reg, \reg, lsr #16
+	bic \reg, \reg, \reg, lsl #16
+	.endm
+
+	.macro swab16_high_half, reg
+	swab16_low_half \reg
+	mov \reg, \reg, lsl #16
+	.endm
+#endif
+
 #endif /* __ASM_ASSEMBLER_H__ */
diff --git a/arch/arm/include/asm/hardware/entry-macro-gic.S b/arch/arm/include/asm/hardware/entry-macro-gic.S
index 74ebc80..3453273 100644
--- a/arch/arm/include/asm/hardware/entry-macro-gic.S
+++ b/arch/arm/include/asm/hardware/entry-macro-gic.S
@@ -9,6 +9,7 @@
  */
 
 #include <asm/hardware/gic.h>
+#include <asm/assembler.h>
 
 #ifndef HAVE_GET_IRQNR_PREAMBLE
 	.macro	get_irqnr_preamble, base, tmp
@@ -35,6 +36,9 @@
 	.macro  get_irqnr_and_base, irqnr, irqstat, base, tmp
 
 	ldr     \irqstat, [\base, #GIC_CPU_INTACK]
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	swab16_low_half \irqstat
+#endif
 	/* bits 12-10 = src CPU, 9-0 = int # */
 
 	ldr	\tmp, =1021
@@ -55,6 +59,9 @@
 	.macro test_for_ipi, irqnr, irqstat, base, tmp
 	bic	\irqnr, \irqstat, #0x1c00
 	cmp	\irqnr, #16
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	swab16_high_half \irqstat
+#endif
 	strcc	\irqstat, [\base, #GIC_CPU_EOI]
 	cmpcs	\irqnr, \irqnr
 	.endm
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 3/6] ARM: early_printk: pl01x: fix big endian support
  2011-11-15  2:06 [PATCH 1/6] ARM: fix be8 support for phys/virt address conversion Junxiao Bi
  2011-11-15  2:06 ` [PATCH 2/6] ARM: gic: fix big endian support Junxiao Bi
@ 2011-11-15  2:06 ` Junxiao Bi
  2011-11-15  2:06 ` [PATCH 4/6] ARM: add big endian support for peripheral access Junxiao Bi
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 23+ messages in thread
From: Junxiao Bi @ 2011-11-15  2:06 UTC (permalink / raw)
  To: linux-arm-kernel

The waituart and busyuart need to get one bit flag from UART flag
register. For this, we don't need do endian conversion for the entire
flag register, providing that flag definition in big-endian mode is
enough.

Signed-off-by: Junxiao Bi <junxiao.bi@windriver.com>
---
 arch/arm/include/asm/assembler.h            |    6 ++++++
 arch/arm/include/asm/hardware/debug-pl01x.S |   12 ++++++++++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 3ddee22..2c7e8fa 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -329,4 +329,10 @@
 	.endm
 #endif
 
+#define asm_constant_swab32(x) \
+	((((x) & 0xff000000) >> 24) | \
+	(((x) & 0x00ff0000) >> 8) | \
+	(((x) & 0x0000ff00) << 8) | \
+	(((x) & 0x000000ff) << 24))
+
 #endif /* __ASM_ASSEMBLER_H__ */
diff --git a/arch/arm/include/asm/hardware/debug-pl01x.S b/arch/arm/include/asm/hardware/debug-pl01x.S
index f9fd083..25fa0ec 100644
--- a/arch/arm/include/asm/hardware/debug-pl01x.S
+++ b/arch/arm/include/asm/hardware/debug-pl01x.S
@@ -11,19 +11,27 @@
  *
 */
 #include <linux/amba/serial.h>
+#include <asm/assembler.h>
 
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define _UART01x_FR_TXFF asm_constant_swab32(UART01x_FR_TXFF)
+#define _UART01x_FR_BUSY asm_constant_swab32(UART01x_FR_BUSY)
+#else
+#define _UART01x_FR_TXFF UART01x_FR_TXFF
+#define _UART01x_FR_BUSY UART01x_FR_BUSY
+#endif
 		.macro	senduart,rd,rx
 		strb	\rd, [\rx, #UART01x_DR]
 		.endm
 
 		.macro	waituart,rd,rx
 1001:		ldr	\rd, [\rx, #UART01x_FR]
-		tst	\rd, #UART01x_FR_TXFF
+		tst	\rd, #_UART01x_FR_TXFF
 		bne	1001b
 		.endm
 
 		.macro	busyuart,rd,rx
 1001:		ldr	\rd, [\rx, #UART01x_FR]
-		tst	\rd, #UART01x_FR_BUSY
+		tst	\rd, #_UART01x_FR_BUSY
 		bne	1001b
 		.endm
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 4/6] ARM: add big endian support for peripheral access
  2011-11-15  2:06 [PATCH 1/6] ARM: fix be8 support for phys/virt address conversion Junxiao Bi
  2011-11-15  2:06 ` [PATCH 2/6] ARM: gic: fix big endian support Junxiao Bi
  2011-11-15  2:06 ` [PATCH 3/6] ARM: early_printk: pl01x: " Junxiao Bi
@ 2011-11-15  2:06 ` Junxiao Bi
  2011-11-15  2:06 ` [PATCH 5/6] ARM: Atomic64: fix 64bit ops in BE mode Junxiao Bi
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 23+ messages in thread
From: Junxiao Bi @ 2011-11-15  2:06 UTC (permalink / raw)
  To: linux-arm-kernel

From: Xue Ying <ying.xue@windriver.com>

Since data from arm peripheral are little-endian and __raw_read*/__raw_write*
don't do any endian conversion, we should use read*/write*_relaxed to replace
them as the _relaxed version only adds endian conversion based on __raw version
which isn't like read*/write* that adds IO barriers which will affect performance.

Signed-off-by: Xue Ying <ying.xue@windriver.com>
Signed-off-by: Junxiao Bi <junxiao.bi@windriver.com>
---
 arch/arm/kernel/smp_scu.c |   10 +++++-----
 arch/arm/kernel/smp_twd.c |   20 ++++++++++----------
 include/linux/mtd/map.h   |   16 ++++++++--------
 3 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c
index 8f5dd79..0324435 100644
--- a/arch/arm/kernel/smp_scu.c
+++ b/arch/arm/kernel/smp_scu.c
@@ -27,7 +27,7 @@
  */
 unsigned int __init scu_get_core_count(void __iomem *scu_base)
 {
-	unsigned int ncores = __raw_readl(scu_base + SCU_CONFIG);
+	unsigned int ncores = readl_relaxed(scu_base + SCU_CONFIG);
 	return (ncores & 0x03) + 1;
 }
 
@@ -41,19 +41,19 @@ void scu_enable(void __iomem *scu_base)
 #ifdef CONFIG_ARM_ERRATA_764369
 	/* Cortex-A9 only */
 	if ((read_cpuid(CPUID_ID) & 0xff0ffff0) == 0x410fc090) {
-		scu_ctrl = __raw_readl(scu_base + 0x30);
+		scu_ctrl = readl_relaxed(scu_base + 0x30);
 		if (!(scu_ctrl & 1))
-			__raw_writel(scu_ctrl | 0x1, scu_base + 0x30);
+			writel_relaxed(scu_ctrl | 0x1, scu_base + 0x30);
 	}
 #endif
 
-	scu_ctrl = __raw_readl(scu_base + SCU_CTRL);
+	scu_ctrl = readl_relaxed(scu_base + SCU_CTRL);
 	/* already enabled? */
 	if (scu_ctrl & 1)
 		return;
 
 	scu_ctrl |= 1;
-	__raw_writel(scu_ctrl, scu_base + SCU_CTRL);
+	writel_relaxed(scu_ctrl, scu_base + SCU_CTRL);
 
 	/*
 	 * Ensure that the data accessed by CPU0 before the SCU was
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index a8a6682..0864d92 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -39,7 +39,7 @@ static void twd_set_mode(enum clock_event_mode mode,
 		/* timer load already set up */
 		ctrl = TWD_TIMER_CONTROL_ENABLE | TWD_TIMER_CONTROL_IT_ENABLE
 			| TWD_TIMER_CONTROL_PERIODIC;
-		__raw_writel(twd_timer_rate / HZ, twd_base + TWD_TIMER_LOAD);
+		writel_relaxed(twd_timer_rate / HZ, twd_base + TWD_TIMER_LOAD);
 		break;
 	case CLOCK_EVT_MODE_ONESHOT:
 		/* period set, and timer enabled in 'next_event' hook */
@@ -51,18 +51,18 @@ static void twd_set_mode(enum clock_event_mode mode,
 		ctrl = 0;
 	}
 
-	__raw_writel(ctrl, twd_base + TWD_TIMER_CONTROL);
+	writel_relaxed(ctrl, twd_base + TWD_TIMER_CONTROL);
 }
 
 static int twd_set_next_event(unsigned long evt,
 			struct clock_event_device *unused)
 {
-	unsigned long ctrl = __raw_readl(twd_base + TWD_TIMER_CONTROL);
+	unsigned long ctrl = readl_relaxed(twd_base + TWD_TIMER_CONTROL);
 
 	ctrl |= TWD_TIMER_CONTROL_ENABLE;
 
-	__raw_writel(evt, twd_base + TWD_TIMER_COUNTER);
-	__raw_writel(ctrl, twd_base + TWD_TIMER_CONTROL);
+	writel_relaxed(evt, twd_base + TWD_TIMER_COUNTER);
+	writel_relaxed(ctrl, twd_base + TWD_TIMER_CONTROL);
 
 	return 0;
 }
@@ -75,8 +75,8 @@ static int twd_set_next_event(unsigned long evt,
  */
 int twd_timer_ack(void)
 {
-	if (__raw_readl(twd_base + TWD_TIMER_INTSTAT)) {
-		__raw_writel(1, twd_base + TWD_TIMER_INTSTAT);
+	if (readl_relaxed(twd_base + TWD_TIMER_INTSTAT)) {
+		writel_relaxed(1, twd_base + TWD_TIMER_INTSTAT);
 		return 1;
 	}
 
@@ -111,15 +111,15 @@ static void __cpuinit twd_calibrate_rate(void)
 		waitjiffies += 5;
 
 				 /* enable, no interrupt or reload */
-		__raw_writel(0x1, twd_base + TWD_TIMER_CONTROL);
+		writel_relaxed(0x1, twd_base + TWD_TIMER_CONTROL);
 
 				 /* maximum value */
-		__raw_writel(0xFFFFFFFFU, twd_base + TWD_TIMER_COUNTER);
+		writel_relaxed(0xFFFFFFFFU, twd_base + TWD_TIMER_COUNTER);
 
 		while (get_jiffies_64() < waitjiffies)
 			udelay(10);
 
-		count = __raw_readl(twd_base + TWD_TIMER_COUNTER);
+		count = readl_relaxed(twd_base + TWD_TIMER_COUNTER);
 
 		twd_timer_rate = (0xFFFFFFFFU - count) * (HZ / 5);
 
diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index a9e6ba4..74b266d 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -393,14 +393,14 @@ static inline map_word inline_map_read(struct map_info *map, unsigned long ofs)
 	map_word r;
 
 	if (map_bankwidth_is_1(map))
-		r.x[0] = __raw_readb(map->virt + ofs);
+		r.x[0] = readb_relaxed(map->virt + ofs);
 	else if (map_bankwidth_is_2(map))
-		r.x[0] = __raw_readw(map->virt + ofs);
+		r.x[0] = readw_relaxed(map->virt + ofs);
 	else if (map_bankwidth_is_4(map))
-		r.x[0] = __raw_readl(map->virt + ofs);
+		r.x[0] = readl_relaxed(map->virt + ofs);
 #if BITS_PER_LONG >= 64
 	else if (map_bankwidth_is_8(map))
-		r.x[0] = __raw_readq(map->virt + ofs);
+		r.x[0] = readq_relaxed(map->virt + ofs);
 #endif
 	else if (map_bankwidth_is_large(map))
 		memcpy_fromio(r.x, map->virt+ofs, map->bankwidth);
@@ -413,14 +413,14 @@ static inline map_word inline_map_read(struct map_info *map, unsigned long ofs)
 static inline void inline_map_write(struct map_info *map, const map_word datum, unsigned long ofs)
 {
 	if (map_bankwidth_is_1(map))
-		__raw_writeb(datum.x[0], map->virt + ofs);
+		writeb_relaxed(datum.x[0], map->virt + ofs);
 	else if (map_bankwidth_is_2(map))
-		__raw_writew(datum.x[0], map->virt + ofs);
+		writew_relaxed(datum.x[0], map->virt + ofs);
 	else if (map_bankwidth_is_4(map))
-		__raw_writel(datum.x[0], map->virt + ofs);
+		writel_relaxed(datum.x[0], map->virt + ofs);
 #if BITS_PER_LONG >= 64
 	else if (map_bankwidth_is_8(map))
-		__raw_writeq(datum.x[0], map->virt + ofs);
+		writeq_relaxed(datum.x[0], map->virt + ofs);
 #endif
 	else if (map_bankwidth_is_large(map))
 		memcpy_toio(map->virt+ofs, datum.x, map->bankwidth);
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 5/6] ARM: Atomic64: fix 64bit ops in BE mode
  2011-11-15  2:06 [PATCH 1/6] ARM: fix be8 support for phys/virt address conversion Junxiao Bi
                   ` (2 preceding siblings ...)
  2011-11-15  2:06 ` [PATCH 4/6] ARM: add big endian support for peripheral access Junxiao Bi
@ 2011-11-15  2:06 ` Junxiao Bi
  2011-11-21 19:24   ` Nicolas Pitre
  2011-11-15  2:06 ` [PATCH 6/6] ARM: support kernel modules in BE8 mode Junxiao Bi
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 23+ messages in thread
From: Junxiao Bi @ 2011-11-15  2:06 UTC (permalink / raw)
  To: linux-arm-kernel

The add/sub ops to 64 bits double word are made by adds/subs to the least
significant 32 bits first and then adc/sbc to the most significant 32 bits.
Using %0 and %H0 can only work in LE mode. If we change them to %Q0 and %R0,
it will work in both endian modes since %Q0 always represents the least
significant 32 bits and %R0 represents the most.

Signed-off-by: Junxiao Bi <junxiao.bi@windriver.com>
---
 arch/arm/include/asm/atomic.h |   26 +++++++++++++-------------
 1 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 86976d0..00fcc4f 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -275,8 +275,8 @@ static inline void atomic64_add(u64 i, atomic64_t *v)
 
 	__asm__ __volatile__("@ atomic64_add\n"
 "1:	ldrexd	%0, %H0, [%3]\n"
-"	adds	%0, %0, %4\n"
-"	adc	%H0, %H0, %H4\n"
+"	adds	%Q0, %Q0, %Q4\n"
+"	adc	%R0, %R0, %R4\n"
 "	strexd	%1, %0, %H0, [%3]\n"
 "	teq	%1, #0\n"
 "	bne	1b"
@@ -294,8 +294,8 @@ static inline u64 atomic64_add_return(u64 i, atomic64_t *v)
 
 	__asm__ __volatile__("@ atomic64_add_return\n"
 "1:	ldrexd	%0, %H0, [%3]\n"
-"	adds	%0, %0, %4\n"
-"	adc	%H0, %H0, %H4\n"
+"	adds	%Q0, %Q0, %Q4\n"
+"	adc	%R0, %R0, %R4\n"
 "	strexd	%1, %0, %H0, [%3]\n"
 "	teq	%1, #0\n"
 "	bne	1b"
@@ -315,8 +315,8 @@ static inline void atomic64_sub(u64 i, atomic64_t *v)
 
 	__asm__ __volatile__("@ atomic64_sub\n"
 "1:	ldrexd	%0, %H0, [%3]\n"
-"	subs	%0, %0, %4\n"
-"	sbc	%H0, %H0, %H4\n"
+"	subs	%Q0, %Q0, %Q4\n"
+"	sbc	%R0, %R0, %R4\n"
 "	strexd	%1, %0, %H0, [%3]\n"
 "	teq	%1, #0\n"
 "	bne	1b"
@@ -334,8 +334,8 @@ static inline u64 atomic64_sub_return(u64 i, atomic64_t *v)
 
 	__asm__ __volatile__("@ atomic64_sub_return\n"
 "1:	ldrexd	%0, %H0, [%3]\n"
-"	subs	%0, %0, %4\n"
-"	sbc	%H0, %H0, %H4\n"
+"	subs	%Q0, %Q0, %Q4\n"
+"	sbc	%R0, %R0, %R4\n"
 "	strexd	%1, %0, %H0, [%3]\n"
 "	teq	%1, #0\n"
 "	bne	1b"
@@ -402,9 +402,9 @@ static inline u64 atomic64_dec_if_positive(atomic64_t *v)
 
 	__asm__ __volatile__("@ atomic64_dec_if_positive\n"
 "1:	ldrexd	%0, %H0, [%3]\n"
-"	subs	%0, %0, #1\n"
-"	sbc	%H0, %H0, #0\n"
-"	teq	%H0, #0\n"
+"	subs	%Q0, %Q0, #1\n"
+"	sbc	%R0, %R0, #0\n"
+"	teq	%R0, #0\n"
 "	bmi	2f\n"
 "	strexd	%1, %0, %H0, [%3]\n"
 "	teq	%1, #0\n"
@@ -433,8 +433,8 @@ static inline int atomic64_add_unless(atomic64_t *v, u64 a, u64 u)
 "	teqeq	%H0, %H5\n"
 "	moveq	%1, #0\n"
 "	beq	2f\n"
-"	adds	%0, %0, %6\n"
-"	adc	%H0, %H0, %H6\n"
+"	adds	%Q0, %Q0, %Q6\n"
+"	adc	%R0, %R0, %R6\n"
 "	strexd	%2, %0, %H0, [%4]\n"
 "	teq	%2, #0\n"
 "	bne	1b\n"
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 6/6] ARM: support kernel modules in BE8 mode
  2011-11-15  2:06 [PATCH 1/6] ARM: fix be8 support for phys/virt address conversion Junxiao Bi
                   ` (3 preceding siblings ...)
  2011-11-15  2:06 ` [PATCH 5/6] ARM: Atomic64: fix 64bit ops in BE mode Junxiao Bi
@ 2011-11-15  2:06 ` Junxiao Bi
  2011-11-21 19:29   ` Nicolas Pitre
  2011-11-21  5:44 ` [PATCH 1/6] ARM: fix be8 support for phys/virt address conversion Bi Junxiao
  2011-11-21 18:32 ` Nicolas Pitre
  6 siblings, 1 reply; 23+ messages in thread
From: Junxiao Bi @ 2011-11-15  2:06 UTC (permalink / raw)
  To: linux-arm-kernel

From: Stanley.Miao <stanley.miao@windriver.com>

In BE8 mode, data must be manipulated in big endian format while
text must be little endian. Therefore, when relocating the text
section of module in BE8 mode, we must convert the location offset
of the text to big endian from the native little endian. After
the relocation is complete, the location offset value is re-written
as little endian.

Since only BE8 mode has such special requirement while other big endian
mode not, cpu_to_le32 and le32_to_cpu can not be used to relocate the
text. We introduce write_instr* and read_instr* to do it.

Signed-off-by: Stanley.Miao <stanley.miao@windriver.com>
Signed-off-by: Junxiao Bi <junxiao.bi@windriver.com>
---
 arch/arm/Makefile         |    1 +
 arch/arm/include/asm/io.h |   12 ++++++++++
 arch/arm/kernel/module.c  |   51 +++++++++++++++++++++++----------------------
 3 files changed, 39 insertions(+), 25 deletions(-)

diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index dfcf3b0..c858184 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -13,6 +13,7 @@
 LDFLAGS_vmlinux	:=-p --no-undefined -X
 ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
 LDFLAGS_vmlinux	+= --be8
+LDFLAGS_MODULE	+= --be8
 endif
 
 OBJCOPYFLAGS	:=-O binary -R .comment -S
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 065d100..4b6c7de 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -210,6 +210,18 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
  * Again, this are defined to perform little endian accesses.  See the
  * IO port primitives for more information.
  */
+#ifdef CONFIG_CPU_ENDIAN_BE8
+#define read_instr32(c)		__swab32(*(u32 *)(c))
+#define read_instr16(c)		__swab16(*(u16 *)(c))
+#define write_instr32(v, a)	(*(u32 *)(a) = __swab32((__force __u32)(v)))
+#define write_instr16(v, a)	(*(u16 *)(a) = __swab16((__force __u16)(v)))
+#else
+#define read_instr32(c)		(*(u32 *)(c))
+#define read_instr16(c)		(*(u16 *)(c))
+#define write_instr32(v, a)	(*(u32 *)(a) = (v))
+#define write_instr16(v, a)	(*(u16 *)(a) = (v))
+#endif
+
 #ifdef __mem_pci
 #define readb_relaxed(c) ({ u8  __r = __raw_readb(__mem_pci(c)); __r; })
 #define readw_relaxed(c) ({ u16 __r = le16_to_cpu((__force __le16) \
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index 1e9be5d..b6a0f50 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -14,6 +14,7 @@
 #include <linux/moduleloader.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/io.h>
 #include <linux/elf.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
@@ -95,7 +96,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 		case R_ARM_PC24:
 		case R_ARM_CALL:
 		case R_ARM_JUMP24:
-			offset = (*(u32 *)loc & 0x00ffffff) << 2;
+			offset = (read_instr32(loc) & 0x00ffffff) << 2;
 			if (offset & 0x02000000)
 				offset -= 0x04000000;
 
@@ -112,8 +113,8 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 
 			offset >>= 2;
 
-			*(u32 *)loc &= 0xff000000;
-			*(u32 *)loc |= offset & 0x00ffffff;
+			write_instr32((read_instr32(loc) & 0xff000000) |
+					(offset & 0x00ffffff), loc);
 			break;
 
 	       case R_ARM_V4BX:
@@ -121,9 +122,9 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 			* other bits to re-code instruction as
 			* MOV PC,Rm.
 			*/
-		       *(u32 *)loc &= 0xf000000f;
-		       *(u32 *)loc |= 0x01a0f000;
-		       break;
+			write_instr32((read_instr32(loc) & 0xf000000f) |
+					0x01a0f000, loc);
+			break;
 
 		case R_ARM_PREL31:
 			offset = *(u32 *)loc + sym->st_value - loc;
@@ -132,7 +133,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 
 		case R_ARM_MOVW_ABS_NC:
 		case R_ARM_MOVT_ABS:
-			offset = *(u32 *)loc;
+			offset = read_instr32(loc);
 			offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff);
 			offset = (offset ^ 0x8000) - 0x8000;
 
@@ -140,16 +141,16 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 			if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS)
 				offset >>= 16;
 
-			*(u32 *)loc &= 0xfff0f000;
-			*(u32 *)loc |= ((offset & 0xf000) << 4) |
-					(offset & 0x0fff);
+			write_instr32((read_instr32(loc) & 0xfff0f000) |
+					((offset & 0xf000) << 4) |
+					(offset & 0x0fff), loc);
 			break;
 
 #ifdef CONFIG_THUMB2_KERNEL
 		case R_ARM_THM_CALL:
 		case R_ARM_THM_JUMP24:
-			upper = *(u16 *)loc;
-			lower = *(u16 *)(loc + 2);
+			upper = read_instr16(loc);
+			lower = read_instr16(loc + 2);
 
 			/*
 			 * 25 bit signed address range (Thumb-2 BL and B.W
@@ -198,17 +199,17 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 			sign = (offset >> 24) & 1;
 			j1 = sign ^ (~(offset >> 23) & 1);
 			j2 = sign ^ (~(offset >> 22) & 1);
-			*(u16 *)loc = (u16)((upper & 0xf800) | (sign << 10) |
-					    ((offset >> 12) & 0x03ff));
-			*(u16 *)(loc + 2) = (u16)((lower & 0xd000) |
-						  (j1 << 13) | (j2 << 11) |
-						  ((offset >> 1) & 0x07ff));
+			write_instr16((u16)((upper & 0xf800) | (sign << 10) |
+					((offset >> 12) & 0x03ff)), loc);
+			write_instr16((u16)((lower & 0xd000) | (j1 << 13) |
+					(j2 << 11) | ((offset >> 1) & 0x07ff)),
+					loc + 2);
 			break;
 
 		case R_ARM_THM_MOVW_ABS_NC:
 		case R_ARM_THM_MOVT_ABS:
-			upper = *(u16 *)loc;
-			lower = *(u16 *)(loc + 2);
+			upper = read_instr16(loc);
+			lower = read_instr16(loc + 2);
 
 			/*
 			 * MOVT/MOVW instructions encoding in Thumb-2:
@@ -229,12 +230,12 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 			if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS)
 				offset >>= 16;
 
-			*(u16 *)loc = (u16)((upper & 0xfbf0) |
-					    ((offset & 0xf000) >> 12) |
-					    ((offset & 0x0800) >> 1));
-			*(u16 *)(loc + 2) = (u16)((lower & 0x8f00) |
-						  ((offset & 0x0700) << 4) |
-						  (offset & 0x00ff));
+			write_instr16((u16)((upper & 0xfbf0) |
+					((offset & 0xf000) >> 12) |
+					((offset & 0x0800) >> 1)), loc);
+			write_instr16((u16)((lower & 0x8f00) |
+					((offset & 0x0700) << 4) |
+					(offset & 0x00ff)), loc + 2);
 			break;
 #endif
 
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 1/6] ARM: fix be8 support for phys/virt address conversion
  2011-11-15  2:06 [PATCH 1/6] ARM: fix be8 support for phys/virt address conversion Junxiao Bi
                   ` (4 preceding siblings ...)
  2011-11-15  2:06 ` [PATCH 6/6] ARM: support kernel modules in BE8 mode Junxiao Bi
@ 2011-11-21  5:44 ` Bi Junxiao
  2011-11-21 18:32 ` Nicolas Pitre
  6 siblings, 0 replies; 23+ messages in thread
From: Bi Junxiao @ 2011-11-21  5:44 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Russell,

Would you please help to review this patch series?

Great thanks.

on 11/15/2011 10:06 AM Junxiao Bi wrote:
> phys_to_virt() and virt_to_phys() is implemented by one assembler instruction,
> add/sub an offset between the physical and virtual address. For some arm machines
> with CONFIG_ARM_PATCH_PHYS_VIRT defined, this offset is unknown at compile time,
> it is calculated dynamically at run time and then patched to the add/sub instruction.
> To support BE8 mode, firstly we need to convert the instruntion to big-endian after
> loading it from the memory, and then modify its offset field, at last convert it to
> little-endian and write it back to the memory.
>
> Signed-off-by: Junxiao Bi<junxiao.bi@windriver.com>
> ---
>   arch/arm/kernel/head.S |   12 ++++++++++++
>   1 files changed, 12 insertions(+), 0 deletions(-)
>
> diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
> index 566c54c..838fb0c 100644
> --- a/arch/arm/kernel/head.S
> +++ b/arch/arm/kernel/head.S
> @@ -522,8 +522,14 @@ __fixup_a_pv_table:
>   	b	2f
>   1:	add     r7, r3
>   	ldrh	ip, [r7, #2]
> +#ifdef CONFIG_CPU_ENDIAN_BE8
> +	rev16	ip, ip
> +#endif
>   	and	ip, 0x8f00
>   	orr	ip, r6	@ mask in offset bits 31-24
> +#ifdef CONFIG_CPU_ENDIAN_BE8
> +	rev16	ip, ip
> +#endif
>   	strh	ip, [r7, #2]
>   2:	cmp	r4, r5
>   	ldrcc	r7, [r4], #4	@ use branch for delay slot
> @@ -532,8 +538,14 @@ __fixup_a_pv_table:
>   #else
>   	b	2f
>   1:	ldr	ip, [r7, r3]
> +#ifdef CONFIG_CPU_ENDIAN_BE8
> +	rev	ip, ip
> +#endif
>   	bic	ip, ip, #0x000000ff
>   	orr	ip, ip, r6	@ mask in offset bits 31-24
> +#ifdef CONFIG_CPU_ENDIAN_BE8
> +	rev	ip, ip
> +#endif
>   	str	ip, [r7, r3]
>   2:	cmp	r4, r5
>   	ldrcc	r7, [r4], #4	@ use branch for delay slot
>    

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 1/6] ARM: fix be8 support for phys/virt address conversion
  2011-11-15  2:06 [PATCH 1/6] ARM: fix be8 support for phys/virt address conversion Junxiao Bi
                   ` (5 preceding siblings ...)
  2011-11-21  5:44 ` [PATCH 1/6] ARM: fix be8 support for phys/virt address conversion Bi Junxiao
@ 2011-11-21 18:32 ` Nicolas Pitre
  6 siblings, 0 replies; 23+ messages in thread
From: Nicolas Pitre @ 2011-11-21 18:32 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, 15 Nov 2011, Junxiao Bi wrote:

> phys_to_virt() and virt_to_phys() is implemented by one assembler instruction,
> add/sub an offset between the physical and virtual address. For some arm machines
> with CONFIG_ARM_PATCH_PHYS_VIRT defined, this offset is unknown at compile time,
> it is calculated dynamically at run time and then patched to the add/sub instruction.
> To support BE8 mode, firstly we need to convert the instruntion to big-endian after
> loading it from the memory, and then modify its offset field, at last convert it to
> little-endian and write it back to the memory.
> 
> Signed-off-by: Junxiao Bi <junxiao.bi@windriver.com>

Reviewed-by: Nicolas Pitre <nico@linaro.org>

> ---
>  arch/arm/kernel/head.S |   12 ++++++++++++
>  1 files changed, 12 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
> index 566c54c..838fb0c 100644
> --- a/arch/arm/kernel/head.S
> +++ b/arch/arm/kernel/head.S
> @@ -522,8 +522,14 @@ __fixup_a_pv_table:
>  	b	2f
>  1:	add     r7, r3
>  	ldrh	ip, [r7, #2]
> +#ifdef CONFIG_CPU_ENDIAN_BE8
> +	rev16	ip, ip
> +#endif
>  	and	ip, 0x8f00
>  	orr	ip, r6	@ mask in offset bits 31-24
> +#ifdef CONFIG_CPU_ENDIAN_BE8
> +	rev16	ip, ip
> +#endif
>  	strh	ip, [r7, #2]
>  2:	cmp	r4, r5
>  	ldrcc	r7, [r4], #4	@ use branch for delay slot
> @@ -532,8 +538,14 @@ __fixup_a_pv_table:
>  #else
>  	b	2f
>  1:	ldr	ip, [r7, r3]
> +#ifdef CONFIG_CPU_ENDIAN_BE8
> +	rev	ip, ip
> +#endif
>  	bic	ip, ip, #0x000000ff
>  	orr	ip, ip, r6	@ mask in offset bits 31-24
> +#ifdef CONFIG_CPU_ENDIAN_BE8
> +	rev	ip, ip
> +#endif
>  	str	ip, [r7, r3]
>  2:	cmp	r4, r5
>  	ldrcc	r7, [r4], #4	@ use branch for delay slot
> -- 
> 1.7.0.4
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 2/6] ARM: gic: fix big endian support
  2011-11-15  2:06 ` [PATCH 2/6] ARM: gic: fix big endian support Junxiao Bi
@ 2011-11-21 19:11   ` Nicolas Pitre
  2011-11-22  1:47     ` Bi Junxiao
  0 siblings, 1 reply; 23+ messages in thread
From: Nicolas Pitre @ 2011-11-21 19:11 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, 15 Nov 2011, Junxiao Bi wrote:

> The irq state from gic is in little-endian mode. We need do endian
> conversion for it in big-endian machine. Usually byte swapping of
> 32-bits word needs 4 assembler instructions for arm machine which
> arch < 6. But since the high order half word of the irq state is
> zero, there is a way that can use lesser instrunctions to improve
> the performance since we only need a bytes swaping of half word.
> That's what swab16_low_half and swab16_high_half do.
> 
> Signed-off-by: Junxiao Bi <junxiao.bi@windriver.com>

Comments below.

> ---
>  arch/arm/include/asm/assembler.h                |   27 +++++++++++++++++++++++
>  arch/arm/include/asm/hardware/entry-macro-gic.S |    7 ++++++
>  2 files changed, 34 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
> index 29035e8..3ddee22 100644
> --- a/arch/arm/include/asm/assembler.h
> +++ b/arch/arm/include/asm/assembler.h
> @@ -302,4 +302,31 @@
>  	.size \name , . - \name
>  	.endm
>  
> +/*
> + * swab16_low_half is used to swap the bytes order of the low order
> + * half word, it assumes that the high order half word is zero.
> + * swab16_high_half is used to swap the bytes order of the high order
> + * half word, it assumes that the low order half word is zero.
> + */
> +#if __LINUX_ARM_ARCH__ >= 6
> +	.macro swab16_low_half, reg
> +	rev \reg, \reg
> +	.endm
> +
> +	.macro swab16_high_half, reg
> +	rev \reg, \reg
> +	.endm
> +#else
> +	.macro swab16_low_half, reg
> +	mov \reg, \reg, ror #8
> +	orr \reg, \reg, \reg, lsr #16
> +	bic \reg, \reg, \reg, lsl #16
> +	.endm

I fail to see how this macro would be generally useful.  Presuming that 
the top or bottom half has to be zero is not particularly practical.  

> +	.macro swab16_high_half, reg
> +	swab16_low_half \reg
> +	mov \reg, \reg, lsl #16
> +	.endm

And this doesn't end up cheaper than a plain swab32.

Furthermore, you're using those macros 1) in the gic code, and 2) onli 
in the BE8 case.  Both of those conditions are only possible on ARMv6 
and above anyway, so I'd simply open code a rev instruction inline.


Nicolas

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 5/6] ARM: Atomic64: fix 64bit ops in BE mode
  2011-11-15  2:06 ` [PATCH 5/6] ARM: Atomic64: fix 64bit ops in BE mode Junxiao Bi
@ 2011-11-21 19:24   ` Nicolas Pitre
  0 siblings, 0 replies; 23+ messages in thread
From: Nicolas Pitre @ 2011-11-21 19:24 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, 15 Nov 2011, Junxiao Bi wrote:

> The add/sub ops to 64 bits double word are made by adds/subs to the least
> significant 32 bits first and then adc/sbc to the most significant 32 bits.
> Using %0 and %H0 can only work in LE mode. If we change them to %Q0 and %R0,
> it will work in both endian modes since %Q0 always represents the least
> significant 32 bits and %R0 represents the most.
> 
> Signed-off-by: Junxiao Bi <junxiao.bi@windriver.com>

Acked-by: Nicolas Pitre <nico@linaro.org>


> ---
>  arch/arm/include/asm/atomic.h |   26 +++++++++++++-------------
>  1 files changed, 13 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
> index 86976d0..00fcc4f 100644
> --- a/arch/arm/include/asm/atomic.h
> +++ b/arch/arm/include/asm/atomic.h
> @@ -275,8 +275,8 @@ static inline void atomic64_add(u64 i, atomic64_t *v)
>  
>  	__asm__ __volatile__("@ atomic64_add\n"
>  "1:	ldrexd	%0, %H0, [%3]\n"
> -"	adds	%0, %0, %4\n"
> -"	adc	%H0, %H0, %H4\n"
> +"	adds	%Q0, %Q0, %Q4\n"
> +"	adc	%R0, %R0, %R4\n"
>  "	strexd	%1, %0, %H0, [%3]\n"
>  "	teq	%1, #0\n"
>  "	bne	1b"
> @@ -294,8 +294,8 @@ static inline u64 atomic64_add_return(u64 i, atomic64_t *v)
>  
>  	__asm__ __volatile__("@ atomic64_add_return\n"
>  "1:	ldrexd	%0, %H0, [%3]\n"
> -"	adds	%0, %0, %4\n"
> -"	adc	%H0, %H0, %H4\n"
> +"	adds	%Q0, %Q0, %Q4\n"
> +"	adc	%R0, %R0, %R4\n"
>  "	strexd	%1, %0, %H0, [%3]\n"
>  "	teq	%1, #0\n"
>  "	bne	1b"
> @@ -315,8 +315,8 @@ static inline void atomic64_sub(u64 i, atomic64_t *v)
>  
>  	__asm__ __volatile__("@ atomic64_sub\n"
>  "1:	ldrexd	%0, %H0, [%3]\n"
> -"	subs	%0, %0, %4\n"
> -"	sbc	%H0, %H0, %H4\n"
> +"	subs	%Q0, %Q0, %Q4\n"
> +"	sbc	%R0, %R0, %R4\n"
>  "	strexd	%1, %0, %H0, [%3]\n"
>  "	teq	%1, #0\n"
>  "	bne	1b"
> @@ -334,8 +334,8 @@ static inline u64 atomic64_sub_return(u64 i, atomic64_t *v)
>  
>  	__asm__ __volatile__("@ atomic64_sub_return\n"
>  "1:	ldrexd	%0, %H0, [%3]\n"
> -"	subs	%0, %0, %4\n"
> -"	sbc	%H0, %H0, %H4\n"
> +"	subs	%Q0, %Q0, %Q4\n"
> +"	sbc	%R0, %R0, %R4\n"
>  "	strexd	%1, %0, %H0, [%3]\n"
>  "	teq	%1, #0\n"
>  "	bne	1b"
> @@ -402,9 +402,9 @@ static inline u64 atomic64_dec_if_positive(atomic64_t *v)
>  
>  	__asm__ __volatile__("@ atomic64_dec_if_positive\n"
>  "1:	ldrexd	%0, %H0, [%3]\n"
> -"	subs	%0, %0, #1\n"
> -"	sbc	%H0, %H0, #0\n"
> -"	teq	%H0, #0\n"
> +"	subs	%Q0, %Q0, #1\n"
> +"	sbc	%R0, %R0, #0\n"
> +"	teq	%R0, #0\n"
>  "	bmi	2f\n"
>  "	strexd	%1, %0, %H0, [%3]\n"
>  "	teq	%1, #0\n"
> @@ -433,8 +433,8 @@ static inline int atomic64_add_unless(atomic64_t *v, u64 a, u64 u)
>  "	teqeq	%H0, %H5\n"
>  "	moveq	%1, #0\n"
>  "	beq	2f\n"
> -"	adds	%0, %0, %6\n"
> -"	adc	%H0, %H0, %H6\n"
> +"	adds	%Q0, %Q0, %Q6\n"
> +"	adc	%R0, %R0, %R6\n"
>  "	strexd	%2, %0, %H0, [%4]\n"
>  "	teq	%2, #0\n"
>  "	bne	1b\n"
> -- 
> 1.7.0.4
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 6/6] ARM: support kernel modules in BE8 mode
  2011-11-15  2:06 ` [PATCH 6/6] ARM: support kernel modules in BE8 mode Junxiao Bi
@ 2011-11-21 19:29   ` Nicolas Pitre
  2011-11-22  1:32     ` Bi Junxiao
  0 siblings, 1 reply; 23+ messages in thread
From: Nicolas Pitre @ 2011-11-21 19:29 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, 15 Nov 2011, Junxiao Bi wrote:

> From: Stanley.Miao <stanley.miao@windriver.com>
> 
> In BE8 mode, data must be manipulated in big endian format while
> text must be little endian. Therefore, when relocating the text
> section of module in BE8 mode, we must convert the location offset
> of the text to big endian from the native little endian. After
> the relocation is complete, the location offset value is re-written
> as little endian.
> 
> Since only BE8 mode has such special requirement while other big endian
> mode not, cpu_to_le32 and le32_to_cpu can not be used to relocate the
> text. We introduce write_instr* and read_instr* to do it.
> 
> Signed-off-by: Stanley.Miao <stanley.miao@windriver.com>
> Signed-off-by: Junxiao Bi <junxiao.bi@windriver.com>
> ---
>  arch/arm/Makefile         |    1 +
>  arch/arm/include/asm/io.h |   12 ++++++++++
>  arch/arm/kernel/module.c  |   51 +++++++++++++++++++++++----------------------
>  3 files changed, 39 insertions(+), 25 deletions(-)
> 
> diff --git a/arch/arm/Makefile b/arch/arm/Makefile
> index dfcf3b0..c858184 100644
> --- a/arch/arm/Makefile
> +++ b/arch/arm/Makefile
> @@ -13,6 +13,7 @@
>  LDFLAGS_vmlinux	:=-p --no-undefined -X
>  ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
>  LDFLAGS_vmlinux	+= --be8
> +LDFLAGS_MODULE	+= --be8
>  endif
>  
>  OBJCOPYFLAGS	:=-O binary -R .comment -S
> diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
> index 065d100..4b6c7de 100644
> --- a/arch/arm/include/asm/io.h
> +++ b/arch/arm/include/asm/io.h
> @@ -210,6 +210,18 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
>   * Again, this are defined to perform little endian accesses.  See the
>   * IO port primitives for more information.
>   */
> +#ifdef CONFIG_CPU_ENDIAN_BE8
> +#define read_instr32(c)		__swab32(*(u32 *)(c))
> +#define read_instr16(c)		__swab16(*(u16 *)(c))
> +#define write_instr32(v, a)	(*(u32 *)(a) = __swab32((__force __u32)(v)))
> +#define write_instr16(v, a)	(*(u16 *)(a) = __swab16((__force __u16)(v)))
> +#else
> +#define read_instr32(c)		(*(u32 *)(c))
> +#define read_instr16(c)		(*(u16 *)(c))
> +#define write_instr32(v, a)	(*(u32 *)(a) = (v))
> +#define write_instr16(v, a)	(*(u16 *)(a) = (v))
> +#endif

NAK.  This has nothing to do with IO.

If only module.c requires this, please move those definitions there.


Nicolas

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 6/6] ARM: support kernel modules in BE8 mode
  2011-11-21 19:29   ` Nicolas Pitre
@ 2011-11-22  1:32     ` Bi Junxiao
  2011-11-22  4:17       ` Nicolas Pitre
  2011-11-22 10:47       ` Dave Martin
  0 siblings, 2 replies; 23+ messages in thread
From: Bi Junxiao @ 2011-11-22  1:32 UTC (permalink / raw)
  To: linux-arm-kernel

on 11/22/2011 03:29 AM Nicolas Pitre wrote:
> On Tue, 15 Nov 2011, Junxiao Bi wrote:
>
>    
>> From: Stanley.Miao<stanley.miao@windriver.com>
>>
>> In BE8 mode, data must be manipulated in big endian format while
>> text must be little endian. Therefore, when relocating the text
>> section of module in BE8 mode, we must convert the location offset
>> of the text to big endian from the native little endian. After
>> the relocation is complete, the location offset value is re-written
>> as little endian.
>>
>> Since only BE8 mode has such special requirement while other big endian
>> mode not, cpu_to_le32 and le32_to_cpu can not be used to relocate the
>> text. We introduce write_instr* and read_instr* to do it.
>>
>> Signed-off-by: Stanley.Miao<stanley.miao@windriver.com>
>> Signed-off-by: Junxiao Bi<junxiao.bi@windriver.com>
>> ---
>>   arch/arm/Makefile         |    1 +
>>   arch/arm/include/asm/io.h |   12 ++++++++++
>>   arch/arm/kernel/module.c  |   51 +++++++++++++++++++++++----------------------
>>   3 files changed, 39 insertions(+), 25 deletions(-)
>>
>> diff --git a/arch/arm/Makefile b/arch/arm/Makefile
>> index dfcf3b0..c858184 100644
>> --- a/arch/arm/Makefile
>> +++ b/arch/arm/Makefile
>> @@ -13,6 +13,7 @@
>>   LDFLAGS_vmlinux	:=-p --no-undefined -X
>>   ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
>>   LDFLAGS_vmlinux	+= --be8
>> +LDFLAGS_MODULE	+= --be8
>>   endif
>>
>>   OBJCOPYFLAGS	:=-O binary -R .comment -S
>> diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
>> index 065d100..4b6c7de 100644
>> --- a/arch/arm/include/asm/io.h
>> +++ b/arch/arm/include/asm/io.h
>> @@ -210,6 +210,18 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
>>    * Again, this are defined to perform little endian accesses.  See the
>>    * IO port primitives for more information.
>>    */
>> +#ifdef CONFIG_CPU_ENDIAN_BE8
>> +#define read_instr32(c)		__swab32(*(u32 *)(c))
>> +#define read_instr16(c)		__swab16(*(u16 *)(c))
>> +#define write_instr32(v, a)	(*(u32 *)(a) = __swab32((__force __u32)(v)))
>> +#define write_instr16(v, a)	(*(u16 *)(a) = __swab16((__force __u16)(v)))
>> +#else
>> +#define read_instr32(c)		(*(u32 *)(c))
>> +#define read_instr16(c)		(*(u16 *)(c))
>> +#define write_instr32(v, a)	(*(u32 *)(a) = (v))
>> +#define write_instr16(v, a)	(*(u16 *)(a) = (v))
>> +#endif
>>      
> NAK.  This has nothing to do with IO.
>
> If only module.c requires this, please move those definitions there.
>    
Not only modules, all components that needs to read and write text 
segment like kprobes also needs this. As it is special to arm be8, how 
about define it in arch/arm/include/asm/swab.h?
>
> Nicolas
>
>    

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 2/6] ARM: gic: fix big endian support
  2011-11-21 19:11   ` Nicolas Pitre
@ 2011-11-22  1:47     ` Bi Junxiao
  2011-11-22  4:10       ` Nicolas Pitre
  2011-11-22 10:33       ` Dave Martin
  0 siblings, 2 replies; 23+ messages in thread
From: Bi Junxiao @ 2011-11-22  1:47 UTC (permalink / raw)
  To: linux-arm-kernel

on 11/22/2011 03:11 AM Nicolas Pitre wrote:
> On Tue, 15 Nov 2011, Junxiao Bi wrote:
>
>    
>> The irq state from gic is in little-endian mode. We need do endian
>> conversion for it in big-endian machine. Usually byte swapping of
>> 32-bits word needs 4 assembler instructions for arm machine which
>> arch<  6. But since the high order half word of the irq state is
>> zero, there is a way that can use lesser instrunctions to improve
>> the performance since we only need a bytes swaping of half word.
>> That's what swab16_low_half and swab16_high_half do.
>>
>> Signed-off-by: Junxiao Bi<junxiao.bi@windriver.com>
>>      
> Comments below.
>
>    
>> ---
>>   arch/arm/include/asm/assembler.h                |   27 +++++++++++++++++++++++
>>   arch/arm/include/asm/hardware/entry-macro-gic.S |    7 ++++++
>>   2 files changed, 34 insertions(+), 0 deletions(-)
>>
>> diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
>> index 29035e8..3ddee22 100644
>> --- a/arch/arm/include/asm/assembler.h
>> +++ b/arch/arm/include/asm/assembler.h
>> @@ -302,4 +302,31 @@
>>   	.size \name , . - \name
>>   	.endm
>>
>> +/*
>> + * swab16_low_half is used to swap the bytes order of the low order
>> + * half word, it assumes that the high order half word is zero.
>> + * swab16_high_half is used to swap the bytes order of the high order
>> + * half word, it assumes that the low order half word is zero.
>> + */
>> +#if __LINUX_ARM_ARCH__>= 6
>> +	.macro swab16_low_half, reg
>> +	rev \reg, \reg
>> +	.endm
>> +
>> +	.macro swab16_high_half, reg
>> +	rev \reg, \reg
>> +	.endm
>> +#else
>> +	.macro swab16_low_half, reg
>> +	mov \reg, \reg, ror #8
>> +	orr \reg, \reg, \reg, lsr #16
>> +	bic \reg, \reg, \reg, lsl #16
>> +	.endm
>>      
> I fail to see how this macro would be generally useful.  Presuming that
> the top or bottom half has to be zero is not particularly practical.
>    
We can use it to swap bytes order of a 16-bits number in 32-bits 
register in which case the top or bottom half will be zero. For the 
\irqstat register, as the original implementation assumes its top half 
is zero, so I make the same assumption.
>    
>> +	.macro swab16_high_half, reg
>> +	swab16_low_half \reg
>> +	mov \reg, \reg, lsl #16
>> +	.endm
>>      
> And this doesn't end up cheaper than a plain swab32.
>    
This macro has an advantage that it only needs one register. For swab32, 
it will need another temporary register besides the target register.
> Furthermore, you're using those macros 1) in the gic code, and 2) onli
> in the BE8 case.  Both of those conditions are only possible on ARMv6
> and above anyway, so I'd simply open code a rev instruction inline.
>    
This is not be8 specific. It will be needed for both be8 and be32. So 
the machine with arch < 6 also needs to be supported.
>
> Nicolas
>
>    

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 2/6] ARM: gic: fix big endian support
  2011-11-22  1:47     ` Bi Junxiao
@ 2011-11-22  4:10       ` Nicolas Pitre
  2011-11-22  5:22         ` Bi Junxiao
  2011-11-22 10:33       ` Dave Martin
  1 sibling, 1 reply; 23+ messages in thread
From: Nicolas Pitre @ 2011-11-22  4:10 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, 22 Nov 2011, Bi Junxiao wrote:

> on 11/22/2011 03:11 AM Nicolas Pitre wrote:
> > On Tue, 15 Nov 2011, Junxiao Bi wrote:
> > 
> >    
> > > The irq state from gic is in little-endian mode. We need do endian
> > > conversion for it in big-endian machine. Usually byte swapping of
> > > 32-bits word needs 4 assembler instructions for arm machine which
> > > arch<  6. But since the high order half word of the irq state is
> > > zero, there is a way that can use lesser instrunctions to improve
> > > the performance since we only need a bytes swaping of half word.
> > > That's what swab16_low_half and swab16_high_half do.
> > > 
> > > Signed-off-by: Junxiao Bi<junxiao.bi@windriver.com>
> > >      
> > Comments below.
> > 
> >    
> > > ---
> > >   arch/arm/include/asm/assembler.h                |   27
> > > +++++++++++++++++++++++
> > >   arch/arm/include/asm/hardware/entry-macro-gic.S |    7 ++++++
> > >   2 files changed, 34 insertions(+), 0 deletions(-)
> > > 
> > > diff --git a/arch/arm/include/asm/assembler.h
> > > b/arch/arm/include/asm/assembler.h
> > > index 29035e8..3ddee22 100644
> > > --- a/arch/arm/include/asm/assembler.h
> > > +++ b/arch/arm/include/asm/assembler.h
> > > @@ -302,4 +302,31 @@
> > >   	.size \name , . - \name
> > >   	.endm
> > > 
> > > +/*
> > > + * swab16_low_half is used to swap the bytes order of the low order
> > > + * half word, it assumes that the high order half word is zero.
> > > + * swab16_high_half is used to swap the bytes order of the high order
> > > + * half word, it assumes that the low order half word is zero.
> > > + */
> > > +#if __LINUX_ARM_ARCH__>= 6
> > > +	.macro swab16_low_half, reg
> > > +	rev \reg, \reg
> > > +	.endm
> > > +
> > > +	.macro swab16_high_half, reg
> > > +	rev \reg, \reg
> > > +	.endm
> > > +#else
> > > +	.macro swab16_low_half, reg
> > > +	mov \reg, \reg, ror #8
> > > +	orr \reg, \reg, \reg, lsr #16
> > > +	bic \reg, \reg, \reg, lsl #16
> > > +	.endm
> > >      
> > I fail to see how this macro would be generally useful.  Presuming that
> > the top or bottom half has to be zero is not particularly practical.
> >    
> We can use it to swap bytes order of a 16-bits number in 32-bits register in
> which case the top or bottom half will be zero. For the \irqstat register, as
> the original implementation assumes its top half is zero, so I make the same
> assumption.

The problem is that you're loading some assumption about a particular 
register onto a generic macro.  Unless this is a very common pattern, I 
don't think this needs to be made generic.

> > > +	.macro swab16_high_half, reg
> > > +	swab16_low_half \reg
> > > +	mov \reg, \reg, lsl #16
> > > +	.endm
> > >      
> > And this doesn't end up cheaper than a plain swab32.
> >    
> This macro has an advantage that it only needs one register. For swab32, it
> will need another temporary register besides the target register.

Sure, but I don't think we're so short of registers.  And BE on 
pre-ARMv6 is not that common either.  So, unless this particular pattern 
is heavily used on pre-ARMv6 BE systems, I don't think it is reasonable 
to create a generic macro with such specific assumptions.

> > Furthermore, you're using those macros 1) in the gic code, and 2) onli
> > in the BE8 case.  Both of those conditions are only possible on ARMv6
> > and above anyway, so I'd simply open code a rev instruction inline.
> >    
> This is not be8 specific. It will be needed for both be8 and be32. So the
> machine with arch < 6 also needs to be supported.

There is no gic on machines with arch < 6, no?

If you meant for those macros to be used for other purposes then they 
deserve a patch of their own.

And Russell just merged in his devel branch a patch from Marc Zyngier 
completely removing entry-macro-gic.S anyway, so this patch as it is 
will be moot soon.


Nicolas

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 6/6] ARM: support kernel modules in BE8 mode
  2011-11-22  1:32     ` Bi Junxiao
@ 2011-11-22  4:17       ` Nicolas Pitre
  2011-11-22  5:27         ` Bi Junxiao
  2011-11-22 10:47       ` Dave Martin
  1 sibling, 1 reply; 23+ messages in thread
From: Nicolas Pitre @ 2011-11-22  4:17 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, 22 Nov 2011, Bi Junxiao wrote:

> on 11/22/2011 03:29 AM Nicolas Pitre wrote:
> > On Tue, 15 Nov 2011, Junxiao Bi wrote:
> > 
> >    
> > > From: Stanley.Miao<stanley.miao@windriver.com>
> > > 
> > > In BE8 mode, data must be manipulated in big endian format while
> > > text must be little endian. Therefore, when relocating the text
> > > section of module in BE8 mode, we must convert the location offset
> > > of the text to big endian from the native little endian. After
> > > the relocation is complete, the location offset value is re-written
> > > as little endian.
> > > 
> > > Since only BE8 mode has such special requirement while other big endian
> > > mode not, cpu_to_le32 and le32_to_cpu can not be used to relocate the
> > > text. We introduce write_instr* and read_instr* to do it.
[...]
> > If only module.c requires this, please move those definitions there.
> >    
> Not only modules, all components that needs to read and write text segment
> like kprobes also needs this. As it is special to arm be8, how about define it
> in arch/arm/include/asm/swab.h?

There is a patch series proposed by Leif Lindholm adding generic ARM 
instruction condition code check that creates an opcode.h file.  That 
would be an ideal location for opcode accessor definitions.


Nicolas

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 2/6] ARM: gic: fix big endian support
  2011-11-22  4:10       ` Nicolas Pitre
@ 2011-11-22  5:22         ` Bi Junxiao
  0 siblings, 0 replies; 23+ messages in thread
From: Bi Junxiao @ 2011-11-22  5:22 UTC (permalink / raw)
  To: linux-arm-kernel

on 11/22/2011 12:10 PM Nicolas Pitre wrote:
> On Tue, 22 Nov 2011, Bi Junxiao wrote:
>
>    
>> on 11/22/2011 03:11 AM Nicolas Pitre wrote:
>>      
>>> On Tue, 15 Nov 2011, Junxiao Bi wrote:
>>>
>>>
>>>        
>>>> The irq state from gic is in little-endian mode. We need do endian
>>>> conversion for it in big-endian machine. Usually byte swapping of
>>>> 32-bits word needs 4 assembler instructions for arm machine which
>>>> arch<   6. But since the high order half word of the irq state is
>>>> zero, there is a way that can use lesser instrunctions to improve
>>>> the performance since we only need a bytes swaping of half word.
>>>> That's what swab16_low_half and swab16_high_half do.
>>>>
>>>> Signed-off-by: Junxiao Bi<junxiao.bi@windriver.com>
>>>>
>>>>          
>>> Comments below.
>>>
>>>
>>>        
>>>> ---
>>>>    arch/arm/include/asm/assembler.h                |   27
>>>> +++++++++++++++++++++++
>>>>    arch/arm/include/asm/hardware/entry-macro-gic.S |    7 ++++++
>>>>    2 files changed, 34 insertions(+), 0 deletions(-)
>>>>
>>>> diff --git a/arch/arm/include/asm/assembler.h
>>>> b/arch/arm/include/asm/assembler.h
>>>> index 29035e8..3ddee22 100644
>>>> --- a/arch/arm/include/asm/assembler.h
>>>> +++ b/arch/arm/include/asm/assembler.h
>>>> @@ -302,4 +302,31 @@
>>>>    	.size \name , . - \name
>>>>    	.endm
>>>>
>>>> +/*
>>>> + * swab16_low_half is used to swap the bytes order of the low order
>>>> + * half word, it assumes that the high order half word is zero.
>>>> + * swab16_high_half is used to swap the bytes order of the high order
>>>> + * half word, it assumes that the low order half word is zero.
>>>> + */
>>>> +#if __LINUX_ARM_ARCH__>= 6
>>>> +	.macro swab16_low_half, reg
>>>> +	rev \reg, \reg
>>>> +	.endm
>>>> +
>>>> +	.macro swab16_high_half, reg
>>>> +	rev \reg, \reg
>>>> +	.endm
>>>> +#else
>>>> +	.macro swab16_low_half, reg
>>>> +	mov \reg, \reg, ror #8
>>>> +	orr \reg, \reg, \reg, lsr #16
>>>> +	bic \reg, \reg, \reg, lsl #16
>>>> +	.endm
>>>>
>>>>          
>>> I fail to see how this macro would be generally useful.  Presuming that
>>> the top or bottom half has to be zero is not particularly practical.
>>>
>>>        
>> We can use it to swap bytes order of a 16-bits number in 32-bits register in
>> which case the top or bottom half will be zero. For the \irqstat register, as
>> the original implementation assumes its top half is zero, so I make the same
>> assumption.
>>      
> The problem is that you're loading some assumption about a particular
> register onto a generic macro.  Unless this is a very common pattern, I
> don't think this needs to be made generic.
>
>    
>>>> +	.macro swab16_high_half, reg
>>>> +	swab16_low_half \reg
>>>> +	mov \reg, \reg, lsl #16
>>>> +	.endm
>>>>
>>>>          
>>> And this doesn't end up cheaper than a plain swab32.
>>>
>>>        
>> This macro has an advantage that it only needs one register. For swab32, it
>> will need another temporary register besides the target register.
>>      
> Sure, but I don't think we're so short of registers.  And BE on
> pre-ARMv6 is not that common either.  So, unless this particular pattern
> is heavily used on pre-ARMv6 BE systems, I don't think it is reasonable
> to create a generic macro with such specific assumptions.
>
>    
>>> Furthermore, you're using those macros 1) in the gic code, and 2) onli
>>> in the BE8 case.  Both of those conditions are only possible on ARMv6
>>> and above anyway, so I'd simply open code a rev instruction inline.
>>>
>>>        
>> This is not be8 specific. It will be needed for both be8 and be32. So the
>> machine with arch<  6 also needs to be supported.
>>      
> There is no gic on machines with arch<  6, no?
>
> If you meant for those macros to be used for other purposes then they
> deserve a patch of their own.
>    
I didn't find other need of the macro yet. I will leave it aside.
> And Russell just merged in his devel branch a patch from Marc Zyngier
> completely removing entry-macro-gic.S anyway, so this patch as it is
> will be moot soon.
>
>    
OK, thank you for reviewing it. I will drop this patch.
> Nicolas
>
>    

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 6/6] ARM: support kernel modules in BE8 mode
  2011-11-22  4:17       ` Nicolas Pitre
@ 2011-11-22  5:27         ` Bi Junxiao
  0 siblings, 0 replies; 23+ messages in thread
From: Bi Junxiao @ 2011-11-22  5:27 UTC (permalink / raw)
  To: linux-arm-kernel

on 11/22/2011 12:17 PM Nicolas Pitre wrote:
> On Tue, 22 Nov 2011, Bi Junxiao wrote:
>
>    
>> on 11/22/2011 03:29 AM Nicolas Pitre wrote:
>>      
>>> On Tue, 15 Nov 2011, Junxiao Bi wrote:
>>>
>>>
>>>        
>>>> From: Stanley.Miao<stanley.miao@windriver.com>
>>>>
>>>> In BE8 mode, data must be manipulated in big endian format while
>>>> text must be little endian. Therefore, when relocating the text
>>>> section of module in BE8 mode, we must convert the location offset
>>>> of the text to big endian from the native little endian. After
>>>> the relocation is complete, the location offset value is re-written
>>>> as little endian.
>>>>
>>>> Since only BE8 mode has such special requirement while other big endian
>>>> mode not, cpu_to_le32 and le32_to_cpu can not be used to relocate the
>>>> text. We introduce write_instr* and read_instr* to do it.
>>>>          
> [...]
>    
>>> If only module.c requires this, please move those definitions there.
>>>
>>>        
>> Not only modules, all components that needs to read and write text segment
>> like kprobes also needs this. As it is special to arm be8, how about define it
>> in arch/arm/include/asm/swab.h?
>>      
> There is a patch series proposed by Leif Lindholm adding generic ARM
> instruction condition code check that creates an opcode.h file.  That
> would be an ideal location for opcode accessor definitions.
>    
Thank you for the guide. I will put there in V2.
>
> Nicolas
>
>    

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 2/6] ARM: gic: fix big endian support
  2011-11-22  1:47     ` Bi Junxiao
  2011-11-22  4:10       ` Nicolas Pitre
@ 2011-11-22 10:33       ` Dave Martin
  2011-11-23  2:32         ` Bi Junxiao
  1 sibling, 1 reply; 23+ messages in thread
From: Dave Martin @ 2011-11-22 10:33 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Nov 22, 2011 at 09:47:24AM +0800, Bi Junxiao wrote:
> on 11/22/2011 03:11 AM Nicolas Pitre wrote:
> >On Tue, 15 Nov 2011, Junxiao Bi wrote:
> >
> >>The irq state from gic is in little-endian mode. We need do endian
> >>conversion for it in big-endian machine. Usually byte swapping of
> >>32-bits word needs 4 assembler instructions for arm machine which
> >>arch<  6. But since the high order half word of the irq state is
> >>zero, there is a way that can use lesser instrunctions to improve
> >>the performance since we only need a bytes swaping of half word.
> >>That's what swab16_low_half and swab16_high_half do.
> >>
> >>Signed-off-by: Junxiao Bi<junxiao.bi@windriver.com>
> >Comments below.
> >
> >>---
> >>  arch/arm/include/asm/assembler.h                |   27 +++++++++++++++++++++++
> >>  arch/arm/include/asm/hardware/entry-macro-gic.S |    7 ++++++
> >>  2 files changed, 34 insertions(+), 0 deletions(-)
> >>
> >>diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
> >>index 29035e8..3ddee22 100644
> >>--- a/arch/arm/include/asm/assembler.h
> >>+++ b/arch/arm/include/asm/assembler.h
> >>@@ -302,4 +302,31 @@
> >>  	.size \name , . - \name
> >>  	.endm
> >>
> >>+/*
> >>+ * swab16_low_half is used to swap the bytes order of the low order
> >>+ * half word, it assumes that the high order half word is zero.
> >>+ * swab16_high_half is used to swap the bytes order of the high order
> >>+ * half word, it assumes that the low order half word is zero.
> >>+ */
> >>+#if __LINUX_ARM_ARCH__>= 6
> >>+	.macro swab16_low_half, reg
> >>+	rev \reg, \reg
> >>+	.endm
> >>+
> >>+	.macro swab16_high_half, reg
> >>+	rev \reg, \reg
> >>+	.endm
> >>+#else
> >>+	.macro swab16_low_half, reg
> >>+	mov \reg, \reg, ror #8
> >>+	orr \reg, \reg, \reg, lsr #16
> >>+	bic \reg, \reg, \reg, lsl #16
> >>+	.endm

Your v6 and <v6 implementations don't seem to produce the same result
for swab16_low_half.

If reg=0x0000AABB  on >=v6 you transform it to 0xBBAA0000, but on <v6
you translate it to 0x0000BBAA.

Perhaps you meant to use the rev16 instruction instead of rev?

> >I fail to see how this macro would be generally useful.  Presuming that
> >the top or bottom half has to be zero is not particularly practical.
> We can use it to swap bytes order of a 16-bits number in 32-bits
> register in which case the top or bottom half will be zero. For the
> \irqstat register, as the original implementation assumes its top
> half is zero, so I make the same assumption.
> >>+	.macro swab16_high_half, reg
> >>+	swab16_low_half \reg
> >>+	mov \reg, \reg, lsl #16
> >>+	.endm
> >And this doesn't end up cheaper than a plain swab32.
> This macro has an advantage that it only needs one register. For
> swab32, it will need another temporary register besides the target
> register.
> >Furthermore, you're using those macros 1) in the gic code, and 2) onli
> >in the BE8 case.  Both of those conditions are only possible on ARMv6
> >and above anyway, so I'd simply open code a rev instruction inline.
> This is not be8 specific. It will be needed for both be8 and be32.
> So the machine with arch < 6 also needs to be supported.

I also thought the GIC doesn't get used with <v6?  I thought the GIC was
too new for that, but I'm not an expert on it.

Cheers
---Dave

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 6/6] ARM: support kernel modules in BE8 mode
  2011-11-22  1:32     ` Bi Junxiao
  2011-11-22  4:17       ` Nicolas Pitre
@ 2011-11-22 10:47       ` Dave Martin
  2011-11-23  3:30         ` Bi Junxiao
  1 sibling, 1 reply; 23+ messages in thread
From: Dave Martin @ 2011-11-22 10:47 UTC (permalink / raw)
  To: linux-arm-kernel


On Tue, Nov 22, 2011 at 09:32:16AM +0800, Bi Junxiao wrote:
> on 11/22/2011 03:29 AM Nicolas Pitre wrote:
> >On Tue, 15 Nov 2011, Junxiao Bi wrote:
> >
> >>From: Stanley.Miao<stanley.miao@windriver.com>
> >>
> >>In BE8 mode, data must be manipulated in big endian format while
> >>text must be little endian. Therefore, when relocating the text
> >>section of module in BE8 mode, we must convert the location offset
> >>of the text to big endian from the native little endian. After
> >>the relocation is complete, the location offset value is re-written
> >>as little endian.
> >>
> >>Since only BE8 mode has such special requirement while other big endian
> >>mode not, cpu_to_le32 and le32_to_cpu can not be used to relocate the
> >>text. We introduce write_instr* and read_instr* to do it.
> >>
> >>Signed-off-by: Stanley.Miao<stanley.miao@windriver.com>
> >>Signed-off-by: Junxiao Bi<junxiao.bi@windriver.com>
> >>---
> >>  arch/arm/Makefile         |    1 +
> >>  arch/arm/include/asm/io.h |   12 ++++++++++
> >>  arch/arm/kernel/module.c  |   51 +++++++++++++++++++++++----------------------
> >>  3 files changed, 39 insertions(+), 25 deletions(-)
> >>
> >>diff --git a/arch/arm/Makefile b/arch/arm/Makefile
> >>index dfcf3b0..c858184 100644
> >>--- a/arch/arm/Makefile
> >>+++ b/arch/arm/Makefile
> >>@@ -13,6 +13,7 @@
> >>  LDFLAGS_vmlinux	:=-p --no-undefined -X
> >>  ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
> >>  LDFLAGS_vmlinux	+= --be8
> >>+LDFLAGS_MODULE	+= --be8
> >>  endif
> >>
> >>  OBJCOPYFLAGS	:=-O binary -R .comment -S
> >>diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
> >>index 065d100..4b6c7de 100644
> >>--- a/arch/arm/include/asm/io.h
> >>+++ b/arch/arm/include/asm/io.h
> >>@@ -210,6 +210,18 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
> >>   * Again, this are defined to perform little endian accesses.  See the
> >>   * IO port primitives for more information.
> >>   */
> >>+#ifdef CONFIG_CPU_ENDIAN_BE8
> >>+#define read_instr32(c)		__swab32(*(u32 *)(c))
> >>+#define read_instr16(c)		__swab16(*(u16 *)(c))
> >>+#define write_instr32(v, a)	(*(u32 *)(a) = __swab32((__force __u32)(v)))
> >>+#define write_instr16(v, a)	(*(u16 *)(a) = __swab16((__force __u16)(v)))
> >>+#else
> >>+#define read_instr32(c)		(*(u32 *)(c))
> >>+#define read_instr16(c)		(*(u16 *)(c))
> >>+#define write_instr32(v, a)	(*(u32 *)(a) = (v))
> >>+#define write_instr16(v, a)	(*(u16 *)(a) = (v))
> >>+#endif
> >NAK.  This has nothing to do with IO.
> >
> >If only module.c requires this, please move those definitions there.
> Not only modules, all components that needs to read and write text
> segment like kprobes also needs this. As it is special to arm be8,
> how about define it in arch/arm/include/asm/swab.h?

I'm not sure where the correct place is, but I think having macros
doing something along these lines would be useful.  The module loader
is one relevant place, but there is also kprobes, and Rabin's recent
ftrace series highlights the potential usefulness too.

There are a lot of #ifdefs and swab hacks spread around various places
wherever we need to load/store/relocate instructions.  Abstracting this
properly so we don't need a load of fragile #ifdefs for the big-endian
case is clearly a good idea.

Because the Thumb 32-bit case is different from the ARM case, I would
also recommend having three macro variants instead of two:

*_instr_arm()
*_instr_thumb16()
*_instr_thumb32()

Cheers
---Dave

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 2/6] ARM: gic: fix big endian support
  2011-11-22 10:33       ` Dave Martin
@ 2011-11-23  2:32         ` Bi Junxiao
  0 siblings, 0 replies; 23+ messages in thread
From: Bi Junxiao @ 2011-11-23  2:32 UTC (permalink / raw)
  To: linux-arm-kernel

on 11/22/2011 06:33 PM Dave Martin wrote:
> On Tue, Nov 22, 2011 at 09:47:24AM +0800, Bi Junxiao wrote:
>    
>> on 11/22/2011 03:11 AM Nicolas Pitre wrote:
>>      
>>> On Tue, 15 Nov 2011, Junxiao Bi wrote:
>>>
>>>        
>>>> The irq state from gic is in little-endian mode. We need do endian
>>>> conversion for it in big-endian machine. Usually byte swapping of
>>>> 32-bits word needs 4 assembler instructions for arm machine which
>>>> arch<   6. But since the high order half word of the irq state is
>>>> zero, there is a way that can use lesser instrunctions to improve
>>>> the performance since we only need a bytes swaping of half word.
>>>> That's what swab16_low_half and swab16_high_half do.
>>>>
>>>> Signed-off-by: Junxiao Bi<junxiao.bi@windriver.com>
>>>>          
>>> Comments below.
>>>
>>>        
>>>> ---
>>>>   arch/arm/include/asm/assembler.h                |   27 +++++++++++++++++++++++
>>>>   arch/arm/include/asm/hardware/entry-macro-gic.S |    7 ++++++
>>>>   2 files changed, 34 insertions(+), 0 deletions(-)
>>>>
>>>> diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
>>>> index 29035e8..3ddee22 100644
>>>> --- a/arch/arm/include/asm/assembler.h
>>>> +++ b/arch/arm/include/asm/assembler.h
>>>> @@ -302,4 +302,31 @@
>>>>   	.size \name , . - \name
>>>>   	.endm
>>>>
>>>> +/*
>>>> + * swab16_low_half is used to swap the bytes order of the low order
>>>> + * half word, it assumes that the high order half word is zero.
>>>> + * swab16_high_half is used to swap the bytes order of the high order
>>>> + * half word, it assumes that the low order half word is zero.
>>>> + */
>>>> +#if __LINUX_ARM_ARCH__>= 6
>>>> +	.macro swab16_low_half, reg
>>>> +	rev \reg, \reg
>>>> +	.endm
>>>> +
>>>> +	.macro swab16_high_half, reg
>>>> +	rev \reg, \reg
>>>> +	.endm
>>>> +#else
>>>> +	.macro swab16_low_half, reg
>>>> +	mov \reg, \reg, ror #8
>>>> +	orr \reg, \reg, \reg, lsr #16
>>>> +	bic \reg, \reg, \reg, lsl #16
>>>> +	.endm
>>>>          
> Your v6 and<v6 implementations don't seem to produce the same result
> for swab16_low_half.
>
> If reg=0x0000AABB  on>=v6 you transform it to 0xBBAA0000, but on<v6
> you translate it to 0x0000BBAA.
>    
For swab16_low_half, it assumes that the high order half of the word is 
zero. This mean 0x0000AABB is not a qualified number, but 0xAABB0000 is, 
it can translate to 0x0000BBAA with this macro which is the same result 
with "rev".
> Perhaps you meant to use the rev16 instruction instead of rev?
>    
This macro is trying to swap bytes order of a 32-bit word. It make the 
assumption about the high half for a more cheaper and less register 
usage implementation than swab32. When I first develop this code, 
test_for_ltirq is still in entry-macro-gic.S(now it is deleted), I can 
not find a temporary register for swab32 which needs two registers in 
it. So I introduced this macro. I am not sure whether there are other 
cases like test_for_ltirq. Is this marco useful? If so I can make a 
separate patch for it.
>    
>>> I fail to see how this macro would be generally useful.  Presuming that
>>> the top or bottom half has to be zero is not particularly practical.
>>>        
>> We can use it to swap bytes order of a 16-bits number in 32-bits
>> register in which case the top or bottom half will be zero. For the
>> \irqstat register, as the original implementation assumes its top
>> half is zero, so I make the same assumption.
>>      
>>>> +	.macro swab16_high_half, reg
>>>> +	swab16_low_half \reg
>>>> +	mov \reg, \reg, lsl #16
>>>> +	.endm
>>>>          
>>> And this doesn't end up cheaper than a plain swab32.
>>>        
>> This macro has an advantage that it only needs one register. For
>> swab32, it will need another temporary register besides the target
>> register.
>>      
>>> Furthermore, you're using those macros 1) in the gic code, and 2) onli
>>> in the BE8 case.  Both of those conditions are only possible on ARMv6
>>> and above anyway, so I'd simply open code a rev instruction inline.
>>>        
>> This is not be8 specific. It will be needed for both be8 and be32.
>> So the machine with arch<  6 also needs to be supported.
>>      
> I also thought the GIC doesn't get used with<v6?  I thought the GIC was
> too new for that, but I'm not an expert on it.
>    
I am also not sure about it. I didn't find a official document describe 
this.
> Cheers
> ---Dave
>
>
>    

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 6/6] ARM: support kernel modules in BE8 mode
  2011-11-22 10:47       ` Dave Martin
@ 2011-11-23  3:30         ` Bi Junxiao
  0 siblings, 0 replies; 23+ messages in thread
From: Bi Junxiao @ 2011-11-23  3:30 UTC (permalink / raw)
  To: linux-arm-kernel

on 11/22/2011 06:47 PM Dave Martin wrote:
> On Tue, Nov 22, 2011 at 09:32:16AM +0800, Bi Junxiao wrote:
>    
>> on 11/22/2011 03:29 AM Nicolas Pitre wrote:
>>      
>>> On Tue, 15 Nov 2011, Junxiao Bi wrote:
>>>
>>>        
>>>> From: Stanley.Miao<stanley.miao@windriver.com>
>>>>
>>>> In BE8 mode, data must be manipulated in big endian format while
>>>> text must be little endian. Therefore, when relocating the text
>>>> section of module in BE8 mode, we must convert the location offset
>>>> of the text to big endian from the native little endian. After
>>>> the relocation is complete, the location offset value is re-written
>>>> as little endian.
>>>>
>>>> Since only BE8 mode has such special requirement while other big endian
>>>> mode not, cpu_to_le32 and le32_to_cpu can not be used to relocate the
>>>> text. We introduce write_instr* and read_instr* to do it.
>>>>
>>>> Signed-off-by: Stanley.Miao<stanley.miao@windriver.com>
>>>> Signed-off-by: Junxiao Bi<junxiao.bi@windriver.com>
>>>> ---
>>>>   arch/arm/Makefile         |    1 +
>>>>   arch/arm/include/asm/io.h |   12 ++++++++++
>>>>   arch/arm/kernel/module.c  |   51 +++++++++++++++++++++++----------------------
>>>>   3 files changed, 39 insertions(+), 25 deletions(-)
>>>>
>>>> diff --git a/arch/arm/Makefile b/arch/arm/Makefile
>>>> index dfcf3b0..c858184 100644
>>>> --- a/arch/arm/Makefile
>>>> +++ b/arch/arm/Makefile
>>>> @@ -13,6 +13,7 @@
>>>>   LDFLAGS_vmlinux	:=-p --no-undefined -X
>>>>   ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
>>>>   LDFLAGS_vmlinux	+= --be8
>>>> +LDFLAGS_MODULE	+= --be8
>>>>   endif
>>>>
>>>>   OBJCOPYFLAGS	:=-O binary -R .comment -S
>>>> diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
>>>> index 065d100..4b6c7de 100644
>>>> --- a/arch/arm/include/asm/io.h
>>>> +++ b/arch/arm/include/asm/io.h
>>>> @@ -210,6 +210,18 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
>>>>    * Again, this are defined to perform little endian accesses.  See the
>>>>    * IO port primitives for more information.
>>>>    */
>>>> +#ifdef CONFIG_CPU_ENDIAN_BE8
>>>> +#define read_instr32(c)		__swab32(*(u32 *)(c))
>>>> +#define read_instr16(c)		__swab16(*(u16 *)(c))
>>>> +#define write_instr32(v, a)	(*(u32 *)(a) = __swab32((__force __u32)(v)))
>>>> +#define write_instr16(v, a)	(*(u16 *)(a) = __swab16((__force __u16)(v)))
>>>> +#else
>>>> +#define read_instr32(c)		(*(u32 *)(c))
>>>> +#define read_instr16(c)		(*(u16 *)(c))
>>>> +#define write_instr32(v, a)	(*(u32 *)(a) = (v))
>>>> +#define write_instr16(v, a)	(*(u16 *)(a) = (v))
>>>> +#endif
>>>>          
>>> NAK.  This has nothing to do with IO.
>>>
>>> If only module.c requires this, please move those definitions there.
>>>        
>> Not only modules, all components that needs to read and write text
>> segment like kprobes also needs this. As it is special to arm be8,
>> how about define it in arch/arm/include/asm/swab.h?
>>      
> I'm not sure where the correct place is, but I think having macros
> doing something along these lines would be useful.  The module loader
> is one relevant place, but there is also kprobes, and Rabin's recent
> ftrace series highlights the potential usefulness too.
>
> There are a lot of #ifdefs and swab hacks spread around various places
> wherever we need to load/store/relocate instructions.  Abstracting this
> properly so we don't need a load of fragile #ifdefs for the big-endian
> case is clearly a good idea.
>
> Because the Thumb 32-bit case is different from the ARM case, I would
> also recommend having three macro variants instead of two:
>
> *_instr_arm()
> *_instr_thumb16()
> *_instr_thumb32()
>    
These name makes more sense than mine.  I will use them. But it seems 
that we don't need all the three. It is enough with the first two.
For reading and writing thumb code, there is no difference as 32-bit 
thumb code is organized as two 16-bit parts. To read a 32-bit thumb 
code, we need using read_instr16 to read the first part, then check if 
it's a 32-bit thumb, if so then use read_instr16 to read the second part.
> Cheers
> ---Dave
>
>    

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 1/6] ARM: fix be8 support for phys/virt address conversion
  2011-12-08 10:07 [V2][PATCH 0/6]ARM: fix BE8 mode support Junxiao Bi
@ 2011-12-08 10:07   ` Junxiao Bi
  0 siblings, 0 replies; 23+ messages in thread
From: Junxiao Bi @ 2011-12-08 10:07 UTC (permalink / raw)
  To: linux-arm-kernel; +Cc: nicolas.pitre, dwmw2, dave.martin, linux, linux-mtd

phys_to_virt() and virt_to_phys() is implemented by one assembler instruction,
add/sub an offset between the physical and virtual address. For some arm machines
with CONFIG_ARM_PATCH_PHYS_VIRT defined, this offset is unknown at compile time,
it is calculated dynamically at run time and then patched to the add/sub instruction.
To support BE8 mode, firstly we need to convert the instruntion to big-endian after
loading it from the memory, and then modify its offset field, at last convert it to
little-endian and write it back to the memory.

Reviewed-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Junxiao Bi <junxiao.bi@windriver.com>
---
 arch/arm/kernel/head.S |   12 ++++++++++++
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 54fa641..a52aec7 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -565,8 +565,14 @@ __fixup_a_pv_table:
 	b	2f
 1:	add     r7, r3
 	ldrh	ip, [r7, #2]
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	rev16	ip, ip
+#endif
 	and	ip, 0x8f00
 	orr	ip, r6	@ mask in offset bits 31-24
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	rev16	ip, ip
+#endif
 	strh	ip, [r7, #2]
 2:	cmp	r4, r5
 	ldrcc	r7, [r4], #4	@ use branch for delay slot
@@ -575,8 +581,14 @@ __fixup_a_pv_table:
 #else
 	b	2f
 1:	ldr	ip, [r7, r3]
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	rev	ip, ip
+#endif
 	bic	ip, ip, #0x000000ff
 	orr	ip, ip, r6	@ mask in offset bits 31-24
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	rev	ip, ip
+#endif
 	str	ip, [r7, r3]
 2:	cmp	r4, r5
 	ldrcc	r7, [r4], #4	@ use branch for delay slot
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 1/6] ARM: fix be8 support for phys/virt address conversion
@ 2011-12-08 10:07   ` Junxiao Bi
  0 siblings, 0 replies; 23+ messages in thread
From: Junxiao Bi @ 2011-12-08 10:07 UTC (permalink / raw)
  To: linux-arm-kernel

phys_to_virt() and virt_to_phys() is implemented by one assembler instruction,
add/sub an offset between the physical and virtual address. For some arm machines
with CONFIG_ARM_PATCH_PHYS_VIRT defined, this offset is unknown at compile time,
it is calculated dynamically at run time and then patched to the add/sub instruction.
To support BE8 mode, firstly we need to convert the instruntion to big-endian after
loading it from the memory, and then modify its offset field, at last convert it to
little-endian and write it back to the memory.

Reviewed-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Junxiao Bi <junxiao.bi@windriver.com>
---
 arch/arm/kernel/head.S |   12 ++++++++++++
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 54fa641..a52aec7 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -565,8 +565,14 @@ __fixup_a_pv_table:
 	b	2f
 1:	add     r7, r3
 	ldrh	ip, [r7, #2]
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	rev16	ip, ip
+#endif
 	and	ip, 0x8f00
 	orr	ip, r6	@ mask in offset bits 31-24
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	rev16	ip, ip
+#endif
 	strh	ip, [r7, #2]
 2:	cmp	r4, r5
 	ldrcc	r7, [r4], #4	@ use branch for delay slot
@@ -575,8 +581,14 @@ __fixup_a_pv_table:
 #else
 	b	2f
 1:	ldr	ip, [r7, r3]
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	rev	ip, ip
+#endif
 	bic	ip, ip, #0x000000ff
 	orr	ip, ip, r6	@ mask in offset bits 31-24
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	rev	ip, ip
+#endif
 	str	ip, [r7, r3]
 2:	cmp	r4, r5
 	ldrcc	r7, [r4], #4	@ use branch for delay slot
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2011-12-08 10:07 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-11-15  2:06 [PATCH 1/6] ARM: fix be8 support for phys/virt address conversion Junxiao Bi
2011-11-15  2:06 ` [PATCH 2/6] ARM: gic: fix big endian support Junxiao Bi
2011-11-21 19:11   ` Nicolas Pitre
2011-11-22  1:47     ` Bi Junxiao
2011-11-22  4:10       ` Nicolas Pitre
2011-11-22  5:22         ` Bi Junxiao
2011-11-22 10:33       ` Dave Martin
2011-11-23  2:32         ` Bi Junxiao
2011-11-15  2:06 ` [PATCH 3/6] ARM: early_printk: pl01x: " Junxiao Bi
2011-11-15  2:06 ` [PATCH 4/6] ARM: add big endian support for peripheral access Junxiao Bi
2011-11-15  2:06 ` [PATCH 5/6] ARM: Atomic64: fix 64bit ops in BE mode Junxiao Bi
2011-11-21 19:24   ` Nicolas Pitre
2011-11-15  2:06 ` [PATCH 6/6] ARM: support kernel modules in BE8 mode Junxiao Bi
2011-11-21 19:29   ` Nicolas Pitre
2011-11-22  1:32     ` Bi Junxiao
2011-11-22  4:17       ` Nicolas Pitre
2011-11-22  5:27         ` Bi Junxiao
2011-11-22 10:47       ` Dave Martin
2011-11-23  3:30         ` Bi Junxiao
2011-11-21  5:44 ` [PATCH 1/6] ARM: fix be8 support for phys/virt address conversion Bi Junxiao
2011-11-21 18:32 ` Nicolas Pitre
2011-12-08 10:07 [V2][PATCH 0/6]ARM: fix BE8 mode support Junxiao Bi
2011-12-08 10:07 ` [PATCH 1/6] ARM: fix be8 support for phys/virt address conversion Junxiao Bi
2011-12-08 10:07   ` Junxiao Bi

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.