All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/5] Various patches for 2.6.35-rc1
@ 2010-03-22 15:19 Catalin Marinas
  2010-03-22 15:19 ` [PATCH 1/5] ARM: Use lazy cache flushing on ARMv7 SMP systems Catalin Marinas
                   ` (4 more replies)
  0 siblings, 5 replies; 14+ messages in thread
From: Catalin Marinas @ 2010-03-22 15:19 UTC (permalink / raw)
  To: linux-arm-kernel

Hi,

These are some patches I plan to push to -next unless there are
objections. My intention is to push them into mainline during the next
merging window.

The biggest one is the removal of the domains usage in Linux.

Thanks.


Catalin Marinas (5):
      ARM: Use lazy cache flushing on ARMv7 SMP systems
      ARM: Implement read/write for ownership in the ARMv6 DMA cache ops
      ARM: Correct the VFPv3 detection
      ARM: Align machine_desc.phys_io to a 1MB section
      ARM: Remove the domain switching on ARMv6k/v7 CPUs


 arch/arm/include/asm/assembler.h  |   13 +++---
 arch/arm/include/asm/cacheflush.h |    4 ++
 arch/arm/include/asm/domain.h     |   31 +++++++++++++-
 arch/arm/include/asm/futex.h      |    9 ++--
 arch/arm/include/asm/smp_plat.h   |    4 ++
 arch/arm/include/asm/tlbflush.h   |   29 ++++++++++++-
 arch/arm/include/asm/uaccess.h    |   16 ++++---
 arch/arm/kernel/entry-armv.S      |    4 +-
 arch/arm/kernel/head.S            |    2 +
 arch/arm/kernel/setup.c           |    2 -
 arch/arm/kernel/traps.c           |   17 ++++++++
 arch/arm/lib/getuser.S            |   13 +++---
 arch/arm/lib/putuser.S            |   29 +++++++------
 arch/arm/lib/uaccess.S            |   83 +++++++++++++++++++------------------
 arch/arm/mm/Kconfig               |    8 ++++
 arch/arm/mm/cache-v6.S            |   10 ++++
 arch/arm/mm/cache-v7.S            |    4 ++
 arch/arm/mm/fault-armv.c          |    2 -
 arch/arm/mm/flush.c               |    9 ++--
 arch/arm/mm/proc-macros.S         |    7 +++
 arch/arm/mm/proc-v7.S             |    5 +-
 arch/arm/mm/tlb-v7.S              |    8 ++++
 arch/arm/vfp/vfpmodule.c          |    2 -
 23 files changed, 216 insertions(+), 95 deletions(-)

-- 
Catalin

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/5] ARM: Use lazy cache flushing on ARMv7 SMP systems
  2010-03-22 15:19 [PATCH 0/5] Various patches for 2.6.35-rc1 Catalin Marinas
@ 2010-03-22 15:19 ` Catalin Marinas
  2010-03-23 21:33   ` Russell King - ARM Linux
  2010-03-22 15:19 ` [PATCH 2/5] ARM: Implement read/write for ownership in the ARMv6 DMA cache ops Catalin Marinas
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 14+ messages in thread
From: Catalin Marinas @ 2010-03-22 15:19 UTC (permalink / raw)
  To: linux-arm-kernel

ARMv7 processors like Cortex-A9 broadcast the cache maintenance
operations in hardware. This patch allows the
flush_dcache_page/update_mmu_cache pair to work in lazy flushing mode
similar to the UP case.

The standard ICIALLU and BPIALL operations are not automatically
broadcast to the other CPUs in an MP system. The patch adds the Inner
Shareable variants, ICIALLUIS and BPIALLIS, if ARMv7 and SMP.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/include/asm/cacheflush.h |    4 ++++
 arch/arm/include/asm/smp_plat.h   |    4 ++++
 arch/arm/include/asm/tlbflush.h   |   29 ++++++++++++++++++++++++++++-
 arch/arm/mm/cache-v7.S            |    4 ++++
 arch/arm/mm/fault-armv.c          |    2 --
 arch/arm/mm/flush.c               |    9 ++++-----
 arch/arm/mm/tlb-v7.S              |    8 ++++++++
 7 files changed, 52 insertions(+), 8 deletions(-)

diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 0d08d41..4656a24 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -371,6 +371,10 @@ static inline void __flush_icache_all(void)
 #ifdef CONFIG_ARM_ERRATA_411920
 	extern void v6_icache_inval_all(void);
 	v6_icache_inval_all();
+#elif defined(CONFIG_SMP) && __LINUX_ARM_ARCH__ >= 7
+	asm("mcr	p15, 0, %0, c7, c1, 0	@ invalidate I-cache inner shareable\n"
+	    :
+	    : "r" (0));
 #else
 	asm("mcr	p15, 0, %0, c7, c5, 0	@ invalidate I-cache\n"
 	    :
diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h
index e621530..963a338 100644
--- a/arch/arm/include/asm/smp_plat.h
+++ b/arch/arm/include/asm/smp_plat.h
@@ -13,9 +13,13 @@ static inline int tlb_ops_need_broadcast(void)
 	return ((read_cpuid_ext(CPUID_EXT_MMFR3) >> 12) & 0xf) < 2;
 }
 
+#if !defined(CONFIG_SMP) || __LINUX_ARM_ARCH__ >= 7
+#define cache_ops_need_broadcast()	0
+#else
 static inline int cache_ops_need_broadcast(void)
 {
 	return ((read_cpuid_ext(CPUID_EXT_MMFR3) >> 12) & 0xf) < 1;
 }
+#endif
 
 #endif
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index e085e2c..bd863d8 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -46,6 +46,9 @@
 #define TLB_V7_UIS_FULL (1 << 20)
 #define TLB_V7_UIS_ASID (1 << 21)
 
+/* Inner Shareable BTB operation (ARMv7 MP extensions) */
+#define TLB_V7_IS_BTB	(1 << 22)
+
 #define TLB_L2CLEAN_FR	(1 << 29)		/* Feroceon */
 #define TLB_DCLEAN	(1 << 30)
 #define TLB_WB		(1 << 31)
@@ -183,7 +186,7 @@
 #endif
 
 #ifdef CONFIG_SMP
-#define v7wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BTB | \
+#define v7wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_V7_IS_BTB | \
 			 TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | TLB_V7_UIS_ASID)
 #else
 #define v7wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BTB | \
@@ -339,6 +342,12 @@ static inline void local_flush_tlb_all(void)
 		dsb();
 		isb();
 	}
+	if (tlb_flag(TLB_V7_IS_BTB)) {
+		/* flush the branch target cache */
+		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc");
+		dsb();
+		isb();
+	}
 }
 
 static inline void local_flush_tlb_mm(struct mm_struct *mm)
@@ -376,6 +385,12 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
 		asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc");
 		dsb();
 	}
+	if (tlb_flag(TLB_V7_IS_BTB)) {
+		/* flush the branch target cache */
+		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc");
+		dsb();
+		isb();
+	}
 }
 
 static inline void
@@ -416,6 +431,12 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
 		asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc");
 		dsb();
 	}
+	if (tlb_flag(TLB_V7_IS_BTB)) {
+		/* flush the branch target cache */
+		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc");
+		dsb();
+		isb();
+	}
 }
 
 static inline void local_flush_tlb_kernel_page(unsigned long kaddr)
@@ -454,6 +475,12 @@ static inline void local_flush_tlb_kernel_page(unsigned long kaddr)
 		dsb();
 		isb();
 	}
+	if (tlb_flag(TLB_V7_IS_BTB)) {
+		/* flush the branch target cache */
+		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc");
+		dsb();
+		isb();
+	}
 }
 
 /*
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index bcd64f2..06a90dc 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -167,7 +167,11 @@ ENTRY(v7_coherent_user_range)
 	cmp	r0, r1
 	blo	1b
 	mov	r0, #0
+#ifdef CONFIG_SMP
+	mcr	p15, 0, r0, c7, c1, 6		@ invalidate BTB Inner Shareable
+#else
 	mcr	p15, 0, r0, c7, c5, 6		@ invalidate BTB
+#endif
 	dsb
 	isb
 	mov	pc, lr
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index c9b97e9..0866ffd 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -169,10 +169,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
 		return;
 
 	mapping = page_mapping(page);
-#ifndef CONFIG_SMP
 	if (test_and_clear_bit(PG_dcache_dirty, &page->flags))
 		__flush_dcache_page(mapping, page);
-#endif
 	if (mapping) {
 		if (cache_is_vivt())
 			make_coherent(mapping, vma, addr, ptep, pfn);
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index e34f095..c2cea53 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -16,6 +16,7 @@
 #include <asm/smp_plat.h>
 #include <asm/system.h>
 #include <asm/tlbflush.h>
+#include <asm/smp_plat.h>
 
 #include "mm.h"
 
@@ -241,12 +242,10 @@ void flush_dcache_page(struct page *page)
 
 	mapping = page_mapping(page);
 
-#ifndef CONFIG_SMP
-	if (!PageHighMem(page) && mapping && !mapping_mapped(mapping))
+	if (!cache_ops_need_broadcast() &&
+	    !PageHighMem(page) && mapping && !mapping_mapped(mapping))
 		set_bit(PG_dcache_dirty, &page->flags);
-	else
-#endif
-	{
+	else {
 		__flush_dcache_page(mapping, page);
 		if (mapping && cache_is_vivt())
 			__flush_dcache_aliases(mapping, page);
diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S
index 0cb1848..f3f288a 100644
--- a/arch/arm/mm/tlb-v7.S
+++ b/arch/arm/mm/tlb-v7.S
@@ -50,7 +50,11 @@ ENTRY(v7wbi_flush_user_tlb_range)
 	cmp	r0, r1
 	blo	1b
 	mov	ip, #0
+#ifdef CONFIG_SMP
+	mcr	p15, 0, ip, c7, c1, 6		@ flush BTAC/BTB Inner Shareable
+#else
 	mcr	p15, 0, ip, c7, c5, 6		@ flush BTAC/BTB
+#endif
 	dsb
 	mov	pc, lr
 ENDPROC(v7wbi_flush_user_tlb_range)
@@ -79,7 +83,11 @@ ENTRY(v7wbi_flush_kern_tlb_range)
 	cmp	r0, r1
 	blo	1b
 	mov	r2, #0
+#ifdef CONFIG_SMP
+	mcr	p15, 0, r2, c7, c1, 6		@ flush BTAC/BTB Inner Shareable
+#else
 	mcr	p15, 0, r2, c7, c5, 6		@ flush BTAC/BTB
+#endif
 	dsb
 	isb
 	mov	pc, lr

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/5] ARM: Implement read/write for ownership in the ARMv6 DMA cache ops
  2010-03-22 15:19 [PATCH 0/5] Various patches for 2.6.35-rc1 Catalin Marinas
  2010-03-22 15:19 ` [PATCH 1/5] ARM: Use lazy cache flushing on ARMv7 SMP systems Catalin Marinas
@ 2010-03-22 15:19 ` Catalin Marinas
  2010-03-23 21:38   ` Russell King - ARM Linux
  2010-03-22 15:19 ` [PATCH 3/5] ARM: Correct the VFPv3 detection Catalin Marinas
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 14+ messages in thread
From: Catalin Marinas @ 2010-03-22 15:19 UTC (permalink / raw)
  To: linux-arm-kernel

The Snoop Control Unit on the ARM11MPCore hardware does not detect the
cache operations and the dma_cache_maint*() functions may leave stale
cache entries on other CPUs. The solution implemented in this patch
performs a Read or Write For Ownership in the ARMv6 DMA cache
maintenance functions. These LDR/STR instructions change the cache line
state to shared or exclusive so that the cache maintenance operation has
the desired effect.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/mm/cache-v6.S |   10 ++++++++++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S
index 9d89c67..b9f2cbd 100644
--- a/arch/arm/mm/cache-v6.S
+++ b/arch/arm/mm/cache-v6.S
@@ -211,6 +211,9 @@ v6_dma_inv_range:
 	mcrne	p15, 0, r1, c7, c15, 1		@ clean & invalidate unified line
 #endif
 1:
+#ifdef CONFIG_SMP
+	str	r0, [r0]			@ write for ownership
+#endif
 #ifdef HARVARD_CACHE
 	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D line
 #else
@@ -231,6 +234,9 @@ v6_dma_inv_range:
 v6_dma_clean_range:
 	bic	r0, r0, #D_CACHE_LINE_SIZE - 1
 1:
+#ifdef CONFIG_SMP
+	ldr	r2, [r0]			@ read for ownership
+#endif
 #ifdef HARVARD_CACHE
 	mcr	p15, 0, r0, c7, c10, 1		@ clean D line
 #else
@@ -251,6 +257,10 @@ v6_dma_clean_range:
 ENTRY(v6_dma_flush_range)
 	bic	r0, r0, #D_CACHE_LINE_SIZE - 1
 1:
+#ifdef CONFIG_SMP
+	ldr	r2, [r0]			@ read for ownership
+	str	r2, [r0]			@ write for ownership
+#endif
 #ifdef HARVARD_CACHE
 	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D line
 #else

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/5] ARM: Correct the VFPv3 detection
  2010-03-22 15:19 [PATCH 0/5] Various patches for 2.6.35-rc1 Catalin Marinas
  2010-03-22 15:19 ` [PATCH 1/5] ARM: Use lazy cache flushing on ARMv7 SMP systems Catalin Marinas
  2010-03-22 15:19 ` [PATCH 2/5] ARM: Implement read/write for ownership in the ARMv6 DMA cache ops Catalin Marinas
@ 2010-03-22 15:19 ` Catalin Marinas
  2010-03-23 21:38   ` Russell King - ARM Linux
  2010-03-22 15:19 ` [PATCH 4/5] ARM: Align machine_desc.phys_io to a 1MB section Catalin Marinas
  2010-03-22 15:20 ` [PATCH 5/5] ARM: Remove the domain switching on ARMv6k/v7 CPUs Catalin Marinas
  4 siblings, 1 reply; 14+ messages in thread
From: Catalin Marinas @ 2010-03-22 15:19 UTC (permalink / raw)
  To: linux-arm-kernel

A CPU has VFPv3 hardware if the FPSID[19:16] bits are 2 or more.
Currently Linux was only checking for 3 or more.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/vfp/vfpmodule.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 7f3f59f..a420cb9 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -545,7 +545,7 @@ static int __init vfp_init(void)
 		 */
 		elf_hwcap |= HWCAP_VFP;
 #ifdef CONFIG_VFPv3
-		if (VFP_arch >= 3) {
+		if (VFP_arch >= 2) {
 			elf_hwcap |= HWCAP_VFPv3;
 
 			/*

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 4/5] ARM: Align machine_desc.phys_io to a 1MB section
  2010-03-22 15:19 [PATCH 0/5] Various patches for 2.6.35-rc1 Catalin Marinas
                   ` (2 preceding siblings ...)
  2010-03-22 15:19 ` [PATCH 3/5] ARM: Correct the VFPv3 detection Catalin Marinas
@ 2010-03-22 15:19 ` Catalin Marinas
  2010-03-23 21:39   ` Russell King - ARM Linux
  2010-03-22 15:20 ` [PATCH 5/5] ARM: Remove the domain switching on ARMv6k/v7 CPUs Catalin Marinas
  4 siblings, 1 reply; 14+ messages in thread
From: Catalin Marinas @ 2010-03-22 15:19 UTC (permalink / raw)
  To: linux-arm-kernel

Platforms like RealView don't pass a section-aligned pointer via the
machine_desc structure. This patch aligns the pointer in the
__create_page_tables function. Reported by Tony Thompson.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/kernel/head.S |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index eb62bf9..82ea924 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -302,6 +302,8 @@ __create_page_tables:
 	movhi	r3, #0x0800
 	add	r6, r0, r3
 	ldr	r3, [r8, #MACHINFO_PHYSIO]
+	mov	r3, r3, lsr #20			@ 1MB-aligned address
+	mov	r3, r3, lsl #20
 	orr	r3, r3, r7
 1:	str	r3, [r0], #4
 	add	r3, r3, #1 << 20

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 5/5] ARM: Remove the domain switching on ARMv6k/v7 CPUs
  2010-03-22 15:19 [PATCH 0/5] Various patches for 2.6.35-rc1 Catalin Marinas
                   ` (3 preceding siblings ...)
  2010-03-22 15:19 ` [PATCH 4/5] ARM: Align machine_desc.phys_io to a 1MB section Catalin Marinas
@ 2010-03-22 15:20 ` Catalin Marinas
  4 siblings, 0 replies; 14+ messages in thread
From: Catalin Marinas @ 2010-03-22 15:20 UTC (permalink / raw)
  To: linux-arm-kernel

This patch removes the domain switching functionality via the set_fs and
__switch_to functions on cores that have a TLS register.

Currently, the ioremap and vmalloc areas share the same level 1 page
tables and therefore have the same domain (DOMAIN_KERNEL). When the
kernel domain is modified from Client to Manager (via the __set_fs or in
the __switch_to function), the XN (eXecute Never) bit is overridden and
newer CPUs can speculatively prefetch the ioremap'ed memory.

Linux performs the kernel domain switching to allow user-specific
functions (copy_to/from_user, get/put_user etc.) to access kernel
memory. In order for these functions to work with the kernel domain set
to Client, the patch modifies the LDRT/STRT and related instructions to
the LDR/STR ones.

The user pages access rights are also modified for kernel read-only
access rather than read/write so that the copy-on-write mechanism still
works. CPU_USE_DOMAINS gets disabled only if HAS_TLS_REG is defined
since writing the TLS value to the high vectors page isn't possible.

The user addresses passed to the kernel are checked by the access_ok()
function so that they do not point to the kernel space.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/include/asm/assembler.h |   13 +++---
 arch/arm/include/asm/domain.h    |   31 +++++++++++++-
 arch/arm/include/asm/futex.h     |    9 ++--
 arch/arm/include/asm/uaccess.h   |   16 ++++---
 arch/arm/kernel/entry-armv.S     |    4 +-
 arch/arm/kernel/traps.c          |   17 ++++++++
 arch/arm/lib/getuser.S           |   13 +++---
 arch/arm/lib/putuser.S           |   29 +++++++------
 arch/arm/lib/uaccess.S           |   83 +++++++++++++++++++-------------------
 arch/arm/mm/Kconfig              |    8 ++++
 arch/arm/mm/proc-macros.S        |    7 +++
 arch/arm/mm/proc-v7.S            |    5 +-
 12 files changed, 150 insertions(+), 85 deletions(-)

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 00f46d9..cc84083 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -18,6 +18,7 @@
 #endif
 
 #include <asm/ptrace.h>
+#include <asm/domain.h>
 
 /*
  * Endian independent macros for shifting bytes within registers.
@@ -183,12 +184,12 @@
  */
 #ifdef CONFIG_THUMB2_KERNEL
 
-	.macro	usraccoff, instr, reg, ptr, inc, off, cond, abort
+	.macro	usraccoff, instr, reg, ptr, inc, off, cond, abort, t=T()
 9999:
 	.if	\inc == 1
-	\instr\cond\()bt \reg, [\ptr, #\off]
+	\instr\cond\()b\()\t\().w \reg, [\ptr, #\off]
 	.elseif	\inc == 4
-	\instr\cond\()t \reg, [\ptr, #\off]
+	\instr\cond\()\t\().w \reg, [\ptr, #\off]
 	.else
 	.error	"Unsupported inc macro argument"
 	.endif
@@ -223,13 +224,13 @@
 
 #else	/* !CONFIG_THUMB2_KERNEL */
 
-	.macro	usracc, instr, reg, ptr, inc, cond, rept, abort
+	.macro	usracc, instr, reg, ptr, inc, cond, rept, abort, t=T()
 	.rept	\rept
 9999:
 	.if	\inc == 1
-	\instr\cond\()bt \reg, [\ptr], #\inc
+	\instr\cond\()b\()\t \reg, [\ptr], #\inc
 	.elseif	\inc == 4
-	\instr\cond\()t \reg, [\ptr], #\inc
+	\instr\cond\()\t \reg, [\ptr], #\inc
 	.else
 	.error	"Unsupported inc macro argument"
 	.endif
diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h
index cc7ef40..af18cea 100644
--- a/arch/arm/include/asm/domain.h
+++ b/arch/arm/include/asm/domain.h
@@ -45,13 +45,17 @@
  */
 #define DOMAIN_NOACCESS	0
 #define DOMAIN_CLIENT	1
+#ifdef CONFIG_CPU_USE_DOMAINS
 #define DOMAIN_MANAGER	3
+#else
+#define DOMAIN_MANAGER	1
+#endif
 
 #define domain_val(dom,type)	((type) << (2*(dom)))
 
 #ifndef __ASSEMBLY__
 
-#ifdef CONFIG_MMU
+#ifdef CONFIG_CPU_USE_DOMAINS
 #define set_domain(x)					\
 	do {						\
 	__asm__ __volatile__(				\
@@ -74,5 +78,28 @@
 #define modify_domain(dom,type)	do { } while (0)
 #endif
 
+/*
+ * Generate the T (user) versions of the LDR/STR and related
+ * instructions (inline assembly)
+ */
+#ifdef CONFIG_CPU_USE_DOMAINS
+#define T(instr)	#instr "t"
+#else
+#define T(instr)	#instr
 #endif
-#endif /* !__ASSEMBLY__ */
+
+#else /* __ASSEMBLY__ */
+
+/*
+ * Generate the T (user) versions of the LDR/STR and related
+ * instructions
+ */
+#ifdef CONFIG_CPU_USE_DOMAINS
+#define T(instr)	instr ## t
+#else
+#define T(instr)	instr
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* !__ASM_PROC_DOMAIN_H */
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index bfcc159..8d868bd 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -13,12 +13,13 @@
 #include <linux/preempt.h>
 #include <linux/uaccess.h>
 #include <asm/errno.h>
+#include <asm/domain.h>
 
 #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)	\
 	__asm__ __volatile__(					\
-	"1:	ldrt	%1, [%2]\n"				\
+	"1:	" T(ldr) "	%1, [%2]\n"			\
 	"	" insn "\n"					\
-	"2:	strt	%0, [%2]\n"				\
+	"2:	" T(str) "	%0, [%2]\n"			\
 	"	mov	%0, #0\n"				\
 	"3:\n"							\
 	"	.section __ex_table,\"a\"\n"			\
@@ -97,10 +98,10 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 	pagefault_disable();	/* implies preempt_disable() */
 
 	__asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
-	"1:	ldrt	%0, [%3]\n"
+	"1:	" T(ldr) "	%0, [%3]\n"
 	"	teq	%0, %1\n"
 	"	it	eq	@ explicit IT needed for the 2b label\n"
-	"2:	streqt	%2, [%3]\n"
+	"2:	" T(streq) "	%2, [%3]\n"
 	"3:\n"
 	"	.section __ex_table,\"a\"\n"
 	"	.align	3\n"
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 1d6bd40..e4d0905 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -227,7 +227,7 @@ do {									\
 
 #define __get_user_asm_byte(x,addr,err)				\
 	__asm__ __volatile__(					\
-	"1:	ldrbt	%1,[%2]\n"				\
+	"1:	" T(ldrb) "	%1,[%2],#0\n"			\
 	"2:\n"							\
 	"	.section .fixup,\"ax\"\n"			\
 	"	.align	2\n"					\
@@ -263,7 +263,7 @@ do {									\
 
 #define __get_user_asm_word(x,addr,err)				\
 	__asm__ __volatile__(					\
-	"1:	ldrt	%1,[%2]\n"				\
+	"1:	" T(ldr) "	%1,[%2],#0\n"			\
 	"2:\n"							\
 	"	.section .fixup,\"ax\"\n"			\
 	"	.align	2\n"					\
@@ -308,7 +308,7 @@ do {									\
 
 #define __put_user_asm_byte(x,__pu_addr,err)			\
 	__asm__ __volatile__(					\
-	"1:	strbt	%1,[%2]\n"				\
+	"1:	" T(strb) "	%1,[%2],#0\n"			\
 	"2:\n"							\
 	"	.section .fixup,\"ax\"\n"			\
 	"	.align	2\n"					\
@@ -341,7 +341,7 @@ do {									\
 
 #define __put_user_asm_word(x,__pu_addr,err)			\
 	__asm__ __volatile__(					\
-	"1:	strt	%1,[%2]\n"				\
+	"1:	" T(str) "	%1,[%2],#0\n"			\
 	"2:\n"							\
 	"	.section .fixup,\"ax\"\n"			\
 	"	.align	2\n"					\
@@ -366,10 +366,10 @@ do {									\
 
 #define __put_user_asm_dword(x,__pu_addr,err)			\
 	__asm__ __volatile__(					\
- ARM(	"1:	strt	" __reg_oper1 ", [%1], #4\n"	)	\
- ARM(	"2:	strt	" __reg_oper0 ", [%1]\n"	)	\
- THUMB(	"1:	strt	" __reg_oper1 ", [%1]\n"	)	\
- THUMB(	"2:	strt	" __reg_oper0 ", [%1, #4]\n"	)	\
+ ARM(	"1:	" T(str) "	" __reg_oper1 ", [%1], #4\n"	)	\
+ ARM(	"2:	" T(str) "	" __reg_oper0 ", [%1]\n"	)	\
+ THUMB(	"1:	" T(str) "	" __reg_oper1 ", [%1]\n"	)	\
+ THUMB(	"2:	" T(str) "	" __reg_oper0 ", [%1, #4]\n"	)	\
 	"3:\n"							\
 	"	.section .fixup,\"ax\"\n"			\
 	"	.align	2\n"					\
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 6c5cf36..694f7ab 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -736,7 +736,7 @@ ENTRY(__switch_to)
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
  THUMB(	str	lr, [ip], #4		   )
-#ifdef CONFIG_MMU
+#ifdef CONFIG_CPU_USE_DOMAINS
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
 #if defined(CONFIG_HAS_TLS_REG)
@@ -745,7 +745,7 @@ ENTRY(__switch_to)
 	mov	r4, #0xffff0fff
 	str	r3, [r4, #-15]			@ TLS val at 0xffff0ff0
 #endif
-#ifdef CONFIG_MMU
+#ifdef CONFIG_CPU_USE_DOMAINS
 	mcr	p15, 0, r6, c3, c0, 0		@ Set domain register
 #endif
 	mov	r5, r0
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1621e53..6571e19 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -30,6 +30,7 @@
 #include <asm/unistd.h>
 #include <asm/traps.h>
 #include <asm/unwind.h>
+#include <asm/tlbflush.h>
 
 #include "ptrace.h"
 #include "signal.h"
@@ -750,6 +751,16 @@ void __init early_trap_init(void)
 	extern char __vectors_start[], __vectors_end[];
 	extern char __kuser_helper_start[], __kuser_helper_end[];
 	int kuser_sz = __kuser_helper_end - __kuser_helper_start;
+#if !defined(CONFIG_CPU_USE_DOMAINS) && defined(CONFIG_MMU)
+	pgd_t *pgd = pgd_offset_k(vectors);
+	pmd_t *pmd = pmd_offset(pgd, vectors);
+	pte_t *pte = pte_offset_kernel(pmd, vectors);
+	pte_t entry = *pte;
+
+	/* allow writing to the vectors page */
+	set_pte_ext(pte, pte_mkwrite(entry), 0);
+	local_flush_tlb_kernel_page(vectors);
+#endif
 
 	/*
 	 * Copy the vectors, stubs and kuser helpers (in entry-armv.S)
@@ -769,6 +780,12 @@ void __init early_trap_init(void)
 	memcpy((void *)KERN_RESTART_CODE, syscall_restart_code,
 	       sizeof(syscall_restart_code));
 
+#if !defined(CONFIG_CPU_USE_DOMAINS) && defined(CONFIG_MMU)
+	/* restore the vectors page permissions */
+	set_pte_ext(pte, entry, 0);
+	local_flush_tlb_kernel_page(vectors);
+#endif
+
 	flush_icache_range(vectors, vectors + PAGE_SIZE);
 	modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
 }
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index a1814d9..acc966b 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -28,20 +28,21 @@
  */
 #include <linux/linkage.h>
 #include <asm/errno.h>
+#include <asm/domain.h>
 
 ENTRY(__get_user_1)
-1:	ldrbt	r2, [r0]
+1:	T(ldrb)	r2, [r0]
 	mov	r0, #0
 	mov	pc, lr
 ENDPROC(__get_user_1)
 
 ENTRY(__get_user_2)
 #ifdef CONFIG_THUMB2_KERNEL
-2:	ldrbt	r2, [r0]
-3:	ldrbt	r3, [r0, #1]
+2:	T(ldrb)	r2, [r0]
+3:	T(ldrb)	r3, [r0, #1]
 #else
-2:	ldrbt	r2, [r0], #1
-3:	ldrbt	r3, [r0]
+2:	T(ldrb)	r2, [r0], #1
+3:	T(ldrb)	r3, [r0]
 #endif
 #ifndef __ARMEB__
 	orr	r2, r2, r3, lsl #8
@@ -53,7 +54,7 @@ ENTRY(__get_user_2)
 ENDPROC(__get_user_2)
 
 ENTRY(__get_user_4)
-4:	ldrt	r2, [r0]
+4:	T(ldr)	r2, [r0]
 	mov	r0, #0
 	mov	pc, lr
 ENDPROC(__get_user_4)
diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S
index 02fedbf..95b3fe8 100644
--- a/arch/arm/lib/putuser.S
+++ b/arch/arm/lib/putuser.S
@@ -28,9 +28,10 @@
  */
 #include <linux/linkage.h>
 #include <asm/errno.h>
+#include <asm/domain.h>
 
 ENTRY(__put_user_1)
-1:	strbt	r2, [r0]
+1:	T(strb)	r2, [r0]
 	mov	r0, #0
 	mov	pc, lr
 ENDPROC(__put_user_1)
@@ -39,19 +40,19 @@ ENTRY(__put_user_2)
 	mov	ip, r2, lsr #8
 #ifdef CONFIG_THUMB2_KERNEL
 #ifndef __ARMEB__
-2:	strbt	r2, [r0]
-3:	strbt	ip, [r0, #1]
+2:	T(strb)	r2, [r0]
+3:	T(strb)	ip, [r0, #1]
 #else
-2:	strbt	ip, [r0]
-3:	strbt	r2, [r0, #1]
+2:	T(strb)	ip, [r0]
+3:	T(strb)	r2, [r0, #1]
 #endif
 #else	/* !CONFIG_THUMB2_KERNEL */
 #ifndef __ARMEB__
-2:	strbt	r2, [r0], #1
-3:	strbt	ip, [r0]
+2:	T(strb)	r2, [r0], #1
+3:	T(strb)	ip, [r0]
 #else
-2:	strbt	ip, [r0], #1
-3:	strbt	r2, [r0]
+2:	T(strb)	ip, [r0], #1
+3:	T(strb)	r2, [r0]
 #endif
 #endif	/* CONFIG_THUMB2_KERNEL */
 	mov	r0, #0
@@ -59,18 +60,18 @@ ENTRY(__put_user_2)
 ENDPROC(__put_user_2)
 
 ENTRY(__put_user_4)
-4:	strt	r2, [r0]
+4:	T(str)	r2, [r0]
 	mov	r0, #0
 	mov	pc, lr
 ENDPROC(__put_user_4)
 
 ENTRY(__put_user_8)
 #ifdef CONFIG_THUMB2_KERNEL
-5:	strt	r2, [r0]
-6:	strt	r3, [r0, #4]
+5:	T(str)	r2, [r0]
+6:	T(str)	r3, [r0, #4]
 #else
-5:	strt	r2, [r0], #4
-6:	strt	r3, [r0]
+5:	T(str)	r2, [r0], #4
+6:	T(str)	r3, [r0]
 #endif
 	mov	r0, #0
 	mov	pc, lr
diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S
index ffdd274..e47cdfd 100644
--- a/arch/arm/lib/uaccess.S
+++ b/arch/arm/lib/uaccess.S
@@ -14,6 +14,7 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include <asm/errno.h>
+#include <asm/domain.h>
 
 		.text
 
@@ -31,11 +32,11 @@
 		rsb	ip, ip, #4
 		cmp	ip, #2
 		ldrb	r3, [r1], #1
-USER(		strbt	r3, [r0], #1)			@ May fault
+USER(		T(strb)	r3, [r0], #1)			@ May fault
 		ldrgeb	r3, [r1], #1
-USER(		strgebt	r3, [r0], #1)			@ May fault
+USER(		T(strgeb) r3, [r0], #1)			@ May fault
 		ldrgtb	r3, [r1], #1
-USER(		strgtbt	r3, [r0], #1)			@ May fault
+USER(		T(strgtb) r3, [r0], #1)			@ May fault
 		sub	r2, r2, ip
 		b	.Lc2u_dest_aligned
 
@@ -58,7 +59,7 @@ ENTRY(__copy_to_user)
 		addmi	ip, r2, #4
 		bmi	.Lc2u_0nowords
 		ldr	r3, [r1], #4
-USER(		strt	r3, [r0], #4)			@ May fault
+USER(		T(str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT	@ On each page, use a ld/st??t instruction
 		rsb	ip, ip, #0
 		movs	ip, ip, lsr #32 - PAGE_SHIFT
@@ -87,18 +88,18 @@ USER(		strt	r3, [r0], #4)			@ May fault
 		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
 		tst	ip, #4
 		ldrne	r3, [r1], #4
-		strnet	r3, [r0], #4			@ Shouldnt fault
+		T(strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_0fupi
 .Lc2u_0nowords:	teq	ip, #0
 		beq	.Lc2u_finished
 .Lc2u_nowords:	cmp	ip, #2
 		ldrb	r3, [r1], #1
-USER(		strbt	r3, [r0], #1)			@ May fault
+USER(		T(strb)	r3, [r0], #1)			@ May fault
 		ldrgeb	r3, [r1], #1
-USER(		strgebt	r3, [r0], #1)			@ May fault
+USER(		T(strgeb) r3, [r0], #1)			@ May fault
 		ldrgtb	r3, [r1], #1
-USER(		strgtbt	r3, [r0], #1)			@ May fault
+USER(		T(strgtb) r3, [r0], #1)			@ May fault
 		b	.Lc2u_finished
 
 .Lc2u_not_enough:
@@ -119,7 +120,7 @@ USER(		strgtbt	r3, [r0], #1)			@ May fault
 		mov	r3, r7, pull #8
 		ldr	r7, [r1], #4
 		orr	r3, r3, r7, push #24
-USER(		strt	r3, [r0], #4)			@ May fault
+USER(		T(str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
 		movs	ip, ip, lsr #32 - PAGE_SHIFT
@@ -154,18 +155,18 @@ USER(		strt	r3, [r0], #4)			@ May fault
 		movne	r3, r7, pull #8
 		ldrne	r7, [r1], #4
 		orrne	r3, r3, r7, push #24
-		strnet	r3, [r0], #4			@ Shouldnt fault
+		T(strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_1fupi
 .Lc2u_1nowords:	mov	r3, r7, get_byte_1
 		teq	ip, #0
 		beq	.Lc2u_finished
 		cmp	ip, #2
-USER(		strbt	r3, [r0], #1)			@ May fault
+USER(		T(strb)	r3, [r0], #1)			@ May fault
 		movge	r3, r7, get_byte_2
-USER(		strgebt	r3, [r0], #1)			@ May fault
+USER(		T(strgeb) r3, [r0], #1)			@ May fault
 		movgt	r3, r7, get_byte_3
-USER(		strgtbt	r3, [r0], #1)			@ May fault
+USER(		T(strgtb) r3, [r0], #1)			@ May fault
 		b	.Lc2u_finished
 
 .Lc2u_2fupi:	subs	r2, r2, #4
@@ -174,7 +175,7 @@ USER(		strgtbt	r3, [r0], #1)			@ May fault
 		mov	r3, r7, pull #16
 		ldr	r7, [r1], #4
 		orr	r3, r3, r7, push #16
-USER(		strt	r3, [r0], #4)			@ May fault
+USER(		T(str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
 		movs	ip, ip, lsr #32 - PAGE_SHIFT
@@ -209,18 +210,18 @@ USER(		strt	r3, [r0], #4)			@ May fault
 		movne	r3, r7, pull #16
 		ldrne	r7, [r1], #4
 		orrne	r3, r3, r7, push #16
-		strnet	r3, [r0], #4			@ Shouldnt fault
+		T(strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_2fupi
 .Lc2u_2nowords:	mov	r3, r7, get_byte_2
 		teq	ip, #0
 		beq	.Lc2u_finished
 		cmp	ip, #2
-USER(		strbt	r3, [r0], #1)			@ May fault
+USER(		T(strb)	r3, [r0], #1)			@ May fault
 		movge	r3, r7, get_byte_3
-USER(		strgebt	r3, [r0], #1)			@ May fault
+USER(		T(strgeb) r3, [r0], #1)			@ May fault
 		ldrgtb	r3, [r1], #0
-USER(		strgtbt	r3, [r0], #1)			@ May fault
+USER(		T(strgtb) r3, [r0], #1)			@ May fault
 		b	.Lc2u_finished
 
 .Lc2u_3fupi:	subs	r2, r2, #4
@@ -229,7 +230,7 @@ USER(		strgtbt	r3, [r0], #1)			@ May fault
 		mov	r3, r7, pull #24
 		ldr	r7, [r1], #4
 		orr	r3, r3, r7, push #8
-USER(		strt	r3, [r0], #4)			@ May fault
+USER(		T(str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
 		movs	ip, ip, lsr #32 - PAGE_SHIFT
@@ -264,18 +265,18 @@ USER(		strt	r3, [r0], #4)			@ May fault
 		movne	r3, r7, pull #24
 		ldrne	r7, [r1], #4
 		orrne	r3, r3, r7, push #8
-		strnet	r3, [r0], #4			@ Shouldnt fault
+		T(strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_3fupi
 .Lc2u_3nowords:	mov	r3, r7, get_byte_3
 		teq	ip, #0
 		beq	.Lc2u_finished
 		cmp	ip, #2
-USER(		strbt	r3, [r0], #1)			@ May fault
+USER(		T(strb)	r3, [r0], #1)			@ May fault
 		ldrgeb	r3, [r1], #1
-USER(		strgebt	r3, [r0], #1)			@ May fault
+USER(		T(strgeb) r3, [r0], #1)			@ May fault
 		ldrgtb	r3, [r1], #0
-USER(		strgtbt	r3, [r0], #1)			@ May fault
+USER(		T(strgtb) r3, [r0], #1)			@ May fault
 		b	.Lc2u_finished
 ENDPROC(__copy_to_user)
 
@@ -294,11 +295,11 @@ ENDPROC(__copy_to_user)
 .Lcfu_dest_not_aligned:
 		rsb	ip, ip, #4
 		cmp	ip, #2
-USER(		ldrbt	r3, [r1], #1)			@ May fault
+USER(		T(ldrb)	r3, [r1], #1)			@ May fault
 		strb	r3, [r0], #1
-USER(		ldrgebt	r3, [r1], #1)			@ May fault
+USER(		T(ldrgeb) r3, [r1], #1)			@ May fault
 		strgeb	r3, [r0], #1
-USER(		ldrgtbt	r3, [r1], #1)			@ May fault
+USER(		T(ldrgtb) r3, [r1], #1)			@ May fault
 		strgtb	r3, [r0], #1
 		sub	r2, r2, ip
 		b	.Lcfu_dest_aligned
@@ -321,7 +322,7 @@ ENTRY(__copy_from_user)
 .Lcfu_0fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lcfu_0nowords
-USER(		ldrt	r3, [r1], #4)
+USER(		T(ldr)	r3, [r1], #4)
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT	@ On each page, use a ld/st??t instruction
 		rsb	ip, ip, #0
@@ -350,18 +351,18 @@ USER(		ldrt	r3, [r1], #4)
 		ldmneia	r1!, {r3 - r4}			@ Shouldnt fault
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
-		ldrnet	r3, [r1], #4			@ Shouldnt fault
+		T(ldrne) r3, [r1], #4			@ Shouldnt fault
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
 		beq	.Lcfu_0fupi
 .Lcfu_0nowords:	teq	ip, #0
 		beq	.Lcfu_finished
 .Lcfu_nowords:	cmp	ip, #2
-USER(		ldrbt	r3, [r1], #1)			@ May fault
+USER(		T(ldrb)	r3, [r1], #1)			@ May fault
 		strb	r3, [r0], #1
-USER(		ldrgebt	r3, [r1], #1)			@ May fault
+USER(		T(ldrgeb) r3, [r1], #1)			@ May fault
 		strgeb	r3, [r0], #1
-USER(		ldrgtbt	r3, [r1], #1)			@ May fault
+USER(		T(ldrgtb) r3, [r1], #1)			@ May fault
 		strgtb	r3, [r0], #1
 		b	.Lcfu_finished
 
@@ -374,7 +375,7 @@ USER(		ldrgtbt	r3, [r1], #1)			@ May fault
 
 .Lcfu_src_not_aligned:
 		bic	r1, r1, #3
-USER(		ldrt	r7, [r1], #4)			@ May fault
+USER(		T(ldr)	r7, [r1], #4)			@ May fault
 		cmp	ip, #2
 		bgt	.Lcfu_3fupi
 		beq	.Lcfu_2fupi
@@ -382,7 +383,7 @@ USER(		ldrt	r7, [r1], #4)			@ May fault
 		addmi	ip, r2, #4
 		bmi	.Lcfu_1nowords
 		mov	r3, r7, pull #8
-USER(		ldrt	r7, [r1], #4)			@ May fault
+USER(		T(ldr)	r7, [r1], #4)			@ May fault
 		orr	r3, r3, r7, push #24
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
@@ -417,7 +418,7 @@ USER(		ldrt	r7, [r1], #4)			@ May fault
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
 		movne	r3, r7, pull #8
-USER(		ldrnet	r7, [r1], #4)			@ May fault
+USER(		T(ldrne) r7, [r1], #4)			@ May fault
 		orrne	r3, r3, r7, push #24
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
@@ -437,7 +438,7 @@ USER(		ldrnet	r7, [r1], #4)			@ May fault
 		addmi	ip, r2, #4
 		bmi	.Lcfu_2nowords
 		mov	r3, r7, pull #16
-USER(		ldrt	r7, [r1], #4)			@ May fault
+USER(		T(ldr)	r7, [r1], #4)			@ May fault
 		orr	r3, r3, r7, push #16
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
@@ -473,7 +474,7 @@ USER(		ldrt	r7, [r1], #4)			@ May fault
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
 		movne	r3, r7, pull #16
-USER(		ldrnet	r7, [r1], #4)			@ May fault
+USER(		T(ldrne) r7, [r1], #4)			@ May fault
 		orrne	r3, r3, r7, push #16
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
@@ -485,7 +486,7 @@ USER(		ldrnet	r7, [r1], #4)			@ May fault
 		strb	r3, [r0], #1
 		movge	r3, r7, get_byte_3
 		strgeb	r3, [r0], #1
-USER(		ldrgtbt	r3, [r1], #0)			@ May fault
+USER(		T(ldrgtb) r3, [r1], #0)			@ May fault
 		strgtb	r3, [r0], #1
 		b	.Lcfu_finished
 
@@ -493,7 +494,7 @@ USER(		ldrgtbt	r3, [r1], #0)			@ May fault
 		addmi	ip, r2, #4
 		bmi	.Lcfu_3nowords
 		mov	r3, r7, pull #24
-USER(		ldrt	r7, [r1], #4)			@ May fault
+USER(		T(ldr)	r7, [r1], #4)			@ May fault
 		orr	r3, r3, r7, push #8
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
@@ -528,7 +529,7 @@ USER(		ldrt	r7, [r1], #4)			@ May fault
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
 		movne	r3, r7, pull #24
-USER(		ldrnet	r7, [r1], #4)			@ May fault
+USER(		T(ldrne) r7, [r1], #4)			@ May fault
 		orrne	r3, r3, r7, push #8
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
@@ -538,9 +539,9 @@ USER(		ldrnet	r7, [r1], #4)			@ May fault
 		beq	.Lcfu_finished
 		cmp	ip, #2
 		strb	r3, [r0], #1
-USER(		ldrgebt	r3, [r1], #1)			@ May fault
+USER(		T(ldrgeb) r3, [r1], #1)			@ May fault
 		strgeb	r3, [r0], #1
-USER(		ldrgtbt	r3, [r1], #1)			@ May fault
+USER(		T(ldrgtb) r3, [r1], #1)			@ May fault
 		strgtb	r3, [r0], #1
 		b	.Lcfu_finished
 ENDPROC(__copy_from_user)
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 5df74c1..755e081 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -597,6 +597,14 @@ config CPU_CP15_MPU
 	help
 	  Processor has the CP15 register, which has MPU related registers.
 
+config CPU_USE_DOMAINS
+	bool
+	depends on MMU
+	default y if !HAS_TLS_REG
+	help
+	  This option enables or disables the use of domain switching
+	  via the set_fs() function.
+
 #
 # CPU supports 36-bit I/O
 #
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index 7d63bea..337f102 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -99,6 +99,10 @@
  *  110x   0   1   0	r/w	r/o
  *  11x0   0   1   0	r/w	r/o
  *  1111   0   1   1	r/w	r/w
+ *
+ * If !CONFIG_CPU_USE_DOMAINS, the following permissions are changed:
+ *  110x   1   1   1	r/o	r/o
+ *  11x0   1   1   1	r/o	r/o
  */
 	.macro	armv6_mt_table pfx
 \pfx\()_mt_table:
@@ -138,8 +142,11 @@
 
 	tst	r1, #L_PTE_USER
 	orrne	r3, r3, #PTE_EXT_AP1
+#ifdef CONFIG_CPU_USE_DOMAINS
+	@ allow kernel read/write access to read-only user pages
 	tstne	r3, #PTE_EXT_APX
 	bicne	r3, r3, #PTE_EXT_APX | PTE_EXT_AP0
+#endif
 
 	tst	r1, #L_PTE_EXEC
 	orreq	r3, r3, #PTE_EXT_XN
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 7aaf88a..c1c3fe0 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -152,8 +152,11 @@ ENTRY(cpu_v7_set_pte_ext)
 
 	tst	r1, #L_PTE_USER
 	orrne	r3, r3, #PTE_EXT_AP1
+#ifdef CONFIG_CPU_USE_DOMAINS
+	@ allow kernel read/write access to read-only user pages
 	tstne	r3, #PTE_EXT_APX
 	bicne	r3, r3, #PTE_EXT_APX | PTE_EXT_AP0
+#endif
 
 	tst	r1, #L_PTE_EXEC
 	orreq	r3, r3, #PTE_EXT_XN
@@ -240,8 +243,6 @@ __v7_setup:
 	mcr	p15, 0, r10, c2, c0, 2		@ TTB control register
 	orr	r4, r4, #TTB_FLAGS
 	mcr	p15, 0, r4, c2, c0, 1		@ load TTB1
-	mov	r10, #0x1f			@ domains 0, 1 = manager
-	mcr	p15, 0, r10, c3, c0, 0		@ load domain access register
 	/*
 	 * Memory region attributes with SCTLR.TRE=1
 	 *

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 1/5] ARM: Use lazy cache flushing on ARMv7 SMP systems
  2010-03-22 15:19 ` [PATCH 1/5] ARM: Use lazy cache flushing on ARMv7 SMP systems Catalin Marinas
@ 2010-03-23 21:33   ` Russell King - ARM Linux
  2010-03-24  9:51     ` Catalin Marinas
  2010-03-26 14:49     ` Catalin Marinas
  0 siblings, 2 replies; 14+ messages in thread
From: Russell King - ARM Linux @ 2010-03-23 21:33 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Mar 22, 2010 at 03:19:39PM +0000, Catalin Marinas wrote:
> ARMv7 processors like Cortex-A9 broadcast the cache maintenance
> operations in hardware. This patch allows the
> flush_dcache_page/update_mmu_cache pair to work in lazy flushing mode
> similar to the UP case.

Didn't Ben point out that there's a race with the lazy dcache flushing
on SMP platforms?

> The standard ICIALLU and BPIALL operations are not automatically
> broadcast to the other CPUs in an MP system. The patch adds the Inner
> Shareable variants, ICIALLUIS and BPIALLIS, if ARMv7 and SMP.

Jargon overload.  (Just because it's in a specification does not mean
the names are a good idea.)

In any case, this should probably be two separate patches.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 2/5] ARM: Implement read/write for ownership in the ARMv6 DMA cache ops
  2010-03-22 15:19 ` [PATCH 2/5] ARM: Implement read/write for ownership in the ARMv6 DMA cache ops Catalin Marinas
@ 2010-03-23 21:38   ` Russell King - ARM Linux
  2010-03-26 14:08     ` [PATCH 2/5] ARM: Implement read/write for ownership in theARMv6 " Catalin Marinas
  0 siblings, 1 reply; 14+ messages in thread
From: Russell King - ARM Linux @ 2010-03-23 21:38 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Mar 22, 2010 at 03:19:45PM +0000, Catalin Marinas wrote:
> diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S
> index 9d89c67..b9f2cbd 100644
> --- a/arch/arm/mm/cache-v6.S
> +++ b/arch/arm/mm/cache-v6.S
> @@ -211,6 +211,9 @@ v6_dma_inv_range:
>  	mcrne	p15, 0, r1, c7, c15, 1		@ clean & invalidate unified line
>  #endif
>  1:
> +#ifdef CONFIG_SMP
> +	str	r0, [r0]			@ write for ownership
> +#endif
>  #ifdef HARVARD_CACHE
>  	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D line
>  #else
> @@ -231,6 +234,9 @@ v6_dma_inv_range:
>  v6_dma_clean_range:
>  	bic	r0, r0, #D_CACHE_LINE_SIZE - 1
>  1:
> +#ifdef CONFIG_SMP
> +	ldr	r2, [r0]			@ read for ownership
> +#endif
>  #ifdef HARVARD_CACHE
>  	mcr	p15, 0, r0, c7, c10, 1		@ clean D line
>  #else
> @@ -251,6 +257,10 @@ v6_dma_clean_range:
>  ENTRY(v6_dma_flush_range)
>  	bic	r0, r0, #D_CACHE_LINE_SIZE - 1
>  1:
> +#ifdef CONFIG_SMP
> +	ldr	r2, [r0]			@ read for ownership
> +	str	r2, [r0]			@ write for ownership

What is the effect of using the register just loaded on ARMv6?  Does it
stall like previous architectures?  If so, this str should use a different
register.

In any case, does reading then writing actually achieve anything over just
a plain write?  read surely brings the cache line into shared mode, and
a write to exclusive mode - so won't just a write do?

The converse argument is that with read allocate caches, this technique
can result in faster code, so why don't we use it in dma_inv_range?

Maybe this needs to be benchmarked to discover which method is optimal.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 3/5] ARM: Correct the VFPv3 detection
  2010-03-22 15:19 ` [PATCH 3/5] ARM: Correct the VFPv3 detection Catalin Marinas
@ 2010-03-23 21:38   ` Russell King - ARM Linux
  0 siblings, 0 replies; 14+ messages in thread
From: Russell King - ARM Linux @ 2010-03-23 21:38 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Mar 22, 2010 at 03:19:50PM +0000, Catalin Marinas wrote:
> A CPU has VFPv3 hardware if the FPSID[19:16] bits are 2 or more.
> Currently Linux was only checking for 3 or more.

Ok.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 4/5] ARM: Align machine_desc.phys_io to a 1MB section
  2010-03-22 15:19 ` [PATCH 4/5] ARM: Align machine_desc.phys_io to a 1MB section Catalin Marinas
@ 2010-03-23 21:39   ` Russell King - ARM Linux
  0 siblings, 0 replies; 14+ messages in thread
From: Russell King - ARM Linux @ 2010-03-23 21:39 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Mar 22, 2010 at 03:19:56PM +0000, Catalin Marinas wrote:
> Platforms like RealView don't pass a section-aligned pointer via the
> machine_desc structure. This patch aligns the pointer in the
> __create_page_tables function. Reported by Tony Thompson.

I'd much rather the pointer in the structure was properly aligned,
rather than adding code to make it so.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/5] ARM: Use lazy cache flushing on ARMv7 SMP systems
  2010-03-23 21:33   ` Russell King - ARM Linux
@ 2010-03-24  9:51     ` Catalin Marinas
  2010-03-26 14:49     ` Catalin Marinas
  1 sibling, 0 replies; 14+ messages in thread
From: Catalin Marinas @ 2010-03-24  9:51 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, 2010-03-23 at 21:33 +0000, Russell King - ARM Linux wrote:
> On Mon, Mar 22, 2010 at 03:19:39PM +0000, Catalin Marinas wrote:
> > ARMv7 processors like Cortex-A9 broadcast the cache maintenance
> > operations in hardware. This patch allows the
> > flush_dcache_page/update_mmu_cache pair to work in lazy flushing mode
> > similar to the UP case.
> 
> Didn't Ben point out that there's a race with the lazy dcache flushing
> on SMP platforms?

Yes, there is a slight risk of getting this race. I started working on a
patch to move the cache flushing to set_pte_at (and together with the
PG_dcache_clean bit) but I don't think I'll finish it in time (I'm on
holiday between 1-19 April). That's if we decide to go this route, the
other thread (USB mass storage ...) seems to have slowed down.

So I think it's better to wait with this patch for now and maybe post a
full series addressing the issues above sometime next month.

-- 
Catalin

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 2/5] ARM: Implement read/write for ownership in theARMv6 DMA cache ops
  2010-03-23 21:38   ` Russell King - ARM Linux
@ 2010-03-26 14:08     ` Catalin Marinas
  0 siblings, 0 replies; 14+ messages in thread
From: Catalin Marinas @ 2010-03-26 14:08 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, 2010-03-23 at 21:38 +0000, Russell King - ARM Linux wrote:
> On Mon, Mar 22, 2010 at 03:19:45PM +0000, Catalin Marinas wrote:
> > diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S
> > index 9d89c67..b9f2cbd 100644
> > --- a/arch/arm/mm/cache-v6.S
> > +++ b/arch/arm/mm/cache-v6.S
> > @@ -211,6 +211,9 @@ v6_dma_inv_range:
> >       mcrne   p15, 0, r1, c7, c15, 1          @ clean & invalidate unified line
> >  #endif
> >  1:
> > +#ifdef CONFIG_SMP
> > +     str     r0, [r0]                        @ write for ownership
> > +#endif
> >  #ifdef HARVARD_CACHE
> >       mcr     p15, 0, r0, c7, c6, 1           @ invalidate D line
> >  #else
> > @@ -231,6 +234,9 @@ v6_dma_inv_range:
> >  v6_dma_clean_range:
> >       bic     r0, r0, #D_CACHE_LINE_SIZE - 1
> >  1:
> > +#ifdef CONFIG_SMP
> > +     ldr     r2, [r0]                        @ read for ownership
> > +#endif
> >  #ifdef HARVARD_CACHE
> >       mcr     p15, 0, r0, c7, c10, 1          @ clean D line
> >  #else
> > @@ -251,6 +257,10 @@ v6_dma_clean_range:
> >  ENTRY(v6_dma_flush_range)
> >       bic     r0, r0, #D_CACHE_LINE_SIZE - 1
> >  1:
> > +#ifdef CONFIG_SMP
> > +     ldr     r2, [r0]                        @ read for ownership
> > +     str     r2, [r0]                        @ write for ownership
> 
> What is the effect of using the register just loaded on ARMv6?  Does it
> stall like previous architectures?  If so, this str should use a different
> register.

Using the same register here is on purpose so that we do not override
the data already in the buffer (it's a flush operation). Yes, we have
interlocking, but the STR has to be executed before we issue the cache
cleaning operation.

The invalidate case uses a dummy STR but we just found that with the
latest DMA API, invalidating in the unmap functions would override the
transferred data. I need to revisit this.

> In any case, does reading then writing actually achieve anything over just
> a plain write?  read surely brings the cache line into shared mode, and
> a write to exclusive mode - so won't just a write do?

Yes, we need to preserve the data already in the buffer in some
situations.

> The converse argument is that with read allocate caches, this technique
> can result in faster code, so why don't we use it in dma_inv_range?

Only reading the data may put the cache line in shared mode and a local
cache invalidation only affects the current CPU, leaving the data in
cache on the other CPUs. It's true that we won't have dirty cache lines
that may be evicted during the DMA transfer but if a different CPU uses
the data at a later time, it may get the stale cache entries.

Issuing a write would invalidate the cache lines on the other CPUs so
they'll need to be read from L2 again when accessed.

-- 
Catalin

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/5] ARM: Use lazy cache flushing on ARMv7 SMP systems
  2010-03-23 21:33   ` Russell King - ARM Linux
  2010-03-24  9:51     ` Catalin Marinas
@ 2010-03-26 14:49     ` Catalin Marinas
  2010-03-26 17:36       ` Catalin Marinas
  1 sibling, 1 reply; 14+ messages in thread
From: Catalin Marinas @ 2010-03-26 14:49 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, 2010-03-23 at 21:33 +0000, Russell King - ARM Linux wrote:
> On Mon, Mar 22, 2010 at 03:19:39PM +0000, Catalin Marinas wrote:
> > ARMv7 processors like Cortex-A9 broadcast the cache maintenance
> > operations in hardware. This patch allows the
> > flush_dcache_page/update_mmu_cache pair to work in lazy flushing mode
> > similar to the UP case.
> 
> Didn't Ben point out that there's a race with the lazy dcache flushing
> on SMP platforms?

BTW, even with the current code we still have a similar race with the
I-cache. set_pte_at() creates the entry that another CPU may execute
from before we invalidate the I-cache in update_mmu_cache().

A solution would be that set_pte_at() sets the NX bit which is cleared
later in udpate_mmu_cache() after the I-cache invalidation. Some care
needs to be taken with possible prefetch aborts in the small window when
the page isn't executable.

Note that even if we flush the caches in set_pte_at(), we still have the
I-cache race unless we write the pte twice, first with NX and then
without. So I'm more in favour of the solution in the previous
paragraph.

-- 
Catalin

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/5] ARM: Use lazy cache flushing on ARMv7 SMP systems
  2010-03-26 14:49     ` Catalin Marinas
@ 2010-03-26 17:36       ` Catalin Marinas
  0 siblings, 0 replies; 14+ messages in thread
From: Catalin Marinas @ 2010-03-26 17:36 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 2010-03-26 at 14:49 +0000, Catalin Marinas wrote:
> On Tue, 2010-03-23 at 21:33 +0000, Russell King - ARM Linux wrote:
> > On Mon, Mar 22, 2010 at 03:19:39PM +0000, Catalin Marinas wrote:
> > > ARMv7 processors like Cortex-A9 broadcast the cache maintenance
> > > operations in hardware. This patch allows the
> > > flush_dcache_page/update_mmu_cache pair to work in lazy flushing mode
> > > similar to the UP case.
> >
> > Didn't Ben point out that there's a race with the lazy dcache flushing
> > on SMP platforms?
> 
> BTW, even with the current code we still have a similar race with the
> I-cache. set_pte_at() creates the entry that another CPU may execute
> from before we invalidate the I-cache in update_mmu_cache().
> 
> A solution would be that set_pte_at() sets the NX bit which is cleared
> later in udpate_mmu_cache() after the I-cache invalidation. Some care
> needs to be taken with possible prefetch aborts in the small window when
> the page isn't executable.
> 
> Note that even if we flush the caches in set_pte_at(), we still have the
> I-cache race unless we write the pte twice, first with NX and then
> without. So I'm more in favour of the solution in the previous
> paragraph.

And that's an attempt at this (not fully tested and it could be
optimised for the UP case). I can add more comments later if you find
this the right approach.


ARM: Do not set the exec flag in set_pte_at()

From: Catalin Marinas <catalin.marinas@arm.com>

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/include/asm/pgtable.h |    3 ++-
 arch/arm/mm/fault-armv.c       |   15 ++++++++++-----
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 6fae619..d6d06e7 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -279,7 +279,8 @@ extern struct page *empty_zero_page;
 #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext)
 
 #define set_pte_at(mm,addr,ptep,pteval) do { \
-	set_pte_ext(ptep, pteval, (addr) >= TASK_SIZE ? 0 : PTE_EXT_NG); \
+	set_pte_ext(ptep, __pte(pte_val(pteval) & ~L_PTE_EXEC), \
+		    (addr) >= TASK_SIZE ? 0 : PTE_EXT_NG); \
  } while (0)
 
 /*
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index c5ef5a7..ced1c70 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -167,11 +167,16 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
 	if (test_and_clear_bit(PG_dcache_dirty, &page->flags))
 		__flush_dcache_page(mapping, page);
 #endif
-	if (mapping) {
-		if (cache_is_vivt())
-			make_coherent(mapping, vma, addr, pfn);
-		else if (vma->vm_flags & VM_EXEC)
-			__flush_icache_all();
+	if (!mapping)
+		return;
+
+	if (cache_is_vivt())
+		make_coherent(mapping, vma, addr, pfn);
+	else if (vma->vm_flags & VM_EXEC) {
+		__flush_icache_all();
+		set_pte_ext(ptep, __pte(pte_val(*ptep) | L_PTE_EXEC),
+			    addr >= TASK_SIZE ? 0 : PTE_EXT_NG);
+		flush_tlb_page(vma, addr);
 	}
 }
 

-- 
Catalin

^ permalink raw reply related	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2010-03-26 17:36 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-03-22 15:19 [PATCH 0/5] Various patches for 2.6.35-rc1 Catalin Marinas
2010-03-22 15:19 ` [PATCH 1/5] ARM: Use lazy cache flushing on ARMv7 SMP systems Catalin Marinas
2010-03-23 21:33   ` Russell King - ARM Linux
2010-03-24  9:51     ` Catalin Marinas
2010-03-26 14:49     ` Catalin Marinas
2010-03-26 17:36       ` Catalin Marinas
2010-03-22 15:19 ` [PATCH 2/5] ARM: Implement read/write for ownership in the ARMv6 DMA cache ops Catalin Marinas
2010-03-23 21:38   ` Russell King - ARM Linux
2010-03-26 14:08     ` [PATCH 2/5] ARM: Implement read/write for ownership in theARMv6 " Catalin Marinas
2010-03-22 15:19 ` [PATCH 3/5] ARM: Correct the VFPv3 detection Catalin Marinas
2010-03-23 21:38   ` Russell King - ARM Linux
2010-03-22 15:19 ` [PATCH 4/5] ARM: Align machine_desc.phys_io to a 1MB section Catalin Marinas
2010-03-23 21:39   ` Russell King - ARM Linux
2010-03-22 15:20 ` [PATCH 5/5] ARM: Remove the domain switching on ARMv6k/v7 CPUs Catalin Marinas

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.