All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-04-19 15:54 ` André Hentschel
  0 siblings, 0 replies; 41+ messages in thread
From: André Hentschel @ 2013-04-19 15:54 UTC (permalink / raw)
  To: linux-arch, will.deacon; +Cc: linux, linux-arm-kernel, linux-kernel, gregkh

From: =?UTF-8?q?Andr=C3=A9=20Hentschel?= <nerv@dawncrow.de>

There are more and more applications coming to WinRT, Wine could support them,
but mostly they expect to have the thread environment block (TEB) in TPIDRURW.
This register must be preserved per thread instead of being cleared.

Signed-off-by: André Hentschel <nerv@dawncrow.de>

---
This patch is against a86d52667d8eda5de39393ce737794403bdce1eb

I could only test it with kernel 3.4.6

 arch/arm/include/asm/thread_info.h |    2 +-
 arch/arm/include/asm/tls.h         |   32 +++++++++++++++++++-------------
 arch/arm/kernel/entry-armv.S       |    9 ++++-----
 arch/arm/kernel/process.c          |    2 +-
 arch/arm/kernel/ptrace.c           |    2 +-
 arch/arm/kernel/traps.c            |    4 ++--
 6 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index cddda1f..bb5b48d 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -58,7 +58,7 @@ struct thread_info {
 	struct cpu_context_save	cpu_context;	/* cpu context */
 	__u32			syscall;	/* syscall number */
 	__u8			used_cp[16];	/* thread used copro */
-	unsigned long		tp_value;
+	unsigned long		tp_value[2];
 #ifdef CONFIG_CRUNCH
 	struct crunch_state	crunchstate;
 #endif
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 73409e6..ea0189e 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -2,29 +2,35 @@
 #define __ASMARM_TLS_H
 
 #ifdef __ASSEMBLY__
-	.macro set_tls_none, tp, tmp1, tmp2
+	.macro set_tls_none, ntp, ptp, tmp1, tmp2
 	.endm
 
-	.macro set_tls_v6k, tp, tmp1, tmp2
-	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
-	mov	\tmp1, #0
-	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
+	.macro set_tls_v6k, ntp, ptp, tmp1, tmp2
+	mrc	p15, 0, \tmp2, c13, c0, 2		@ get user r/w TLS register
+	str	\tmp2, [\ptp, #4]
+	ldrd	\tmp1, \tmp2, [\ntp]
+	mcr	p15, 0, \tmp1, c13, c0, 3	@ set user r/o TLS register
+	mcr	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
 	.endm
 
-	.macro set_tls_v6, tp, tmp1, tmp2
+	.macro set_tls_v6, ntp, ptp, tmp1, tmp2
 	ldr	\tmp1, =elf_hwcap
 	ldr	\tmp1, [\tmp1, #0]
 	mov	\tmp2, #0xffff0fff
 	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
-	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
-	movne	\tmp1, #0
-	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
-	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	mrcne	p15, 0, \tmp2, c13, c0, 2		@ get user r/w TLS register
+	strne	\tmp2, [\ptp, #4]
+	ldrdne	\tmp1, \tmp2, [\ntp]
+	ldreq	\tmp1, [\ntp]
+	mcrne	p15, 0, \tmp1, c13, c0, 3	@ yes, set user r/o TLS register
+	mcrne	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
+	streq	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
 
-	.macro set_tls_software, tp, tmp1, tmp2
-	mov	\tmp1, #0xffff0fff
-	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
+	.macro set_tls_software, ntp, ptp, tmp1, tmp2
+	ldr	\tmp1, [\ntp]
+	mov	\tmp2, #0xffff0fff
+	str	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
 #endif
 
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0f82098..78ce1c6 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -728,21 +728,20 @@ ENTRY(__switch_to)
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	add	ip, r1, #TI_CPU_SAVE
-	ldr	r3, [r2, #TI_TP_VALUE]
  ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
  THUMB(	str	lr, [ip], #4		   )
-#ifdef CONFIG_CPU_USE_DOMAINS
-	ldr	r6, [r2, #TI_CPU_DOMAIN]
-#endif
-	set_tls	r3, r4, r5
+	add	r3, r2, #TI_TP_VALUE
+	add	r4, r1, #TI_TP_VALUE
+	set_tls	r3, r4, r6, r7
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	ldr	r7, [r2, #TI_TASK]
 	ldr	r8, =__stack_chk_guard
 	ldr	r7, [r7, #TSK_STACK_CANARY]
 #endif
 #ifdef CONFIG_CPU_USE_DOMAINS
+	ldr	r6, [r2, #TI_CPU_DOMAIN]
 	mcr	p15, 0, r6, c3, c0, 0		@ Set domain register
 #endif
 	mov	r5, r0
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 047d3e4..b3171c4 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -395,7 +395,7 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	clear_ptrace_hw_breakpoint(p);
 
 	if (clone_flags & CLONE_SETTLS)
-		thread->tp_value = childregs->ARM_r3;
+		thread->tp_value[0] = childregs->ARM_r3;
 
 	thread_notify(THREAD_NOTIFY_COPY, thread);
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeff..2bc1514 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif
 
 		case PTRACE_GET_THREAD_AREA:
-			ret = put_user(task_thread_info(child)->tp_value,
+			ret = put_user(task_thread_info(child)->tp_value[0],
 				       datap);
 			break;
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1c08911..f9d6259 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -588,7 +588,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		return regs->ARM_r0;
 
 	case NR(set_tls):
-		thread->tp_value = regs->ARM_r0;
+		thread->tp_value[0] = regs->ARM_r0;
 		if (tls_emu)
 			return 0;
 		if (has_tls_reg) {
@@ -706,7 +706,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
 	int reg = (instr >> 12) & 15;
 	if (reg == 15)
 		return 1;
-	regs->uregs[reg] = current_thread_info()->tp_value;
+	regs->uregs[reg] = current_thread_info()->tp_value[0];
 	regs->ARM_pc += 4;
 	return 0;
 }
-- 
1.7.10.4



^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-04-19 15:54 ` André Hentschel
  0 siblings, 0 replies; 41+ messages in thread
From: André Hentschel @ 2013-04-19 15:54 UTC (permalink / raw)
  To: linux-arch, will.deacon; +Cc: gregkh, linux, linux-kernel, linux-arm-kernel

From: =?UTF-8?q?Andr=C3=A9=20Hentschel?= <nerv@dawncrow.de>

There are more and more applications coming to WinRT, Wine could support them,
but mostly they expect to have the thread environment block (TEB) in TPIDRURW.
This register must be preserved per thread instead of being cleared.

Signed-off-by: André Hentschel <nerv@dawncrow.de>

---
This patch is against a86d52667d8eda5de39393ce737794403bdce1eb

I could only test it with kernel 3.4.6

 arch/arm/include/asm/thread_info.h |    2 +-
 arch/arm/include/asm/tls.h         |   32 +++++++++++++++++++-------------
 arch/arm/kernel/entry-armv.S       |    9 ++++-----
 arch/arm/kernel/process.c          |    2 +-
 arch/arm/kernel/ptrace.c           |    2 +-
 arch/arm/kernel/traps.c            |    4 ++--
 6 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index cddda1f..bb5b48d 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -58,7 +58,7 @@ struct thread_info {
 	struct cpu_context_save	cpu_context;	/* cpu context */
 	__u32			syscall;	/* syscall number */
 	__u8			used_cp[16];	/* thread used copro */
-	unsigned long		tp_value;
+	unsigned long		tp_value[2];
 #ifdef CONFIG_CRUNCH
 	struct crunch_state	crunchstate;
 #endif
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 73409e6..ea0189e 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -2,29 +2,35 @@
 #define __ASMARM_TLS_H
 
 #ifdef __ASSEMBLY__
-	.macro set_tls_none, tp, tmp1, tmp2
+	.macro set_tls_none, ntp, ptp, tmp1, tmp2
 	.endm
 
-	.macro set_tls_v6k, tp, tmp1, tmp2
-	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
-	mov	\tmp1, #0
-	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
+	.macro set_tls_v6k, ntp, ptp, tmp1, tmp2
+	mrc	p15, 0, \tmp2, c13, c0, 2		@ get user r/w TLS register
+	str	\tmp2, [\ptp, #4]
+	ldrd	\tmp1, \tmp2, [\ntp]
+	mcr	p15, 0, \tmp1, c13, c0, 3	@ set user r/o TLS register
+	mcr	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
 	.endm
 
-	.macro set_tls_v6, tp, tmp1, tmp2
+	.macro set_tls_v6, ntp, ptp, tmp1, tmp2
 	ldr	\tmp1, =elf_hwcap
 	ldr	\tmp1, [\tmp1, #0]
 	mov	\tmp2, #0xffff0fff
 	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
-	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
-	movne	\tmp1, #0
-	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
-	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	mrcne	p15, 0, \tmp2, c13, c0, 2		@ get user r/w TLS register
+	strne	\tmp2, [\ptp, #4]
+	ldrdne	\tmp1, \tmp2, [\ntp]
+	ldreq	\tmp1, [\ntp]
+	mcrne	p15, 0, \tmp1, c13, c0, 3	@ yes, set user r/o TLS register
+	mcrne	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
+	streq	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
 
-	.macro set_tls_software, tp, tmp1, tmp2
-	mov	\tmp1, #0xffff0fff
-	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
+	.macro set_tls_software, ntp, ptp, tmp1, tmp2
+	ldr	\tmp1, [\ntp]
+	mov	\tmp2, #0xffff0fff
+	str	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
 #endif
 
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0f82098..78ce1c6 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -728,21 +728,20 @@ ENTRY(__switch_to)
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	add	ip, r1, #TI_CPU_SAVE
-	ldr	r3, [r2, #TI_TP_VALUE]
  ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
  THUMB(	str	lr, [ip], #4		   )
-#ifdef CONFIG_CPU_USE_DOMAINS
-	ldr	r6, [r2, #TI_CPU_DOMAIN]
-#endif
-	set_tls	r3, r4, r5
+	add	r3, r2, #TI_TP_VALUE
+	add	r4, r1, #TI_TP_VALUE
+	set_tls	r3, r4, r6, r7
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	ldr	r7, [r2, #TI_TASK]
 	ldr	r8, =__stack_chk_guard
 	ldr	r7, [r7, #TSK_STACK_CANARY]
 #endif
 #ifdef CONFIG_CPU_USE_DOMAINS
+	ldr	r6, [r2, #TI_CPU_DOMAIN]
 	mcr	p15, 0, r6, c3, c0, 0		@ Set domain register
 #endif
 	mov	r5, r0
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 047d3e4..b3171c4 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -395,7 +395,7 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	clear_ptrace_hw_breakpoint(p);
 
 	if (clone_flags & CLONE_SETTLS)
-		thread->tp_value = childregs->ARM_r3;
+		thread->tp_value[0] = childregs->ARM_r3;
 
 	thread_notify(THREAD_NOTIFY_COPY, thread);
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeff..2bc1514 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif
 
 		case PTRACE_GET_THREAD_AREA:
-			ret = put_user(task_thread_info(child)->tp_value,
+			ret = put_user(task_thread_info(child)->tp_value[0],
 				       datap);
 			break;
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1c08911..f9d6259 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -588,7 +588,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		return regs->ARM_r0;
 
 	case NR(set_tls):
-		thread->tp_value = regs->ARM_r0;
+		thread->tp_value[0] = regs->ARM_r0;
 		if (tls_emu)
 			return 0;
 		if (has_tls_reg) {
@@ -706,7 +706,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
 	int reg = (instr >> 12) & 15;
 	if (reg == 15)
 		return 1;
-	regs->uregs[reg] = current_thread_info()->tp_value;
+	regs->uregs[reg] = current_thread_info()->tp_value[0];
 	regs->ARM_pc += 4;
 	return 0;
 }
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-04-19 15:54 ` André Hentschel
  0 siblings, 0 replies; 41+ messages in thread
From: André Hentschel @ 2013-04-19 15:54 UTC (permalink / raw)
  To: linux-arm-kernel

From: =?UTF-8?q?Andr=C3=A9=20Hentschel?= <nerv@dawncrow.de>

There are more and more applications coming to WinRT, Wine could support them,
but mostly they expect to have the thread environment block (TEB) in TPIDRURW.
This register must be preserved per thread instead of being cleared.

Signed-off-by: Andr? Hentschel <nerv@dawncrow.de>

---
This patch is against a86d52667d8eda5de39393ce737794403bdce1eb

I could only test it with kernel 3.4.6

 arch/arm/include/asm/thread_info.h |    2 +-
 arch/arm/include/asm/tls.h         |   32 +++++++++++++++++++-------------
 arch/arm/kernel/entry-armv.S       |    9 ++++-----
 arch/arm/kernel/process.c          |    2 +-
 arch/arm/kernel/ptrace.c           |    2 +-
 arch/arm/kernel/traps.c            |    4 ++--
 6 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index cddda1f..bb5b48d 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -58,7 +58,7 @@ struct thread_info {
 	struct cpu_context_save	cpu_context;	/* cpu context */
 	__u32			syscall;	/* syscall number */
 	__u8			used_cp[16];	/* thread used copro */
-	unsigned long		tp_value;
+	unsigned long		tp_value[2];
 #ifdef CONFIG_CRUNCH
 	struct crunch_state	crunchstate;
 #endif
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 73409e6..ea0189e 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -2,29 +2,35 @@
 #define __ASMARM_TLS_H
 
 #ifdef __ASSEMBLY__
-	.macro set_tls_none, tp, tmp1, tmp2
+	.macro set_tls_none, ntp, ptp, tmp1, tmp2
 	.endm
 
-	.macro set_tls_v6k, tp, tmp1, tmp2
-	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
-	mov	\tmp1, #0
-	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
+	.macro set_tls_v6k, ntp, ptp, tmp1, tmp2
+	mrc	p15, 0, \tmp2, c13, c0, 2		@ get user r/w TLS register
+	str	\tmp2, [\ptp, #4]
+	ldrd	\tmp1, \tmp2, [\ntp]
+	mcr	p15, 0, \tmp1, c13, c0, 3	@ set user r/o TLS register
+	mcr	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
 	.endm
 
-	.macro set_tls_v6, tp, tmp1, tmp2
+	.macro set_tls_v6, ntp, ptp, tmp1, tmp2
 	ldr	\tmp1, =elf_hwcap
 	ldr	\tmp1, [\tmp1, #0]
 	mov	\tmp2, #0xffff0fff
 	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
-	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
-	movne	\tmp1, #0
-	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
-	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	mrcne	p15, 0, \tmp2, c13, c0, 2		@ get user r/w TLS register
+	strne	\tmp2, [\ptp, #4]
+	ldrdne	\tmp1, \tmp2, [\ntp]
+	ldreq	\tmp1, [\ntp]
+	mcrne	p15, 0, \tmp1, c13, c0, 3	@ yes, set user r/o TLS register
+	mcrne	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
+	streq	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
 
-	.macro set_tls_software, tp, tmp1, tmp2
-	mov	\tmp1, #0xffff0fff
-	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
+	.macro set_tls_software, ntp, ptp, tmp1, tmp2
+	ldr	\tmp1, [\ntp]
+	mov	\tmp2, #0xffff0fff
+	str	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
 #endif
 
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0f82098..78ce1c6 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -728,21 +728,20 @@ ENTRY(__switch_to)
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	add	ip, r1, #TI_CPU_SAVE
-	ldr	r3, [r2, #TI_TP_VALUE]
  ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
  THUMB(	str	lr, [ip], #4		   )
-#ifdef CONFIG_CPU_USE_DOMAINS
-	ldr	r6, [r2, #TI_CPU_DOMAIN]
-#endif
-	set_tls	r3, r4, r5
+	add	r3, r2, #TI_TP_VALUE
+	add	r4, r1, #TI_TP_VALUE
+	set_tls	r3, r4, r6, r7
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	ldr	r7, [r2, #TI_TASK]
 	ldr	r8, =__stack_chk_guard
 	ldr	r7, [r7, #TSK_STACK_CANARY]
 #endif
 #ifdef CONFIG_CPU_USE_DOMAINS
+	ldr	r6, [r2, #TI_CPU_DOMAIN]
 	mcr	p15, 0, r6, c3, c0, 0		@ Set domain register
 #endif
 	mov	r5, r0
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 047d3e4..b3171c4 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -395,7 +395,7 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	clear_ptrace_hw_breakpoint(p);
 
 	if (clone_flags & CLONE_SETTLS)
-		thread->tp_value = childregs->ARM_r3;
+		thread->tp_value[0] = childregs->ARM_r3;
 
 	thread_notify(THREAD_NOTIFY_COPY, thread);
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeff..2bc1514 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif
 
 		case PTRACE_GET_THREAD_AREA:
-			ret = put_user(task_thread_info(child)->tp_value,
+			ret = put_user(task_thread_info(child)->tp_value[0],
 				       datap);
 			break;
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1c08911..f9d6259 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -588,7 +588,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		return regs->ARM_r0;
 
 	case NR(set_tls):
-		thread->tp_value = regs->ARM_r0;
+		thread->tp_value[0] = regs->ARM_r0;
 		if (tls_emu)
 			return 0;
 		if (has_tls_reg) {
@@ -706,7 +706,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
 	int reg = (instr >> 12) & 15;
 	if (reg == 15)
 		return 1;
-	regs->uregs[reg] = current_thread_info()->tp_value;
+	regs->uregs[reg] = current_thread_info()->tp_value[0];
 	regs->ARM_pc += 4;
 	return 0;
 }
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
  2013-04-19 15:54 ` André Hentschel
@ 2013-04-22 14:36   ` Russell King - ARM Linux
  -1 siblings, 0 replies; 41+ messages in thread
From: Russell King - ARM Linux @ 2013-04-22 14:36 UTC (permalink / raw)
  To: André Hentschel
  Cc: linux-arch, will.deacon, linux-arm-kernel, linux-kernel, gregkh

On Fri, Apr 19, 2013 at 05:54:35PM +0200, André Hentschel wrote:
> From: =?UTF-8?q?Andr=C3=A9=20Hentschel?= <nerv@dawncrow.de>
> 
> There are more and more applications coming to WinRT, Wine could support them,
> but mostly they expect to have the thread environment block (TEB) in TPIDRURW.
> This register must be preserved per thread instead of being cleared.
> 
> Signed-off-by: André Hentschel <nerv@dawncrow.de>

This actually makes things less efficient all round, because you
now use the value immediately after loading, which means it will cause
pipeline stalls, certainly on older CPUs.

Could you please rework the patch to try avoiding soo many modifications
to the way things have been done here?

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-04-22 14:36   ` Russell King - ARM Linux
  0 siblings, 0 replies; 41+ messages in thread
From: Russell King - ARM Linux @ 2013-04-22 14:36 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Apr 19, 2013 at 05:54:35PM +0200, Andr? Hentschel wrote:
> From: =?UTF-8?q?Andr=C3=A9=20Hentschel?= <nerv@dawncrow.de>
> 
> There are more and more applications coming to WinRT, Wine could support them,
> but mostly they expect to have the thread environment block (TEB) in TPIDRURW.
> This register must be preserved per thread instead of being cleared.
> 
> Signed-off-by: Andr? Hentschel <nerv@dawncrow.de>

This actually makes things less efficient all round, because you
now use the value immediately after loading, which means it will cause
pipeline stalls, certainly on older CPUs.

Could you please rework the patch to try avoiding soo many modifications
to the way things have been done here?

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
  2013-04-22 14:36   ` Russell King - ARM Linux
@ 2013-04-22 15:18     ` Will Deacon
  -1 siblings, 0 replies; 41+ messages in thread
From: Will Deacon @ 2013-04-22 15:18 UTC (permalink / raw)
  To: Russell King - ARM Linux
  Cc: André Hentschel, linux-arch, linux-arm-kernel, linux-kernel, gregkh

On Mon, Apr 22, 2013 at 03:36:16PM +0100, Russell King - ARM Linux wrote:
> On Fri, Apr 19, 2013 at 05:54:35PM +0200, André Hentschel wrote:
> > From: =?UTF-8?q?Andr=C3=A9=20Hentschel?= <nerv@dawncrow.de>
> > 
> > There are more and more applications coming to WinRT, Wine could support them,
> > but mostly they expect to have the thread environment block (TEB) in TPIDRURW.
> > This register must be preserved per thread instead of being cleared.
> > 
> > Signed-off-by: André Hentschel <nerv@dawncrow.de>
> 
> This actually makes things less efficient all round, because you
> now use the value immediately after loading, which means it will cause
> pipeline stalls, certainly on older CPUs.
> 
> Could you please rework the patch to try avoiding soo many modifications
> to the way things have been done here?

copy_thread also needs updating so that the *register* value for the parent
is copied to the child, since the parent may have written the register
after the last context-switch, meaning that tp_value is out-of-date.

Will

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-04-22 15:18     ` Will Deacon
  0 siblings, 0 replies; 41+ messages in thread
From: Will Deacon @ 2013-04-22 15:18 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Apr 22, 2013 at 03:36:16PM +0100, Russell King - ARM Linux wrote:
> On Fri, Apr 19, 2013 at 05:54:35PM +0200, Andr? Hentschel wrote:
> > From: =?UTF-8?q?Andr=C3=A9=20Hentschel?= <nerv@dawncrow.de>
> > 
> > There are more and more applications coming to WinRT, Wine could support them,
> > but mostly they expect to have the thread environment block (TEB) in TPIDRURW.
> > This register must be preserved per thread instead of being cleared.
> > 
> > Signed-off-by: Andr? Hentschel <nerv@dawncrow.de>
> 
> This actually makes things less efficient all round, because you
> now use the value immediately after loading, which means it will cause
> pipeline stalls, certainly on older CPUs.
> 
> Could you please rework the patch to try avoiding soo many modifications
> to the way things have been done here?

copy_thread also needs updating so that the *register* value for the parent
is copied to the child, since the parent may have written the register
after the last context-switch, meaning that tp_value is out-of-date.

Will

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
  2013-04-22 15:18     ` Will Deacon
  (?)
@ 2013-04-22 21:07       ` André Hentschel
  -1 siblings, 0 replies; 41+ messages in thread
From: André Hentschel @ 2013-04-22 21:07 UTC (permalink / raw)
  To: Will Deacon, Russell King - ARM Linux
  Cc: linux-arch, linux-arm-kernel, linux-kernel, gregkh

Am 22.04.2013 17:18, schrieb Will Deacon:
> On Mon, Apr 22, 2013 at 03:36:16PM +0100, Russell King - ARM Linux wrote:
>> On Fri, Apr 19, 2013 at 05:54:35PM +0200, André Hentschel wrote:
>>> From: =?UTF-8?q?Andr=C3=A9=20Hentschel?= <nerv@dawncrow.de>
>>>
>>> There are more and more applications coming to WinRT, Wine could support them,
>>> but mostly they expect to have the thread environment block (TEB) in TPIDRURW.
>>> This register must be preserved per thread instead of being cleared.
>>>
>>> Signed-off-by: André Hentschel <nerv@dawncrow.de>
>>
>> This actually makes things less efficient all round, because you
>> now use the value immediately after loading, which means it will cause
>> pipeline stalls, certainly on older CPUs.
>>
>> Could you please rework the patch to try avoiding soo many modifications
>> to the way things have been done here?
> 
> copy_thread also needs updating so that the *register* value for the parent
> is copied to the child, since the parent may have written the register
> after the last context-switch, meaning that tp_value is out-of-date.

Thank you both for reviewing.

I guess you mostly mean "ldr	r6, [r2, #TI_CPU_DOMAIN]".
I just thought about old CPUs and remembered again that we at Wine
need that patch only on v7 (and later). So is it ok to introduce a set_tls_v7
in tls.h and make use of CONFIG_CPU_V7 compile-time check in
the changed files and in the copy_thread function?
Do i need any further flag checks in copy_thread or can i use the
compile-time check to add unconditional code?


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-04-22 21:07       ` André Hentschel
  0 siblings, 0 replies; 41+ messages in thread
From: André Hentschel @ 2013-04-22 21:07 UTC (permalink / raw)
  To: Will Deacon, Russell King - ARM Linux
  Cc: linux-arch, linux-arm-kernel, linux-kernel, gregkh

Am 22.04.2013 17:18, schrieb Will Deacon:
> On Mon, Apr 22, 2013 at 03:36:16PM +0100, Russell King - ARM Linux wrote:
>> On Fri, Apr 19, 2013 at 05:54:35PM +0200, André Hentschel wrote:
>>> From: =?UTF-8?q?Andr=C3=A9=20Hentschel?= <nerv@dawncrow.de>
>>>
>>> There are more and more applications coming to WinRT, Wine could support them,
>>> but mostly they expect to have the thread environment block (TEB) in TPIDRURW.
>>> This register must be preserved per thread instead of being cleared.
>>>
>>> Signed-off-by: André Hentschel <nerv@dawncrow.de>
>>
>> This actually makes things less efficient all round, because you
>> now use the value immediately after loading, which means it will cause
>> pipeline stalls, certainly on older CPUs.
>>
>> Could you please rework the patch to try avoiding soo many modifications
>> to the way things have been done here?
> 
> copy_thread also needs updating so that the *register* value for the parent
> is copied to the child, since the parent may have written the register
> after the last context-switch, meaning that tp_value is out-of-date.

Thank you both for reviewing.

I guess you mostly mean "ldr	r6, [r2, #TI_CPU_DOMAIN]".
I just thought about old CPUs and remembered again that we at Wine
need that patch only on v7 (and later). So is it ok to introduce a set_tls_v7
in tls.h and make use of CONFIG_CPU_V7 compile-time check in
the changed files and in the copy_thread function?
Do i need any further flag checks in copy_thread or can i use the
compile-time check to add unconditional code?

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-04-22 21:07       ` André Hentschel
  0 siblings, 0 replies; 41+ messages in thread
From: André Hentschel @ 2013-04-22 21:07 UTC (permalink / raw)
  To: linux-arm-kernel

Am 22.04.2013 17:18, schrieb Will Deacon:
> On Mon, Apr 22, 2013 at 03:36:16PM +0100, Russell King - ARM Linux wrote:
>> On Fri, Apr 19, 2013 at 05:54:35PM +0200, Andr? Hentschel wrote:
>>> From: =?UTF-8?q?Andr=C3=A9=20Hentschel?= <nerv@dawncrow.de>
>>>
>>> There are more and more applications coming to WinRT, Wine could support them,
>>> but mostly they expect to have the thread environment block (TEB) in TPIDRURW.
>>> This register must be preserved per thread instead of being cleared.
>>>
>>> Signed-off-by: Andr? Hentschel <nerv@dawncrow.de>
>>
>> This actually makes things less efficient all round, because you
>> now use the value immediately after loading, which means it will cause
>> pipeline stalls, certainly on older CPUs.
>>
>> Could you please rework the patch to try avoiding soo many modifications
>> to the way things have been done here?
> 
> copy_thread also needs updating so that the *register* value for the parent
> is copied to the child, since the parent may have written the register
> after the last context-switch, meaning that tp_value is out-of-date.

Thank you both for reviewing.

I guess you mostly mean "ldr	r6, [r2, #TI_CPU_DOMAIN]".
I just thought about old CPUs and remembered again that we at Wine
need that patch only on v7 (and later). So is it ok to introduce a set_tls_v7
in tls.h and make use of CONFIG_CPU_V7 compile-time check in
the changed files and in the copy_thread function?
Do i need any further flag checks in copy_thread or can i use the
compile-time check to add unconditional code?

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
  2013-04-22 21:07       ` André Hentschel
  (?)
@ 2013-04-23  9:15         ` Will Deacon
  -1 siblings, 0 replies; 41+ messages in thread
From: Will Deacon @ 2013-04-23  9:15 UTC (permalink / raw)
  To: André Hentschel
  Cc: Russell King - ARM Linux, linux-arch, linux-arm-kernel,
	linux-kernel, gregkh

On Mon, Apr 22, 2013 at 10:07:35PM +0100, André Hentschel wrote:
> Am 22.04.2013 17:18, schrieb Will Deacon:
> > On Mon, Apr 22, 2013 at 03:36:16PM +0100, Russell King - ARM Linux wrote:
> >> On Fri, Apr 19, 2013 at 05:54:35PM +0200, André Hentschel wrote:
> >>> From: =?UTF-8?q?Andr=C3=A9=20Hentschel?= <nerv@dawncrow.de>
> >>>
> >>> There are more and more applications coming to WinRT, Wine could support them,
> >>> but mostly they expect to have the thread environment block (TEB) in TPIDRURW.
> >>> This register must be preserved per thread instead of being cleared.
> >>>
> >>> Signed-off-by: André Hentschel <nerv@dawncrow.de>
> >>
> >> This actually makes things less efficient all round, because you
> >> now use the value immediately after loading, which means it will cause
> >> pipeline stalls, certainly on older CPUs.
> >>
> >> Could you please rework the patch to try avoiding soo many modifications
> >> to the way things have been done here?
> > 
> > copy_thread also needs updating so that the *register* value for the parent
> > is copied to the child, since the parent may have written the register
> > after the last context-switch, meaning that tp_value is out-of-date.
> 
> Thank you both for reviewing.
> 
> I guess you mostly mean "ldr	r6, [r2, #TI_CPU_DOMAIN]".
> I just thought about old CPUs and remembered again that we at Wine
> need that patch only on v7 (and later). So is it ok to introduce a set_tls_v7
> in tls.h and make use of CONFIG_CPU_V7 compile-time check in
> the changed files and in the copy_thread function?

No, we should support this feature on any CPU with the TPIDRURW register,
otherwise it's going to get really confusing for userspace.

> Do i need any further flag checks in copy_thread or can i use the
> compile-time check to add unconditional code?

You could introduce `get' tls functions, which don't do anything for CPUs
without the relevant registers.

Will

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-04-23  9:15         ` Will Deacon
  0 siblings, 0 replies; 41+ messages in thread
From: Will Deacon @ 2013-04-23  9:15 UTC (permalink / raw)
  To: André Hentschel
  Cc: Russell King - ARM Linux, linux-arch, linux-arm-kernel,
	linux-kernel, gregkh

On Mon, Apr 22, 2013 at 10:07:35PM +0100, André Hentschel wrote:
> Am 22.04.2013 17:18, schrieb Will Deacon:
> > On Mon, Apr 22, 2013 at 03:36:16PM +0100, Russell King - ARM Linux wrote:
> >> On Fri, Apr 19, 2013 at 05:54:35PM +0200, André Hentschel wrote:
> >>> From: =?UTF-8?q?Andr=C3=A9=20Hentschel?= <nerv@dawncrow.de>
> >>>
> >>> There are more and more applications coming to WinRT, Wine could support them,
> >>> but mostly they expect to have the thread environment block (TEB) in TPIDRURW.
> >>> This register must be preserved per thread instead of being cleared.
> >>>
> >>> Signed-off-by: André Hentschel <nerv@dawncrow.de>
> >>
> >> This actually makes things less efficient all round, because you
> >> now use the value immediately after loading, which means it will cause
> >> pipeline stalls, certainly on older CPUs.
> >>
> >> Could you please rework the patch to try avoiding soo many modifications
> >> to the way things have been done here?
> > 
> > copy_thread also needs updating so that the *register* value for the parent
> > is copied to the child, since the parent may have written the register
> > after the last context-switch, meaning that tp_value is out-of-date.
> 
> Thank you both for reviewing.
> 
> I guess you mostly mean "ldr	r6, [r2, #TI_CPU_DOMAIN]".
> I just thought about old CPUs and remembered again that we at Wine
> need that patch only on v7 (and later). So is it ok to introduce a set_tls_v7
> in tls.h and make use of CONFIG_CPU_V7 compile-time check in
> the changed files and in the copy_thread function?

No, we should support this feature on any CPU with the TPIDRURW register,
otherwise it's going to get really confusing for userspace.

> Do i need any further flag checks in copy_thread or can i use the
> compile-time check to add unconditional code?

You could introduce `get' tls functions, which don't do anything for CPUs
without the relevant registers.

Will

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-04-23  9:15         ` Will Deacon
  0 siblings, 0 replies; 41+ messages in thread
From: Will Deacon @ 2013-04-23  9:15 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Apr 22, 2013 at 10:07:35PM +0100, Andr? Hentschel wrote:
> Am 22.04.2013 17:18, schrieb Will Deacon:
> > On Mon, Apr 22, 2013 at 03:36:16PM +0100, Russell King - ARM Linux wrote:
> >> On Fri, Apr 19, 2013 at 05:54:35PM +0200, Andr? Hentschel wrote:
> >>> From: =?UTF-8?q?Andr=C3=A9=20Hentschel?= <nerv@dawncrow.de>
> >>>
> >>> There are more and more applications coming to WinRT, Wine could support them,
> >>> but mostly they expect to have the thread environment block (TEB) in TPIDRURW.
> >>> This register must be preserved per thread instead of being cleared.
> >>>
> >>> Signed-off-by: Andr? Hentschel <nerv@dawncrow.de>
> >>
> >> This actually makes things less efficient all round, because you
> >> now use the value immediately after loading, which means it will cause
> >> pipeline stalls, certainly on older CPUs.
> >>
> >> Could you please rework the patch to try avoiding soo many modifications
> >> to the way things have been done here?
> > 
> > copy_thread also needs updating so that the *register* value for the parent
> > is copied to the child, since the parent may have written the register
> > after the last context-switch, meaning that tp_value is out-of-date.
> 
> Thank you both for reviewing.
> 
> I guess you mostly mean "ldr	r6, [r2, #TI_CPU_DOMAIN]".
> I just thought about old CPUs and remembered again that we at Wine
> need that patch only on v7 (and later). So is it ok to introduce a set_tls_v7
> in tls.h and make use of CONFIG_CPU_V7 compile-time check in
> the changed files and in the copy_thread function?

No, we should support this feature on any CPU with the TPIDRURW register,
otherwise it's going to get really confusing for userspace.

> Do i need any further flag checks in copy_thread or can i use the
> compile-time check to add unconditional code?

You could introduce `get' tls functions, which don't do anything for CPUs
without the relevant registers.

Will

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
  2013-04-23  9:15         ` Will Deacon
  (?)
@ 2013-04-23 22:42           ` André Hentschel
  -1 siblings, 0 replies; 41+ messages in thread
From: André Hentschel @ 2013-04-23 22:42 UTC (permalink / raw)
  To: Will Deacon
  Cc: Russell King - ARM Linux, linux-arch, linux-arm-kernel, linux-kernel

Am 23.04.2013 11:15, schrieb Will Deacon:
> On Mon, Apr 22, 2013 at 10:07:35PM +0100, André Hentschel wrote:
>> Am 22.04.2013 17:18, schrieb Will Deacon:
>>> On Mon, Apr 22, 2013 at 03:36:16PM +0100, Russell King - ARM Linux wrote:
>>>> On Fri, Apr 19, 2013 at 05:54:35PM +0200, André Hentschel wrote:
>>>>> From: =?UTF-8?q?Andr=C3=A9=20Hentschel?= <nerv@dawncrow.de>
>>>>>
>>>>> There are more and more applications coming to WinRT, Wine could support them,
>>>>> but mostly they expect to have the thread environment block (TEB) in TPIDRURW.
>>>>> This register must be preserved per thread instead of being cleared.
>>>>>
>>>>> Signed-off-by: André Hentschel <nerv@dawncrow.de>
>>>>
>>>> This actually makes things less efficient all round, because you
>>>> now use the value immediately after loading, which means it will cause
>>>> pipeline stalls, certainly on older CPUs.
>>>>
>>>> Could you please rework the patch to try avoiding soo many modifications
>>>> to the way things have been done here?
>>>
>>> copy_thread also needs updating so that the *register* value for the parent
>>> is copied to the child, since the parent may have written the register
>>> after the last context-switch, meaning that tp_value is out-of-date.
>>
>> Thank you both for reviewing.
>>
>> I guess you mostly mean "ldr	r6, [r2, #TI_CPU_DOMAIN]".
>> I just thought about old CPUs and remembered again that we at Wine
>> need that patch only on v7 (and later). So is it ok to introduce a set_tls_v7
>> in tls.h and make use of CONFIG_CPU_V7 compile-time check in
>> the changed files and in the copy_thread function?
> 
> No, we should support this feature on any CPU with the TPIDRURW register,
> otherwise it's going to get really confusing for userspace.

Sure, remembered that today.

>> Do i need any further flag checks in copy_thread or can i use the
>> compile-time check to add unconditional code?
> 
> You could introduce `get' tls functions, which don't do anything for CPUs
> without the relevant registers.

Before i have another round of testing and patch formatting/sending,
what about the untested patch below?


diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index cddda1f..bb5b48d 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -58,7 +58,7 @@ struct thread_info {
 	struct cpu_context_save	cpu_context;	/* cpu context */
 	__u32			syscall;	/* syscall number */
 	__u8			used_cp[16];	/* thread used copro */
-	unsigned long		tp_value;
+	unsigned long		tp_value[2];
 #ifdef CONFIG_CRUNCH
 	struct crunch_state	crunchstate;
 #endif
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 73409e6..1c10163 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -2,13 +2,30 @@
 #define __ASMARM_TLS_H
 
 #ifdef __ASSEMBLY__
+	.macro get_tls2_none, tp, tmp1
+	.endm
+
+	.macro get_tls2_v6k, tp, tmp1
+	mrc	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
+	str	\tmp1, [\tp, #4]
+	.endm
+
+	.macro get_tls2_v6, tp, tmp1
+	ldr	\tmp1, =elf_hwcap
+	ldr	\tmp1, [\tmp1, #0]
+	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
+	mrcne	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
+	strne	\tmp1, [\tp, #4]
+	.endm
+
+
 	.macro set_tls_none, tp, tmp1, tmp2
 	.endm
 
 	.macro set_tls_v6k, tp, tmp1, tmp2
-	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
-	mov	\tmp1, #0
-	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
+	ldrd	\tmp1, \tmp2, [\tp]
+	mcr	p15, 0, \tmp1, c13, c0, 3	@ set user r/o TLS register
+	mcr	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
 	.endm
 
 	.macro set_tls_v6, tp, tmp1, tmp2
@@ -16,33 +33,39 @@
 	ldr	\tmp1, [\tmp1, #0]
 	mov	\tmp2, #0xffff0fff
 	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
-	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
-	movne	\tmp1, #0
-	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
-	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	ldrdne	\tmp1, \tmp2, [\tp]
+	ldreq	\tmp1, [\tp]
+	mcrne	p15, 0, \tmp1, c13, c0, 3	@ yes, set user r/o TLS register
+	mcrne	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
+	streq	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
 
 	.macro set_tls_software, tp, tmp1, tmp2
-	mov	\tmp1, #0xffff0fff
-	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
+	ldr	\tmp1, [\tp]
+	mov	\tmp2, #0xffff0fff
+	str	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
 #endif
 
 #ifdef CONFIG_TLS_REG_EMUL
 #define tls_emu		1
 #define has_tls_reg		1
+#define get_tls2		get_tls2_none
 #define set_tls		set_tls_none
 #elif defined(CONFIG_CPU_V6)
 #define tls_emu		0
 #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
+#define get_tls2		get_tls2_v6
 #define set_tls		set_tls_v6
 #elif defined(CONFIG_CPU_32v6K)
 #define tls_emu		0
 #define has_tls_reg		1
+#define get_tls2		get_tls2_v6k
 #define set_tls		set_tls_v6k
 #else
 #define tls_emu		0
 #define has_tls_reg		0
+#define get_tls2		get_tls2_none
 #define set_tls		set_tls_software
 #endif
 
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0f82098..097686b 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -728,7 +728,7 @@ ENTRY(__switch_to)
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	add	ip, r1, #TI_CPU_SAVE
-	ldr	r3, [r2, #TI_TP_VALUE]
+	add	r3, r1, #TI_TP_VALUE
  ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
@@ -736,6 +736,8 @@ ENTRY(__switch_to)
 #ifdef CONFIG_CPU_USE_DOMAINS
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
+	get_tls2	r3, r4
+	add	r3, r2, #TI_TP_VALUE
 	set_tls	r3, r4, r5
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	ldr	r7, [r2, #TI_TASK]
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 047d3e4..6138eb1 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -395,7 +395,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	clear_ptrace_hw_breakpoint(p);
 
 	if (clone_flags & CLONE_SETTLS)
-		thread->tp_value = childregs->ARM_r3;
+		thread->tp_value[0] = childregs->ARM_r3;
+	thread->tp_value[1] = current_thread_info()->tp_value[1];
 
 	thread_notify(THREAD_NOTIFY_COPY, thread);
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeff..2bc1514 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif
 
 		case PTRACE_GET_THREAD_AREA:
-			ret = put_user(task_thread_info(child)->tp_value,
+			ret = put_user(task_thread_info(child)->tp_value[0],
 				       datap);
 			break;
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1c08911..f9d6259 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -588,7 +588,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		return regs->ARM_r0;
 
 	case NR(set_tls):
-		thread->tp_value = regs->ARM_r0;
+		thread->tp_value[0] = regs->ARM_r0;
 		if (tls_emu)
 			return 0;
 		if (has_tls_reg) {
@@ -706,7 +706,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
 	int reg = (instr >> 12) & 15;
 	if (reg == 15)
 		return 1;
-	regs->uregs[reg] = current_thread_info()->tp_value;
+	regs->uregs[reg] = current_thread_info()->tp_value[0];
 	regs->ARM_pc += 4;
 	return 0;
 }


^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-04-23 22:42           ` André Hentschel
  0 siblings, 0 replies; 41+ messages in thread
From: André Hentschel @ 2013-04-23 22:42 UTC (permalink / raw)
  To: Will Deacon
  Cc: Russell King - ARM Linux, linux-arch, linux-arm-kernel, linux-kernel

Am 23.04.2013 11:15, schrieb Will Deacon:
> On Mon, Apr 22, 2013 at 10:07:35PM +0100, André Hentschel wrote:
>> Am 22.04.2013 17:18, schrieb Will Deacon:
>>> On Mon, Apr 22, 2013 at 03:36:16PM +0100, Russell King - ARM Linux wrote:
>>>> On Fri, Apr 19, 2013 at 05:54:35PM +0200, André Hentschel wrote:
>>>>> From: =?UTF-8?q?Andr=C3=A9=20Hentschel?= <nerv@dawncrow.de>
>>>>>
>>>>> There are more and more applications coming to WinRT, Wine could support them,
>>>>> but mostly they expect to have the thread environment block (TEB) in TPIDRURW.
>>>>> This register must be preserved per thread instead of being cleared.
>>>>>
>>>>> Signed-off-by: André Hentschel <nerv@dawncrow.de>
>>>>
>>>> This actually makes things less efficient all round, because you
>>>> now use the value immediately after loading, which means it will cause
>>>> pipeline stalls, certainly on older CPUs.
>>>>
>>>> Could you please rework the patch to try avoiding soo many modifications
>>>> to the way things have been done here?
>>>
>>> copy_thread also needs updating so that the *register* value for the parent
>>> is copied to the child, since the parent may have written the register
>>> after the last context-switch, meaning that tp_value is out-of-date.
>>
>> Thank you both for reviewing.
>>
>> I guess you mostly mean "ldr	r6, [r2, #TI_CPU_DOMAIN]".
>> I just thought about old CPUs and remembered again that we at Wine
>> need that patch only on v7 (and later). So is it ok to introduce a set_tls_v7
>> in tls.h and make use of CONFIG_CPU_V7 compile-time check in
>> the changed files and in the copy_thread function?
> 
> No, we should support this feature on any CPU with the TPIDRURW register,
> otherwise it's going to get really confusing for userspace.

Sure, remembered that today.

>> Do i need any further flag checks in copy_thread or can i use the
>> compile-time check to add unconditional code?
> 
> You could introduce `get' tls functions, which don't do anything for CPUs
> without the relevant registers.

Before i have another round of testing and patch formatting/sending,
what about the untested patch below?


diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index cddda1f..bb5b48d 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -58,7 +58,7 @@ struct thread_info {
 	struct cpu_context_save	cpu_context;	/* cpu context */
 	__u32			syscall;	/* syscall number */
 	__u8			used_cp[16];	/* thread used copro */
-	unsigned long		tp_value;
+	unsigned long		tp_value[2];
 #ifdef CONFIG_CRUNCH
 	struct crunch_state	crunchstate;
 #endif
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 73409e6..1c10163 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -2,13 +2,30 @@
 #define __ASMARM_TLS_H
 
 #ifdef __ASSEMBLY__
+	.macro get_tls2_none, tp, tmp1
+	.endm
+
+	.macro get_tls2_v6k, tp, tmp1
+	mrc	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
+	str	\tmp1, [\tp, #4]
+	.endm
+
+	.macro get_tls2_v6, tp, tmp1
+	ldr	\tmp1, =elf_hwcap
+	ldr	\tmp1, [\tmp1, #0]
+	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
+	mrcne	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
+	strne	\tmp1, [\tp, #4]
+	.endm
+
+
 	.macro set_tls_none, tp, tmp1, tmp2
 	.endm
 
 	.macro set_tls_v6k, tp, tmp1, tmp2
-	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
-	mov	\tmp1, #0
-	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
+	ldrd	\tmp1, \tmp2, [\tp]
+	mcr	p15, 0, \tmp1, c13, c0, 3	@ set user r/o TLS register
+	mcr	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
 	.endm
 
 	.macro set_tls_v6, tp, tmp1, tmp2
@@ -16,33 +33,39 @@
 	ldr	\tmp1, [\tmp1, #0]
 	mov	\tmp2, #0xffff0fff
 	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
-	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
-	movne	\tmp1, #0
-	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
-	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	ldrdne	\tmp1, \tmp2, [\tp]
+	ldreq	\tmp1, [\tp]
+	mcrne	p15, 0, \tmp1, c13, c0, 3	@ yes, set user r/o TLS register
+	mcrne	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
+	streq	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
 
 	.macro set_tls_software, tp, tmp1, tmp2
-	mov	\tmp1, #0xffff0fff
-	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
+	ldr	\tmp1, [\tp]
+	mov	\tmp2, #0xffff0fff
+	str	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
 #endif
 
 #ifdef CONFIG_TLS_REG_EMUL
 #define tls_emu		1
 #define has_tls_reg		1
+#define get_tls2		get_tls2_none
 #define set_tls		set_tls_none
 #elif defined(CONFIG_CPU_V6)
 #define tls_emu		0
 #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
+#define get_tls2		get_tls2_v6
 #define set_tls		set_tls_v6
 #elif defined(CONFIG_CPU_32v6K)
 #define tls_emu		0
 #define has_tls_reg		1
+#define get_tls2		get_tls2_v6k
 #define set_tls		set_tls_v6k
 #else
 #define tls_emu		0
 #define has_tls_reg		0
+#define get_tls2		get_tls2_none
 #define set_tls		set_tls_software
 #endif
 
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0f82098..097686b 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -728,7 +728,7 @@ ENTRY(__switch_to)
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	add	ip, r1, #TI_CPU_SAVE
-	ldr	r3, [r2, #TI_TP_VALUE]
+	add	r3, r1, #TI_TP_VALUE
  ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
@@ -736,6 +736,8 @@ ENTRY(__switch_to)
 #ifdef CONFIG_CPU_USE_DOMAINS
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
+	get_tls2	r3, r4
+	add	r3, r2, #TI_TP_VALUE
 	set_tls	r3, r4, r5
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	ldr	r7, [r2, #TI_TASK]
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 047d3e4..6138eb1 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -395,7 +395,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	clear_ptrace_hw_breakpoint(p);
 
 	if (clone_flags & CLONE_SETTLS)
-		thread->tp_value = childregs->ARM_r3;
+		thread->tp_value[0] = childregs->ARM_r3;
+	thread->tp_value[1] = current_thread_info()->tp_value[1];
 
 	thread_notify(THREAD_NOTIFY_COPY, thread);
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeff..2bc1514 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif
 
 		case PTRACE_GET_THREAD_AREA:
-			ret = put_user(task_thread_info(child)->tp_value,
+			ret = put_user(task_thread_info(child)->tp_value[0],
 				       datap);
 			break;
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1c08911..f9d6259 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -588,7 +588,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		return regs->ARM_r0;
 
 	case NR(set_tls):
-		thread->tp_value = regs->ARM_r0;
+		thread->tp_value[0] = regs->ARM_r0;
 		if (tls_emu)
 			return 0;
 		if (has_tls_reg) {
@@ -706,7 +706,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
 	int reg = (instr >> 12) & 15;
 	if (reg == 15)
 		return 1;
-	regs->uregs[reg] = current_thread_info()->tp_value;
+	regs->uregs[reg] = current_thread_info()->tp_value[0];
 	regs->ARM_pc += 4;
 	return 0;
 }

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-04-23 22:42           ` André Hentschel
  0 siblings, 0 replies; 41+ messages in thread
From: André Hentschel @ 2013-04-23 22:42 UTC (permalink / raw)
  To: linux-arm-kernel

Am 23.04.2013 11:15, schrieb Will Deacon:
> On Mon, Apr 22, 2013 at 10:07:35PM +0100, Andr? Hentschel wrote:
>> Am 22.04.2013 17:18, schrieb Will Deacon:
>>> On Mon, Apr 22, 2013 at 03:36:16PM +0100, Russell King - ARM Linux wrote:
>>>> On Fri, Apr 19, 2013 at 05:54:35PM +0200, Andr? Hentschel wrote:
>>>>> From: =?UTF-8?q?Andr=C3=A9=20Hentschel?= <nerv@dawncrow.de>
>>>>>
>>>>> There are more and more applications coming to WinRT, Wine could support them,
>>>>> but mostly they expect to have the thread environment block (TEB) in TPIDRURW.
>>>>> This register must be preserved per thread instead of being cleared.
>>>>>
>>>>> Signed-off-by: Andr? Hentschel <nerv@dawncrow.de>
>>>>
>>>> This actually makes things less efficient all round, because you
>>>> now use the value immediately after loading, which means it will cause
>>>> pipeline stalls, certainly on older CPUs.
>>>>
>>>> Could you please rework the patch to try avoiding soo many modifications
>>>> to the way things have been done here?
>>>
>>> copy_thread also needs updating so that the *register* value for the parent
>>> is copied to the child, since the parent may have written the register
>>> after the last context-switch, meaning that tp_value is out-of-date.
>>
>> Thank you both for reviewing.
>>
>> I guess you mostly mean "ldr	r6, [r2, #TI_CPU_DOMAIN]".
>> I just thought about old CPUs and remembered again that we at Wine
>> need that patch only on v7 (and later). So is it ok to introduce a set_tls_v7
>> in tls.h and make use of CONFIG_CPU_V7 compile-time check in
>> the changed files and in the copy_thread function?
> 
> No, we should support this feature on any CPU with the TPIDRURW register,
> otherwise it's going to get really confusing for userspace.

Sure, remembered that today.

>> Do i need any further flag checks in copy_thread or can i use the
>> compile-time check to add unconditional code?
> 
> You could introduce `get' tls functions, which don't do anything for CPUs
> without the relevant registers.

Before i have another round of testing and patch formatting/sending,
what about the untested patch below?


diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index cddda1f..bb5b48d 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -58,7 +58,7 @@ struct thread_info {
 	struct cpu_context_save	cpu_context;	/* cpu context */
 	__u32			syscall;	/* syscall number */
 	__u8			used_cp[16];	/* thread used copro */
-	unsigned long		tp_value;
+	unsigned long		tp_value[2];
 #ifdef CONFIG_CRUNCH
 	struct crunch_state	crunchstate;
 #endif
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 73409e6..1c10163 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -2,13 +2,30 @@
 #define __ASMARM_TLS_H
 
 #ifdef __ASSEMBLY__
+	.macro get_tls2_none, tp, tmp1
+	.endm
+
+	.macro get_tls2_v6k, tp, tmp1
+	mrc	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
+	str	\tmp1, [\tp, #4]
+	.endm
+
+	.macro get_tls2_v6, tp, tmp1
+	ldr	\tmp1, =elf_hwcap
+	ldr	\tmp1, [\tmp1, #0]
+	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
+	mrcne	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
+	strne	\tmp1, [\tp, #4]
+	.endm
+
+
 	.macro set_tls_none, tp, tmp1, tmp2
 	.endm
 
 	.macro set_tls_v6k, tp, tmp1, tmp2
-	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
-	mov	\tmp1, #0
-	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
+	ldrd	\tmp1, \tmp2, [\tp]
+	mcr	p15, 0, \tmp1, c13, c0, 3	@ set user r/o TLS register
+	mcr	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
 	.endm
 
 	.macro set_tls_v6, tp, tmp1, tmp2
@@ -16,33 +33,39 @@
 	ldr	\tmp1, [\tmp1, #0]
 	mov	\tmp2, #0xffff0fff
 	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
-	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
-	movne	\tmp1, #0
-	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
-	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	ldrdne	\tmp1, \tmp2, [\tp]
+	ldreq	\tmp1, [\tp]
+	mcrne	p15, 0, \tmp1, c13, c0, 3	@ yes, set user r/o TLS register
+	mcrne	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
+	streq	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
 
 	.macro set_tls_software, tp, tmp1, tmp2
-	mov	\tmp1, #0xffff0fff
-	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
+	ldr	\tmp1, [\tp]
+	mov	\tmp2, #0xffff0fff
+	str	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
 #endif
 
 #ifdef CONFIG_TLS_REG_EMUL
 #define tls_emu		1
 #define has_tls_reg		1
+#define get_tls2		get_tls2_none
 #define set_tls		set_tls_none
 #elif defined(CONFIG_CPU_V6)
 #define tls_emu		0
 #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
+#define get_tls2		get_tls2_v6
 #define set_tls		set_tls_v6
 #elif defined(CONFIG_CPU_32v6K)
 #define tls_emu		0
 #define has_tls_reg		1
+#define get_tls2		get_tls2_v6k
 #define set_tls		set_tls_v6k
 #else
 #define tls_emu		0
 #define has_tls_reg		0
+#define get_tls2		get_tls2_none
 #define set_tls		set_tls_software
 #endif
 
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0f82098..097686b 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -728,7 +728,7 @@ ENTRY(__switch_to)
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	add	ip, r1, #TI_CPU_SAVE
-	ldr	r3, [r2, #TI_TP_VALUE]
+	add	r3, r1, #TI_TP_VALUE
  ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
@@ -736,6 +736,8 @@ ENTRY(__switch_to)
 #ifdef CONFIG_CPU_USE_DOMAINS
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
+	get_tls2	r3, r4
+	add	r3, r2, #TI_TP_VALUE
 	set_tls	r3, r4, r5
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	ldr	r7, [r2, #TI_TASK]
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 047d3e4..6138eb1 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -395,7 +395,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	clear_ptrace_hw_breakpoint(p);
 
 	if (clone_flags & CLONE_SETTLS)
-		thread->tp_value = childregs->ARM_r3;
+		thread->tp_value[0] = childregs->ARM_r3;
+	thread->tp_value[1] = current_thread_info()->tp_value[1];
 
 	thread_notify(THREAD_NOTIFY_COPY, thread);
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeff..2bc1514 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif
 
 		case PTRACE_GET_THREAD_AREA:
-			ret = put_user(task_thread_info(child)->tp_value,
+			ret = put_user(task_thread_info(child)->tp_value[0],
 				       datap);
 			break;
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1c08911..f9d6259 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -588,7 +588,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		return regs->ARM_r0;
 
 	case NR(set_tls):
-		thread->tp_value = regs->ARM_r0;
+		thread->tp_value[0] = regs->ARM_r0;
 		if (tls_emu)
 			return 0;
 		if (has_tls_reg) {
@@ -706,7 +706,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
 	int reg = (instr >> 12) & 15;
 	if (reg == 15)
 		return 1;
-	regs->uregs[reg] = current_thread_info()->tp_value;
+	regs->uregs[reg] = current_thread_info()->tp_value[0];
 	regs->ARM_pc += 4;
 	return 0;
 }

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
  2013-04-23 22:42           ` André Hentschel
  (?)
  (?)
@ 2013-04-24  9:42             ` Will Deacon
  -1 siblings, 0 replies; 41+ messages in thread
From: Will Deacon @ 2013-04-24  9:42 UTC (permalink / raw)
  To: André Hentschel
  Cc: Russell King - ARM Linux, linux-arch, linux-arm-kernel, linux-kernel

Hi Andrew,

On Tue, Apr 23, 2013 at 11:42:22PM +0100, André Hentschel wrote:
> Am 23.04.2013 11:15, schrieb Will Deacon:
> > You could introduce `get' tls functions, which don't do anything for CPUs
> > without the relevant registers.
> 
> Before i have another round of testing and patch formatting/sending,
> what about the untested patch below?

Ok. Comments inline.

> diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
> index cddda1f..bb5b48d 100644
> --- a/arch/arm/include/asm/thread_info.h
> +++ b/arch/arm/include/asm/thread_info.h
> @@ -58,7 +58,7 @@ struct thread_info {
>  	struct cpu_context_save	cpu_context;	/* cpu context */
>  	__u32			syscall;	/* syscall number */
>  	__u8			used_cp[16];	/* thread used copro */
> -	unsigned long		tp_value;
> +	unsigned long		tp_value[2];
>  #ifdef CONFIG_CRUNCH
>  	struct crunch_state	crunchstate;
>  #endif
> diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
> index 73409e6..1c10163 100644
> --- a/arch/arm/include/asm/tls.h
> +++ b/arch/arm/include/asm/tls.h
> @@ -2,13 +2,30 @@
>  #define __ASMARM_TLS_H
>  
>  #ifdef __ASSEMBLY__
> +	.macro get_tls2_none, tp, tmp1
> +	.endm

Cosmetic, but these are really horrible macro names.

> +	.macro get_tls2_v6k, tp, tmp1
> +	mrc	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
> +	str	\tmp1, [\tp, #4]
> +	.endm
> +
> +	.macro get_tls2_v6, tp, tmp1
> +	ldr	\tmp1, =elf_hwcap
> +	ldr	\tmp1, [\tmp1, #0]
> +	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
> +	mrcne	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
> +	strne	\tmp1, [\tp, #4]

You could factor out some of this hwcap checking now that it's used by both
set and get.

> +	.endm
> +
> +
>  	.macro set_tls_none, tp, tmp1, tmp2
>  	.endm
>  
>  	.macro set_tls_v6k, tp, tmp1, tmp2
> -	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
> -	mov	\tmp1, #0
> -	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
> +	ldrd	\tmp1, \tmp2, [\tp]
> +	mcr	p15, 0, \tmp1, c13, c0, 3	@ set user r/o TLS register
> +	mcr	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
>  	.endm
>  
>  	.macro set_tls_v6, tp, tmp1, tmp2
> @@ -16,33 +33,39 @@
>  	ldr	\tmp1, [\tmp1, #0]
>  	mov	\tmp2, #0xffff0fff
>  	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
> -	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
> -	movne	\tmp1, #0
> -	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
> -	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
> +	ldrdne	\tmp1, \tmp2, [\tp]

Does this work for big-endian CPUs?

> +	ldreq	\tmp1, [\tp]
> +	mcrne	p15, 0, \tmp1, c13, c0, 3	@ yes, set user r/o TLS register
> +	mcrne	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
> +	streq	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
>  	.endm
>  
>  	.macro set_tls_software, tp, tmp1, tmp2
> -	mov	\tmp1, #0xffff0fff
> -	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
> +	ldr	\tmp1, [\tp]
> +	mov	\tmp2, #0xffff0fff
> +	str	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
>  	.endm
>  #endif
>  
>  #ifdef CONFIG_TLS_REG_EMUL
>  #define tls_emu		1
>  #define has_tls_reg		1
> +#define get_tls2		get_tls2_none
>  #define set_tls		set_tls_none
>  #elif defined(CONFIG_CPU_V6)
>  #define tls_emu		0
>  #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
> +#define get_tls2		get_tls2_v6
>  #define set_tls		set_tls_v6
>  #elif defined(CONFIG_CPU_32v6K)
>  #define tls_emu		0
>  #define has_tls_reg		1
> +#define get_tls2		get_tls2_v6k
>  #define set_tls		set_tls_v6k
>  #else
>  #define tls_emu		0
>  #define has_tls_reg		0
> +#define get_tls2		get_tls2_none
>  #define set_tls		set_tls_software
>  #endif
>  
> diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
> index 0f82098..097686b 100644
> --- a/arch/arm/kernel/entry-armv.S
> +++ b/arch/arm/kernel/entry-armv.S
> @@ -728,7 +728,7 @@ ENTRY(__switch_to)
>   UNWIND(.fnstart	)
>   UNWIND(.cantunwind	)
>  	add	ip, r1, #TI_CPU_SAVE
> -	ldr	r3, [r2, #TI_TP_VALUE]
> +	add	r3, r1, #TI_TP_VALUE
>   ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
>   THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
>   THUMB(	str	sp, [ip], #4		   )
> @@ -736,6 +736,8 @@ ENTRY(__switch_to)
>  #ifdef CONFIG_CPU_USE_DOMAINS
>  	ldr	r6, [r2, #TI_CPU_DOMAIN]
>  #endif
> +	get_tls2	r3, r4
> +	add	r3, r2, #TI_TP_VALUE
>  	set_tls	r3, r4, r5
>  #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
>  	ldr	r7, [r2, #TI_TASK]
> diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
> index 047d3e4..6138eb1 100644
> --- a/arch/arm/kernel/process.c
> +++ b/arch/arm/kernel/process.c
> @@ -395,7 +395,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
>  	clear_ptrace_hw_breakpoint(p);
>  
>  	if (clone_flags & CLONE_SETTLS)
> -		thread->tp_value = childregs->ARM_r3;
> +		thread->tp_value[0] = childregs->ARM_r3;
> +	thread->tp_value[1] = current_thread_info()->tp_value[1];
>

This still isn't correct. Imagine the following sequence of events:

  - Task foo writes its TPIDRURW register from userspace and then issues a
    fork() system call. No context switch occurs between these two events.

  - We start creating the child task, bar, and end up in copy_thread with
    the `thread' pointing at foo's struct thread_info, which contains the
    *old* TPIDRURW value.

  - We copy out the stale value into bar, which is then scheduled with an
    old TPIDRURW value.

The solution is to reload the value sitting in the register in copy_thread,
rather than relying on the thread_info being up-to-date. That's why I
previously suggested not using asm macros for the getters.

>  	thread_notify(THREAD_NOTIFY_COPY, thread);
>  
> diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
> index 03deeff..2bc1514 100644
> --- a/arch/arm/kernel/ptrace.c
> +++ b/arch/arm/kernel/ptrace.c
> @@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
>  #endif
>  
>  		case PTRACE_GET_THREAD_AREA:
> -			ret = put_user(task_thread_info(child)->tp_value,
> +			ret = put_user(task_thread_info(child)->tp_value[0],
>  				       datap);
>  			break;

I'm guessing debuggers don't care about the new TLS value, or do we need a
new ptrace request?

Will

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-04-24  9:42             ` Will Deacon
  0 siblings, 0 replies; 41+ messages in thread
From: Will Deacon @ 2013-04-24  9:42 UTC (permalink / raw)
  To: André Hentschel
  Cc: linux-arch, Russell King - ARM Linux, linux-kernel, linux-arm-kernel

Hi Andrew,

On Tue, Apr 23, 2013 at 11:42:22PM +0100, André Hentschel wrote:
> Am 23.04.2013 11:15, schrieb Will Deacon:
> > You could introduce `get' tls functions, which don't do anything for CPUs
> > without the relevant registers.
> 
> Before i have another round of testing and patch formatting/sending,
> what about the untested patch below?

Ok. Comments inline.

> diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
> index cddda1f..bb5b48d 100644
> --- a/arch/arm/include/asm/thread_info.h
> +++ b/arch/arm/include/asm/thread_info.h
> @@ -58,7 +58,7 @@ struct thread_info {
>  	struct cpu_context_save	cpu_context;	/* cpu context */
>  	__u32			syscall;	/* syscall number */
>  	__u8			used_cp[16];	/* thread used copro */
> -	unsigned long		tp_value;
> +	unsigned long		tp_value[2];
>  #ifdef CONFIG_CRUNCH
>  	struct crunch_state	crunchstate;
>  #endif
> diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
> index 73409e6..1c10163 100644
> --- a/arch/arm/include/asm/tls.h
> +++ b/arch/arm/include/asm/tls.h
> @@ -2,13 +2,30 @@
>  #define __ASMARM_TLS_H
>  
>  #ifdef __ASSEMBLY__
> +	.macro get_tls2_none, tp, tmp1
> +	.endm

Cosmetic, but these are really horrible macro names.

> +	.macro get_tls2_v6k, tp, tmp1
> +	mrc	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
> +	str	\tmp1, [\tp, #4]
> +	.endm
> +
> +	.macro get_tls2_v6, tp, tmp1
> +	ldr	\tmp1, =elf_hwcap
> +	ldr	\tmp1, [\tmp1, #0]
> +	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
> +	mrcne	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
> +	strne	\tmp1, [\tp, #4]

You could factor out some of this hwcap checking now that it's used by both
set and get.

> +	.endm
> +
> +
>  	.macro set_tls_none, tp, tmp1, tmp2
>  	.endm
>  
>  	.macro set_tls_v6k, tp, tmp1, tmp2
> -	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
> -	mov	\tmp1, #0
> -	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
> +	ldrd	\tmp1, \tmp2, [\tp]
> +	mcr	p15, 0, \tmp1, c13, c0, 3	@ set user r/o TLS register
> +	mcr	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
>  	.endm
>  
>  	.macro set_tls_v6, tp, tmp1, tmp2
> @@ -16,33 +33,39 @@
>  	ldr	\tmp1, [\tmp1, #0]
>  	mov	\tmp2, #0xffff0fff
>  	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
> -	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
> -	movne	\tmp1, #0
> -	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
> -	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
> +	ldrdne	\tmp1, \tmp2, [\tp]

Does this work for big-endian CPUs?

> +	ldreq	\tmp1, [\tp]
> +	mcrne	p15, 0, \tmp1, c13, c0, 3	@ yes, set user r/o TLS register
> +	mcrne	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
> +	streq	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
>  	.endm
>  
>  	.macro set_tls_software, tp, tmp1, tmp2
> -	mov	\tmp1, #0xffff0fff
> -	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
> +	ldr	\tmp1, [\tp]
> +	mov	\tmp2, #0xffff0fff
> +	str	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
>  	.endm
>  #endif
>  
>  #ifdef CONFIG_TLS_REG_EMUL
>  #define tls_emu		1
>  #define has_tls_reg		1
> +#define get_tls2		get_tls2_none
>  #define set_tls		set_tls_none
>  #elif defined(CONFIG_CPU_V6)
>  #define tls_emu		0
>  #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
> +#define get_tls2		get_tls2_v6
>  #define set_tls		set_tls_v6
>  #elif defined(CONFIG_CPU_32v6K)
>  #define tls_emu		0
>  #define has_tls_reg		1
> +#define get_tls2		get_tls2_v6k
>  #define set_tls		set_tls_v6k
>  #else
>  #define tls_emu		0
>  #define has_tls_reg		0
> +#define get_tls2		get_tls2_none
>  #define set_tls		set_tls_software
>  #endif
>  
> diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
> index 0f82098..097686b 100644
> --- a/arch/arm/kernel/entry-armv.S
> +++ b/arch/arm/kernel/entry-armv.S
> @@ -728,7 +728,7 @@ ENTRY(__switch_to)
>   UNWIND(.fnstart	)
>   UNWIND(.cantunwind	)
>  	add	ip, r1, #TI_CPU_SAVE
> -	ldr	r3, [r2, #TI_TP_VALUE]
> +	add	r3, r1, #TI_TP_VALUE
>   ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
>   THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
>   THUMB(	str	sp, [ip], #4		   )
> @@ -736,6 +736,8 @@ ENTRY(__switch_to)
>  #ifdef CONFIG_CPU_USE_DOMAINS
>  	ldr	r6, [r2, #TI_CPU_DOMAIN]
>  #endif
> +	get_tls2	r3, r4
> +	add	r3, r2, #TI_TP_VALUE
>  	set_tls	r3, r4, r5
>  #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
>  	ldr	r7, [r2, #TI_TASK]
> diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
> index 047d3e4..6138eb1 100644
> --- a/arch/arm/kernel/process.c
> +++ b/arch/arm/kernel/process.c
> @@ -395,7 +395,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
>  	clear_ptrace_hw_breakpoint(p);
>  
>  	if (clone_flags & CLONE_SETTLS)
> -		thread->tp_value = childregs->ARM_r3;
> +		thread->tp_value[0] = childregs->ARM_r3;
> +	thread->tp_value[1] = current_thread_info()->tp_value[1];
>

This still isn't correct. Imagine the following sequence of events:

  - Task foo writes its TPIDRURW register from userspace and then issues a
    fork() system call. No context switch occurs between these two events.

  - We start creating the child task, bar, and end up in copy_thread with
    the `thread' pointing at foo's struct thread_info, which contains the
    *old* TPIDRURW value.

  - We copy out the stale value into bar, which is then scheduled with an
    old TPIDRURW value.

The solution is to reload the value sitting in the register in copy_thread,
rather than relying on the thread_info being up-to-date. That's why I
previously suggested not using asm macros for the getters.

>  	thread_notify(THREAD_NOTIFY_COPY, thread);
>  
> diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
> index 03deeff..2bc1514 100644
> --- a/arch/arm/kernel/ptrace.c
> +++ b/arch/arm/kernel/ptrace.c
> @@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
>  #endif
>  
>  		case PTRACE_GET_THREAD_AREA:
> -			ret = put_user(task_thread_info(child)->tp_value,
> +			ret = put_user(task_thread_info(child)->tp_value[0],
>  				       datap);
>  			break;

I'm guessing debuggers don't care about the new TLS value, or do we need a
new ptrace request?

Will

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-04-24  9:42             ` Will Deacon
  0 siblings, 0 replies; 41+ messages in thread
From: Will Deacon @ 2013-04-24  9:42 UTC (permalink / raw)
  To: André Hentschel
  Cc: Russell King - ARM Linux, linux-arch, linux-arm-kernel, linux-kernel

Hi Andrew,

On Tue, Apr 23, 2013 at 11:42:22PM +0100, André Hentschel wrote:
> Am 23.04.2013 11:15, schrieb Will Deacon:
> > You could introduce `get' tls functions, which don't do anything for CPUs
> > without the relevant registers.
> 
> Before i have another round of testing and patch formatting/sending,
> what about the untested patch below?

Ok. Comments inline.

> diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
> index cddda1f..bb5b48d 100644
> --- a/arch/arm/include/asm/thread_info.h
> +++ b/arch/arm/include/asm/thread_info.h
> @@ -58,7 +58,7 @@ struct thread_info {
>  	struct cpu_context_save	cpu_context;	/* cpu context */
>  	__u32			syscall;	/* syscall number */
>  	__u8			used_cp[16];	/* thread used copro */
> -	unsigned long		tp_value;
> +	unsigned long		tp_value[2];
>  #ifdef CONFIG_CRUNCH
>  	struct crunch_state	crunchstate;
>  #endif
> diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
> index 73409e6..1c10163 100644
> --- a/arch/arm/include/asm/tls.h
> +++ b/arch/arm/include/asm/tls.h
> @@ -2,13 +2,30 @@
>  #define __ASMARM_TLS_H
>  
>  #ifdef __ASSEMBLY__
> +	.macro get_tls2_none, tp, tmp1
> +	.endm

Cosmetic, but these are really horrible macro names.

> +	.macro get_tls2_v6k, tp, tmp1
> +	mrc	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
> +	str	\tmp1, [\tp, #4]
> +	.endm
> +
> +	.macro get_tls2_v6, tp, tmp1
> +	ldr	\tmp1, =elf_hwcap
> +	ldr	\tmp1, [\tmp1, #0]
> +	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
> +	mrcne	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
> +	strne	\tmp1, [\tp, #4]

You could factor out some of this hwcap checking now that it's used by both
set and get.

> +	.endm
> +
> +
>  	.macro set_tls_none, tp, tmp1, tmp2
>  	.endm
>  
>  	.macro set_tls_v6k, tp, tmp1, tmp2
> -	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
> -	mov	\tmp1, #0
> -	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
> +	ldrd	\tmp1, \tmp2, [\tp]
> +	mcr	p15, 0, \tmp1, c13, c0, 3	@ set user r/o TLS register
> +	mcr	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
>  	.endm
>  
>  	.macro set_tls_v6, tp, tmp1, tmp2
> @@ -16,33 +33,39 @@
>  	ldr	\tmp1, [\tmp1, #0]
>  	mov	\tmp2, #0xffff0fff
>  	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
> -	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
> -	movne	\tmp1, #0
> -	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
> -	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
> +	ldrdne	\tmp1, \tmp2, [\tp]

Does this work for big-endian CPUs?

> +	ldreq	\tmp1, [\tp]
> +	mcrne	p15, 0, \tmp1, c13, c0, 3	@ yes, set user r/o TLS register
> +	mcrne	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
> +	streq	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
>  	.endm
>  
>  	.macro set_tls_software, tp, tmp1, tmp2
> -	mov	\tmp1, #0xffff0fff
> -	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
> +	ldr	\tmp1, [\tp]
> +	mov	\tmp2, #0xffff0fff
> +	str	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
>  	.endm
>  #endif
>  
>  #ifdef CONFIG_TLS_REG_EMUL
>  #define tls_emu		1
>  #define has_tls_reg		1
> +#define get_tls2		get_tls2_none
>  #define set_tls		set_tls_none
>  #elif defined(CONFIG_CPU_V6)
>  #define tls_emu		0
>  #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
> +#define get_tls2		get_tls2_v6
>  #define set_tls		set_tls_v6
>  #elif defined(CONFIG_CPU_32v6K)
>  #define tls_emu		0
>  #define has_tls_reg		1
> +#define get_tls2		get_tls2_v6k
>  #define set_tls		set_tls_v6k
>  #else
>  #define tls_emu		0
>  #define has_tls_reg		0
> +#define get_tls2		get_tls2_none
>  #define set_tls		set_tls_software
>  #endif
>  
> diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
> index 0f82098..097686b 100644
> --- a/arch/arm/kernel/entry-armv.S
> +++ b/arch/arm/kernel/entry-armv.S
> @@ -728,7 +728,7 @@ ENTRY(__switch_to)
>   UNWIND(.fnstart	)
>   UNWIND(.cantunwind	)
>  	add	ip, r1, #TI_CPU_SAVE
> -	ldr	r3, [r2, #TI_TP_VALUE]
> +	add	r3, r1, #TI_TP_VALUE
>   ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
>   THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
>   THUMB(	str	sp, [ip], #4		   )
> @@ -736,6 +736,8 @@ ENTRY(__switch_to)
>  #ifdef CONFIG_CPU_USE_DOMAINS
>  	ldr	r6, [r2, #TI_CPU_DOMAIN]
>  #endif
> +	get_tls2	r3, r4
> +	add	r3, r2, #TI_TP_VALUE
>  	set_tls	r3, r4, r5
>  #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
>  	ldr	r7, [r2, #TI_TASK]
> diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
> index 047d3e4..6138eb1 100644
> --- a/arch/arm/kernel/process.c
> +++ b/arch/arm/kernel/process.c
> @@ -395,7 +395,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
>  	clear_ptrace_hw_breakpoint(p);
>  
>  	if (clone_flags & CLONE_SETTLS)
> -		thread->tp_value = childregs->ARM_r3;
> +		thread->tp_value[0] = childregs->ARM_r3;
> +	thread->tp_value[1] = current_thread_info()->tp_value[1];
>

This still isn't correct. Imagine the following sequence of events:

  - Task foo writes its TPIDRURW register from userspace and then issues a
    fork() system call. No context switch occurs between these two events.

  - We start creating the child task, bar, and end up in copy_thread with
    the `thread' pointing at foo's struct thread_info, which contains the
    *old* TPIDRURW value.

  - We copy out the stale value into bar, which is then scheduled with an
    old TPIDRURW value.

The solution is to reload the value sitting in the register in copy_thread,
rather than relying on the thread_info being up-to-date. That's why I
previously suggested not using asm macros for the getters.

>  	thread_notify(THREAD_NOTIFY_COPY, thread);
>  
> diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
> index 03deeff..2bc1514 100644
> --- a/arch/arm/kernel/ptrace.c
> +++ b/arch/arm/kernel/ptrace.c
> @@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
>  #endif
>  
>  		case PTRACE_GET_THREAD_AREA:
> -			ret = put_user(task_thread_info(child)->tp_value,
> +			ret = put_user(task_thread_info(child)->tp_value[0],
>  				       datap);
>  			break;

I'm guessing debuggers don't care about the new TLS value, or do we need a
new ptrace request?

Will

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-04-24  9:42             ` Will Deacon
  0 siblings, 0 replies; 41+ messages in thread
From: Will Deacon @ 2013-04-24  9:42 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Andrew,

On Tue, Apr 23, 2013 at 11:42:22PM +0100, Andr? Hentschel wrote:
> Am 23.04.2013 11:15, schrieb Will Deacon:
> > You could introduce `get' tls functions, which don't do anything for CPUs
> > without the relevant registers.
> 
> Before i have another round of testing and patch formatting/sending,
> what about the untested patch below?

Ok. Comments inline.

> diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
> index cddda1f..bb5b48d 100644
> --- a/arch/arm/include/asm/thread_info.h
> +++ b/arch/arm/include/asm/thread_info.h
> @@ -58,7 +58,7 @@ struct thread_info {
>  	struct cpu_context_save	cpu_context;	/* cpu context */
>  	__u32			syscall;	/* syscall number */
>  	__u8			used_cp[16];	/* thread used copro */
> -	unsigned long		tp_value;
> +	unsigned long		tp_value[2];
>  #ifdef CONFIG_CRUNCH
>  	struct crunch_state	crunchstate;
>  #endif
> diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
> index 73409e6..1c10163 100644
> --- a/arch/arm/include/asm/tls.h
> +++ b/arch/arm/include/asm/tls.h
> @@ -2,13 +2,30 @@
>  #define __ASMARM_TLS_H
>  
>  #ifdef __ASSEMBLY__
> +	.macro get_tls2_none, tp, tmp1
> +	.endm

Cosmetic, but these are really horrible macro names.

> +	.macro get_tls2_v6k, tp, tmp1
> +	mrc	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
> +	str	\tmp1, [\tp, #4]
> +	.endm
> +
> +	.macro get_tls2_v6, tp, tmp1
> +	ldr	\tmp1, =elf_hwcap
> +	ldr	\tmp1, [\tmp1, #0]
> +	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
> +	mrcne	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
> +	strne	\tmp1, [\tp, #4]

You could factor out some of this hwcap checking now that it's used by both
set and get.

> +	.endm
> +
> +
>  	.macro set_tls_none, tp, tmp1, tmp2
>  	.endm
>  
>  	.macro set_tls_v6k, tp, tmp1, tmp2
> -	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
> -	mov	\tmp1, #0
> -	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
> +	ldrd	\tmp1, \tmp2, [\tp]
> +	mcr	p15, 0, \tmp1, c13, c0, 3	@ set user r/o TLS register
> +	mcr	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
>  	.endm
>  
>  	.macro set_tls_v6, tp, tmp1, tmp2
> @@ -16,33 +33,39 @@
>  	ldr	\tmp1, [\tmp1, #0]
>  	mov	\tmp2, #0xffff0fff
>  	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
> -	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
> -	movne	\tmp1, #0
> -	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
> -	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
> +	ldrdne	\tmp1, \tmp2, [\tp]

Does this work for big-endian CPUs?

> +	ldreq	\tmp1, [\tp]
> +	mcrne	p15, 0, \tmp1, c13, c0, 3	@ yes, set user r/o TLS register
> +	mcrne	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
> +	streq	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
>  	.endm
>  
>  	.macro set_tls_software, tp, tmp1, tmp2
> -	mov	\tmp1, #0xffff0fff
> -	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
> +	ldr	\tmp1, [\tp]
> +	mov	\tmp2, #0xffff0fff
> +	str	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
>  	.endm
>  #endif
>  
>  #ifdef CONFIG_TLS_REG_EMUL
>  #define tls_emu		1
>  #define has_tls_reg		1
> +#define get_tls2		get_tls2_none
>  #define set_tls		set_tls_none
>  #elif defined(CONFIG_CPU_V6)
>  #define tls_emu		0
>  #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
> +#define get_tls2		get_tls2_v6
>  #define set_tls		set_tls_v6
>  #elif defined(CONFIG_CPU_32v6K)
>  #define tls_emu		0
>  #define has_tls_reg		1
> +#define get_tls2		get_tls2_v6k
>  #define set_tls		set_tls_v6k
>  #else
>  #define tls_emu		0
>  #define has_tls_reg		0
> +#define get_tls2		get_tls2_none
>  #define set_tls		set_tls_software
>  #endif
>  
> diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
> index 0f82098..097686b 100644
> --- a/arch/arm/kernel/entry-armv.S
> +++ b/arch/arm/kernel/entry-armv.S
> @@ -728,7 +728,7 @@ ENTRY(__switch_to)
>   UNWIND(.fnstart	)
>   UNWIND(.cantunwind	)
>  	add	ip, r1, #TI_CPU_SAVE
> -	ldr	r3, [r2, #TI_TP_VALUE]
> +	add	r3, r1, #TI_TP_VALUE
>   ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
>   THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
>   THUMB(	str	sp, [ip], #4		   )
> @@ -736,6 +736,8 @@ ENTRY(__switch_to)
>  #ifdef CONFIG_CPU_USE_DOMAINS
>  	ldr	r6, [r2, #TI_CPU_DOMAIN]
>  #endif
> +	get_tls2	r3, r4
> +	add	r3, r2, #TI_TP_VALUE
>  	set_tls	r3, r4, r5
>  #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
>  	ldr	r7, [r2, #TI_TASK]
> diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
> index 047d3e4..6138eb1 100644
> --- a/arch/arm/kernel/process.c
> +++ b/arch/arm/kernel/process.c
> @@ -395,7 +395,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
>  	clear_ptrace_hw_breakpoint(p);
>  
>  	if (clone_flags & CLONE_SETTLS)
> -		thread->tp_value = childregs->ARM_r3;
> +		thread->tp_value[0] = childregs->ARM_r3;
> +	thread->tp_value[1] = current_thread_info()->tp_value[1];
>

This still isn't correct. Imagine the following sequence of events:

  - Task foo writes its TPIDRURW register from userspace and then issues a
    fork() system call. No context switch occurs between these two events.

  - We start creating the child task, bar, and end up in copy_thread with
    the `thread' pointing at foo's struct thread_info, which contains the
    *old* TPIDRURW value.

  - We copy out the stale value into bar, which is then scheduled with an
    old TPIDRURW value.

The solution is to reload the value sitting in the register in copy_thread,
rather than relying on the thread_info being up-to-date. That's why I
previously suggested not using asm macros for the getters.

>  	thread_notify(THREAD_NOTIFY_COPY, thread);
>  
> diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
> index 03deeff..2bc1514 100644
> --- a/arch/arm/kernel/ptrace.c
> +++ b/arch/arm/kernel/ptrace.c
> @@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
>  #endif
>  
>  		case PTRACE_GET_THREAD_AREA:
> -			ret = put_user(task_thread_info(child)->tp_value,
> +			ret = put_user(task_thread_info(child)->tp_value[0],
>  				       datap);
>  			break;

I'm guessing debuggers don't care about the new TLS value, or do we need a
new ptrace request?

Will

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
  2013-04-24  9:42             ` Will Deacon
                               ` (2 preceding siblings ...)
  (?)
@ 2013-04-24 21:44             ` André Hentschel
  -1 siblings, 0 replies; 41+ messages in thread
From: André Hentschel @ 2013-04-24 21:44 UTC (permalink / raw)
  To: Will Deacon; +Cc: Russell King - ARM Linux, linux-arch

Am 24.04.2013 11:42, schrieb Will Deacon:
> Hi Andrew,
> 
> On Tue, Apr 23, 2013 at 11:42:22PM +0100, André Hentschel wrote:
>> Am 23.04.2013 11:15, schrieb Will Deacon:
>>> You could introduce `get' tls functions, which don't do anything for CPUs
>>> without the relevant registers.
>>
>> Before i have another round of testing and patch formatting/sending,
>> what about the untested patch below?
> 
> Ok. Comments inline.

Thanks for them. My first kernel patch was adding an include, this second patch is a rather hard one. So every comment is appreciated.

>>  #ifdef __ASSEMBLY__
>> +	.macro get_tls2_none, tp, tmp1
>> +	.endm
> 
> Cosmetic, but these are really horrible macro names.

I guess that's only about removing '2'?

>> +	.macro get_tls2_v6, tp, tmp1
>> +	ldr	\tmp1, =elf_hwcap
>> +	ldr	\tmp1, [\tmp1, #0]
>> +	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
>> +	mrcne	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
>> +	strne	\tmp1, [\tp, #4]
> 
> You could factor out some of this hwcap checking now that it's used by both
> set and get.

Sure, but the code would still run twice (unlike my PATCHv2)

>> +	ldrdne	\tmp1, \tmp2, [\tp]
> 
> Does this work for big-endian CPUs?

I'd say yes.

>> diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
>> index 047d3e4..6138eb1 100644
>> --- a/arch/arm/kernel/process.c
>> +++ b/arch/arm/kernel/process.c
>> @@ -395,7 +395,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
>>  	clear_ptrace_hw_breakpoint(p);
>>  
>>  	if (clone_flags & CLONE_SETTLS)
>> -		thread->tp_value = childregs->ARM_r3;
>> +		thread->tp_value[0] = childregs->ARM_r3;
>> +	thread->tp_value[1] = current_thread_info()->tp_value[1];
>>
> 
> This still isn't correct. Imagine the following sequence of events:
> 
>   - Task foo writes its TPIDRURW register from userspace and then issues a
>     fork() system call. No context switch occurs between these two events.
> 
>   - We start creating the child task, bar, and end up in copy_thread with
>     the `thread' pointing at foo's struct thread_info, which contains the
>     *old* TPIDRURW value.
> 
>   - We copy out the stale value into bar, which is then scheduled with an
>     old TPIDRURW value.
> 
> The solution is to reload the value sitting in the register in copy_thread,
> rather than relying on the thread_info being up-to-date. That's why I
> previously suggested not using asm macros for the getters.

Thanks for the informations. Further questions:
	Where would i place this functions? In tls.h as inline functions?
	How should that function look like? Containing compile-time checks and only using assembler for mrc instructions?
	Wouldn't that be much overhead in __switch_to?

>> diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
>> index 03deeff..2bc1514 100644
>> --- a/arch/arm/kernel/ptrace.c
>> +++ b/arch/arm/kernel/ptrace.c
>> @@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
>>  #endif
>>  
>>  		case PTRACE_GET_THREAD_AREA:
>> -			ret = put_user(task_thread_info(child)->tp_value,
>> +			ret = put_user(task_thread_info(child)->tp_value[0],
>>  				       datap);
>>  			break;
> 
> I'm guessing debuggers don't care about the new TLS value, or do we need a
> new ptrace request?

I'd say no. In case someone would jump in and say we need it, it'd be a seperate patch anyway.

Best regards.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
  2013-04-24  9:42             ` Will Deacon
  (?)
@ 2013-05-02 19:54               ` André Hentschel
  -1 siblings, 0 replies; 41+ messages in thread
From: André Hentschel @ 2013-05-02 19:54 UTC (permalink / raw)
  To: Will Deacon
  Cc: Russell King - ARM Linux, linux-arch, linux-arm-kernel, linux-kernel

Am 24.04.2013 11:42, schrieb Will Deacon:
> Hi Andrew,
> 
> On Tue, Apr 23, 2013 at 11:42:22PM +0100, André Hentschel wrote:
>> Am 23.04.2013 11:15, schrieb Will Deacon:
>>> You could introduce `get' tls functions, which don't do anything for CPUs
>>> without the relevant registers.
>>
>> Before i have another round of testing and patch formatting/sending,
>> what about the untested patch below?
> 
> Ok. Comments inline.

I answered to that seperatly.
Here is another try based on your comments:



diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index cddda1f..bb5b48d 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -58,7 +58,7 @@ struct thread_info {
 	struct cpu_context_save	cpu_context;	/* cpu context */
 	__u32			syscall;	/* syscall number */
 	__u8			used_cp[16];	/* thread used copro */
-	unsigned long		tp_value;
+	unsigned long		tp_value[2];
 #ifdef CONFIG_CRUNCH
 	struct crunch_state	crunchstate;
 #endif
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 73409e6..02f8674 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -2,48 +2,87 @@
 #define __ASMARM_TLS_H
 
 #ifdef __ASSEMBLY__
+	.macro check_hwcap_tls, tmp1
+	ldr	\tmp1, =elf_hwcap
+	ldr	\tmp1, [\tmp1, #0]
+	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
+	.endm
+
+
+	.macro get_tls_none, tp, tmp1
+	.endm
+
+	.macro get_tls_v6k, tp, tmp1
+	mrc	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
+	str	\tmp1, [\tp, #4]
+	.endm
+
+	.macro get_tls_v6, tp, tmp1
+	check_hwcap_tls \tmp1
+	mrcne	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
+	strne	\tmp1, [\tp, #4]
+	.endm
+
+
 	.macro set_tls_none, tp, tmp1, tmp2
 	.endm
 
 	.macro set_tls_v6k, tp, tmp1, tmp2
-	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
-	mov	\tmp1, #0
-	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
+	ldrd	\tmp1, \tmp2, [\tp]
+	mcr	p15, 0, \tmp1, c13, c0, 3	@ set user r/o TLS register
+	mcr	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
 	.endm
 
 	.macro set_tls_v6, tp, tmp1, tmp2
-	ldr	\tmp1, =elf_hwcap
-	ldr	\tmp1, [\tmp1, #0]
 	mov	\tmp2, #0xffff0fff
-	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
-	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
-	movne	\tmp1, #0
-	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
-	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	check_hwcap_tls \tmp1
+	ldrdne	\tmp1, \tmp2, [\tp]
+	ldreq	\tmp1, [\tp]
+	mcrne	p15, 0, \tmp1, c13, c0, 3	@ yes, set user r/o TLS register
+	mcrne	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
+	streq	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
 
 	.macro set_tls_software, tp, tmp1, tmp2
-	mov	\tmp1, #0xffff0fff
-	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
+	ldr	\tmp1, [\tp]
+	mov	\tmp2, #0xffff0fff
+	str	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
 #endif
 
 #ifdef CONFIG_TLS_REG_EMUL
 #define tls_emu		1
 #define has_tls_reg		1
+#define get_tls		get_tls_none
 #define set_tls		set_tls_none
 #elif defined(CONFIG_CPU_V6)
 #define tls_emu		0
 #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
+#define get_tls		get_tls_v6
 #define set_tls		set_tls_v6
 #elif defined(CONFIG_CPU_32v6K)
 #define tls_emu		0
 #define has_tls_reg		1
+#define get_tls		get_tls_v6k
 #define set_tls		set_tls_v6k
 #else
 #define tls_emu		0
 #define has_tls_reg		0
+#define get_tls		get_tls_none
 #define set_tls		set_tls_software
 #endif
 
+#ifndef __ASSEMBLY__
+static inline void get_tpidrurw(unsigned long *tpidrurw)
+{
+	unsigned long t;
+#ifdef CONFIG_TLS_REG_EMUL
+	return;
+#endif
+	if (!has_tls_reg) return;
+	__asm__("mcr p15, 0, %0, c13, c0, 2" : : "r" (t));
+	*tpidrurw = t;
+}
+#endif
+
 #endif	/* __ASMARM_TLS_H */
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0f82098..2c892b2 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -728,7 +728,7 @@ ENTRY(__switch_to)
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	add	ip, r1, #TI_CPU_SAVE
-	ldr	r3, [r2, #TI_TP_VALUE]
+	add	r3, r1, #TI_TP_VALUE
  ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
@@ -736,6 +736,8 @@ ENTRY(__switch_to)
 #ifdef CONFIG_CPU_USE_DOMAINS
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
+	get_tls	r3, r4
+	add	r3, r2, #TI_TP_VALUE
 	set_tls	r3, r4, r5
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	ldr	r7, [r2, #TI_TASK]
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 047d3e4..a13bbc8 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -36,6 +36,7 @@
 #include <asm/cacheflush.h>
 #include <asm/idmap.h>
 #include <asm/processor.h>
+#include <asm/tls.h>
 #include <asm/thread_notify.h>
 #include <asm/stacktrace.h>
 #include <asm/mach/time.h>
@@ -395,7 +396,10 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	clear_ptrace_hw_breakpoint(p);
 
 	if (clone_flags & CLONE_SETTLS)
-		thread->tp_value = childregs->ARM_r3;
+	{
+		thread->tp_value[0] = childregs->ARM_r3;
+		get_tpidrurw(&thread->tp_value[1]);
+	}
 
 	thread_notify(THREAD_NOTIFY_COPY, thread);
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeff..2bc1514 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif
 
 		case PTRACE_GET_THREAD_AREA:
-			ret = put_user(task_thread_info(child)->tp_value,
+			ret = put_user(task_thread_info(child)->tp_value[0],
 				       datap);
 			break;
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1c08911..f9d6259 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -588,7 +588,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		return regs->ARM_r0;
 
 	case NR(set_tls):
-		thread->tp_value = regs->ARM_r0;
+		thread->tp_value[0] = regs->ARM_r0;
 		if (tls_emu)
 			return 0;
 		if (has_tls_reg) {
@@ -706,7 +706,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
 	int reg = (instr >> 12) & 15;
 	if (reg == 15)
 		return 1;
-	regs->uregs[reg] = current_thread_info()->tp_value;
+	regs->uregs[reg] = current_thread_info()->tp_value[0];
 	regs->ARM_pc += 4;
 	return 0;
 }


^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-05-02 19:54               ` André Hentschel
  0 siblings, 0 replies; 41+ messages in thread
From: André Hentschel @ 2013-05-02 19:54 UTC (permalink / raw)
  To: Will Deacon
  Cc: Russell King - ARM Linux, linux-arch, linux-arm-kernel, linux-kernel

Am 24.04.2013 11:42, schrieb Will Deacon:
> Hi Andrew,
> 
> On Tue, Apr 23, 2013 at 11:42:22PM +0100, André Hentschel wrote:
>> Am 23.04.2013 11:15, schrieb Will Deacon:
>>> You could introduce `get' tls functions, which don't do anything for CPUs
>>> without the relevant registers.
>>
>> Before i have another round of testing and patch formatting/sending,
>> what about the untested patch below?
> 
> Ok. Comments inline.

I answered to that seperatly.
Here is another try based on your comments:



diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index cddda1f..bb5b48d 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -58,7 +58,7 @@ struct thread_info {
 	struct cpu_context_save	cpu_context;	/* cpu context */
 	__u32			syscall;	/* syscall number */
 	__u8			used_cp[16];	/* thread used copro */
-	unsigned long		tp_value;
+	unsigned long		tp_value[2];
 #ifdef CONFIG_CRUNCH
 	struct crunch_state	crunchstate;
 #endif
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 73409e6..02f8674 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -2,48 +2,87 @@
 #define __ASMARM_TLS_H
 
 #ifdef __ASSEMBLY__
+	.macro check_hwcap_tls, tmp1
+	ldr	\tmp1, =elf_hwcap
+	ldr	\tmp1, [\tmp1, #0]
+	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
+	.endm
+
+
+	.macro get_tls_none, tp, tmp1
+	.endm
+
+	.macro get_tls_v6k, tp, tmp1
+	mrc	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
+	str	\tmp1, [\tp, #4]
+	.endm
+
+	.macro get_tls_v6, tp, tmp1
+	check_hwcap_tls \tmp1
+	mrcne	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
+	strne	\tmp1, [\tp, #4]
+	.endm
+
+
 	.macro set_tls_none, tp, tmp1, tmp2
 	.endm
 
 	.macro set_tls_v6k, tp, tmp1, tmp2
-	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
-	mov	\tmp1, #0
-	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
+	ldrd	\tmp1, \tmp2, [\tp]
+	mcr	p15, 0, \tmp1, c13, c0, 3	@ set user r/o TLS register
+	mcr	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
 	.endm
 
 	.macro set_tls_v6, tp, tmp1, tmp2
-	ldr	\tmp1, =elf_hwcap
-	ldr	\tmp1, [\tmp1, #0]
 	mov	\tmp2, #0xffff0fff
-	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
-	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
-	movne	\tmp1, #0
-	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
-	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	check_hwcap_tls \tmp1
+	ldrdne	\tmp1, \tmp2, [\tp]
+	ldreq	\tmp1, [\tp]
+	mcrne	p15, 0, \tmp1, c13, c0, 3	@ yes, set user r/o TLS register
+	mcrne	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
+	streq	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
 
 	.macro set_tls_software, tp, tmp1, tmp2
-	mov	\tmp1, #0xffff0fff
-	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
+	ldr	\tmp1, [\tp]
+	mov	\tmp2, #0xffff0fff
+	str	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
 #endif
 
 #ifdef CONFIG_TLS_REG_EMUL
 #define tls_emu		1
 #define has_tls_reg		1
+#define get_tls		get_tls_none
 #define set_tls		set_tls_none
 #elif defined(CONFIG_CPU_V6)
 #define tls_emu		0
 #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
+#define get_tls		get_tls_v6
 #define set_tls		set_tls_v6
 #elif defined(CONFIG_CPU_32v6K)
 #define tls_emu		0
 #define has_tls_reg		1
+#define get_tls		get_tls_v6k
 #define set_tls		set_tls_v6k
 #else
 #define tls_emu		0
 #define has_tls_reg		0
+#define get_tls		get_tls_none
 #define set_tls		set_tls_software
 #endif
 
+#ifndef __ASSEMBLY__
+static inline void get_tpidrurw(unsigned long *tpidrurw)
+{
+	unsigned long t;
+#ifdef CONFIG_TLS_REG_EMUL
+	return;
+#endif
+	if (!has_tls_reg) return;
+	__asm__("mcr p15, 0, %0, c13, c0, 2" : : "r" (t));
+	*tpidrurw = t;
+}
+#endif
+
 #endif	/* __ASMARM_TLS_H */
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0f82098..2c892b2 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -728,7 +728,7 @@ ENTRY(__switch_to)
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	add	ip, r1, #TI_CPU_SAVE
-	ldr	r3, [r2, #TI_TP_VALUE]
+	add	r3, r1, #TI_TP_VALUE
  ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
@@ -736,6 +736,8 @@ ENTRY(__switch_to)
 #ifdef CONFIG_CPU_USE_DOMAINS
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
+	get_tls	r3, r4
+	add	r3, r2, #TI_TP_VALUE
 	set_tls	r3, r4, r5
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	ldr	r7, [r2, #TI_TASK]
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 047d3e4..a13bbc8 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -36,6 +36,7 @@
 #include <asm/cacheflush.h>
 #include <asm/idmap.h>
 #include <asm/processor.h>
+#include <asm/tls.h>
 #include <asm/thread_notify.h>
 #include <asm/stacktrace.h>
 #include <asm/mach/time.h>
@@ -395,7 +396,10 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	clear_ptrace_hw_breakpoint(p);
 
 	if (clone_flags & CLONE_SETTLS)
-		thread->tp_value = childregs->ARM_r3;
+	{
+		thread->tp_value[0] = childregs->ARM_r3;
+		get_tpidrurw(&thread->tp_value[1]);
+	}
 
 	thread_notify(THREAD_NOTIFY_COPY, thread);
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeff..2bc1514 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif
 
 		case PTRACE_GET_THREAD_AREA:
-			ret = put_user(task_thread_info(child)->tp_value,
+			ret = put_user(task_thread_info(child)->tp_value[0],
 				       datap);
 			break;
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1c08911..f9d6259 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -588,7 +588,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		return regs->ARM_r0;
 
 	case NR(set_tls):
-		thread->tp_value = regs->ARM_r0;
+		thread->tp_value[0] = regs->ARM_r0;
 		if (tls_emu)
 			return 0;
 		if (has_tls_reg) {
@@ -706,7 +706,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
 	int reg = (instr >> 12) & 15;
 	if (reg == 15)
 		return 1;
-	regs->uregs[reg] = current_thread_info()->tp_value;
+	regs->uregs[reg] = current_thread_info()->tp_value[0];
 	regs->ARM_pc += 4;
 	return 0;
 }

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-05-02 19:54               ` André Hentschel
  0 siblings, 0 replies; 41+ messages in thread
From: André Hentschel @ 2013-05-02 19:54 UTC (permalink / raw)
  To: linux-arm-kernel

Am 24.04.2013 11:42, schrieb Will Deacon:
> Hi Andrew,
> 
> On Tue, Apr 23, 2013 at 11:42:22PM +0100, Andr? Hentschel wrote:
>> Am 23.04.2013 11:15, schrieb Will Deacon:
>>> You could introduce `get' tls functions, which don't do anything for CPUs
>>> without the relevant registers.
>>
>> Before i have another round of testing and patch formatting/sending,
>> what about the untested patch below?
> 
> Ok. Comments inline.

I answered to that seperatly.
Here is another try based on your comments:



diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index cddda1f..bb5b48d 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -58,7 +58,7 @@ struct thread_info {
 	struct cpu_context_save	cpu_context;	/* cpu context */
 	__u32			syscall;	/* syscall number */
 	__u8			used_cp[16];	/* thread used copro */
-	unsigned long		tp_value;
+	unsigned long		tp_value[2];
 #ifdef CONFIG_CRUNCH
 	struct crunch_state	crunchstate;
 #endif
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 73409e6..02f8674 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -2,48 +2,87 @@
 #define __ASMARM_TLS_H
 
 #ifdef __ASSEMBLY__
+	.macro check_hwcap_tls, tmp1
+	ldr	\tmp1, =elf_hwcap
+	ldr	\tmp1, [\tmp1, #0]
+	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
+	.endm
+
+
+	.macro get_tls_none, tp, tmp1
+	.endm
+
+	.macro get_tls_v6k, tp, tmp1
+	mrc	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
+	str	\tmp1, [\tp, #4]
+	.endm
+
+	.macro get_tls_v6, tp, tmp1
+	check_hwcap_tls \tmp1
+	mrcne	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
+	strne	\tmp1, [\tp, #4]
+	.endm
+
+
 	.macro set_tls_none, tp, tmp1, tmp2
 	.endm
 
 	.macro set_tls_v6k, tp, tmp1, tmp2
-	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
-	mov	\tmp1, #0
-	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
+	ldrd	\tmp1, \tmp2, [\tp]
+	mcr	p15, 0, \tmp1, c13, c0, 3	@ set user r/o TLS register
+	mcr	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
 	.endm
 
 	.macro set_tls_v6, tp, tmp1, tmp2
-	ldr	\tmp1, =elf_hwcap
-	ldr	\tmp1, [\tmp1, #0]
 	mov	\tmp2, #0xffff0fff
-	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
-	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
-	movne	\tmp1, #0
-	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
-	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	check_hwcap_tls \tmp1
+	ldrdne	\tmp1, \tmp2, [\tp]
+	ldreq	\tmp1, [\tp]
+	mcrne	p15, 0, \tmp1, c13, c0, 3	@ yes, set user r/o TLS register
+	mcrne	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
+	streq	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
 
 	.macro set_tls_software, tp, tmp1, tmp2
-	mov	\tmp1, #0xffff0fff
-	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
+	ldr	\tmp1, [\tp]
+	mov	\tmp2, #0xffff0fff
+	str	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
 #endif
 
 #ifdef CONFIG_TLS_REG_EMUL
 #define tls_emu		1
 #define has_tls_reg		1
+#define get_tls		get_tls_none
 #define set_tls		set_tls_none
 #elif defined(CONFIG_CPU_V6)
 #define tls_emu		0
 #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
+#define get_tls		get_tls_v6
 #define set_tls		set_tls_v6
 #elif defined(CONFIG_CPU_32v6K)
 #define tls_emu		0
 #define has_tls_reg		1
+#define get_tls		get_tls_v6k
 #define set_tls		set_tls_v6k
 #else
 #define tls_emu		0
 #define has_tls_reg		0
+#define get_tls		get_tls_none
 #define set_tls		set_tls_software
 #endif
 
+#ifndef __ASSEMBLY__
+static inline void get_tpidrurw(unsigned long *tpidrurw)
+{
+	unsigned long t;
+#ifdef CONFIG_TLS_REG_EMUL
+	return;
+#endif
+	if (!has_tls_reg) return;
+	__asm__("mcr p15, 0, %0, c13, c0, 2" : : "r" (t));
+	*tpidrurw = t;
+}
+#endif
+
 #endif	/* __ASMARM_TLS_H */
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0f82098..2c892b2 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -728,7 +728,7 @@ ENTRY(__switch_to)
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	add	ip, r1, #TI_CPU_SAVE
-	ldr	r3, [r2, #TI_TP_VALUE]
+	add	r3, r1, #TI_TP_VALUE
  ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
@@ -736,6 +736,8 @@ ENTRY(__switch_to)
 #ifdef CONFIG_CPU_USE_DOMAINS
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
+	get_tls	r3, r4
+	add	r3, r2, #TI_TP_VALUE
 	set_tls	r3, r4, r5
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	ldr	r7, [r2, #TI_TASK]
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 047d3e4..a13bbc8 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -36,6 +36,7 @@
 #include <asm/cacheflush.h>
 #include <asm/idmap.h>
 #include <asm/processor.h>
+#include <asm/tls.h>
 #include <asm/thread_notify.h>
 #include <asm/stacktrace.h>
 #include <asm/mach/time.h>
@@ -395,7 +396,10 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	clear_ptrace_hw_breakpoint(p);
 
 	if (clone_flags & CLONE_SETTLS)
-		thread->tp_value = childregs->ARM_r3;
+	{
+		thread->tp_value[0] = childregs->ARM_r3;
+		get_tpidrurw(&thread->tp_value[1]);
+	}
 
 	thread_notify(THREAD_NOTIFY_COPY, thread);
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeff..2bc1514 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif
 
 		case PTRACE_GET_THREAD_AREA:
-			ret = put_user(task_thread_info(child)->tp_value,
+			ret = put_user(task_thread_info(child)->tp_value[0],
 				       datap);
 			break;
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1c08911..f9d6259 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -588,7 +588,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		return regs->ARM_r0;
 
 	case NR(set_tls):
-		thread->tp_value = regs->ARM_r0;
+		thread->tp_value[0] = regs->ARM_r0;
 		if (tls_emu)
 			return 0;
 		if (has_tls_reg) {
@@ -706,7 +706,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
 	int reg = (instr >> 12) & 15;
 	if (reg == 15)
 		return 1;
-	regs->uregs[reg] = current_thread_info()->tp_value;
+	regs->uregs[reg] = current_thread_info()->tp_value[0];
 	regs->ARM_pc += 4;
 	return 0;
 }

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
  2013-05-02 19:54               ` André Hentschel
  (?)
@ 2013-05-03  9:21                 ` Jonathan Austin
  -1 siblings, 0 replies; 41+ messages in thread
From: Jonathan Austin @ 2013-05-03  9:21 UTC (permalink / raw)
  To: André Hentschel
  Cc: Will Deacon, linux-arch, Russell King - ARM Linux, linux-kernel,
	linux-arm-kernel

Hi André,

Will pointed me at this thread and I had a look at fixing
this up yesterday by extending his original patch...

There are a few things about this that aren't quite right. Most
of the comments are cosmetic but there's an issue in copy_thread
that will result in incorrect behaviour, I think.

I've commented below inline and there's a patch at the bottom- can
you let me know if it works for you?

On 02/05/13 20:54, André Hentschel wrote:
> Am 24.04.2013 11:42, schrieb Will Deacon:
>> Hi Andrew,
>>
>> On Tue, Apr 23, 2013 at 11:42:22PM +0100, André Hentschel wrote:
>>> Am 23.04.2013 11:15, schrieb Will Deacon:
>>>> You could introduce `get' tls functions, which don't do anything for CPUs
>>>> without the relevant registers.
>>>
>>> Before i have another round of testing and patch formatting/sending,
>>> what about the untested patch below?
>>
>> Ok. Comments inline.
> 
> I answered to that seperatly.
> Here is another try based on your comments:
> 
> 
> 
> diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
> index cddda1f..bb5b48d 100644
> --- a/arch/arm/include/asm/thread_info.h
> +++ b/arch/arm/include/asm/thread_info.h
> @@ -58,7 +58,7 @@ struct thread_info {
>   	struct cpu_context_save	cpu_context;	/* cpu context */
>   	__u32			syscall;	/* syscall number */
>   	__u8			used_cp[16];	/* thread used copro */
> -	unsigned long		tp_value;
> +	unsigned long		tp_value[2];
>   #ifdef CONFIG_CRUNCH
>   	struct crunch_state	crunchstate;
>   #endif
> diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
> index 73409e6..02f8674 100644
> --- a/arch/arm/include/asm/tls.h
> +++ b/arch/arm/include/asm/tls.h
> @@ -2,48 +2,87 @@
>   #define __ASMARM_TLS_H
>   
>   #ifdef __ASSEMBLY__
> +	.macro check_hwcap_tls, tmp1
> +	ldr	\tmp1, =elf_hwcap
> +	ldr	\tmp1, [\tmp1, #0]
> +	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
> +	.endm
> +
> +
> +	.macro get_tls_none, tp, tmp1
> +	.endm
> +
> +	.macro get_tls_v6k, tp, tmp1
> +	mrc	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
> +	str	\tmp1, [\tp, #4]
> +	.endm
> +
> +	.macro get_tls_v6, tp, tmp1
> +	check_hwcap_tls \tmp1

I tend to steer clear of asm that requires certain behaviour wrt
the flags, though in this case I think it's probably a sufficiently
self contained case to be okay...

> +	mrcne	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
> +	strne	\tmp1, [\tp, #4]
> +	.endm
> +
> +
>   	.macro set_tls_none, tp, tmp1, tmp2
>   	.endm
>   
>   	.macro set_tls_v6k, tp, tmp1, tmp2
> -	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
> -	mov	\tmp1, #0
> -	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
> +	ldrd	\tmp1, \tmp2, [\tp]
> +	mcr	p15, 0, \tmp1, c13, c0, 3	@ set user r/o TLS register
> +	mcr	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
>   	.endm
>   
>   	.macro set_tls_v6, tp, tmp1, tmp2
> -	ldr	\tmp1, =elf_hwcap
> -	ldr	\tmp1, [\tmp1, #0]
>   	mov	\tmp2, #0xffff0fff
> -	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
> -	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
> -	movne	\tmp1, #0
> -	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
> -	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
> +	check_hwcap_tls \tmp1
> +	ldrdne	\tmp1, \tmp2, [\tp]
> +	ldreq	\tmp1, [\tp]
> +	mcrne	p15, 0, \tmp1, c13, c0, 3	@ yes, set user r/o TLS register
> +	mcrne	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
> +	streq	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
>   	.endm
>   
>   	.macro set_tls_software, tp, tmp1, tmp2
> -	mov	\tmp1, #0xffff0fff
> -	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
> +	ldr	\tmp1, [\tp]
> +	mov	\tmp2, #0xffff0fff
> +	str	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
>   	.endm
>   #endif
>   
>   #ifdef CONFIG_TLS_REG_EMUL
>   #define tls_emu		1
>   #define has_tls_reg		1
> +#define get_tls		get_tls_none

This is different from the set_tls, which deals with both
tpidrurw and tpidruro, so the naming is a little inconsistent here...

>   #define set_tls		set_tls_none
>   #elif defined(CONFIG_CPU_V6)
>   #define tls_emu		0
>   #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
> +#define get_tls		get_tls_v6
>   #define set_tls		set_tls_v6
>   #elif defined(CONFIG_CPU_32v6K)
>   #define tls_emu		0
>   #define has_tls_reg		1
> +#define get_tls		get_tls_v6k
>   #define set_tls		set_tls_v6k
>   #else
>   #define tls_emu		0
>   #define has_tls_reg		0
> +#define get_tls		get_tls_none
>   #define set_tls		set_tls_software
>   #endif
>   
> +#ifndef __ASSEMBLY__
> +static inline void get_tpidrurw(unsigned long *tpidrurw)

A bit weird to have tpidrurw here but tls elsewhere - I
settled on tlsuser... (see below)

> +{
> +	unsigned long t;
> +#ifdef CONFIG_TLS_REG_EMUL
> +	return;
> +#endif
> +	if (!has_tls_reg) return;
> +	__asm__("mcr p15, 0, %0, c13, c0, 2" : : "r" (t));
> +	*tpidrurw = t;
> +}
> +#endif
> +
>   #endif	/* __ASMARM_TLS_H */
> diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
> index 0f82098..2c892b2 100644
> --- a/arch/arm/kernel/entry-armv.S
> +++ b/arch/arm/kernel/entry-armv.S
> @@ -728,7 +728,7 @@ ENTRY(__switch_to)
>    UNWIND(.fnstart	)
>    UNWIND(.cantunwind	)
>   	add	ip, r1, #TI_CPU_SAVE
> -	ldr	r3, [r2, #TI_TP_VALUE]
> +	add	r3, r1, #TI_TP_VALUE
>    ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
>    THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
>    THUMB(	str	sp, [ip], #4		   )
> @@ -736,6 +736,8 @@ ENTRY(__switch_to)
>   #ifdef CONFIG_CPU_USE_DOMAINS
>   	ldr	r6, [r2, #TI_CPU_DOMAIN]
>   #endif
> +	get_tls	r3, r4
> +	add	r3, r2, #TI_TP_VALUE
>   	set_tls	r3, r4, r5
>   #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
>   	ldr	r7, [r2, #TI_TASK]
> diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
> index 047d3e4..a13bbc8 100644
> --- a/arch/arm/kernel/process.c
> +++ b/arch/arm/kernel/process.c
> @@ -36,6 +36,7 @@
>   #include <asm/cacheflush.h>
>   #include <asm/idmap.h>
>   #include <asm/processor.h>
> +#include <asm/tls.h>
>   #include <asm/thread_notify.h>
>   #include <asm/stacktrace.h>
>   #include <asm/mach/time.h>
> @@ -395,7 +396,10 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
>   	clear_ptrace_hw_breakpoint(p);
>   
>   	if (clone_flags & CLONE_SETTLS)
> -		thread->tp_value = childregs->ARM_r3;
> +	{
> +		thread->tp_value[0] = childregs->ARM_r3;
> +		get_tpidrurw(&thread->tp_value[1]);
> +	}

This isn't quite right - the re-reading of tpidrurw should
be independent of CLONE_SETTLS. We should update tpidrurw
from userspace in all cases.

The following is what I've been looking at/testing...
It works on V7 and I've build tested it for 1136 - I would've
sent it yesterday but was getting things set up for testing on
1136 (v6 not k)

----8<-------
>From bd3fe4055777b404a1635f366483637fd0cfa35a Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Fri, 8 Feb 2013 15:55:12 +0000
Subject: [PATCH] ARM: tls: context switch user writeable TLS register
 TPIDRURW

Since commit 6a1c53124aa1 ("ARM: 7403/1: tls: remove covert channel via
TPIDRURW") we have zeroed the user writeable TLS register to prevent it
from being used as a covert channel between two tasks.

However, it turns out that the wine guys would rather we actually
switched the register so that WinRT applications can use it to store a
pointer to their `thread environment block (TEB)'.

This patch implements TPIDRURW context-switching for cpus implementing
the register. Unlike the TPIDRURO, which is already switched, the TPIDRURW
can be updated from userspace so needs careful treatment in the case that we
modify TPIDRURW and call fork(). If no context switch occurs between these
two events then the thread_info struct that we use to construct the child
will have the old, stale, TPIDRURW value. To avoid this we must always read
TPIDRURW in copy_thread.

This patch is extended from an earlier version by Will Deacon in order to:
- Save TPIDRURW at context switch
- Deal with the race condition described above, including adding C-getters

Reported-by: Andre Hentschel <nerv@dawncrow.de>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
---
 arch/arm/include/asm/thread_info.h |    2 +-
 arch/arm/include/asm/tls.h         |   51 +++++++++++++++++++++++++++++-------
 arch/arm/kernel/Makefile           |    2 +-
 arch/arm/kernel/entry-armv.S       |    5 +++-
 arch/arm/kernel/process.c          |    4 ++-
 arch/arm/kernel/ptrace.c           |    2 +-
 arch/arm/kernel/tls.c              |   50 +++++++++++++++++++++++++++++++++++
 arch/arm/kernel/traps.c            |    4 +--
 8 files changed, 104 insertions(+), 16 deletions(-)
 create mode 100644 arch/arm/kernel/tls.c

diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index cddda1f..d90be6d 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -58,7 +58,7 @@ struct thread_info {
 	struct cpu_context_save	cpu_context;	/* cpu context */
 	__u32			syscall;	/* syscall number */
 	__u8			used_cp[16];	/* thread used copro */
-	unsigned long		tp_value;
+	unsigned long		tp_value[2];	/* TLS registers */
 #ifdef CONFIG_CRUNCH
 	struct crunch_state	crunchstate;
 #endif
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 73409e6..e292794 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -5,10 +5,19 @@
 	.macro set_tls_none, tp, tmp1, tmp2
 	.endm
 
+	.macro save_tlsuser_none, tp, tmp1, tmp2
+	.endm
+
 	.macro set_tls_v6k, tp, tmp1, tmp2
-	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
-	mov	\tmp1, #0
-	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
+	ldrd	\tmp1, \tmp2, [\tp]
+	mcr	p15, 0, \tmp1, c13, c0, 3	@ set user r/o TLS register
+	mcr	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
+	.endm
+
+	.macro save_tlsuser_v6k, tp, tmp1, tmp2
+	@ TPIDRURW can be updated from userspace, so we have to re-read it
+	mrc	p15, 0, \tmp2, c13, c0, 2	@ load user r/w TLS register
+	str	\tmp2, [\tp, #4]
 	.endm
 
 	.macro set_tls_v6, tp, tmp1, tmp2
@@ -16,15 +25,26 @@
 	ldr	\tmp1, [\tmp1, #0]
 	mov	\tmp2, #0xffff0fff
 	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
-	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
-	movne	\tmp1, #0
-	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
-	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	ldrned	\tmp1, \tmp2, [\tp]
+	ldreq	\tmp1, [\tp]
+	mcrne	p15, 0, \tmp1, c13, c0, 3	@ yes, set user r/o TLS register
+	mcrne	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
+	streq	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	.endm
+
+	.macro save_tlsuser_v6, tp, tmp1, tmp2
+	@ TPIDRURW can be updated from userspace, so we have to re-read it
+	ldr	\tmp1, =elf_hwcap
+	ldr	\tmp1, [\tmp1, #0]
+	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
+	mrcne	p15, 0, \tmp2, c13, c0, 2	@ read user r/w TLS register
+	strne	\tmp2, [\tp, #4]		@ save in to thread_info
 	.endm
 
 	.macro set_tls_software, tp, tmp1, tmp2
-	mov	\tmp1, #0xffff0fff
-	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
+	ldr	\tmp1, [\tp]
+	mov	\tmp2, #0xffff0fff
+	str	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
 #endif
 
@@ -32,18 +52,31 @@
 #define tls_emu		1
 #define has_tls_reg		1
 #define set_tls		set_tls_none
+#define save_tlsuser	save_tlsuser_none
+#define get_tlsuser	get_tlsuser_none
 #elif defined(CONFIG_CPU_V6)
 #define tls_emu		0
 #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
 #define set_tls		set_tls_v6
+#define save_tlsuser	save_tlsuser_v6
+#define get_tlsuser	get_tlsuser_v6
 #elif defined(CONFIG_CPU_32v6K)
 #define tls_emu		0
 #define has_tls_reg		1
 #define set_tls		set_tls_v6k
+#define save_tlsuser	save_tlsuser_v6k
+#define get_tlsuser	get_tlsuser_v6k
 #else
 #define tls_emu		0
 #define has_tls_reg		0
 #define set_tls		set_tls_software
+#define save_tlsuser	save_tlsuser_none
+#define get_tlsuser	get_tlsuser_none
 #endif
 
+#ifndef __ASSEMBLY__
+extern unsigned long get_tlsuser_none(void);
+extern unsigned long get_tlsuser_v6(void);
+extern unsigned long get_tlsuser_v6k(void);
+#endif
 #endif	/* __ASMARM_TLS_H */
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 5f3338e..4e1114c 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -17,7 +17,7 @@ CFLAGS_REMOVE_return_address.o = -pg
 
 obj-y		:= elf.o entry-armv.o entry-common.o irq.o opcodes.o \
 		   process.o ptrace.o return_address.o sched_clock.o \
-		   setup.o signal.o stacktrace.o sys_arm.o time.o traps.o
+		   setup.o signal.o stacktrace.o sys_arm.o time.o tls.o traps.o
 
 obj-$(CONFIG_ATAGS)		+= atags_parse.o
 obj-$(CONFIG_ATAGS_PROC)	+= atags_proc.o
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0f82098..66adb0c 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -728,7 +728,7 @@ ENTRY(__switch_to)
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	add	ip, r1, #TI_CPU_SAVE
-	ldr	r3, [r2, #TI_TP_VALUE]
+	add	r3, r1, #TI_TP_VALUE
  ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
@@ -736,6 +736,9 @@ ENTRY(__switch_to)
 #ifdef CONFIG_CPU_USE_DOMAINS
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
+	/* Save the user-writeable tls register */
+	save_tlsuser	r3, r4, r5
+	add	r3, r2, #TI_TP_VALUE
 	set_tls	r3, r4, r5
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	ldr	r7, [r2, #TI_TASK]
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 047d3e4..24dbc72 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -39,6 +39,7 @@
 #include <asm/thread_notify.h>
 #include <asm/stacktrace.h>
 #include <asm/mach/time.h>
+#include <asm/tls.h>
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #include <linux/stackprotector.h>
@@ -395,7 +396,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	clear_ptrace_hw_breakpoint(p);
 
 	if (clone_flags & CLONE_SETTLS)
-		thread->tp_value = childregs->ARM_r3;
+		thread->tp_value[0] = childregs->ARM_r3;
+	thread->tp_value[1] = get_tlsuser();
 
 	thread_notify(THREAD_NOTIFY_COPY, thread);
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeff..2bc1514 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif
 
 		case PTRACE_GET_THREAD_AREA:
-			ret = put_user(task_thread_info(child)->tp_value,
+			ret = put_user(task_thread_info(child)->tp_value[0],
 				       datap);
 			break;
 
diff --git a/arch/arm/kernel/tls.c b/arch/arm/kernel/tls.c
new file mode 100644
index 0000000..1627f5b
--- /dev/null
+++ b/arch/arm/kernel/tls.c
@@ -0,0 +1,50 @@
+/*
+ * arch/arm/kernel/tls.c
+ *
+ * Copyright (C) 2013 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <asm/tls.h>
+
+/*
+ * Access to the TPIDRURW register, with full certainty that it exists.
+ */
+unsigned long get_tlsuser_v6k(void)
+{
+	unsigned long v;
+	asm("mrc        p15, 0, %0, c13, c0, 2\n" : "=r" (v));
+	return v;
+}
+
+/*
+ * Access to the TPIDRURW register if it exists.
+ */
+unsigned long get_tlsuser_v6(void)
+{
+	unsigned long v = 0;
+	if (elf_hwcap & HWCAP_TLS)
+		asm("mrc        p15, 0, %0, c13, c0, 2\n" : "=r" (v));
+	return v;
+}
+
+/*
+ * Dummy access for the case that TLS is emulated in software
+ */
+unsigned long get_tlsuser_none(void)
+{
+	return 0;
+}
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1c08911..f9d6259 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -588,7 +588,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		return regs->ARM_r0;
 
 	case NR(set_tls):
-		thread->tp_value = regs->ARM_r0;
+		thread->tp_value[0] = regs->ARM_r0;
 		if (tls_emu)
 			return 0;
 		if (has_tls_reg) {
@@ -706,7 +706,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
 	int reg = (instr >> 12) & 15;
 	if (reg == 15)
 		return 1;
-	regs->uregs[reg] = current_thread_info()->tp_value;
+	regs->uregs[reg] = current_thread_info()->tp_value[0];
 	regs->ARM_pc += 4;
 	return 0;
 }
-- 
1.7.9.5



^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-05-03  9:21                 ` Jonathan Austin
  0 siblings, 0 replies; 41+ messages in thread
From: Jonathan Austin @ 2013-05-03  9:21 UTC (permalink / raw)
  To: André Hentschel
  Cc: Will Deacon, linux-arch, Russell King - ARM Linux, linux-kernel,
	linux-arm-kernel

Hi André,

Will pointed me at this thread and I had a look at fixing
this up yesterday by extending his original patch...

There are a few things about this that aren't quite right. Most
of the comments are cosmetic but there's an issue in copy_thread
that will result in incorrect behaviour, I think.

I've commented below inline and there's a patch at the bottom- can
you let me know if it works for you?

On 02/05/13 20:54, André Hentschel wrote:
> Am 24.04.2013 11:42, schrieb Will Deacon:
>> Hi Andrew,
>>
>> On Tue, Apr 23, 2013 at 11:42:22PM +0100, André Hentschel wrote:
>>> Am 23.04.2013 11:15, schrieb Will Deacon:
>>>> You could introduce `get' tls functions, which don't do anything for CPUs
>>>> without the relevant registers.
>>>
>>> Before i have another round of testing and patch formatting/sending,
>>> what about the untested patch below?
>>
>> Ok. Comments inline.
> 
> I answered to that seperatly.
> Here is another try based on your comments:
> 
> 
> 
> diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
> index cddda1f..bb5b48d 100644
> --- a/arch/arm/include/asm/thread_info.h
> +++ b/arch/arm/include/asm/thread_info.h
> @@ -58,7 +58,7 @@ struct thread_info {
>   	struct cpu_context_save	cpu_context;	/* cpu context */
>   	__u32			syscall;	/* syscall number */
>   	__u8			used_cp[16];	/* thread used copro */
> -	unsigned long		tp_value;
> +	unsigned long		tp_value[2];
>   #ifdef CONFIG_CRUNCH
>   	struct crunch_state	crunchstate;
>   #endif
> diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
> index 73409e6..02f8674 100644
> --- a/arch/arm/include/asm/tls.h
> +++ b/arch/arm/include/asm/tls.h
> @@ -2,48 +2,87 @@
>   #define __ASMARM_TLS_H
>   
>   #ifdef __ASSEMBLY__
> +	.macro check_hwcap_tls, tmp1
> +	ldr	\tmp1, =elf_hwcap
> +	ldr	\tmp1, [\tmp1, #0]
> +	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
> +	.endm
> +
> +
> +	.macro get_tls_none, tp, tmp1
> +	.endm
> +
> +	.macro get_tls_v6k, tp, tmp1
> +	mrc	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
> +	str	\tmp1, [\tp, #4]
> +	.endm
> +
> +	.macro get_tls_v6, tp, tmp1
> +	check_hwcap_tls \tmp1

I tend to steer clear of asm that requires certain behaviour wrt
the flags, though in this case I think it's probably a sufficiently
self contained case to be okay...

> +	mrcne	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
> +	strne	\tmp1, [\tp, #4]
> +	.endm
> +
> +
>   	.macro set_tls_none, tp, tmp1, tmp2
>   	.endm
>   
>   	.macro set_tls_v6k, tp, tmp1, tmp2
> -	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
> -	mov	\tmp1, #0
> -	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
> +	ldrd	\tmp1, \tmp2, [\tp]
> +	mcr	p15, 0, \tmp1, c13, c0, 3	@ set user r/o TLS register
> +	mcr	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
>   	.endm
>   
>   	.macro set_tls_v6, tp, tmp1, tmp2
> -	ldr	\tmp1, =elf_hwcap
> -	ldr	\tmp1, [\tmp1, #0]
>   	mov	\tmp2, #0xffff0fff
> -	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
> -	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
> -	movne	\tmp1, #0
> -	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
> -	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
> +	check_hwcap_tls \tmp1
> +	ldrdne	\tmp1, \tmp2, [\tp]
> +	ldreq	\tmp1, [\tp]
> +	mcrne	p15, 0, \tmp1, c13, c0, 3	@ yes, set user r/o TLS register
> +	mcrne	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
> +	streq	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
>   	.endm
>   
>   	.macro set_tls_software, tp, tmp1, tmp2
> -	mov	\tmp1, #0xffff0fff
> -	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
> +	ldr	\tmp1, [\tp]
> +	mov	\tmp2, #0xffff0fff
> +	str	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
>   	.endm
>   #endif
>   
>   #ifdef CONFIG_TLS_REG_EMUL
>   #define tls_emu		1
>   #define has_tls_reg		1
> +#define get_tls		get_tls_none

This is different from the set_tls, which deals with both
tpidrurw and tpidruro, so the naming is a little inconsistent here...

>   #define set_tls		set_tls_none
>   #elif defined(CONFIG_CPU_V6)
>   #define tls_emu		0
>   #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
> +#define get_tls		get_tls_v6
>   #define set_tls		set_tls_v6
>   #elif defined(CONFIG_CPU_32v6K)
>   #define tls_emu		0
>   #define has_tls_reg		1
> +#define get_tls		get_tls_v6k
>   #define set_tls		set_tls_v6k
>   #else
>   #define tls_emu		0
>   #define has_tls_reg		0
> +#define get_tls		get_tls_none
>   #define set_tls		set_tls_software
>   #endif
>   
> +#ifndef __ASSEMBLY__
> +static inline void get_tpidrurw(unsigned long *tpidrurw)

A bit weird to have tpidrurw here but tls elsewhere - I
settled on tlsuser... (see below)

> +{
> +	unsigned long t;
> +#ifdef CONFIG_TLS_REG_EMUL
> +	return;
> +#endif
> +	if (!has_tls_reg) return;
> +	__asm__("mcr p15, 0, %0, c13, c0, 2" : : "r" (t));
> +	*tpidrurw = t;
> +}
> +#endif
> +
>   #endif	/* __ASMARM_TLS_H */
> diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
> index 0f82098..2c892b2 100644
> --- a/arch/arm/kernel/entry-armv.S
> +++ b/arch/arm/kernel/entry-armv.S
> @@ -728,7 +728,7 @@ ENTRY(__switch_to)
>    UNWIND(.fnstart	)
>    UNWIND(.cantunwind	)
>   	add	ip, r1, #TI_CPU_SAVE
> -	ldr	r3, [r2, #TI_TP_VALUE]
> +	add	r3, r1, #TI_TP_VALUE
>    ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
>    THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
>    THUMB(	str	sp, [ip], #4		   )
> @@ -736,6 +736,8 @@ ENTRY(__switch_to)
>   #ifdef CONFIG_CPU_USE_DOMAINS
>   	ldr	r6, [r2, #TI_CPU_DOMAIN]
>   #endif
> +	get_tls	r3, r4
> +	add	r3, r2, #TI_TP_VALUE
>   	set_tls	r3, r4, r5
>   #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
>   	ldr	r7, [r2, #TI_TASK]
> diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
> index 047d3e4..a13bbc8 100644
> --- a/arch/arm/kernel/process.c
> +++ b/arch/arm/kernel/process.c
> @@ -36,6 +36,7 @@
>   #include <asm/cacheflush.h>
>   #include <asm/idmap.h>
>   #include <asm/processor.h>
> +#include <asm/tls.h>
>   #include <asm/thread_notify.h>
>   #include <asm/stacktrace.h>
>   #include <asm/mach/time.h>
> @@ -395,7 +396,10 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
>   	clear_ptrace_hw_breakpoint(p);
>   
>   	if (clone_flags & CLONE_SETTLS)
> -		thread->tp_value = childregs->ARM_r3;
> +	{
> +		thread->tp_value[0] = childregs->ARM_r3;
> +		get_tpidrurw(&thread->tp_value[1]);
> +	}

This isn't quite right - the re-reading of tpidrurw should
be independent of CLONE_SETTLS. We should update tpidrurw
from userspace in all cases.

The following is what I've been looking at/testing...
It works on V7 and I've build tested it for 1136 - I would've
sent it yesterday but was getting things set up for testing on
1136 (v6 not k)

----8<-------
From bd3fe4055777b404a1635f366483637fd0cfa35a Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Fri, 8 Feb 2013 15:55:12 +0000
Subject: [PATCH] ARM: tls: context switch user writeable TLS register
 TPIDRURW

Since commit 6a1c53124aa1 ("ARM: 7403/1: tls: remove covert channel via
TPIDRURW") we have zeroed the user writeable TLS register to prevent it
from being used as a covert channel between two tasks.

However, it turns out that the wine guys would rather we actually
switched the register so that WinRT applications can use it to store a
pointer to their `thread environment block (TEB)'.

This patch implements TPIDRURW context-switching for cpus implementing
the register. Unlike the TPIDRURO, which is already switched, the TPIDRURW
can be updated from userspace so needs careful treatment in the case that we
modify TPIDRURW and call fork(). If no context switch occurs between these
two events then the thread_info struct that we use to construct the child
will have the old, stale, TPIDRURW value. To avoid this we must always read
TPIDRURW in copy_thread.

This patch is extended from an earlier version by Will Deacon in order to:
- Save TPIDRURW at context switch
- Deal with the race condition described above, including adding C-getters

Reported-by: Andre Hentschel <nerv@dawncrow.de>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
---
 arch/arm/include/asm/thread_info.h |    2 +-
 arch/arm/include/asm/tls.h         |   51 +++++++++++++++++++++++++++++-------
 arch/arm/kernel/Makefile           |    2 +-
 arch/arm/kernel/entry-armv.S       |    5 +++-
 arch/arm/kernel/process.c          |    4 ++-
 arch/arm/kernel/ptrace.c           |    2 +-
 arch/arm/kernel/tls.c              |   50 +++++++++++++++++++++++++++++++++++
 arch/arm/kernel/traps.c            |    4 +--
 8 files changed, 104 insertions(+), 16 deletions(-)
 create mode 100644 arch/arm/kernel/tls.c

diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index cddda1f..d90be6d 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -58,7 +58,7 @@ struct thread_info {
 	struct cpu_context_save	cpu_context;	/* cpu context */
 	__u32			syscall;	/* syscall number */
 	__u8			used_cp[16];	/* thread used copro */
-	unsigned long		tp_value;
+	unsigned long		tp_value[2];	/* TLS registers */
 #ifdef CONFIG_CRUNCH
 	struct crunch_state	crunchstate;
 #endif
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 73409e6..e292794 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -5,10 +5,19 @@
 	.macro set_tls_none, tp, tmp1, tmp2
 	.endm
 
+	.macro save_tlsuser_none, tp, tmp1, tmp2
+	.endm
+
 	.macro set_tls_v6k, tp, tmp1, tmp2
-	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
-	mov	\tmp1, #0
-	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
+	ldrd	\tmp1, \tmp2, [\tp]
+	mcr	p15, 0, \tmp1, c13, c0, 3	@ set user r/o TLS register
+	mcr	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
+	.endm
+
+	.macro save_tlsuser_v6k, tp, tmp1, tmp2
+	@ TPIDRURW can be updated from userspace, so we have to re-read it
+	mrc	p15, 0, \tmp2, c13, c0, 2	@ load user r/w TLS register
+	str	\tmp2, [\tp, #4]
 	.endm
 
 	.macro set_tls_v6, tp, tmp1, tmp2
@@ -16,15 +25,26 @@
 	ldr	\tmp1, [\tmp1, #0]
 	mov	\tmp2, #0xffff0fff
 	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
-	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
-	movne	\tmp1, #0
-	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
-	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	ldrned	\tmp1, \tmp2, [\tp]
+	ldreq	\tmp1, [\tp]
+	mcrne	p15, 0, \tmp1, c13, c0, 3	@ yes, set user r/o TLS register
+	mcrne	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
+	streq	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	.endm
+
+	.macro save_tlsuser_v6, tp, tmp1, tmp2
+	@ TPIDRURW can be updated from userspace, so we have to re-read it
+	ldr	\tmp1, =elf_hwcap
+	ldr	\tmp1, [\tmp1, #0]
+	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
+	mrcne	p15, 0, \tmp2, c13, c0, 2	@ read user r/w TLS register
+	strne	\tmp2, [\tp, #4]		@ save in to thread_info
 	.endm
 
 	.macro set_tls_software, tp, tmp1, tmp2
-	mov	\tmp1, #0xffff0fff
-	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
+	ldr	\tmp1, [\tp]
+	mov	\tmp2, #0xffff0fff
+	str	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
 #endif
 
@@ -32,18 +52,31 @@
 #define tls_emu		1
 #define has_tls_reg		1
 #define set_tls		set_tls_none
+#define save_tlsuser	save_tlsuser_none
+#define get_tlsuser	get_tlsuser_none
 #elif defined(CONFIG_CPU_V6)
 #define tls_emu		0
 #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
 #define set_tls		set_tls_v6
+#define save_tlsuser	save_tlsuser_v6
+#define get_tlsuser	get_tlsuser_v6
 #elif defined(CONFIG_CPU_32v6K)
 #define tls_emu		0
 #define has_tls_reg		1
 #define set_tls		set_tls_v6k
+#define save_tlsuser	save_tlsuser_v6k
+#define get_tlsuser	get_tlsuser_v6k
 #else
 #define tls_emu		0
 #define has_tls_reg		0
 #define set_tls		set_tls_software
+#define save_tlsuser	save_tlsuser_none
+#define get_tlsuser	get_tlsuser_none
 #endif
 
+#ifndef __ASSEMBLY__
+extern unsigned long get_tlsuser_none(void);
+extern unsigned long get_tlsuser_v6(void);
+extern unsigned long get_tlsuser_v6k(void);
+#endif
 #endif	/* __ASMARM_TLS_H */
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 5f3338e..4e1114c 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -17,7 +17,7 @@ CFLAGS_REMOVE_return_address.o = -pg
 
 obj-y		:= elf.o entry-armv.o entry-common.o irq.o opcodes.o \
 		   process.o ptrace.o return_address.o sched_clock.o \
-		   setup.o signal.o stacktrace.o sys_arm.o time.o traps.o
+		   setup.o signal.o stacktrace.o sys_arm.o time.o tls.o traps.o
 
 obj-$(CONFIG_ATAGS)		+= atags_parse.o
 obj-$(CONFIG_ATAGS_PROC)	+= atags_proc.o
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0f82098..66adb0c 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -728,7 +728,7 @@ ENTRY(__switch_to)
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	add	ip, r1, #TI_CPU_SAVE
-	ldr	r3, [r2, #TI_TP_VALUE]
+	add	r3, r1, #TI_TP_VALUE
  ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
@@ -736,6 +736,9 @@ ENTRY(__switch_to)
 #ifdef CONFIG_CPU_USE_DOMAINS
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
+	/* Save the user-writeable tls register */
+	save_tlsuser	r3, r4, r5
+	add	r3, r2, #TI_TP_VALUE
 	set_tls	r3, r4, r5
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	ldr	r7, [r2, #TI_TASK]
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 047d3e4..24dbc72 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -39,6 +39,7 @@
 #include <asm/thread_notify.h>
 #include <asm/stacktrace.h>
 #include <asm/mach/time.h>
+#include <asm/tls.h>
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #include <linux/stackprotector.h>
@@ -395,7 +396,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	clear_ptrace_hw_breakpoint(p);
 
 	if (clone_flags & CLONE_SETTLS)
-		thread->tp_value = childregs->ARM_r3;
+		thread->tp_value[0] = childregs->ARM_r3;
+	thread->tp_value[1] = get_tlsuser();
 
 	thread_notify(THREAD_NOTIFY_COPY, thread);
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeff..2bc1514 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif
 
 		case PTRACE_GET_THREAD_AREA:
-			ret = put_user(task_thread_info(child)->tp_value,
+			ret = put_user(task_thread_info(child)->tp_value[0],
 				       datap);
 			break;
 
diff --git a/arch/arm/kernel/tls.c b/arch/arm/kernel/tls.c
new file mode 100644
index 0000000..1627f5b
--- /dev/null
+++ b/arch/arm/kernel/tls.c
@@ -0,0 +1,50 @@
+/*
+ * arch/arm/kernel/tls.c
+ *
+ * Copyright (C) 2013 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <asm/tls.h>
+
+/*
+ * Access to the TPIDRURW register, with full certainty that it exists.
+ */
+unsigned long get_tlsuser_v6k(void)
+{
+	unsigned long v;
+	asm("mrc        p15, 0, %0, c13, c0, 2\n" : "=r" (v));
+	return v;
+}
+
+/*
+ * Access to the TPIDRURW register if it exists.
+ */
+unsigned long get_tlsuser_v6(void)
+{
+	unsigned long v = 0;
+	if (elf_hwcap & HWCAP_TLS)
+		asm("mrc        p15, 0, %0, c13, c0, 2\n" : "=r" (v));
+	return v;
+}
+
+/*
+ * Dummy access for the case that TLS is emulated in software
+ */
+unsigned long get_tlsuser_none(void)
+{
+	return 0;
+}
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1c08911..f9d6259 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -588,7 +588,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		return regs->ARM_r0;
 
 	case NR(set_tls):
-		thread->tp_value = regs->ARM_r0;
+		thread->tp_value[0] = regs->ARM_r0;
 		if (tls_emu)
 			return 0;
 		if (has_tls_reg) {
@@ -706,7 +706,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
 	int reg = (instr >> 12) & 15;
 	if (reg == 15)
 		return 1;
-	regs->uregs[reg] = current_thread_info()->tp_value;
+	regs->uregs[reg] = current_thread_info()->tp_value[0];
 	regs->ARM_pc += 4;
 	return 0;
 }
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-05-03  9:21                 ` Jonathan Austin
  0 siblings, 0 replies; 41+ messages in thread
From: Jonathan Austin @ 2013-05-03  9:21 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Andr?,

Will pointed me at this thread and I had a look at fixing
this up yesterday by extending his original patch...

There are a few things about this that aren't quite right. Most
of the comments are cosmetic but there's an issue in copy_thread
that will result in incorrect behaviour, I think.

I've commented below inline and there's a patch at the bottom- can
you let me know if it works for you?

On 02/05/13 20:54, Andr? Hentschel wrote:
> Am 24.04.2013 11:42, schrieb Will Deacon:
>> Hi Andrew,
>>
>> On Tue, Apr 23, 2013 at 11:42:22PM +0100, Andr? Hentschel wrote:
>>> Am 23.04.2013 11:15, schrieb Will Deacon:
>>>> You could introduce `get' tls functions, which don't do anything for CPUs
>>>> without the relevant registers.
>>>
>>> Before i have another round of testing and patch formatting/sending,
>>> what about the untested patch below?
>>
>> Ok. Comments inline.
> 
> I answered to that seperatly.
> Here is another try based on your comments:
> 
> 
> 
> diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
> index cddda1f..bb5b48d 100644
> --- a/arch/arm/include/asm/thread_info.h
> +++ b/arch/arm/include/asm/thread_info.h
> @@ -58,7 +58,7 @@ struct thread_info {
>   	struct cpu_context_save	cpu_context;	/* cpu context */
>   	__u32			syscall;	/* syscall number */
>   	__u8			used_cp[16];	/* thread used copro */
> -	unsigned long		tp_value;
> +	unsigned long		tp_value[2];
>   #ifdef CONFIG_CRUNCH
>   	struct crunch_state	crunchstate;
>   #endif
> diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
> index 73409e6..02f8674 100644
> --- a/arch/arm/include/asm/tls.h
> +++ b/arch/arm/include/asm/tls.h
> @@ -2,48 +2,87 @@
>   #define __ASMARM_TLS_H
>   
>   #ifdef __ASSEMBLY__
> +	.macro check_hwcap_tls, tmp1
> +	ldr	\tmp1, =elf_hwcap
> +	ldr	\tmp1, [\tmp1, #0]
> +	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
> +	.endm
> +
> +
> +	.macro get_tls_none, tp, tmp1
> +	.endm
> +
> +	.macro get_tls_v6k, tp, tmp1
> +	mrc	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
> +	str	\tmp1, [\tp, #4]
> +	.endm
> +
> +	.macro get_tls_v6, tp, tmp1
> +	check_hwcap_tls \tmp1

I tend to steer clear of asm that requires certain behaviour wrt
the flags, though in this case I think it's probably a sufficiently
self contained case to be okay...

> +	mrcne	p15, 0, \tmp1, c13, c0, 2		@ get user r/w TLS register
> +	strne	\tmp1, [\tp, #4]
> +	.endm
> +
> +
>   	.macro set_tls_none, tp, tmp1, tmp2
>   	.endm
>   
>   	.macro set_tls_v6k, tp, tmp1, tmp2
> -	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
> -	mov	\tmp1, #0
> -	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
> +	ldrd	\tmp1, \tmp2, [\tp]
> +	mcr	p15, 0, \tmp1, c13, c0, 3	@ set user r/o TLS register
> +	mcr	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
>   	.endm
>   
>   	.macro set_tls_v6, tp, tmp1, tmp2
> -	ldr	\tmp1, =elf_hwcap
> -	ldr	\tmp1, [\tmp1, #0]
>   	mov	\tmp2, #0xffff0fff
> -	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
> -	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
> -	movne	\tmp1, #0
> -	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
> -	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
> +	check_hwcap_tls \tmp1
> +	ldrdne	\tmp1, \tmp2, [\tp]
> +	ldreq	\tmp1, [\tp]
> +	mcrne	p15, 0, \tmp1, c13, c0, 3	@ yes, set user r/o TLS register
> +	mcrne	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
> +	streq	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
>   	.endm
>   
>   	.macro set_tls_software, tp, tmp1, tmp2
> -	mov	\tmp1, #0xffff0fff
> -	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
> +	ldr	\tmp1, [\tp]
> +	mov	\tmp2, #0xffff0fff
> +	str	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
>   	.endm
>   #endif
>   
>   #ifdef CONFIG_TLS_REG_EMUL
>   #define tls_emu		1
>   #define has_tls_reg		1
> +#define get_tls		get_tls_none

This is different from the set_tls, which deals with both
tpidrurw and tpidruro, so the naming is a little inconsistent here...

>   #define set_tls		set_tls_none
>   #elif defined(CONFIG_CPU_V6)
>   #define tls_emu		0
>   #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
> +#define get_tls		get_tls_v6
>   #define set_tls		set_tls_v6
>   #elif defined(CONFIG_CPU_32v6K)
>   #define tls_emu		0
>   #define has_tls_reg		1
> +#define get_tls		get_tls_v6k
>   #define set_tls		set_tls_v6k
>   #else
>   #define tls_emu		0
>   #define has_tls_reg		0
> +#define get_tls		get_tls_none
>   #define set_tls		set_tls_software
>   #endif
>   
> +#ifndef __ASSEMBLY__
> +static inline void get_tpidrurw(unsigned long *tpidrurw)

A bit weird to have tpidrurw here but tls elsewhere - I
settled on tlsuser... (see below)

> +{
> +	unsigned long t;
> +#ifdef CONFIG_TLS_REG_EMUL
> +	return;
> +#endif
> +	if (!has_tls_reg) return;
> +	__asm__("mcr p15, 0, %0, c13, c0, 2" : : "r" (t));
> +	*tpidrurw = t;
> +}
> +#endif
> +
>   #endif	/* __ASMARM_TLS_H */
> diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
> index 0f82098..2c892b2 100644
> --- a/arch/arm/kernel/entry-armv.S
> +++ b/arch/arm/kernel/entry-armv.S
> @@ -728,7 +728,7 @@ ENTRY(__switch_to)
>    UNWIND(.fnstart	)
>    UNWIND(.cantunwind	)
>   	add	ip, r1, #TI_CPU_SAVE
> -	ldr	r3, [r2, #TI_TP_VALUE]
> +	add	r3, r1, #TI_TP_VALUE
>    ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
>    THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
>    THUMB(	str	sp, [ip], #4		   )
> @@ -736,6 +736,8 @@ ENTRY(__switch_to)
>   #ifdef CONFIG_CPU_USE_DOMAINS
>   	ldr	r6, [r2, #TI_CPU_DOMAIN]
>   #endif
> +	get_tls	r3, r4
> +	add	r3, r2, #TI_TP_VALUE
>   	set_tls	r3, r4, r5
>   #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
>   	ldr	r7, [r2, #TI_TASK]
> diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
> index 047d3e4..a13bbc8 100644
> --- a/arch/arm/kernel/process.c
> +++ b/arch/arm/kernel/process.c
> @@ -36,6 +36,7 @@
>   #include <asm/cacheflush.h>
>   #include <asm/idmap.h>
>   #include <asm/processor.h>
> +#include <asm/tls.h>
>   #include <asm/thread_notify.h>
>   #include <asm/stacktrace.h>
>   #include <asm/mach/time.h>
> @@ -395,7 +396,10 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
>   	clear_ptrace_hw_breakpoint(p);
>   
>   	if (clone_flags & CLONE_SETTLS)
> -		thread->tp_value = childregs->ARM_r3;
> +	{
> +		thread->tp_value[0] = childregs->ARM_r3;
> +		get_tpidrurw(&thread->tp_value[1]);
> +	}

This isn't quite right - the re-reading of tpidrurw should
be independent of CLONE_SETTLS. We should update tpidrurw
from userspace in all cases.

The following is what I've been looking at/testing...
It works on V7 and I've build tested it for 1136 - I would've
sent it yesterday but was getting things set up for testing on
1136 (v6 not k)

----8<-------
>From bd3fe4055777b404a1635f366483637fd0cfa35a Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Fri, 8 Feb 2013 15:55:12 +0000
Subject: [PATCH] ARM: tls: context switch user writeable TLS register
 TPIDRURW

Since commit 6a1c53124aa1 ("ARM: 7403/1: tls: remove covert channel via
TPIDRURW") we have zeroed the user writeable TLS register to prevent it
from being used as a covert channel between two tasks.

However, it turns out that the wine guys would rather we actually
switched the register so that WinRT applications can use it to store a
pointer to their `thread environment block (TEB)'.

This patch implements TPIDRURW context-switching for cpus implementing
the register. Unlike the TPIDRURO, which is already switched, the TPIDRURW
can be updated from userspace so needs careful treatment in the case that we
modify TPIDRURW and call fork(). If no context switch occurs between these
two events then the thread_info struct that we use to construct the child
will have the old, stale, TPIDRURW value. To avoid this we must always read
TPIDRURW in copy_thread.

This patch is extended from an earlier version by Will Deacon in order to:
- Save TPIDRURW at context switch
- Deal with the race condition described above, including adding C-getters

Reported-by: Andre Hentschel <nerv@dawncrow.de>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
---
 arch/arm/include/asm/thread_info.h |    2 +-
 arch/arm/include/asm/tls.h         |   51 +++++++++++++++++++++++++++++-------
 arch/arm/kernel/Makefile           |    2 +-
 arch/arm/kernel/entry-armv.S       |    5 +++-
 arch/arm/kernel/process.c          |    4 ++-
 arch/arm/kernel/ptrace.c           |    2 +-
 arch/arm/kernel/tls.c              |   50 +++++++++++++++++++++++++++++++++++
 arch/arm/kernel/traps.c            |    4 +--
 8 files changed, 104 insertions(+), 16 deletions(-)
 create mode 100644 arch/arm/kernel/tls.c

diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index cddda1f..d90be6d 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -58,7 +58,7 @@ struct thread_info {
 	struct cpu_context_save	cpu_context;	/* cpu context */
 	__u32			syscall;	/* syscall number */
 	__u8			used_cp[16];	/* thread used copro */
-	unsigned long		tp_value;
+	unsigned long		tp_value[2];	/* TLS registers */
 #ifdef CONFIG_CRUNCH
 	struct crunch_state	crunchstate;
 #endif
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 73409e6..e292794 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -5,10 +5,19 @@
 	.macro set_tls_none, tp, tmp1, tmp2
 	.endm
 
+	.macro save_tlsuser_none, tp, tmp1, tmp2
+	.endm
+
 	.macro set_tls_v6k, tp, tmp1, tmp2
-	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
-	mov	\tmp1, #0
-	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
+	ldrd	\tmp1, \tmp2, [\tp]
+	mcr	p15, 0, \tmp1, c13, c0, 3	@ set user r/o TLS register
+	mcr	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
+	.endm
+
+	.macro save_tlsuser_v6k, tp, tmp1, tmp2
+	@ TPIDRURW can be updated from userspace, so we have to re-read it
+	mrc	p15, 0, \tmp2, c13, c0, 2	@ load user r/w TLS register
+	str	\tmp2, [\tp, #4]
 	.endm
 
 	.macro set_tls_v6, tp, tmp1, tmp2
@@ -16,15 +25,26 @@
 	ldr	\tmp1, [\tmp1, #0]
 	mov	\tmp2, #0xffff0fff
 	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
-	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
-	movne	\tmp1, #0
-	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
-	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	ldrned	\tmp1, \tmp2, [\tp]
+	ldreq	\tmp1, [\tp]
+	mcrne	p15, 0, \tmp1, c13, c0, 3	@ yes, set user r/o TLS register
+	mcrne	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
+	streq	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	.endm
+
+	.macro save_tlsuser_v6, tp, tmp1, tmp2
+	@ TPIDRURW can be updated from userspace, so we have to re-read it
+	ldr	\tmp1, =elf_hwcap
+	ldr	\tmp1, [\tmp1, #0]
+	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
+	mrcne	p15, 0, \tmp2, c13, c0, 2	@ read user r/w TLS register
+	strne	\tmp2, [\tp, #4]		@ save in to thread_info
 	.endm
 
 	.macro set_tls_software, tp, tmp1, tmp2
-	mov	\tmp1, #0xffff0fff
-	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
+	ldr	\tmp1, [\tp]
+	mov	\tmp2, #0xffff0fff
+	str	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
 #endif
 
@@ -32,18 +52,31 @@
 #define tls_emu		1
 #define has_tls_reg		1
 #define set_tls		set_tls_none
+#define save_tlsuser	save_tlsuser_none
+#define get_tlsuser	get_tlsuser_none
 #elif defined(CONFIG_CPU_V6)
 #define tls_emu		0
 #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
 #define set_tls		set_tls_v6
+#define save_tlsuser	save_tlsuser_v6
+#define get_tlsuser	get_tlsuser_v6
 #elif defined(CONFIG_CPU_32v6K)
 #define tls_emu		0
 #define has_tls_reg		1
 #define set_tls		set_tls_v6k
+#define save_tlsuser	save_tlsuser_v6k
+#define get_tlsuser	get_tlsuser_v6k
 #else
 #define tls_emu		0
 #define has_tls_reg		0
 #define set_tls		set_tls_software
+#define save_tlsuser	save_tlsuser_none
+#define get_tlsuser	get_tlsuser_none
 #endif
 
+#ifndef __ASSEMBLY__
+extern unsigned long get_tlsuser_none(void);
+extern unsigned long get_tlsuser_v6(void);
+extern unsigned long get_tlsuser_v6k(void);
+#endif
 #endif	/* __ASMARM_TLS_H */
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 5f3338e..4e1114c 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -17,7 +17,7 @@ CFLAGS_REMOVE_return_address.o = -pg
 
 obj-y		:= elf.o entry-armv.o entry-common.o irq.o opcodes.o \
 		   process.o ptrace.o return_address.o sched_clock.o \
-		   setup.o signal.o stacktrace.o sys_arm.o time.o traps.o
+		   setup.o signal.o stacktrace.o sys_arm.o time.o tls.o traps.o
 
 obj-$(CONFIG_ATAGS)		+= atags_parse.o
 obj-$(CONFIG_ATAGS_PROC)	+= atags_proc.o
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0f82098..66adb0c 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -728,7 +728,7 @@ ENTRY(__switch_to)
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	add	ip, r1, #TI_CPU_SAVE
-	ldr	r3, [r2, #TI_TP_VALUE]
+	add	r3, r1, #TI_TP_VALUE
  ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
@@ -736,6 +736,9 @@ ENTRY(__switch_to)
 #ifdef CONFIG_CPU_USE_DOMAINS
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
+	/* Save the user-writeable tls register */
+	save_tlsuser	r3, r4, r5
+	add	r3, r2, #TI_TP_VALUE
 	set_tls	r3, r4, r5
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	ldr	r7, [r2, #TI_TASK]
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 047d3e4..24dbc72 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -39,6 +39,7 @@
 #include <asm/thread_notify.h>
 #include <asm/stacktrace.h>
 #include <asm/mach/time.h>
+#include <asm/tls.h>
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #include <linux/stackprotector.h>
@@ -395,7 +396,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	clear_ptrace_hw_breakpoint(p);
 
 	if (clone_flags & CLONE_SETTLS)
-		thread->tp_value = childregs->ARM_r3;
+		thread->tp_value[0] = childregs->ARM_r3;
+	thread->tp_value[1] = get_tlsuser();
 
 	thread_notify(THREAD_NOTIFY_COPY, thread);
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeff..2bc1514 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif
 
 		case PTRACE_GET_THREAD_AREA:
-			ret = put_user(task_thread_info(child)->tp_value,
+			ret = put_user(task_thread_info(child)->tp_value[0],
 				       datap);
 			break;
 
diff --git a/arch/arm/kernel/tls.c b/arch/arm/kernel/tls.c
new file mode 100644
index 0000000..1627f5b
--- /dev/null
+++ b/arch/arm/kernel/tls.c
@@ -0,0 +1,50 @@
+/*
+ * arch/arm/kernel/tls.c
+ *
+ * Copyright (C) 2013 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <asm/tls.h>
+
+/*
+ * Access to the TPIDRURW register, with full certainty that it exists.
+ */
+unsigned long get_tlsuser_v6k(void)
+{
+	unsigned long v;
+	asm("mrc        p15, 0, %0, c13, c0, 2\n" : "=r" (v));
+	return v;
+}
+
+/*
+ * Access to the TPIDRURW register if it exists.
+ */
+unsigned long get_tlsuser_v6(void)
+{
+	unsigned long v = 0;
+	if (elf_hwcap & HWCAP_TLS)
+		asm("mrc        p15, 0, %0, c13, c0, 2\n" : "=r" (v));
+	return v;
+}
+
+/*
+ * Dummy access for the case that TLS is emulated in software
+ */
+unsigned long get_tlsuser_none(void)
+{
+	return 0;
+}
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1c08911..f9d6259 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -588,7 +588,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		return regs->ARM_r0;
 
 	case NR(set_tls):
-		thread->tp_value = regs->ARM_r0;
+		thread->tp_value[0] = regs->ARM_r0;
 		if (tls_emu)
 			return 0;
 		if (has_tls_reg) {
@@ -706,7 +706,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
 	int reg = (instr >> 12) & 15;
 	if (reg == 15)
 		return 1;
-	regs->uregs[reg] = current_thread_info()->tp_value;
+	regs->uregs[reg] = current_thread_info()->tp_value[0];
 	regs->ARM_pc += 4;
 	return 0;
 }
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
  2013-05-03  9:21                 ` Jonathan Austin
@ 2013-05-03  9:55                   ` Russell King - ARM Linux
  -1 siblings, 0 replies; 41+ messages in thread
From: Russell King - ARM Linux @ 2013-05-03  9:55 UTC (permalink / raw)
  To: Jonathan Austin
  Cc: André Hentschel, Will Deacon, linux-arch, linux-kernel,
	linux-arm-kernel

On Fri, May 03, 2013 at 10:21:34AM +0100, Jonathan Austin wrote:
>  	.macro set_tls_v6k, tp, tmp1, tmp2
> -	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
> -	mov	\tmp1, #0
> -	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
> +	ldrd	\tmp1, \tmp2, [\tp]
> +	mcr	p15, 0, \tmp1, c13, c0, 3	@ set user r/o TLS register
> +	mcr	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register

So we're still back at stalling the pipeline with result delays on older
CPUs?

> +	.endm
> +
> +	.macro save_tlsuser_v6k, tp, tmp1, tmp2
> +	@ TPIDRURW can be updated from userspace, so we have to re-read it
> +	mrc	p15, 0, \tmp2, c13, c0, 2	@ load user r/w TLS register
> +	str	\tmp2, [\tp, #4]
>  	.endm
>  
>  	.macro set_tls_v6, tp, tmp1, tmp2
> @@ -16,15 +25,26 @@
>  	ldr	\tmp1, [\tmp1, #0]
>  	mov	\tmp2, #0xffff0fff
>  	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
> -	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
> -	movne	\tmp1, #0
> -	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
> -	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
> +	ldrned	\tmp1, \tmp2, [\tp]
> +	ldreq	\tmp1, [\tp]
> +	mcrne	p15, 0, \tmp1, c13, c0, 3	@ yes, set user r/o TLS register
> +	mcrne	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
> +	streq	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0

This at least is better.

> +	.endm
> +
> +	.macro save_tlsuser_v6, tp, tmp1, tmp2
> +	@ TPIDRURW can be updated from userspace, so we have to re-read it
> +	ldr	\tmp1, =elf_hwcap
> +	ldr	\tmp1, [\tmp1, #0]
> +	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?

But this isn't - this involves two delays.

> +	mrcne	p15, 0, \tmp2, c13, c0, 2	@ read user r/w TLS register
> +	strne	\tmp2, [\tp, #4]		@ save in to thread_info
>  	.endm
>  
>  	.macro set_tls_software, tp, tmp1, tmp2
> -	mov	\tmp1, #0xffff0fff
> -	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
> +	ldr	\tmp1, [\tp]
> +	mov	\tmp2, #0xffff0fff
> +	str	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
>  	.endm
>  #endif
>  
> @@ -32,18 +52,31 @@
>  #define tls_emu		1
>  #define has_tls_reg		1
>  #define set_tls		set_tls_none
> +#define save_tlsuser	save_tlsuser_none
> +#define get_tlsuser	get_tlsuser_none
>  #elif defined(CONFIG_CPU_V6)
>  #define tls_emu		0
>  #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
>  #define set_tls		set_tls_v6
> +#define save_tlsuser	save_tlsuser_v6
> +#define get_tlsuser	get_tlsuser_v6
>  #elif defined(CONFIG_CPU_32v6K)
>  #define tls_emu		0
>  #define has_tls_reg		1
>  #define set_tls		set_tls_v6k
> +#define save_tlsuser	save_tlsuser_v6k
> +#define get_tlsuser	get_tlsuser_v6k
>  #else
>  #define tls_emu		0
>  #define has_tls_reg		0
>  #define set_tls		set_tls_software
> +#define save_tlsuser	save_tlsuser_none
> +#define get_tlsuser	get_tlsuser_none
>  #endif

This separation of setting and saving the TLS value is actually quite
silly.  They're called from the same place, so lets just call it
"switch_tls" instead.

Here's just the assembly bits doing that - this is totally untested
of course:

 arch/arm/include/asm/tls.h   |   28 +++++++++++++++-------------
 arch/arm/kernel/entry-armv.S |    4 ++--
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 73409e6..9c377f1 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -2,27 +2,29 @@
 #define __ASMARM_TLS_H
 
 #ifdef __ASSEMBLY__
-	.macro set_tls_none, tp, tmp1, tmp2
+	.macro switch_tls_none, base, tp, trw, tmp1, tmp2
 	.endm
 
-	.macro set_tls_v6k, tp, tmp1, tmp2
+	.macro switch_tls_v6k, base, tp, trw, tmp1, tmp2
+	mrc	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
 	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
-	mov	\tmp1, #0
-	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
+	mcr	p15, 0, \trw, c13, c0, 2	@ and the user r/w register
+	str	\tmp2, [\base, #TI_TP_VALUE + 4]@ save it
 	.endm
 
-	.macro set_tls_v6, tp, tmp1, tmp2
+	.macro switch_tls_v6, base, tp, trw, tmp1, tmp2
 	ldr	\tmp1, =elf_hwcap
 	ldr	\tmp1, [\tmp1, #0]
 	mov	\tmp2, #0xffff0fff
 	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
-	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
-	movne	\tmp1, #0
-	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
 	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	mrcne	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
+	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
+	mcrne	p15, 0, \trw, c13, c0, 2	@ set user r/w register
+	strne	\tmp2, [\base, #TI_TP_VALUE + 4]@ save it
 	.endm
 
-	.macro set_tls_software, tp, tmp1, tmp2
+	.macro switch_tls_software, base, tp, trw, tmp1, tmp2
 	mov	\tmp1, #0xffff0fff
 	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
@@ -31,19 +33,19 @@
 #ifdef CONFIG_TLS_REG_EMUL
 #define tls_emu		1
 #define has_tls_reg		1
-#define set_tls		set_tls_none
+#define switch_tls	switch_tls_none
 #elif defined(CONFIG_CPU_V6)
 #define tls_emu		0
 #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
-#define set_tls		set_tls_v6
+#define switch_tls	switch_tls_v6
 #elif defined(CONFIG_CPU_32v6K)
 #define tls_emu		0
 #define has_tls_reg		1
-#define set_tls		set_tls_v6k
+#define switch_tls	switch_tls_v6k
 #else
 #define tls_emu		0
 #define has_tls_reg		0
-#define set_tls		set_tls_software
+#define switch_tls	switch_tls_software
 #endif
 
 #endif	/* __ASMARM_TLS_H */
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0f82098..81a08b1 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -728,15 +728,15 @@ ENTRY(__switch_to)
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	add	ip, r1, #TI_CPU_SAVE
-	ldr	r3, [r2, #TI_TP_VALUE]
  ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
  THUMB(	str	lr, [ip], #4		   )
+	ldrd	r4, r5, [r2, #TI_TP_VALUE]
 #ifdef CONFIG_CPU_USE_DOMAINS
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
-	set_tls	r3, r4, r5
+	switch_tls r2, r4, r5, r3, r7
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	ldr	r7, [r2, #TI_TASK]
 	ldr	r8, =__stack_chk_guard


^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-05-03  9:55                   ` Russell King - ARM Linux
  0 siblings, 0 replies; 41+ messages in thread
From: Russell King - ARM Linux @ 2013-05-03  9:55 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, May 03, 2013 at 10:21:34AM +0100, Jonathan Austin wrote:
>  	.macro set_tls_v6k, tp, tmp1, tmp2
> -	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
> -	mov	\tmp1, #0
> -	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
> +	ldrd	\tmp1, \tmp2, [\tp]
> +	mcr	p15, 0, \tmp1, c13, c0, 3	@ set user r/o TLS register
> +	mcr	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register

So we're still back at stalling the pipeline with result delays on older
CPUs?

> +	.endm
> +
> +	.macro save_tlsuser_v6k, tp, tmp1, tmp2
> +	@ TPIDRURW can be updated from userspace, so we have to re-read it
> +	mrc	p15, 0, \tmp2, c13, c0, 2	@ load user r/w TLS register
> +	str	\tmp2, [\tp, #4]
>  	.endm
>  
>  	.macro set_tls_v6, tp, tmp1, tmp2
> @@ -16,15 +25,26 @@
>  	ldr	\tmp1, [\tmp1, #0]
>  	mov	\tmp2, #0xffff0fff
>  	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
> -	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
> -	movne	\tmp1, #0
> -	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
> -	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
> +	ldrned	\tmp1, \tmp2, [\tp]
> +	ldreq	\tmp1, [\tp]
> +	mcrne	p15, 0, \tmp1, c13, c0, 3	@ yes, set user r/o TLS register
> +	mcrne	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
> +	streq	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0

This at least is better.

> +	.endm
> +
> +	.macro save_tlsuser_v6, tp, tmp1, tmp2
> +	@ TPIDRURW can be updated from userspace, so we have to re-read it
> +	ldr	\tmp1, =elf_hwcap
> +	ldr	\tmp1, [\tmp1, #0]
> +	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?

But this isn't - this involves two delays.

> +	mrcne	p15, 0, \tmp2, c13, c0, 2	@ read user r/w TLS register
> +	strne	\tmp2, [\tp, #4]		@ save in to thread_info
>  	.endm
>  
>  	.macro set_tls_software, tp, tmp1, tmp2
> -	mov	\tmp1, #0xffff0fff
> -	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
> +	ldr	\tmp1, [\tp]
> +	mov	\tmp2, #0xffff0fff
> +	str	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
>  	.endm
>  #endif
>  
> @@ -32,18 +52,31 @@
>  #define tls_emu		1
>  #define has_tls_reg		1
>  #define set_tls		set_tls_none
> +#define save_tlsuser	save_tlsuser_none
> +#define get_tlsuser	get_tlsuser_none
>  #elif defined(CONFIG_CPU_V6)
>  #define tls_emu		0
>  #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
>  #define set_tls		set_tls_v6
> +#define save_tlsuser	save_tlsuser_v6
> +#define get_tlsuser	get_tlsuser_v6
>  #elif defined(CONFIG_CPU_32v6K)
>  #define tls_emu		0
>  #define has_tls_reg		1
>  #define set_tls		set_tls_v6k
> +#define save_tlsuser	save_tlsuser_v6k
> +#define get_tlsuser	get_tlsuser_v6k
>  #else
>  #define tls_emu		0
>  #define has_tls_reg		0
>  #define set_tls		set_tls_software
> +#define save_tlsuser	save_tlsuser_none
> +#define get_tlsuser	get_tlsuser_none
>  #endif

This separation of setting and saving the TLS value is actually quite
silly.  They're called from the same place, so lets just call it
"switch_tls" instead.

Here's just the assembly bits doing that - this is totally untested
of course:

 arch/arm/include/asm/tls.h   |   28 +++++++++++++++-------------
 arch/arm/kernel/entry-armv.S |    4 ++--
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 73409e6..9c377f1 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -2,27 +2,29 @@
 #define __ASMARM_TLS_H
 
 #ifdef __ASSEMBLY__
-	.macro set_tls_none, tp, tmp1, tmp2
+	.macro switch_tls_none, base, tp, trw, tmp1, tmp2
 	.endm
 
-	.macro set_tls_v6k, tp, tmp1, tmp2
+	.macro switch_tls_v6k, base, tp, trw, tmp1, tmp2
+	mrc	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
 	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
-	mov	\tmp1, #0
-	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
+	mcr	p15, 0, \trw, c13, c0, 2	@ and the user r/w register
+	str	\tmp2, [\base, #TI_TP_VALUE + 4]@ save it
 	.endm
 
-	.macro set_tls_v6, tp, tmp1, tmp2
+	.macro switch_tls_v6, base, tp, trw, tmp1, tmp2
 	ldr	\tmp1, =elf_hwcap
 	ldr	\tmp1, [\tmp1, #0]
 	mov	\tmp2, #0xffff0fff
 	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
-	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
-	movne	\tmp1, #0
-	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
 	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	mrcne	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
+	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
+	mcrne	p15, 0, \trw, c13, c0, 2	@ set user r/w register
+	strne	\tmp2, [\base, #TI_TP_VALUE + 4]@ save it
 	.endm
 
-	.macro set_tls_software, tp, tmp1, tmp2
+	.macro switch_tls_software, base, tp, trw, tmp1, tmp2
 	mov	\tmp1, #0xffff0fff
 	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
@@ -31,19 +33,19 @@
 #ifdef CONFIG_TLS_REG_EMUL
 #define tls_emu		1
 #define has_tls_reg		1
-#define set_tls		set_tls_none
+#define switch_tls	switch_tls_none
 #elif defined(CONFIG_CPU_V6)
 #define tls_emu		0
 #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
-#define set_tls		set_tls_v6
+#define switch_tls	switch_tls_v6
 #elif defined(CONFIG_CPU_32v6K)
 #define tls_emu		0
 #define has_tls_reg		1
-#define set_tls		set_tls_v6k
+#define switch_tls	switch_tls_v6k
 #else
 #define tls_emu		0
 #define has_tls_reg		0
-#define set_tls		set_tls_software
+#define switch_tls	switch_tls_software
 #endif
 
 #endif	/* __ASMARM_TLS_H */
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0f82098..81a08b1 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -728,15 +728,15 @@ ENTRY(__switch_to)
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	add	ip, r1, #TI_CPU_SAVE
-	ldr	r3, [r2, #TI_TP_VALUE]
  ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
  THUMB(	str	lr, [ip], #4		   )
+	ldrd	r4, r5, [r2, #TI_TP_VALUE]
 #ifdef CONFIG_CPU_USE_DOMAINS
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
-	set_tls	r3, r4, r5
+	switch_tls r2, r4, r5, r3, r7
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	ldr	r7, [r2, #TI_TASK]
 	ldr	r8, =__stack_chk_guard

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
  2013-05-03  9:55                   ` Russell King - ARM Linux
@ 2013-05-03 15:24                     ` Jonathan Austin
  -1 siblings, 0 replies; 41+ messages in thread
From: Jonathan Austin @ 2013-05-03 15:24 UTC (permalink / raw)
  To: Russell King - ARM Linux
  Cc: André Hentschel, Will Deacon, linux-arch, linux-kernel,
	linux-arm-kernel

Hi Russell,

Thanks for the comments - you're right about the 'switch_tls'
being more appropriate - needed to take a step back to see that.

I've got a few questions, added inline.

André, Assuming I've understood things okay, there's a patch that
uses Russell's asm stuff (with minor modifications, see the questions)
and includes the C-world changes too. Perhaps you could see that it
solves your problem?

On 03/05/13 10:55, Russell King - ARM Linux wrote:
> On Fri, May 03, 2013 at 10:21:34AM +0100, Jonathan Austin wrote:
>>   	.macro set_tls_v6k, tp, tmp1, tmp2
>> -	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
>> -	mov	\tmp1, #0
>> -	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
>> +	ldrd	\tmp1, \tmp2, [\tp]
>> +	mcr	p15, 0, \tmp1, c13, c0, 3	@ set user r/o TLS register
>> +	mcr	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
> 
> So we're still back at stalling the pipeline with result delays on older
> CPUs?

How much older? This particular bit is v6k specific so I wasn't worrying
too much but I guess I'm missing something?

It's an academic question wrt to this patch now, though, as the version
you show below re-orders to reduce the stalls...

> 
>> +	.endm
>> +
>> +	.macro save_tlsuser_v6k, tp, tmp1, tmp2
>> +	@ TPIDRURW can be updated from userspace, so we have to re-read it
>> +	mrc	p15, 0, \tmp2, c13, c0, 2	@ load user r/w TLS register
>> +	str	\tmp2, [\tp, #4]
>>   	.endm
>>   
>>   	.macro set_tls_v6, tp, tmp1, tmp2
>> @@ -16,15 +25,26 @@
>>   	ldr	\tmp1, [\tmp1, #0]
>>   	mov	\tmp2, #0xffff0fff
>>   	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
>> -	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
>> -	movne	\tmp1, #0
>> -	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
>> -	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
>> +	ldrned	\tmp1, \tmp2, [\tp]
>> +	ldreq	\tmp1, [\tp]
>> +	mcrne	p15, 0, \tmp1, c13, c0, 3	@ yes, set user r/o TLS register
>> +	mcrne	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
>> +	streq	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
> 
> This at least is better.
> 
>> +	.endm
>> +
>> +	.macro save_tlsuser_v6, tp, tmp1, tmp2
>> +	@ TPIDRURW can be updated from userspace, so we have to re-read it
>> +	ldr	\tmp1, =elf_hwcap
>> +	ldr	\tmp1, [\tmp1, #0]
>> +	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
> 
> But this isn't - this involves two delays.

Indeed. You left this section untouched in your asm below - was that
because you didn't look at optimising it, or because you thought there
wasn't much better that could be done with it?

As far as I can see, we can't start doing any mcr/mrc operations until we
know for sure that the hw implements them, so this is something we're
stuck with?

Is V6 but not V6k just 1136?

> 
>> +	mrcne	p15, 0, \tmp2, c13, c0, 2	@ read user r/w TLS register
>> +	strne	\tmp2, [\tp, #4]		@ save in to thread_info
>>   	.endm
>>   
>>   	.macro set_tls_software, tp, tmp1, tmp2
>> -	mov	\tmp1, #0xffff0fff
>> -	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
>> +	ldr	\tmp1, [\tp]
>> +	mov	\tmp2, #0xffff0fff
>> +	str	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
>>   	.endm
>>   #endif
>>   
>> @@ -32,18 +52,31 @@
>>   #define tls_emu		1
>>   #define has_tls_reg		1
>>   #define set_tls		set_tls_none
>> +#define save_tlsuser	save_tlsuser_none
>> +#define get_tlsuser	get_tlsuser_none
>>   #elif defined(CONFIG_CPU_V6)
>>   #define tls_emu		0
>>   #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
>>   #define set_tls		set_tls_v6
>> +#define save_tlsuser	save_tlsuser_v6
>> +#define get_tlsuser	get_tlsuser_v6
>>   #elif defined(CONFIG_CPU_32v6K)
>>   #define tls_emu		0
>>   #define has_tls_reg		1
>>   #define set_tls		set_tls_v6k
>> +#define save_tlsuser	save_tlsuser_v6k
>> +#define get_tlsuser	get_tlsuser_v6k
>>   #else
>>   #define tls_emu		0
>>   #define has_tls_reg		0
>>   #define set_tls		set_tls_software
>> +#define save_tlsuser	save_tlsuser_none
>> +#define get_tlsuser	get_tlsuser_none
>>   #endif
> 
> This separation of setting and saving the TLS value is actually quite
> silly.  They're called from the same place, so lets just call it
> "switch_tls" instead.

Agreed. Thanks for the suggestion.

> 
> Here's just the assembly bits doing that - this is totally untested
> of course:
> 
>   arch/arm/include/asm/tls.h   |   28 +++++++++++++++-------------
>   arch/arm/kernel/entry-armv.S |    4 ++--
>   2 files changed, 17 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
> index 73409e6..9c377f1 100644
> --- a/arch/arm/include/asm/tls.h
> +++ b/arch/arm/include/asm/tls.h
> @@ -2,27 +2,29 @@
>   #define __ASMARM_TLS_H
>   
>   #ifdef __ASSEMBLY__

+#include <asm/asm-offsets.h>

required for TI_TP_VALUE

> -	.macro set_tls_none, tp, tmp1, tmp2
> +	.macro switch_tls_none, base, tp, trw, tmp1, tmp2
>   	.endm
>   
> -	.macro set_tls_v6k, tp, tmp1, tmp2
> +	.macro switch_tls_v6k, base, tp, trw, tmp1, tmp2

How do you feel about calling tp and trw something different? tpidro
and tpidrw, or tp and tpuser?

The naming threw me off slightly first time I read this new signature
(tp=thread_pointer/tls_pointer/etc).

> +	mrc	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
>   	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
> -	mov	\tmp1, #0
> -	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
> +	mcr	p15, 0, \trw, c13, c0, 2	@ and the user r/w register
> +	str	\tmp2, [\base, #TI_TP_VALUE + 4]@ save it
>   	.endm
>   
> -	.macro set_tls_v6, tp, tmp1, tmp2
> +	.macro switch_tls_v6, base, tp, trw, tmp1, tmp2
>   	ldr	\tmp1, =elf_hwcap
>   	ldr	\tmp1, [\tmp1, #0]
>   	mov	\tmp2, #0xffff0fff
>   	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
> -	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
> -	movne	\tmp1, #0
> -	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
>   	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
> +	mrcne	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
> +	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
> +	mcrne	p15, 0, \trw, c13, c0, 2	@ set user r/w register
> +	strne	\tmp2, [\base, #TI_TP_VALUE + 4]@ save it
>   	.endm
>   
> -	.macro set_tls_software, tp, tmp1, tmp2
> +	.macro switch_tls_software, base, tp, trw, tmp1, tmp2
>   	mov	\tmp1, #0xffff0fff
>   	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
>   	.endm
> @@ -31,19 +33,19 @@
>   #ifdef CONFIG_TLS_REG_EMUL
>   #define tls_emu		1
>   #define has_tls_reg		1
> -#define set_tls		set_tls_none
> +#define switch_tls	switch_tls_none
>   #elif defined(CONFIG_CPU_V6)
>   #define tls_emu		0
>   #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
> -#define set_tls		set_tls_v6
> +#define switch_tls	switch_tls_v6
>   #elif defined(CONFIG_CPU_32v6K)
>   #define tls_emu		0
>   #define has_tls_reg		1
> -#define set_tls		set_tls_v6k
> +#define switch_tls	switch_tls_v6k
>   #else
>   #define tls_emu		0
>   #define has_tls_reg		0
> -#define set_tls		set_tls_software
> +#define switch_tls	switch_tls_software
>   #endif
>   
>   #endif	/* __ASMARM_TLS_H */
> diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
> index 0f82098..81a08b1 100644
> --- a/arch/arm/kernel/entry-armv.S
> +++ b/arch/arm/kernel/entry-armv.S
> @@ -728,15 +728,15 @@ ENTRY(__switch_to)
>    UNWIND(.fnstart	)
>    UNWIND(.cantunwind	)
>   	add	ip, r1, #TI_CPU_SAVE
> -	ldr	r3, [r2, #TI_TP_VALUE]
>    ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
>    THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
>    THUMB(	str	sp, [ip], #4		   )
>    THUMB(	str	lr, [ip], #4		   )
> +	ldrd	r4, r5, [r2, #TI_TP_VALUE]
>   #ifdef CONFIG_CPU_USE_DOMAINS
>   	ldr	r6, [r2, #TI_CPU_DOMAIN]
>   #endif
> -	set_tls	r3, r4, r5
> +	switch_tls r2, r4, r5, r3, r7

Looking at the implementation above and the way you use 'base', I think
that should be 
switch_tls r1, r4, r5, r3, r7
not
switch_tls r2, r4, r5, r3, r7

That way we save tpidrurw in to the old thread pointer not the new one.

>   #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
>   	ldr	r7, [r2, #TI_TASK]
>   	ldr	r8, =__stack_chk_guard
> 
> 

Here's a complete patch including Russell's suggested asm. Tested on
TC2, build-tested for 1136

I think we could drop the extra .c file too - though it is kinda nice
for keeping include/asm/tls.h clean...

----8<------
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index cddda1f..d90be6d 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -58,7 +58,7 @@ struct thread_info {
 	struct cpu_context_save	cpu_context;	/* cpu context */
 	__u32			syscall;	/* syscall number */
 	__u8			used_cp[16];	/* thread used copro */
-	unsigned long		tp_value;
+	unsigned long		tp_value[2];	/* TLS registers */
 #ifdef CONFIG_CRUNCH
 	struct crunch_state	crunchstate;
 #endif
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 73409e6..39bce5b 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -2,27 +2,30 @@
 #define __ASMARM_TLS_H
 
 #ifdef __ASSEMBLY__
-	.macro set_tls_none, tp, tmp1, tmp2
+#include <asm/asm-offsets.h>
+	.macro switch_tls_none, base, tp, trw, tmp1, tmp2
 	.endm
 
-	.macro set_tls_v6k, tp, tmp1, tmp2
+	.macro switch_tls_v6k, base, tp, trw, tmp1, tmp2
+	mrc	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
 	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
-	mov	\tmp1, #0
-	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
+	mcr	p15, 0, \trw, c13, c0, 2	@ and the user r/w register
+	strne	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
 	.endm
 
-	.macro set_tls_v6, tp, tmp1, tmp2
+	.macro switch_tls_v6, base, tp, trw, tmp1, tmp2
 	ldr	\tmp1, =elf_hwcap
 	ldr	\tmp1, [\tmp1, #0]
 	mov	\tmp2, #0xffff0fff
 	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
-	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
-	movne	\tmp1, #0
-	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
 	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	mrcne	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
+	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
+	mcrne	p15, 0, \trw, c13, c0, 2	@ set user r/w register
+	strne	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
 	.endm
 
-	.macro set_tls_software, tp, tmp1, tmp2
+	.macro switch_tls_software, base, tp, trw, tmp1, tmp2
 	mov	\tmp1, #0xffff0fff
 	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
@@ -31,19 +34,28 @@
 #ifdef CONFIG_TLS_REG_EMUL
 #define tls_emu		1
 #define has_tls_reg		1
-#define set_tls		set_tls_none
+#define switch_tls	switch_tls_none
+#define get_tlsuser	get_tlsuser_none
 #elif defined(CONFIG_CPU_V6)
 #define tls_emu		0
 #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
-#define set_tls		set_tls_v6
+#define switch_tls	switch_tls_v6
+#define get_tlsuser	get_tlsuser_v6
 #elif defined(CONFIG_CPU_32v6K)
 #define tls_emu		0
 #define has_tls_reg		1
-#define set_tls		set_tls_v6k
+#define switch_tls	switch_tls_v6k
+#define get_tlsuser	get_tlsuser_v6k
 #else
 #define tls_emu		0
 #define has_tls_reg		0
-#define set_tls		set_tls_software
+#define switch_tls	switch_tls_software
+#define get_tlsuser	get_tlsuser_none
 #endif
 
+#ifndef __ASSEMBLY__
+extern unsigned long get_tlsuser_none(void);
+extern unsigned long get_tlsuser_v6(void);
+extern unsigned long get_tlsuser_v6k(void);
+#endif
 #endif	/* __ASMARM_TLS_H */
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 5f3338e..4e1114c 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -17,7 +17,7 @@ CFLAGS_REMOVE_return_address.o = -pg
 
 obj-y		:= elf.o entry-armv.o entry-common.o irq.o opcodes.o \
 		   process.o ptrace.o return_address.o sched_clock.o \
-		   setup.o signal.o stacktrace.o sys_arm.o time.o traps.o
+		   setup.o signal.o stacktrace.o sys_arm.o time.o tls.o traps.o
 
 obj-$(CONFIG_ATAGS)		+= atags_parse.o
 obj-$(CONFIG_ATAGS_PROC)	+= atags_proc.o
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0f82098..80f09fe 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -728,15 +728,15 @@ ENTRY(__switch_to)
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	add	ip, r1, #TI_CPU_SAVE
-	ldr	r3, [r2, #TI_TP_VALUE]
  ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
  THUMB(	str	lr, [ip], #4		   )
+	ldrd	r4, r5, [r2, #TI_TP_VALUE]
 #ifdef CONFIG_CPU_USE_DOMAINS
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
-	set_tls	r3, r4, r5
+	switch_tls r1, r4, r5, r3, r7
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	ldr	r7, [r2, #TI_TASK]
 	ldr	r8, =__stack_chk_guard
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 047d3e4..24dbc72 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -39,6 +39,7 @@
 #include <asm/thread_notify.h>
 #include <asm/stacktrace.h>
 #include <asm/mach/time.h>
+#include <asm/tls.h>
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #include <linux/stackprotector.h>
@@ -395,7 +396,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	clear_ptrace_hw_breakpoint(p);
 
 	if (clone_flags & CLONE_SETTLS)
-		thread->tp_value = childregs->ARM_r3;
+		thread->tp_value[0] = childregs->ARM_r3;
+	thread->tp_value[1] = get_tlsuser();
 
 	thread_notify(THREAD_NOTIFY_COPY, thread);
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeff..2bc1514 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif
 
 		case PTRACE_GET_THREAD_AREA:
-			ret = put_user(task_thread_info(child)->tp_value,
+			ret = put_user(task_thread_info(child)->tp_value[0],
 				       datap);
 			break;
 
diff --git a/arch/arm/kernel/tls.c b/arch/arm/kernel/tls.c
new file mode 100644
index 0000000..1627f5b
--- /dev/null
+++ b/arch/arm/kernel/tls.c
@@ -0,0 +1,50 @@
+/*
+ * arch/arm/kernel/tls.c
+ *
+ * Copyright (C) 2013 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <asm/tls.h>
+
+/*
+ * Access to the TPIDRURW register, with full certainty that it exists.
+ */
+unsigned long get_tlsuser_v6k(void)
+{
+	unsigned long v;
+	asm("mrc        p15, 0, %0, c13, c0, 2\n" : "=r" (v));
+	return v;
+}
+
+/*
+ * Access to the TPIDRURW register if it exists.
+ */
+unsigned long get_tlsuser_v6(void)
+{
+	unsigned long v = 0;
+	if (elf_hwcap & HWCAP_TLS)
+		asm("mrc        p15, 0, %0, c13, c0, 2\n" : "=r" (v));
+	return v;
+}
+
+/*
+ * Dummy access for the case that TLS is emulated in software
+ */
+unsigned long get_tlsuser_none(void)
+{
+	return 0;
+}
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1c08911..f9d6259 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -588,7 +588,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		return regs->ARM_r0;
 
 	case NR(set_tls):
-		thread->tp_value = regs->ARM_r0;
+		thread->tp_value[0] = regs->ARM_r0;
 		if (tls_emu)
 			return 0;
 		if (has_tls_reg) {
@@ -706,7 +706,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
 	int reg = (instr >> 12) & 15;
 	if (reg == 15)
 		return 1;
-	regs->uregs[reg] = current_thread_info()->tp_value;
+	regs->uregs[reg] = current_thread_info()->tp_value[0];
 	regs->ARM_pc += 4;
 	return 0;
 }



^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-05-03 15:24                     ` Jonathan Austin
  0 siblings, 0 replies; 41+ messages in thread
From: Jonathan Austin @ 2013-05-03 15:24 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Russell,

Thanks for the comments - you're right about the 'switch_tls'
being more appropriate - needed to take a step back to see that.

I've got a few questions, added inline.

Andr?, Assuming I've understood things okay, there's a patch that
uses Russell's asm stuff (with minor modifications, see the questions)
and includes the C-world changes too. Perhaps you could see that it
solves your problem?

On 03/05/13 10:55, Russell King - ARM Linux wrote:
> On Fri, May 03, 2013 at 10:21:34AM +0100, Jonathan Austin wrote:
>>   	.macro set_tls_v6k, tp, tmp1, tmp2
>> -	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
>> -	mov	\tmp1, #0
>> -	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
>> +	ldrd	\tmp1, \tmp2, [\tp]
>> +	mcr	p15, 0, \tmp1, c13, c0, 3	@ set user r/o TLS register
>> +	mcr	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
> 
> So we're still back at stalling the pipeline with result delays on older
> CPUs?

How much older? This particular bit is v6k specific so I wasn't worrying
too much but I guess I'm missing something?

It's an academic question wrt to this patch now, though, as the version
you show below re-orders to reduce the stalls...

> 
>> +	.endm
>> +
>> +	.macro save_tlsuser_v6k, tp, tmp1, tmp2
>> +	@ TPIDRURW can be updated from userspace, so we have to re-read it
>> +	mrc	p15, 0, \tmp2, c13, c0, 2	@ load user r/w TLS register
>> +	str	\tmp2, [\tp, #4]
>>   	.endm
>>   
>>   	.macro set_tls_v6, tp, tmp1, tmp2
>> @@ -16,15 +25,26 @@
>>   	ldr	\tmp1, [\tmp1, #0]
>>   	mov	\tmp2, #0xffff0fff
>>   	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
>> -	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
>> -	movne	\tmp1, #0
>> -	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
>> -	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
>> +	ldrned	\tmp1, \tmp2, [\tp]
>> +	ldreq	\tmp1, [\tp]
>> +	mcrne	p15, 0, \tmp1, c13, c0, 3	@ yes, set user r/o TLS register
>> +	mcrne	p15, 0, \tmp2, c13, c0, 2	@ set user r/w TLS register
>> +	streq	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
> 
> This at least is better.
> 
>> +	.endm
>> +
>> +	.macro save_tlsuser_v6, tp, tmp1, tmp2
>> +	@ TPIDRURW can be updated from userspace, so we have to re-read it
>> +	ldr	\tmp1, =elf_hwcap
>> +	ldr	\tmp1, [\tmp1, #0]
>> +	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
> 
> But this isn't - this involves two delays.

Indeed. You left this section untouched in your asm below - was that
because you didn't look at optimising it, or because you thought there
wasn't much better that could be done with it?

As far as I can see, we can't start doing any mcr/mrc operations until we
know for sure that the hw implements them, so this is something we're
stuck with?

Is V6 but not V6k just 1136?

> 
>> +	mrcne	p15, 0, \tmp2, c13, c0, 2	@ read user r/w TLS register
>> +	strne	\tmp2, [\tp, #4]		@ save in to thread_info
>>   	.endm
>>   
>>   	.macro set_tls_software, tp, tmp1, tmp2
>> -	mov	\tmp1, #0xffff0fff
>> -	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
>> +	ldr	\tmp1, [\tp]
>> +	mov	\tmp2, #0xffff0fff
>> +	str	\tmp1, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
>>   	.endm
>>   #endif
>>   
>> @@ -32,18 +52,31 @@
>>   #define tls_emu		1
>>   #define has_tls_reg		1
>>   #define set_tls		set_tls_none
>> +#define save_tlsuser	save_tlsuser_none
>> +#define get_tlsuser	get_tlsuser_none
>>   #elif defined(CONFIG_CPU_V6)
>>   #define tls_emu		0
>>   #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
>>   #define set_tls		set_tls_v6
>> +#define save_tlsuser	save_tlsuser_v6
>> +#define get_tlsuser	get_tlsuser_v6
>>   #elif defined(CONFIG_CPU_32v6K)
>>   #define tls_emu		0
>>   #define has_tls_reg		1
>>   #define set_tls		set_tls_v6k
>> +#define save_tlsuser	save_tlsuser_v6k
>> +#define get_tlsuser	get_tlsuser_v6k
>>   #else
>>   #define tls_emu		0
>>   #define has_tls_reg		0
>>   #define set_tls		set_tls_software
>> +#define save_tlsuser	save_tlsuser_none
>> +#define get_tlsuser	get_tlsuser_none
>>   #endif
> 
> This separation of setting and saving the TLS value is actually quite
> silly.  They're called from the same place, so lets just call it
> "switch_tls" instead.

Agreed. Thanks for the suggestion.

> 
> Here's just the assembly bits doing that - this is totally untested
> of course:
> 
>   arch/arm/include/asm/tls.h   |   28 +++++++++++++++-------------
>   arch/arm/kernel/entry-armv.S |    4 ++--
>   2 files changed, 17 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
> index 73409e6..9c377f1 100644
> --- a/arch/arm/include/asm/tls.h
> +++ b/arch/arm/include/asm/tls.h
> @@ -2,27 +2,29 @@
>   #define __ASMARM_TLS_H
>   
>   #ifdef __ASSEMBLY__

+#include <asm/asm-offsets.h>

required for TI_TP_VALUE

> -	.macro set_tls_none, tp, tmp1, tmp2
> +	.macro switch_tls_none, base, tp, trw, tmp1, tmp2
>   	.endm
>   
> -	.macro set_tls_v6k, tp, tmp1, tmp2
> +	.macro switch_tls_v6k, base, tp, trw, tmp1, tmp2

How do you feel about calling tp and trw something different? tpidro
and tpidrw, or tp and tpuser?

The naming threw me off slightly first time I read this new signature
(tp=thread_pointer/tls_pointer/etc).

> +	mrc	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
>   	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
> -	mov	\tmp1, #0
> -	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
> +	mcr	p15, 0, \trw, c13, c0, 2	@ and the user r/w register
> +	str	\tmp2, [\base, #TI_TP_VALUE + 4]@ save it
>   	.endm
>   
> -	.macro set_tls_v6, tp, tmp1, tmp2
> +	.macro switch_tls_v6, base, tp, trw, tmp1, tmp2
>   	ldr	\tmp1, =elf_hwcap
>   	ldr	\tmp1, [\tmp1, #0]
>   	mov	\tmp2, #0xffff0fff
>   	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
> -	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
> -	movne	\tmp1, #0
> -	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
>   	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
> +	mrcne	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
> +	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
> +	mcrne	p15, 0, \trw, c13, c0, 2	@ set user r/w register
> +	strne	\tmp2, [\base, #TI_TP_VALUE + 4]@ save it
>   	.endm
>   
> -	.macro set_tls_software, tp, tmp1, tmp2
> +	.macro switch_tls_software, base, tp, trw, tmp1, tmp2
>   	mov	\tmp1, #0xffff0fff
>   	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
>   	.endm
> @@ -31,19 +33,19 @@
>   #ifdef CONFIG_TLS_REG_EMUL
>   #define tls_emu		1
>   #define has_tls_reg		1
> -#define set_tls		set_tls_none
> +#define switch_tls	switch_tls_none
>   #elif defined(CONFIG_CPU_V6)
>   #define tls_emu		0
>   #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
> -#define set_tls		set_tls_v6
> +#define switch_tls	switch_tls_v6
>   #elif defined(CONFIG_CPU_32v6K)
>   #define tls_emu		0
>   #define has_tls_reg		1
> -#define set_tls		set_tls_v6k
> +#define switch_tls	switch_tls_v6k
>   #else
>   #define tls_emu		0
>   #define has_tls_reg		0
> -#define set_tls		set_tls_software
> +#define switch_tls	switch_tls_software
>   #endif
>   
>   #endif	/* __ASMARM_TLS_H */
> diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
> index 0f82098..81a08b1 100644
> --- a/arch/arm/kernel/entry-armv.S
> +++ b/arch/arm/kernel/entry-armv.S
> @@ -728,15 +728,15 @@ ENTRY(__switch_to)
>    UNWIND(.fnstart	)
>    UNWIND(.cantunwind	)
>   	add	ip, r1, #TI_CPU_SAVE
> -	ldr	r3, [r2, #TI_TP_VALUE]
>    ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
>    THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
>    THUMB(	str	sp, [ip], #4		   )
>    THUMB(	str	lr, [ip], #4		   )
> +	ldrd	r4, r5, [r2, #TI_TP_VALUE]
>   #ifdef CONFIG_CPU_USE_DOMAINS
>   	ldr	r6, [r2, #TI_CPU_DOMAIN]
>   #endif
> -	set_tls	r3, r4, r5
> +	switch_tls r2, r4, r5, r3, r7

Looking at the implementation above and the way you use 'base', I think
that should be 
switch_tls r1, r4, r5, r3, r7
not
switch_tls r2, r4, r5, r3, r7

That way we save tpidrurw in to the old thread pointer not the new one.

>   #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
>   	ldr	r7, [r2, #TI_TASK]
>   	ldr	r8, =__stack_chk_guard
> 
> 

Here's a complete patch including Russell's suggested asm. Tested on
TC2, build-tested for 1136

I think we could drop the extra .c file too - though it is kinda nice
for keeping include/asm/tls.h clean...

----8<------
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index cddda1f..d90be6d 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -58,7 +58,7 @@ struct thread_info {
 	struct cpu_context_save	cpu_context;	/* cpu context */
 	__u32			syscall;	/* syscall number */
 	__u8			used_cp[16];	/* thread used copro */
-	unsigned long		tp_value;
+	unsigned long		tp_value[2];	/* TLS registers */
 #ifdef CONFIG_CRUNCH
 	struct crunch_state	crunchstate;
 #endif
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 73409e6..39bce5b 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -2,27 +2,30 @@
 #define __ASMARM_TLS_H
 
 #ifdef __ASSEMBLY__
-	.macro set_tls_none, tp, tmp1, tmp2
+#include <asm/asm-offsets.h>
+	.macro switch_tls_none, base, tp, trw, tmp1, tmp2
 	.endm
 
-	.macro set_tls_v6k, tp, tmp1, tmp2
+	.macro switch_tls_v6k, base, tp, trw, tmp1, tmp2
+	mrc	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
 	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
-	mov	\tmp1, #0
-	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
+	mcr	p15, 0, \trw, c13, c0, 2	@ and the user r/w register
+	strne	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
 	.endm
 
-	.macro set_tls_v6, tp, tmp1, tmp2
+	.macro switch_tls_v6, base, tp, trw, tmp1, tmp2
 	ldr	\tmp1, =elf_hwcap
 	ldr	\tmp1, [\tmp1, #0]
 	mov	\tmp2, #0xffff0fff
 	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
-	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
-	movne	\tmp1, #0
-	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
 	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	mrcne	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
+	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
+	mcrne	p15, 0, \trw, c13, c0, 2	@ set user r/w register
+	strne	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
 	.endm
 
-	.macro set_tls_software, tp, tmp1, tmp2
+	.macro switch_tls_software, base, tp, trw, tmp1, tmp2
 	mov	\tmp1, #0xffff0fff
 	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
@@ -31,19 +34,28 @@
 #ifdef CONFIG_TLS_REG_EMUL
 #define tls_emu		1
 #define has_tls_reg		1
-#define set_tls		set_tls_none
+#define switch_tls	switch_tls_none
+#define get_tlsuser	get_tlsuser_none
 #elif defined(CONFIG_CPU_V6)
 #define tls_emu		0
 #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
-#define set_tls		set_tls_v6
+#define switch_tls	switch_tls_v6
+#define get_tlsuser	get_tlsuser_v6
 #elif defined(CONFIG_CPU_32v6K)
 #define tls_emu		0
 #define has_tls_reg		1
-#define set_tls		set_tls_v6k
+#define switch_tls	switch_tls_v6k
+#define get_tlsuser	get_tlsuser_v6k
 #else
 #define tls_emu		0
 #define has_tls_reg		0
-#define set_tls		set_tls_software
+#define switch_tls	switch_tls_software
+#define get_tlsuser	get_tlsuser_none
 #endif
 
+#ifndef __ASSEMBLY__
+extern unsigned long get_tlsuser_none(void);
+extern unsigned long get_tlsuser_v6(void);
+extern unsigned long get_tlsuser_v6k(void);
+#endif
 #endif	/* __ASMARM_TLS_H */
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 5f3338e..4e1114c 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -17,7 +17,7 @@ CFLAGS_REMOVE_return_address.o = -pg
 
 obj-y		:= elf.o entry-armv.o entry-common.o irq.o opcodes.o \
 		   process.o ptrace.o return_address.o sched_clock.o \
-		   setup.o signal.o stacktrace.o sys_arm.o time.o traps.o
+		   setup.o signal.o stacktrace.o sys_arm.o time.o tls.o traps.o
 
 obj-$(CONFIG_ATAGS)		+= atags_parse.o
 obj-$(CONFIG_ATAGS_PROC)	+= atags_proc.o
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0f82098..80f09fe 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -728,15 +728,15 @@ ENTRY(__switch_to)
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	add	ip, r1, #TI_CPU_SAVE
-	ldr	r3, [r2, #TI_TP_VALUE]
  ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
  THUMB(	str	lr, [ip], #4		   )
+	ldrd	r4, r5, [r2, #TI_TP_VALUE]
 #ifdef CONFIG_CPU_USE_DOMAINS
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
-	set_tls	r3, r4, r5
+	switch_tls r1, r4, r5, r3, r7
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	ldr	r7, [r2, #TI_TASK]
 	ldr	r8, =__stack_chk_guard
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 047d3e4..24dbc72 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -39,6 +39,7 @@
 #include <asm/thread_notify.h>
 #include <asm/stacktrace.h>
 #include <asm/mach/time.h>
+#include <asm/tls.h>
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #include <linux/stackprotector.h>
@@ -395,7 +396,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	clear_ptrace_hw_breakpoint(p);
 
 	if (clone_flags & CLONE_SETTLS)
-		thread->tp_value = childregs->ARM_r3;
+		thread->tp_value[0] = childregs->ARM_r3;
+	thread->tp_value[1] = get_tlsuser();
 
 	thread_notify(THREAD_NOTIFY_COPY, thread);
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeff..2bc1514 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif
 
 		case PTRACE_GET_THREAD_AREA:
-			ret = put_user(task_thread_info(child)->tp_value,
+			ret = put_user(task_thread_info(child)->tp_value[0],
 				       datap);
 			break;
 
diff --git a/arch/arm/kernel/tls.c b/arch/arm/kernel/tls.c
new file mode 100644
index 0000000..1627f5b
--- /dev/null
+++ b/arch/arm/kernel/tls.c
@@ -0,0 +1,50 @@
+/*
+ * arch/arm/kernel/tls.c
+ *
+ * Copyright (C) 2013 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <asm/tls.h>
+
+/*
+ * Access to the TPIDRURW register, with full certainty that it exists.
+ */
+unsigned long get_tlsuser_v6k(void)
+{
+	unsigned long v;
+	asm("mrc        p15, 0, %0, c13, c0, 2\n" : "=r" (v));
+	return v;
+}
+
+/*
+ * Access to the TPIDRURW register if it exists.
+ */
+unsigned long get_tlsuser_v6(void)
+{
+	unsigned long v = 0;
+	if (elf_hwcap & HWCAP_TLS)
+		asm("mrc        p15, 0, %0, c13, c0, 2\n" : "=r" (v));
+	return v;
+}
+
+/*
+ * Dummy access for the case that TLS is emulated in software
+ */
+unsigned long get_tlsuser_none(void)
+{
+	return 0;
+}
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1c08911..f9d6259 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -588,7 +588,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		return regs->ARM_r0;
 
 	case NR(set_tls):
-		thread->tp_value = regs->ARM_r0;
+		thread->tp_value[0] = regs->ARM_r0;
 		if (tls_emu)
 			return 0;
 		if (has_tls_reg) {
@@ -706,7 +706,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
 	int reg = (instr >> 12) & 15;
 	if (reg == 15)
 		return 1;
-	regs->uregs[reg] = current_thread_info()->tp_value;
+	regs->uregs[reg] = current_thread_info()->tp_value[0];
 	regs->ARM_pc += 4;
 	return 0;
 }

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
  2013-05-03 15:24                     ` Jonathan Austin
  (?)
@ 2013-05-04 15:54                       ` André Hentschel
  -1 siblings, 0 replies; 41+ messages in thread
From: André Hentschel @ 2013-05-04 15:54 UTC (permalink / raw)
  To: Jonathan Austin
  Cc: Russell King - ARM Linux, Will Deacon, linux-arch, linux-kernel,
	linux-arm-kernel

Am 03.05.2013 17:24, schrieb Jonathan Austin:
> Hi Russell,
> 
> Thanks for the comments - you're right about the 'switch_tls'
> being more appropriate - needed to take a step back to see that.
> 
> I've got a few questions, added inline.
> 
> André, Assuming I've understood things okay, there's a patch that
> uses Russell's asm stuff (with minor modifications, see the questions)
> and includes the C-world changes too. Perhaps you could see that it
> solves your problem?

I'll test it tomorrow with my test tool and Wine.
FWIW i hacked together a test tool and it's available at https://github.com/AndreRH/tpidrurw-test

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-05-04 15:54                       ` André Hentschel
  0 siblings, 0 replies; 41+ messages in thread
From: André Hentschel @ 2013-05-04 15:54 UTC (permalink / raw)
  To: Jonathan Austin
  Cc: Russell King - ARM Linux, Will Deacon, linux-arch, linux-kernel,
	linux-arm-kernel

Am 03.05.2013 17:24, schrieb Jonathan Austin:
> Hi Russell,
> 
> Thanks for the comments - you're right about the 'switch_tls'
> being more appropriate - needed to take a step back to see that.
> 
> I've got a few questions, added inline.
> 
> André, Assuming I've understood things okay, there's a patch that
> uses Russell's asm stuff (with minor modifications, see the questions)
> and includes the C-world changes too. Perhaps you could see that it
> solves your problem?

I'll test it tomorrow with my test tool and Wine.
FWIW i hacked together a test tool and it's available at https://github.com/AndreRH/tpidrurw-test

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-05-04 15:54                       ` André Hentschel
  0 siblings, 0 replies; 41+ messages in thread
From: André Hentschel @ 2013-05-04 15:54 UTC (permalink / raw)
  To: linux-arm-kernel

Am 03.05.2013 17:24, schrieb Jonathan Austin:
> Hi Russell,
> 
> Thanks for the comments - you're right about the 'switch_tls'
> being more appropriate - needed to take a step back to see that.
> 
> I've got a few questions, added inline.
> 
> Andr?, Assuming I've understood things okay, there's a patch that
> uses Russell's asm stuff (with minor modifications, see the questions)
> and includes the C-world changes too. Perhaps you could see that it
> solves your problem?

I'll test it tomorrow with my test tool and Wine.
FWIW i hacked together a test tool and it's available at https://github.com/AndreRH/tpidrurw-test

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
  2013-05-03 15:24                     ` Jonathan Austin
  (?)
  (?)
@ 2013-05-06 22:27                       ` André Hentschel
  -1 siblings, 0 replies; 41+ messages in thread
From: André Hentschel @ 2013-05-06 22:27 UTC (permalink / raw)
  To: Jonathan Austin
  Cc: Russell King - ARM Linux, Will Deacon, linux-arch, linux-kernel,
	linux-arm-kernel

Am 03.05.2013 17:24, schrieb Jonathan Austin:
>> -	.macro set_tls_none, tp, tmp1, tmp2
>> +	.macro switch_tls_none, base, tp, trw, tmp1, tmp2
>>   	.endm
>>   
>> -	.macro set_tls_v6k, tp, tmp1, tmp2
>> +	.macro switch_tls_v6k, base, tp, trw, tmp1, tmp2
> 
> How do you feel about calling tp and trw something different? tpidro
> and tpidrw, or tp and tpuser?
> 
> The naming threw me off slightly first time I read this new signature
> (tp=thread_pointer/tls_pointer/etc).
> 

FWIW i think tp&tpuser is more consistent.

> André, Assuming I've understood things okay, there's a patch that
> uses Russell's asm stuff (with minor modifications, see the questions)
> and includes the C-world changes too. Perhaps you could see that it
> solves your problem?

It works, but for various reasons i would like to suggest the patch below.
Reasons include: My thoughts about tp&tpuser naming and the helper function for copy_thread, further i'd really like to get a bit credit for spending weeks on getting my second kernel patch in :)
If that patch is fine for you and no one object, i'd be happy to test it, adapt the commit message and include:
	Reported-by: André Hentschel <nerv@dawncrow.de>
	Signed-off-by: André Hentschel <nerv@dawncrow.de>
	Signed-off-by: Will Deacon <will.deacon@arm.com>
	Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
	Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>

Not totally sure about the Signed-off-bys. Can i add a Signed-off-by for Russell King? Is it the right mail address for him/you?




diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index cddda1f..d90be6d 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -58,7 +58,7 @@ struct thread_info {
 	struct cpu_context_save	cpu_context;	/* cpu context */
 	__u32			syscall;	/* syscall number */
 	__u8			used_cp[16];	/* thread used copro */
-	unsigned long		tp_value;
+	unsigned long		tp_value[2];	/* TLS registers */
 #ifdef CONFIG_CRUNCH
 	struct crunch_state	crunchstate;
 #endif
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 73409e6..d7d542b 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -2,27 +2,30 @@
 #define __ASMARM_TLS_H
 
 #ifdef __ASSEMBLY__
-	.macro set_tls_none, tp, tmp1, tmp2
+#include <asm/asm-offsets.h>
+	.macro switch_tls_none, base, tp, tpuser, tmp1, tmp2
 	.endm
 
-	.macro set_tls_v6k, tp, tmp1, tmp2
+	.macro switch_tls_v6k, base, tp, tpuser, tmp1, tmp2
+	mrc	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
 	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
-	mov	\tmp1, #0
-	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
+	mcr	p15, 0, \tpuser, c13, c0, 2	@ and the user r/w register
+	strne	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
 	.endm
 
-	.macro set_tls_v6, tp, tmp1, tmp2
+	.macro switch_tls_v6, base, tp, tpuser, tmp1, tmp2
 	ldr	\tmp1, =elf_hwcap
 	ldr	\tmp1, [\tmp1, #0]
 	mov	\tmp2, #0xffff0fff
 	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
-	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
-	movne	\tmp1, #0
-	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
 	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	mrcne	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
+	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
+	mcrne	p15, 0, \tpuser, c13, c0, 2	@ set user r/w register
+	strne	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
 	.endm
 
-	.macro set_tls_software, tp, tmp1, tmp2
+	.macro switch_tls_software, base, tp, tpuser, tmp1, tmp2
 	mov	\tmp1, #0xffff0fff
 	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
@@ -31,19 +34,31 @@
 #ifdef CONFIG_TLS_REG_EMUL
 #define tls_emu		1
 #define has_tls_reg		1
-#define set_tls		set_tls_none
+#define switch_tls	switch_tls_none
 #elif defined(CONFIG_CPU_V6)
 #define tls_emu		0
 #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
-#define set_tls		set_tls_v6
+#define switch_tls	switch_tls_v6
 #elif defined(CONFIG_CPU_32v6K)
 #define tls_emu		0
 #define has_tls_reg		1
-#define set_tls		set_tls_v6k
+#define switch_tls	switch_tls_v6k
 #else
 #define tls_emu		0
 #define has_tls_reg		0
-#define set_tls		set_tls_software
+#define switch_tls	switch_tls_software
 #endif
 
+#ifndef __ASSEMBLY__
+static inline unsigned long get_tlsuser(void)
+{
+	if (has_tls_reg && !tls_emu)
+	{
+		unsigned long t;
+		__asm__("mcr p15, 0, %0, c13, c0, 2" : : "r" (t));
+		return t;
+	}
+	return 0;
+}
+#endif
 #endif	/* __ASMARM_TLS_H */
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0f82098..80f09fe 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -728,15 +728,15 @@ ENTRY(__switch_to)
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	add	ip, r1, #TI_CPU_SAVE
-	ldr	r3, [r2, #TI_TP_VALUE]
  ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
  THUMB(	str	lr, [ip], #4		   )
+	ldrd	r4, r5, [r2, #TI_TP_VALUE]
 #ifdef CONFIG_CPU_USE_DOMAINS
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
-	set_tls	r3, r4, r5
+	switch_tls r1, r4, r5, r3, r7
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	ldr	r7, [r2, #TI_TASK]
 	ldr	r8, =__stack_chk_guard
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 047d3e4..24dbc72 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -39,6 +39,7 @@
 #include <asm/thread_notify.h>
 #include <asm/stacktrace.h>
 #include <asm/mach/time.h>
+#include <asm/tls.h>
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #include <linux/stackprotector.h>
@@ -395,7 +396,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	clear_ptrace_hw_breakpoint(p);
 
 	if (clone_flags & CLONE_SETTLS)
-		thread->tp_value = childregs->ARM_r3;
+		thread->tp_value[0] = childregs->ARM_r3;
+	thread->tp_value[1] = get_tlsuser();
 
 	thread_notify(THREAD_NOTIFY_COPY, thread);
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeff..2bc1514 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif
 
 		case PTRACE_GET_THREAD_AREA:
-			ret = put_user(task_thread_info(child)->tp_value,
+			ret = put_user(task_thread_info(child)->tp_value[0],
 				       datap);
 			break;
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1c08911..f9d6259 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -588,7 +588,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		return regs->ARM_r0;
 
 	case NR(set_tls):
-		thread->tp_value = regs->ARM_r0;
+		thread->tp_value[0] = regs->ARM_r0;
 		if (tls_emu)
 			return 0;
 		if (has_tls_reg) {
@@ -706,7 +706,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
 	int reg = (instr >> 12) & 15;
 	if (reg == 15)
 		return 1;
-	regs->uregs[reg] = current_thread_info()->tp_value;
+	regs->uregs[reg] = current_thread_info()->tp_value[0];
 	regs->ARM_pc += 4;
 	return 0;
 }


^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-05-06 22:27                       ` André Hentschel
  0 siblings, 0 replies; 41+ messages in thread
From: André Hentschel @ 2013-05-06 22:27 UTC (permalink / raw)
  To: Jonathan Austin
  Cc: linux-arch, Will Deacon, Russell King - ARM Linux, linux-kernel,
	linux-arm-kernel

Am 03.05.2013 17:24, schrieb Jonathan Austin:
>> -	.macro set_tls_none, tp, tmp1, tmp2
>> +	.macro switch_tls_none, base, tp, trw, tmp1, tmp2
>>   	.endm
>>   
>> -	.macro set_tls_v6k, tp, tmp1, tmp2
>> +	.macro switch_tls_v6k, base, tp, trw, tmp1, tmp2
> 
> How do you feel about calling tp and trw something different? tpidro
> and tpidrw, or tp and tpuser?
> 
> The naming threw me off slightly first time I read this new signature
> (tp=thread_pointer/tls_pointer/etc).
> 

FWIW i think tp&tpuser is more consistent.

> André, Assuming I've understood things okay, there's a patch that
> uses Russell's asm stuff (with minor modifications, see the questions)
> and includes the C-world changes too. Perhaps you could see that it
> solves your problem?

It works, but for various reasons i would like to suggest the patch below.
Reasons include: My thoughts about tp&tpuser naming and the helper function for copy_thread, further i'd really like to get a bit credit for spending weeks on getting my second kernel patch in :)
If that patch is fine for you and no one object, i'd be happy to test it, adapt the commit message and include:
	Reported-by: André Hentschel <nerv@dawncrow.de>
	Signed-off-by: André Hentschel <nerv@dawncrow.de>
	Signed-off-by: Will Deacon <will.deacon@arm.com>
	Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
	Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>

Not totally sure about the Signed-off-bys. Can i add a Signed-off-by for Russell King? Is it the right mail address for him/you?




diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index cddda1f..d90be6d 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -58,7 +58,7 @@ struct thread_info {
 	struct cpu_context_save	cpu_context;	/* cpu context */
 	__u32			syscall;	/* syscall number */
 	__u8			used_cp[16];	/* thread used copro */
-	unsigned long		tp_value;
+	unsigned long		tp_value[2];	/* TLS registers */
 #ifdef CONFIG_CRUNCH
 	struct crunch_state	crunchstate;
 #endif
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 73409e6..d7d542b 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -2,27 +2,30 @@
 #define __ASMARM_TLS_H
 
 #ifdef __ASSEMBLY__
-	.macro set_tls_none, tp, tmp1, tmp2
+#include <asm/asm-offsets.h>
+	.macro switch_tls_none, base, tp, tpuser, tmp1, tmp2
 	.endm
 
-	.macro set_tls_v6k, tp, tmp1, tmp2
+	.macro switch_tls_v6k, base, tp, tpuser, tmp1, tmp2
+	mrc	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
 	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
-	mov	\tmp1, #0
-	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
+	mcr	p15, 0, \tpuser, c13, c0, 2	@ and the user r/w register
+	strne	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
 	.endm
 
-	.macro set_tls_v6, tp, tmp1, tmp2
+	.macro switch_tls_v6, base, tp, tpuser, tmp1, tmp2
 	ldr	\tmp1, =elf_hwcap
 	ldr	\tmp1, [\tmp1, #0]
 	mov	\tmp2, #0xffff0fff
 	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
-	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
-	movne	\tmp1, #0
-	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
 	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	mrcne	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
+	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
+	mcrne	p15, 0, \tpuser, c13, c0, 2	@ set user r/w register
+	strne	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
 	.endm
 
-	.macro set_tls_software, tp, tmp1, tmp2
+	.macro switch_tls_software, base, tp, tpuser, tmp1, tmp2
 	mov	\tmp1, #0xffff0fff
 	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
@@ -31,19 +34,31 @@
 #ifdef CONFIG_TLS_REG_EMUL
 #define tls_emu		1
 #define has_tls_reg		1
-#define set_tls		set_tls_none
+#define switch_tls	switch_tls_none
 #elif defined(CONFIG_CPU_V6)
 #define tls_emu		0
 #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
-#define set_tls		set_tls_v6
+#define switch_tls	switch_tls_v6
 #elif defined(CONFIG_CPU_32v6K)
 #define tls_emu		0
 #define has_tls_reg		1
-#define set_tls		set_tls_v6k
+#define switch_tls	switch_tls_v6k
 #else
 #define tls_emu		0
 #define has_tls_reg		0
-#define set_tls		set_tls_software
+#define switch_tls	switch_tls_software
 #endif
 
+#ifndef __ASSEMBLY__
+static inline unsigned long get_tlsuser(void)
+{
+	if (has_tls_reg && !tls_emu)
+	{
+		unsigned long t;
+		__asm__("mcr p15, 0, %0, c13, c0, 2" : : "r" (t));
+		return t;
+	}
+	return 0;
+}
+#endif
 #endif	/* __ASMARM_TLS_H */
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0f82098..80f09fe 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -728,15 +728,15 @@ ENTRY(__switch_to)
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	add	ip, r1, #TI_CPU_SAVE
-	ldr	r3, [r2, #TI_TP_VALUE]
  ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
  THUMB(	str	lr, [ip], #4		   )
+	ldrd	r4, r5, [r2, #TI_TP_VALUE]
 #ifdef CONFIG_CPU_USE_DOMAINS
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
-	set_tls	r3, r4, r5
+	switch_tls r1, r4, r5, r3, r7
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	ldr	r7, [r2, #TI_TASK]
 	ldr	r8, =__stack_chk_guard
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 047d3e4..24dbc72 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -39,6 +39,7 @@
 #include <asm/thread_notify.h>
 #include <asm/stacktrace.h>
 #include <asm/mach/time.h>
+#include <asm/tls.h>
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #include <linux/stackprotector.h>
@@ -395,7 +396,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	clear_ptrace_hw_breakpoint(p);
 
 	if (clone_flags & CLONE_SETTLS)
-		thread->tp_value = childregs->ARM_r3;
+		thread->tp_value[0] = childregs->ARM_r3;
+	thread->tp_value[1] = get_tlsuser();
 
 	thread_notify(THREAD_NOTIFY_COPY, thread);
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeff..2bc1514 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif
 
 		case PTRACE_GET_THREAD_AREA:
-			ret = put_user(task_thread_info(child)->tp_value,
+			ret = put_user(task_thread_info(child)->tp_value[0],
 				       datap);
 			break;
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1c08911..f9d6259 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -588,7 +588,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		return regs->ARM_r0;
 
 	case NR(set_tls):
-		thread->tp_value = regs->ARM_r0;
+		thread->tp_value[0] = regs->ARM_r0;
 		if (tls_emu)
 			return 0;
 		if (has_tls_reg) {
@@ -706,7 +706,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
 	int reg = (instr >> 12) & 15;
 	if (reg == 15)
 		return 1;
-	regs->uregs[reg] = current_thread_info()->tp_value;
+	regs->uregs[reg] = current_thread_info()->tp_value[0];
 	regs->ARM_pc += 4;
 	return 0;
 }


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-05-06 22:27                       ` André Hentschel
  0 siblings, 0 replies; 41+ messages in thread
From: André Hentschel @ 2013-05-06 22:27 UTC (permalink / raw)
  To: Jonathan Austin
  Cc: Russell King - ARM Linux, Will Deacon, linux-arch, linux-kernel,
	linux-arm-kernel

Am 03.05.2013 17:24, schrieb Jonathan Austin:
>> -	.macro set_tls_none, tp, tmp1, tmp2
>> +	.macro switch_tls_none, base, tp, trw, tmp1, tmp2
>>   	.endm
>>   
>> -	.macro set_tls_v6k, tp, tmp1, tmp2
>> +	.macro switch_tls_v6k, base, tp, trw, tmp1, tmp2
> 
> How do you feel about calling tp and trw something different? tpidro
> and tpidrw, or tp and tpuser?
> 
> The naming threw me off slightly first time I read this new signature
> (tp=thread_pointer/tls_pointer/etc).
> 

FWIW i think tp&tpuser is more consistent.

> André, Assuming I've understood things okay, there's a patch that
> uses Russell's asm stuff (with minor modifications, see the questions)
> and includes the C-world changes too. Perhaps you could see that it
> solves your problem?

It works, but for various reasons i would like to suggest the patch below.
Reasons include: My thoughts about tp&tpuser naming and the helper function for copy_thread, further i'd really like to get a bit credit for spending weeks on getting my second kernel patch in :)
If that patch is fine for you and no one object, i'd be happy to test it, adapt the commit message and include:
	Reported-by: André Hentschel <nerv@dawncrow.de>
	Signed-off-by: André Hentschel <nerv@dawncrow.de>
	Signed-off-by: Will Deacon <will.deacon@arm.com>
	Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
	Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>

Not totally sure about the Signed-off-bys. Can i add a Signed-off-by for Russell King? Is it the right mail address for him/you?




diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index cddda1f..d90be6d 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -58,7 +58,7 @@ struct thread_info {
 	struct cpu_context_save	cpu_context;	/* cpu context */
 	__u32			syscall;	/* syscall number */
 	__u8			used_cp[16];	/* thread used copro */
-	unsigned long		tp_value;
+	unsigned long		tp_value[2];	/* TLS registers */
 #ifdef CONFIG_CRUNCH
 	struct crunch_state	crunchstate;
 #endif
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 73409e6..d7d542b 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -2,27 +2,30 @@
 #define __ASMARM_TLS_H
 
 #ifdef __ASSEMBLY__
-	.macro set_tls_none, tp, tmp1, tmp2
+#include <asm/asm-offsets.h>
+	.macro switch_tls_none, base, tp, tpuser, tmp1, tmp2
 	.endm
 
-	.macro set_tls_v6k, tp, tmp1, tmp2
+	.macro switch_tls_v6k, base, tp, tpuser, tmp1, tmp2
+	mrc	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
 	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
-	mov	\tmp1, #0
-	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
+	mcr	p15, 0, \tpuser, c13, c0, 2	@ and the user r/w register
+	strne	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
 	.endm
 
-	.macro set_tls_v6, tp, tmp1, tmp2
+	.macro switch_tls_v6, base, tp, tpuser, tmp1, tmp2
 	ldr	\tmp1, =elf_hwcap
 	ldr	\tmp1, [\tmp1, #0]
 	mov	\tmp2, #0xffff0fff
 	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
-	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
-	movne	\tmp1, #0
-	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
 	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	mrcne	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
+	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
+	mcrne	p15, 0, \tpuser, c13, c0, 2	@ set user r/w register
+	strne	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
 	.endm
 
-	.macro set_tls_software, tp, tmp1, tmp2
+	.macro switch_tls_software, base, tp, tpuser, tmp1, tmp2
 	mov	\tmp1, #0xffff0fff
 	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
@@ -31,19 +34,31 @@
 #ifdef CONFIG_TLS_REG_EMUL
 #define tls_emu		1
 #define has_tls_reg		1
-#define set_tls		set_tls_none
+#define switch_tls	switch_tls_none
 #elif defined(CONFIG_CPU_V6)
 #define tls_emu		0
 #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
-#define set_tls		set_tls_v6
+#define switch_tls	switch_tls_v6
 #elif defined(CONFIG_CPU_32v6K)
 #define tls_emu		0
 #define has_tls_reg		1
-#define set_tls		set_tls_v6k
+#define switch_tls	switch_tls_v6k
 #else
 #define tls_emu		0
 #define has_tls_reg		0
-#define set_tls		set_tls_software
+#define switch_tls	switch_tls_software
 #endif
 
+#ifndef __ASSEMBLY__
+static inline unsigned long get_tlsuser(void)
+{
+	if (has_tls_reg && !tls_emu)
+	{
+		unsigned long t;
+		__asm__("mcr p15, 0, %0, c13, c0, 2" : : "r" (t));
+		return t;
+	}
+	return 0;
+}
+#endif
 #endif	/* __ASMARM_TLS_H */
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0f82098..80f09fe 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -728,15 +728,15 @@ ENTRY(__switch_to)
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	add	ip, r1, #TI_CPU_SAVE
-	ldr	r3, [r2, #TI_TP_VALUE]
  ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
  THUMB(	str	lr, [ip], #4		   )
+	ldrd	r4, r5, [r2, #TI_TP_VALUE]
 #ifdef CONFIG_CPU_USE_DOMAINS
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
-	set_tls	r3, r4, r5
+	switch_tls r1, r4, r5, r3, r7
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	ldr	r7, [r2, #TI_TASK]
 	ldr	r8, =__stack_chk_guard
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 047d3e4..24dbc72 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -39,6 +39,7 @@
 #include <asm/thread_notify.h>
 #include <asm/stacktrace.h>
 #include <asm/mach/time.h>
+#include <asm/tls.h>
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #include <linux/stackprotector.h>
@@ -395,7 +396,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	clear_ptrace_hw_breakpoint(p);
 
 	if (clone_flags & CLONE_SETTLS)
-		thread->tp_value = childregs->ARM_r3;
+		thread->tp_value[0] = childregs->ARM_r3;
+	thread->tp_value[1] = get_tlsuser();
 
 	thread_notify(THREAD_NOTIFY_COPY, thread);
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeff..2bc1514 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif
 
 		case PTRACE_GET_THREAD_AREA:
-			ret = put_user(task_thread_info(child)->tp_value,
+			ret = put_user(task_thread_info(child)->tp_value[0],
 				       datap);
 			break;
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1c08911..f9d6259 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -588,7 +588,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		return regs->ARM_r0;
 
 	case NR(set_tls):
-		thread->tp_value = regs->ARM_r0;
+		thread->tp_value[0] = regs->ARM_r0;
 		if (tls_emu)
 			return 0;
 		if (has_tls_reg) {
@@ -706,7 +706,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
 	int reg = (instr >> 12) & 15;
 	if (reg == 15)
 		return 1;
-	regs->uregs[reg] = current_thread_info()->tp_value;
+	regs->uregs[reg] = current_thread_info()->tp_value[0];
 	regs->ARM_pc += 4;
 	return 0;
 }


^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-05-06 22:27                       ` André Hentschel
  0 siblings, 0 replies; 41+ messages in thread
From: André Hentschel @ 2013-05-06 22:27 UTC (permalink / raw)
  To: linux-arm-kernel

Am 03.05.2013 17:24, schrieb Jonathan Austin:
>> -	.macro set_tls_none, tp, tmp1, tmp2
>> +	.macro switch_tls_none, base, tp, trw, tmp1, tmp2
>>   	.endm
>>   
>> -	.macro set_tls_v6k, tp, tmp1, tmp2
>> +	.macro switch_tls_v6k, base, tp, trw, tmp1, tmp2
> 
> How do you feel about calling tp and trw something different? tpidro
> and tpidrw, or tp and tpuser?
> 
> The naming threw me off slightly first time I read this new signature
> (tp=thread_pointer/tls_pointer/etc).
> 

FWIW i think tp&tpuser is more consistent.

> Andr?, Assuming I've understood things okay, there's a patch that
> uses Russell's asm stuff (with minor modifications, see the questions)
> and includes the C-world changes too. Perhaps you could see that it
> solves your problem?

It works, but for various reasons i would like to suggest the patch below.
Reasons include: My thoughts about tp&tpuser naming and the helper function for copy_thread, further i'd really like to get a bit credit for spending weeks on getting my second kernel patch in :)
If that patch is fine for you and no one object, i'd be happy to test it, adapt the commit message and include:
	Reported-by: Andr? Hentschel <nerv@dawncrow.de>
	Signed-off-by: Andr? Hentschel <nerv@dawncrow.de>
	Signed-off-by: Will Deacon <will.deacon@arm.com>
	Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
	Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>

Not totally sure about the Signed-off-bys. Can i add a Signed-off-by for Russell King? Is it the right mail address for him/you?




diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index cddda1f..d90be6d 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -58,7 +58,7 @@ struct thread_info {
 	struct cpu_context_save	cpu_context;	/* cpu context */
 	__u32			syscall;	/* syscall number */
 	__u8			used_cp[16];	/* thread used copro */
-	unsigned long		tp_value;
+	unsigned long		tp_value[2];	/* TLS registers */
 #ifdef CONFIG_CRUNCH
 	struct crunch_state	crunchstate;
 #endif
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 73409e6..d7d542b 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -2,27 +2,30 @@
 #define __ASMARM_TLS_H
 
 #ifdef __ASSEMBLY__
-	.macro set_tls_none, tp, tmp1, tmp2
+#include <asm/asm-offsets.h>
+	.macro switch_tls_none, base, tp, tpuser, tmp1, tmp2
 	.endm
 
-	.macro set_tls_v6k, tp, tmp1, tmp2
+	.macro switch_tls_v6k, base, tp, tpuser, tmp1, tmp2
+	mrc	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
 	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
-	mov	\tmp1, #0
-	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
+	mcr	p15, 0, \tpuser, c13, c0, 2	@ and the user r/w register
+	strne	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
 	.endm
 
-	.macro set_tls_v6, tp, tmp1, tmp2
+	.macro switch_tls_v6, base, tp, tpuser, tmp1, tmp2
 	ldr	\tmp1, =elf_hwcap
 	ldr	\tmp1, [\tmp1, #0]
 	mov	\tmp2, #0xffff0fff
 	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
-	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
-	movne	\tmp1, #0
-	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
 	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	mrcne	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
+	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
+	mcrne	p15, 0, \tpuser, c13, c0, 2	@ set user r/w register
+	strne	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
 	.endm
 
-	.macro set_tls_software, tp, tmp1, tmp2
+	.macro switch_tls_software, base, tp, tpuser, tmp1, tmp2
 	mov	\tmp1, #0xffff0fff
 	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
@@ -31,19 +34,31 @@
 #ifdef CONFIG_TLS_REG_EMUL
 #define tls_emu		1
 #define has_tls_reg		1
-#define set_tls		set_tls_none
+#define switch_tls	switch_tls_none
 #elif defined(CONFIG_CPU_V6)
 #define tls_emu		0
 #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
-#define set_tls		set_tls_v6
+#define switch_tls	switch_tls_v6
 #elif defined(CONFIG_CPU_32v6K)
 #define tls_emu		0
 #define has_tls_reg		1
-#define set_tls		set_tls_v6k
+#define switch_tls	switch_tls_v6k
 #else
 #define tls_emu		0
 #define has_tls_reg		0
-#define set_tls		set_tls_software
+#define switch_tls	switch_tls_software
 #endif
 
+#ifndef __ASSEMBLY__
+static inline unsigned long get_tlsuser(void)
+{
+	if (has_tls_reg && !tls_emu)
+	{
+		unsigned long t;
+		__asm__("mcr p15, 0, %0, c13, c0, 2" : : "r" (t));
+		return t;
+	}
+	return 0;
+}
+#endif
 #endif	/* __ASMARM_TLS_H */
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0f82098..80f09fe 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -728,15 +728,15 @@ ENTRY(__switch_to)
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	add	ip, r1, #TI_CPU_SAVE
-	ldr	r3, [r2, #TI_TP_VALUE]
  ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
  THUMB(	str	lr, [ip], #4		   )
+	ldrd	r4, r5, [r2, #TI_TP_VALUE]
 #ifdef CONFIG_CPU_USE_DOMAINS
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
-	set_tls	r3, r4, r5
+	switch_tls r1, r4, r5, r3, r7
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	ldr	r7, [r2, #TI_TASK]
 	ldr	r8, =__stack_chk_guard
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 047d3e4..24dbc72 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -39,6 +39,7 @@
 #include <asm/thread_notify.h>
 #include <asm/stacktrace.h>
 #include <asm/mach/time.h>
+#include <asm/tls.h>
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #include <linux/stackprotector.h>
@@ -395,7 +396,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	clear_ptrace_hw_breakpoint(p);
 
 	if (clone_flags & CLONE_SETTLS)
-		thread->tp_value = childregs->ARM_r3;
+		thread->tp_value[0] = childregs->ARM_r3;
+	thread->tp_value[1] = get_tlsuser();
 
 	thread_notify(THREAD_NOTIFY_COPY, thread);
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeff..2bc1514 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif
 
 		case PTRACE_GET_THREAD_AREA:
-			ret = put_user(task_thread_info(child)->tp_value,
+			ret = put_user(task_thread_info(child)->tp_value[0],
 				       datap);
 			break;
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1c08911..f9d6259 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -588,7 +588,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		return regs->ARM_r0;
 
 	case NR(set_tls):
-		thread->tp_value = regs->ARM_r0;
+		thread->tp_value[0] = regs->ARM_r0;
 		if (tls_emu)
 			return 0;
 		if (has_tls_reg) {
@@ -706,7 +706,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
 	int reg = (instr >> 12) & 15;
 	if (reg == 15)
 		return 1;
-	regs->uregs[reg] = current_thread_info()->tp_value;
+	regs->uregs[reg] = current_thread_info()->tp_value[0];
 	regs->ARM_pc += 4;
 	return 0;
 }

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
  2013-05-06 22:27                       ` André Hentschel
  (?)
@ 2013-05-07 10:16                         ` Jonathan Austin
  -1 siblings, 0 replies; 41+ messages in thread
From: Jonathan Austin @ 2013-05-07 10:16 UTC (permalink / raw)
  To: André Hentschel
  Cc: Russell King - ARM Linux, Will Deacon, linux-arch, linux-kernel,
	linux-arm-kernel

Hi André,

On 06/05/13 23:27, André Hentschel wrote:
> Am 03.05.2013 17:24, schrieb Jonathan Austin:
>>> -	.macro set_tls_none, tp, tmp1, tmp2
>>> +	.macro switch_tls_none, base, tp, trw, tmp1, tmp2
>>>    	.endm
>>>
>>> -	.macro set_tls_v6k, tp, tmp1, tmp2
>>> +	.macro switch_tls_v6k, base, tp, trw, tmp1, tmp2
>>
>> How do you feel about calling tp and trw something different? tpidro
>> and tpidrw, or tp and tpuser?
>>
>> The naming threw me off slightly first time I read this new signature
>> (tp=thread_pointer/tls_pointer/etc).
>>
>
> FWIW i think tp&tpuser is more consistent.
>
>> André, Assuming I've understood things okay, there's a patch that
>> uses Russell's asm stuff (with minor modifications, see the questions)
>> and includes the C-world changes too. Perhaps you could see that it
>> solves your problem?
>
> It works, but for various reasons i would like to suggest the patch below.
> Reasons include: My thoughts about tp&tpuser naming and the helper function for copy_thread, further i'd really like to get a bit credit for spending weeks on getting my second kernel patch in :)
> If that patch is fine for you and no one object, i'd be happy to test it, adapt the commit message and include:
> 	Reported-by: André Hentschel <nerv@dawncrow.de>
> 	Signed-off-by: André Hentschel <nerv@dawncrow.de>
> 	Signed-off-by: Will Deacon <will.deacon@arm.com>
> 	Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
> 	Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>

I'm not worried about authorship, so you're welcome to be the author on 
the patch - assuming Russell's happy with that and with the change of 
register naming in your patch below.

As this will go through Russell's tree (probably via has patch system), 
you don't need his Signed-off-by - It'll get added as he takes the 
patch. As you're signing off, too, you don't need to list yourself as 
reporting the problem.

It'll look a bit weird to have 3 people signing off on quite a little 
patch (4 once it goes through Russell's tree) so it's best to make some 
notes in the commit message about the way this patch was written (IE 
many authors)

Hope that helps,
Jonny
>
> Not totally sure about the Signed-off-bys. Can i add a Signed-off-by for Russell King? Is it the right mail address for him/you?
>
>
>
>
> diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
> index cddda1f..d90be6d 100644
> --- a/arch/arm/include/asm/thread_info.h
> +++ b/arch/arm/include/asm/thread_info.h
> @@ -58,7 +58,7 @@ struct thread_info {
>   	struct cpu_context_save	cpu_context;	/* cpu context */
>   	__u32			syscall;	/* syscall number */
>   	__u8			used_cp[16];	/* thread used copro */
> -	unsigned long		tp_value;
> +	unsigned long		tp_value[2];	/* TLS registers */
>   #ifdef CONFIG_CRUNCH
>   	struct crunch_state	crunchstate;
>   #endif
> diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
> index 73409e6..d7d542b 100644
> --- a/arch/arm/include/asm/tls.h
> +++ b/arch/arm/include/asm/tls.h
> @@ -2,27 +2,30 @@
>   #define __ASMARM_TLS_H
>
>   #ifdef __ASSEMBLY__
> -	.macro set_tls_none, tp, tmp1, tmp2
> +#include <asm/asm-offsets.h>
> +	.macro switch_tls_none, base, tp, tpuser, tmp1, tmp2
>   	.endm
>
> -	.macro set_tls_v6k, tp, tmp1, tmp2
> +	.macro switch_tls_v6k, base, tp, tpuser, tmp1, tmp2
> +	mrc	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
>   	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
> -	mov	\tmp1, #0
> -	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
> +	mcr	p15, 0, \tpuser, c13, c0, 2	@ and the user r/w register
> +	strne	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
>   	.endm
>
> -	.macro set_tls_v6, tp, tmp1, tmp2
> +	.macro switch_tls_v6, base, tp, tpuser, tmp1, tmp2
>   	ldr	\tmp1, =elf_hwcap
>   	ldr	\tmp1, [\tmp1, #0]
>   	mov	\tmp2, #0xffff0fff
>   	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
> -	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
> -	movne	\tmp1, #0
> -	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
>   	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
> +	mrcne	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
> +	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
> +	mcrne	p15, 0, \tpuser, c13, c0, 2	@ set user r/w register
> +	strne	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
>   	.endm
>
> -	.macro set_tls_software, tp, tmp1, tmp2
> +	.macro switch_tls_software, base, tp, tpuser, tmp1, tmp2
>   	mov	\tmp1, #0xffff0fff
>   	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
>   	.endm
> @@ -31,19 +34,31 @@
>   #ifdef CONFIG_TLS_REG_EMUL
>   #define tls_emu		1
>   #define has_tls_reg		1
> -#define set_tls		set_tls_none
> +#define switch_tls	switch_tls_none
>   #elif defined(CONFIG_CPU_V6)
>   #define tls_emu		0
>   #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
> -#define set_tls		set_tls_v6
> +#define switch_tls	switch_tls_v6
>   #elif defined(CONFIG_CPU_32v6K)
>   #define tls_emu		0
>   #define has_tls_reg		1
> -#define set_tls		set_tls_v6k
> +#define switch_tls	switch_tls_v6k
>   #else
>   #define tls_emu		0
>   #define has_tls_reg		0
> -#define set_tls		set_tls_software
> +#define switch_tls	switch_tls_software
>   #endif
>
> +#ifndef __ASSEMBLY__
> +static inline unsigned long get_tlsuser(void)
> +{
> +	if (has_tls_reg && !tls_emu)
> +	{
> +		unsigned long t;
> +		__asm__("mcr p15, 0, %0, c13, c0, 2" : : "r" (t));
> +		return t;
> +	}
> +	return 0;
> +}
> +#endif
>   #endif	/* __ASMARM_TLS_H */
> diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
> index 0f82098..80f09fe 100644
> --- a/arch/arm/kernel/entry-armv.S
> +++ b/arch/arm/kernel/entry-armv.S
> @@ -728,15 +728,15 @@ ENTRY(__switch_to)
>    UNWIND(.fnstart	)
>    UNWIND(.cantunwind	)
>   	add	ip, r1, #TI_CPU_SAVE
> -	ldr	r3, [r2, #TI_TP_VALUE]
>    ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
>    THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
>    THUMB(	str	sp, [ip], #4		   )
>    THUMB(	str	lr, [ip], #4		   )
> +	ldrd	r4, r5, [r2, #TI_TP_VALUE]
>   #ifdef CONFIG_CPU_USE_DOMAINS
>   	ldr	r6, [r2, #TI_CPU_DOMAIN]
>   #endif
> -	set_tls	r3, r4, r5
> +	switch_tls r1, r4, r5, r3, r7
>   #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
>   	ldr	r7, [r2, #TI_TASK]
>   	ldr	r8, =__stack_chk_guard
> diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
> index 047d3e4..24dbc72 100644
> --- a/arch/arm/kernel/process.c
> +++ b/arch/arm/kernel/process.c
> @@ -39,6 +39,7 @@
>   #include <asm/thread_notify.h>
>   #include <asm/stacktrace.h>
>   #include <asm/mach/time.h>
> +#include <asm/tls.h>
>
>   #ifdef CONFIG_CC_STACKPROTECTOR
>   #include <linux/stackprotector.h>
> @@ -395,7 +396,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
>   	clear_ptrace_hw_breakpoint(p);
>
>   	if (clone_flags & CLONE_SETTLS)
> -		thread->tp_value = childregs->ARM_r3;
> +		thread->tp_value[0] = childregs->ARM_r3;
> +	thread->tp_value[1] = get_tlsuser();
>
>   	thread_notify(THREAD_NOTIFY_COPY, thread);
>
> diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
> index 03deeff..2bc1514 100644
> --- a/arch/arm/kernel/ptrace.c
> +++ b/arch/arm/kernel/ptrace.c
> @@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
>   #endif
>
>   		case PTRACE_GET_THREAD_AREA:
> -			ret = put_user(task_thread_info(child)->tp_value,
> +			ret = put_user(task_thread_info(child)->tp_value[0],
>   				       datap);
>   			break;
>
> diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
> index 1c08911..f9d6259 100644
> --- a/arch/arm/kernel/traps.c
> +++ b/arch/arm/kernel/traps.c
> @@ -588,7 +588,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
>   		return regs->ARM_r0;
>
>   	case NR(set_tls):
> -		thread->tp_value = regs->ARM_r0;
> +		thread->tp_value[0] = regs->ARM_r0;
>   		if (tls_emu)
>   			return 0;
>   		if (has_tls_reg) {
> @@ -706,7 +706,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
>   	int reg = (instr >> 12) & 15;
>   	if (reg == 15)
>   		return 1;
> -	regs->uregs[reg] = current_thread_info()->tp_value;
> +	regs->uregs[reg] = current_thread_info()->tp_value[0];
>   	regs->ARM_pc += 4;
>   	return 0;
>   }
>
>



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-05-07 10:16                         ` Jonathan Austin
  0 siblings, 0 replies; 41+ messages in thread
From: Jonathan Austin @ 2013-05-07 10:16 UTC (permalink / raw)
  To: André Hentschel
  Cc: Russell King - ARM Linux, Will Deacon, linux-arch, linux-kernel,
	linux-arm-kernel

Hi André,

On 06/05/13 23:27, André Hentschel wrote:
> Am 03.05.2013 17:24, schrieb Jonathan Austin:
>>> -	.macro set_tls_none, tp, tmp1, tmp2
>>> +	.macro switch_tls_none, base, tp, trw, tmp1, tmp2
>>>    	.endm
>>>
>>> -	.macro set_tls_v6k, tp, tmp1, tmp2
>>> +	.macro switch_tls_v6k, base, tp, trw, tmp1, tmp2
>>
>> How do you feel about calling tp and trw something different? tpidro
>> and tpidrw, or tp and tpuser?
>>
>> The naming threw me off slightly first time I read this new signature
>> (tp=thread_pointer/tls_pointer/etc).
>>
>
> FWIW i think tp&tpuser is more consistent.
>
>> André, Assuming I've understood things okay, there's a patch that
>> uses Russell's asm stuff (with minor modifications, see the questions)
>> and includes the C-world changes too. Perhaps you could see that it
>> solves your problem?
>
> It works, but for various reasons i would like to suggest the patch below.
> Reasons include: My thoughts about tp&tpuser naming and the helper function for copy_thread, further i'd really like to get a bit credit for spending weeks on getting my second kernel patch in :)
> If that patch is fine for you and no one object, i'd be happy to test it, adapt the commit message and include:
> 	Reported-by: André Hentschel <nerv@dawncrow.de>
> 	Signed-off-by: André Hentschel <nerv@dawncrow.de>
> 	Signed-off-by: Will Deacon <will.deacon@arm.com>
> 	Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
> 	Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>

I'm not worried about authorship, so you're welcome to be the author on 
the patch - assuming Russell's happy with that and with the change of 
register naming in your patch below.

As this will go through Russell's tree (probably via has patch system), 
you don't need his Signed-off-by - It'll get added as he takes the 
patch. As you're signing off, too, you don't need to list yourself as 
reporting the problem.

It'll look a bit weird to have 3 people signing off on quite a little 
patch (4 once it goes through Russell's tree) so it's best to make some 
notes in the commit message about the way this patch was written (IE 
many authors)

Hope that helps,
Jonny
>
> Not totally sure about the Signed-off-bys. Can i add a Signed-off-by for Russell King? Is it the right mail address for him/you?
>
>
>
>
> diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
> index cddda1f..d90be6d 100644
> --- a/arch/arm/include/asm/thread_info.h
> +++ b/arch/arm/include/asm/thread_info.h
> @@ -58,7 +58,7 @@ struct thread_info {
>   	struct cpu_context_save	cpu_context;	/* cpu context */
>   	__u32			syscall;	/* syscall number */
>   	__u8			used_cp[16];	/* thread used copro */
> -	unsigned long		tp_value;
> +	unsigned long		tp_value[2];	/* TLS registers */
>   #ifdef CONFIG_CRUNCH
>   	struct crunch_state	crunchstate;
>   #endif
> diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
> index 73409e6..d7d542b 100644
> --- a/arch/arm/include/asm/tls.h
> +++ b/arch/arm/include/asm/tls.h
> @@ -2,27 +2,30 @@
>   #define __ASMARM_TLS_H
>
>   #ifdef __ASSEMBLY__
> -	.macro set_tls_none, tp, tmp1, tmp2
> +#include <asm/asm-offsets.h>
> +	.macro switch_tls_none, base, tp, tpuser, tmp1, tmp2
>   	.endm
>
> -	.macro set_tls_v6k, tp, tmp1, tmp2
> +	.macro switch_tls_v6k, base, tp, tpuser, tmp1, tmp2
> +	mrc	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
>   	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
> -	mov	\tmp1, #0
> -	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
> +	mcr	p15, 0, \tpuser, c13, c0, 2	@ and the user r/w register
> +	strne	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
>   	.endm
>
> -	.macro set_tls_v6, tp, tmp1, tmp2
> +	.macro switch_tls_v6, base, tp, tpuser, tmp1, tmp2
>   	ldr	\tmp1, =elf_hwcap
>   	ldr	\tmp1, [\tmp1, #0]
>   	mov	\tmp2, #0xffff0fff
>   	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
> -	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
> -	movne	\tmp1, #0
> -	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
>   	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
> +	mrcne	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
> +	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
> +	mcrne	p15, 0, \tpuser, c13, c0, 2	@ set user r/w register
> +	strne	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
>   	.endm
>
> -	.macro set_tls_software, tp, tmp1, tmp2
> +	.macro switch_tls_software, base, tp, tpuser, tmp1, tmp2
>   	mov	\tmp1, #0xffff0fff
>   	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
>   	.endm
> @@ -31,19 +34,31 @@
>   #ifdef CONFIG_TLS_REG_EMUL
>   #define tls_emu		1
>   #define has_tls_reg		1
> -#define set_tls		set_tls_none
> +#define switch_tls	switch_tls_none
>   #elif defined(CONFIG_CPU_V6)
>   #define tls_emu		0
>   #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
> -#define set_tls		set_tls_v6
> +#define switch_tls	switch_tls_v6
>   #elif defined(CONFIG_CPU_32v6K)
>   #define tls_emu		0
>   #define has_tls_reg		1
> -#define set_tls		set_tls_v6k
> +#define switch_tls	switch_tls_v6k
>   #else
>   #define tls_emu		0
>   #define has_tls_reg		0
> -#define set_tls		set_tls_software
> +#define switch_tls	switch_tls_software
>   #endif
>
> +#ifndef __ASSEMBLY__
> +static inline unsigned long get_tlsuser(void)
> +{
> +	if (has_tls_reg && !tls_emu)
> +	{
> +		unsigned long t;
> +		__asm__("mcr p15, 0, %0, c13, c0, 2" : : "r" (t));
> +		return t;
> +	}
> +	return 0;
> +}
> +#endif
>   #endif	/* __ASMARM_TLS_H */
> diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
> index 0f82098..80f09fe 100644
> --- a/arch/arm/kernel/entry-armv.S
> +++ b/arch/arm/kernel/entry-armv.S
> @@ -728,15 +728,15 @@ ENTRY(__switch_to)
>    UNWIND(.fnstart	)
>    UNWIND(.cantunwind	)
>   	add	ip, r1, #TI_CPU_SAVE
> -	ldr	r3, [r2, #TI_TP_VALUE]
>    ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
>    THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
>    THUMB(	str	sp, [ip], #4		   )
>    THUMB(	str	lr, [ip], #4		   )
> +	ldrd	r4, r5, [r2, #TI_TP_VALUE]
>   #ifdef CONFIG_CPU_USE_DOMAINS
>   	ldr	r6, [r2, #TI_CPU_DOMAIN]
>   #endif
> -	set_tls	r3, r4, r5
> +	switch_tls r1, r4, r5, r3, r7
>   #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
>   	ldr	r7, [r2, #TI_TASK]
>   	ldr	r8, =__stack_chk_guard
> diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
> index 047d3e4..24dbc72 100644
> --- a/arch/arm/kernel/process.c
> +++ b/arch/arm/kernel/process.c
> @@ -39,6 +39,7 @@
>   #include <asm/thread_notify.h>
>   #include <asm/stacktrace.h>
>   #include <asm/mach/time.h>
> +#include <asm/tls.h>
>
>   #ifdef CONFIG_CC_STACKPROTECTOR
>   #include <linux/stackprotector.h>
> @@ -395,7 +396,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
>   	clear_ptrace_hw_breakpoint(p);
>
>   	if (clone_flags & CLONE_SETTLS)
> -		thread->tp_value = childregs->ARM_r3;
> +		thread->tp_value[0] = childregs->ARM_r3;
> +	thread->tp_value[1] = get_tlsuser();
>
>   	thread_notify(THREAD_NOTIFY_COPY, thread);
>
> diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
> index 03deeff..2bc1514 100644
> --- a/arch/arm/kernel/ptrace.c
> +++ b/arch/arm/kernel/ptrace.c
> @@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
>   #endif
>
>   		case PTRACE_GET_THREAD_AREA:
> -			ret = put_user(task_thread_info(child)->tp_value,
> +			ret = put_user(task_thread_info(child)->tp_value[0],
>   				       datap);
>   			break;
>
> diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
> index 1c08911..f9d6259 100644
> --- a/arch/arm/kernel/traps.c
> +++ b/arch/arm/kernel/traps.c
> @@ -588,7 +588,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
>   		return regs->ARM_r0;
>
>   	case NR(set_tls):
> -		thread->tp_value = regs->ARM_r0;
> +		thread->tp_value[0] = regs->ARM_r0;
>   		if (tls_emu)
>   			return 0;
>   		if (has_tls_reg) {
> @@ -706,7 +706,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
>   	int reg = (instr >> 12) & 15;
>   	if (reg == 15)
>   		return 1;
> -	regs->uregs[reg] = current_thread_info()->tp_value;
> +	regs->uregs[reg] = current_thread_info()->tp_value[0];
>   	regs->ARM_pc += 4;
>   	return 0;
>   }
>
>

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCHv2] arm: Preserve TPIDRURW on context switch
@ 2013-05-07 10:16                         ` Jonathan Austin
  0 siblings, 0 replies; 41+ messages in thread
From: Jonathan Austin @ 2013-05-07 10:16 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Andr?,

On 06/05/13 23:27, Andr? Hentschel wrote:
> Am 03.05.2013 17:24, schrieb Jonathan Austin:
>>> -	.macro set_tls_none, tp, tmp1, tmp2
>>> +	.macro switch_tls_none, base, tp, trw, tmp1, tmp2
>>>    	.endm
>>>
>>> -	.macro set_tls_v6k, tp, tmp1, tmp2
>>> +	.macro switch_tls_v6k, base, tp, trw, tmp1, tmp2
>>
>> How do you feel about calling tp and trw something different? tpidro
>> and tpidrw, or tp and tpuser?
>>
>> The naming threw me off slightly first time I read this new signature
>> (tp=thread_pointer/tls_pointer/etc).
>>
>
> FWIW i think tp&tpuser is more consistent.
>
>> Andr?, Assuming I've understood things okay, there's a patch that
>> uses Russell's asm stuff (with minor modifications, see the questions)
>> and includes the C-world changes too. Perhaps you could see that it
>> solves your problem?
>
> It works, but for various reasons i would like to suggest the patch below.
> Reasons include: My thoughts about tp&tpuser naming and the helper function for copy_thread, further i'd really like to get a bit credit for spending weeks on getting my second kernel patch in :)
> If that patch is fine for you and no one object, i'd be happy to test it, adapt the commit message and include:
> 	Reported-by: Andr? Hentschel <nerv@dawncrow.de>
> 	Signed-off-by: Andr? Hentschel <nerv@dawncrow.de>
> 	Signed-off-by: Will Deacon <will.deacon@arm.com>
> 	Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
> 	Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>

I'm not worried about authorship, so you're welcome to be the author on 
the patch - assuming Russell's happy with that and with the change of 
register naming in your patch below.

As this will go through Russell's tree (probably via has patch system), 
you don't need his Signed-off-by - It'll get added as he takes the 
patch. As you're signing off, too, you don't need to list yourself as 
reporting the problem.

It'll look a bit weird to have 3 people signing off on quite a little 
patch (4 once it goes through Russell's tree) so it's best to make some 
notes in the commit message about the way this patch was written (IE 
many authors)

Hope that helps,
Jonny
>
> Not totally sure about the Signed-off-bys. Can i add a Signed-off-by for Russell King? Is it the right mail address for him/you?
>
>
>
>
> diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
> index cddda1f..d90be6d 100644
> --- a/arch/arm/include/asm/thread_info.h
> +++ b/arch/arm/include/asm/thread_info.h
> @@ -58,7 +58,7 @@ struct thread_info {
>   	struct cpu_context_save	cpu_context;	/* cpu context */
>   	__u32			syscall;	/* syscall number */
>   	__u8			used_cp[16];	/* thread used copro */
> -	unsigned long		tp_value;
> +	unsigned long		tp_value[2];	/* TLS registers */
>   #ifdef CONFIG_CRUNCH
>   	struct crunch_state	crunchstate;
>   #endif
> diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
> index 73409e6..d7d542b 100644
> --- a/arch/arm/include/asm/tls.h
> +++ b/arch/arm/include/asm/tls.h
> @@ -2,27 +2,30 @@
>   #define __ASMARM_TLS_H
>
>   #ifdef __ASSEMBLY__
> -	.macro set_tls_none, tp, tmp1, tmp2
> +#include <asm/asm-offsets.h>
> +	.macro switch_tls_none, base, tp, tpuser, tmp1, tmp2
>   	.endm
>
> -	.macro set_tls_v6k, tp, tmp1, tmp2
> +	.macro switch_tls_v6k, base, tp, tpuser, tmp1, tmp2
> +	mrc	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
>   	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
> -	mov	\tmp1, #0
> -	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
> +	mcr	p15, 0, \tpuser, c13, c0, 2	@ and the user r/w register
> +	strne	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
>   	.endm
>
> -	.macro set_tls_v6, tp, tmp1, tmp2
> +	.macro switch_tls_v6, base, tp, tpuser, tmp1, tmp2
>   	ldr	\tmp1, =elf_hwcap
>   	ldr	\tmp1, [\tmp1, #0]
>   	mov	\tmp2, #0xffff0fff
>   	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
> -	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
> -	movne	\tmp1, #0
> -	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
>   	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
> +	mrcne	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
> +	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
> +	mcrne	p15, 0, \tpuser, c13, c0, 2	@ set user r/w register
> +	strne	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
>   	.endm
>
> -	.macro set_tls_software, tp, tmp1, tmp2
> +	.macro switch_tls_software, base, tp, tpuser, tmp1, tmp2
>   	mov	\tmp1, #0xffff0fff
>   	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
>   	.endm
> @@ -31,19 +34,31 @@
>   #ifdef CONFIG_TLS_REG_EMUL
>   #define tls_emu		1
>   #define has_tls_reg		1
> -#define set_tls		set_tls_none
> +#define switch_tls	switch_tls_none
>   #elif defined(CONFIG_CPU_V6)
>   #define tls_emu		0
>   #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
> -#define set_tls		set_tls_v6
> +#define switch_tls	switch_tls_v6
>   #elif defined(CONFIG_CPU_32v6K)
>   #define tls_emu		0
>   #define has_tls_reg		1
> -#define set_tls		set_tls_v6k
> +#define switch_tls	switch_tls_v6k
>   #else
>   #define tls_emu		0
>   #define has_tls_reg		0
> -#define set_tls		set_tls_software
> +#define switch_tls	switch_tls_software
>   #endif
>
> +#ifndef __ASSEMBLY__
> +static inline unsigned long get_tlsuser(void)
> +{
> +	if (has_tls_reg && !tls_emu)
> +	{
> +		unsigned long t;
> +		__asm__("mcr p15, 0, %0, c13, c0, 2" : : "r" (t));
> +		return t;
> +	}
> +	return 0;
> +}
> +#endif
>   #endif	/* __ASMARM_TLS_H */
> diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
> index 0f82098..80f09fe 100644
> --- a/arch/arm/kernel/entry-armv.S
> +++ b/arch/arm/kernel/entry-armv.S
> @@ -728,15 +728,15 @@ ENTRY(__switch_to)
>    UNWIND(.fnstart	)
>    UNWIND(.cantunwind	)
>   	add	ip, r1, #TI_CPU_SAVE
> -	ldr	r3, [r2, #TI_TP_VALUE]
>    ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
>    THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
>    THUMB(	str	sp, [ip], #4		   )
>    THUMB(	str	lr, [ip], #4		   )
> +	ldrd	r4, r5, [r2, #TI_TP_VALUE]
>   #ifdef CONFIG_CPU_USE_DOMAINS
>   	ldr	r6, [r2, #TI_CPU_DOMAIN]
>   #endif
> -	set_tls	r3, r4, r5
> +	switch_tls r1, r4, r5, r3, r7
>   #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
>   	ldr	r7, [r2, #TI_TASK]
>   	ldr	r8, =__stack_chk_guard
> diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
> index 047d3e4..24dbc72 100644
> --- a/arch/arm/kernel/process.c
> +++ b/arch/arm/kernel/process.c
> @@ -39,6 +39,7 @@
>   #include <asm/thread_notify.h>
>   #include <asm/stacktrace.h>
>   #include <asm/mach/time.h>
> +#include <asm/tls.h>
>
>   #ifdef CONFIG_CC_STACKPROTECTOR
>   #include <linux/stackprotector.h>
> @@ -395,7 +396,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
>   	clear_ptrace_hw_breakpoint(p);
>
>   	if (clone_flags & CLONE_SETTLS)
> -		thread->tp_value = childregs->ARM_r3;
> +		thread->tp_value[0] = childregs->ARM_r3;
> +	thread->tp_value[1] = get_tlsuser();
>
>   	thread_notify(THREAD_NOTIFY_COPY, thread);
>
> diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
> index 03deeff..2bc1514 100644
> --- a/arch/arm/kernel/ptrace.c
> +++ b/arch/arm/kernel/ptrace.c
> @@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
>   #endif
>
>   		case PTRACE_GET_THREAD_AREA:
> -			ret = put_user(task_thread_info(child)->tp_value,
> +			ret = put_user(task_thread_info(child)->tp_value[0],
>   				       datap);
>   			break;
>
> diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
> index 1c08911..f9d6259 100644
> --- a/arch/arm/kernel/traps.c
> +++ b/arch/arm/kernel/traps.c
> @@ -588,7 +588,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
>   		return regs->ARM_r0;
>
>   	case NR(set_tls):
> -		thread->tp_value = regs->ARM_r0;
> +		thread->tp_value[0] = regs->ARM_r0;
>   		if (tls_emu)
>   			return 0;
>   		if (has_tls_reg) {
> @@ -706,7 +706,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
>   	int reg = (instr >> 12) & 15;
>   	if (reg == 15)
>   		return 1;
> -	regs->uregs[reg] = current_thread_info()->tp_value;
> +	regs->uregs[reg] = current_thread_info()->tp_value[0];
>   	regs->ARM_pc += 4;
>   	return 0;
>   }
>
>

^ permalink raw reply	[flat|nested] 41+ messages in thread

end of thread, other threads:[~2013-05-07 10:16 UTC | newest]

Thread overview: 41+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-04-19 15:54 [PATCHv2] arm: Preserve TPIDRURW on context switch André Hentschel
2013-04-19 15:54 ` André Hentschel
2013-04-19 15:54 ` André Hentschel
2013-04-22 14:36 ` Russell King - ARM Linux
2013-04-22 14:36   ` Russell King - ARM Linux
2013-04-22 15:18   ` Will Deacon
2013-04-22 15:18     ` Will Deacon
2013-04-22 21:07     ` André Hentschel
2013-04-22 21:07       ` André Hentschel
2013-04-22 21:07       ` André Hentschel
2013-04-23  9:15       ` Will Deacon
2013-04-23  9:15         ` Will Deacon
2013-04-23  9:15         ` Will Deacon
2013-04-23 22:42         ` André Hentschel
2013-04-23 22:42           ` André Hentschel
2013-04-23 22:42           ` André Hentschel
2013-04-24  9:42           ` Will Deacon
2013-04-24  9:42             ` Will Deacon
2013-04-24  9:42             ` Will Deacon
2013-04-24  9:42             ` Will Deacon
2013-04-24 21:44             ` André Hentschel
2013-05-02 19:54             ` André Hentschel
2013-05-02 19:54               ` André Hentschel
2013-05-02 19:54               ` André Hentschel
2013-05-03  9:21               ` Jonathan Austin
2013-05-03  9:21                 ` Jonathan Austin
2013-05-03  9:21                 ` Jonathan Austin
2013-05-03  9:55                 ` Russell King - ARM Linux
2013-05-03  9:55                   ` Russell King - ARM Linux
2013-05-03 15:24                   ` Jonathan Austin
2013-05-03 15:24                     ` Jonathan Austin
2013-05-04 15:54                     ` André Hentschel
2013-05-04 15:54                       ` André Hentschel
2013-05-04 15:54                       ` André Hentschel
2013-05-06 22:27                     ` André Hentschel
2013-05-06 22:27                       ` André Hentschel
2013-05-06 22:27                       ` André Hentschel
2013-05-06 22:27                       ` André Hentschel
2013-05-07 10:16                       ` Jonathan Austin
2013-05-07 10:16                         ` Jonathan Austin
2013-05-07 10:16                         ` Jonathan Austin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.