linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH x86/mm 1/6] x86-64 ia32 ptrace pt_regs cleanup
@ 2007-11-29  0:38 Roland McGrath
  2007-11-29  0:40 ` [PATCH x86/mm 2/6] x86-64 ptrace whitespace Roland McGrath
                   ` (5 more replies)
  0 siblings, 6 replies; 28+ messages in thread
From: Roland McGrath @ 2007-11-29  0:38 UTC (permalink / raw)
  To: Andrew Morton, Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin


This cleans up the getreg32/putreg32 functions to use struct pt_regs in a
straightforward fashion, instead of equivalent ugly pointer arithmetic.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 arch/x86/ia32/ptrace32.c |   21 +++++++++------------
 1 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/arch/x86/ia32/ptrace32.c b/arch/x86/ia32/ptrace32.c
index 1e382e3..c52d066 100644
--- a/arch/x86/ia32/ptrace32.c
+++ b/arch/x86/ia32/ptrace32.c
@@ -37,11 +37,11 @@
 
 #define R32(l,q)							\
 	case offsetof(struct user32, regs.l):				\
-		stack[offsetof(struct pt_regs, q) / 8] = val; break
+		regs->q = val; break;
 
 static int putreg32(struct task_struct *child, unsigned regno, u32 val)
 {
-	__u64 *stack = (__u64 *)task_pt_regs(child);
+	struct pt_regs *regs = task_pt_regs(child);
 
 	switch (regno) {
 	case offsetof(struct user32, regs.fs):
@@ -65,12 +65,12 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val)
 	case offsetof(struct user32, regs.ss):
 		if ((val & 3) != 3)
 			return -EIO;
-		stack[offsetof(struct pt_regs, ss)/8] = val & 0xffff;
+		regs->ss = val & 0xffff;
 		break;
 	case offsetof(struct user32, regs.cs):
 		if ((val & 3) != 3)
 			return -EIO;
-		stack[offsetof(struct pt_regs, cs)/8] = val & 0xffff;
+		regs->cs = val & 0xffff;
 		break;
 
 	R32(ebx, bx);
@@ -84,9 +84,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val)
 	R32(eip, ip);
 	R32(esp, sp);
 
-	case offsetof(struct user32, regs.eflags): {
-		__u64 *flags = &stack[offsetof(struct pt_regs, flags)/8];
-
+	case offsetof(struct user32, regs.eflags):
 		val &= FLAG_MASK;
 		/*
 		 * If the user value contains TF, mark that
@@ -97,9 +95,8 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val)
 			clear_tsk_thread_flag(child, TIF_FORCED_TF);
 		else if (test_tsk_thread_flag(child, TIF_FORCED_TF))
 			val |= X86_EFLAGS_TF;
-		*flags = val | (*flags & ~FLAG_MASK);
+		regs->flags = val | (regs->flags & ~FLAG_MASK);
 		break;
-	}
 
 	case offsetof(struct user32, u_debugreg[0]) ...
 		offsetof(struct user32, u_debugreg[7]):
@@ -123,11 +120,11 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val)
 
 #define R32(l,q)							\
 	case offsetof(struct user32, regs.l):				\
-		*val = stack[offsetof(struct pt_regs, q)/8]; break
+		*val = regs->q; break
 
 static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
 {
-	__u64 *stack = (__u64 *)task_pt_regs(child);
+	struct pt_regs *regs = task_pt_regs(child);
 
 	switch (regno) {
 	case offsetof(struct user32, regs.fs):
@@ -160,7 +157,7 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
 		/*
 		 * If the debugger set TF, hide it from the readout.
 		 */
-		*val = stack[offsetof(struct pt_regs, flags)/8];
+		*val = regs->flags;
 		if (test_tsk_thread_flag(child, TIF_FORCED_TF))
 			*val &= ~X86_EFLAGS_TF;
 		break;

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH x86/mm 2/6] x86-64 ptrace whitespace
  2007-11-29  0:38 [PATCH x86/mm 1/6] x86-64 ia32 ptrace pt_regs cleanup Roland McGrath
@ 2007-11-29  0:40 ` Roland McGrath
  2007-11-29  0:40 ` [PATCH x86/mm 3/6] x86-32 " Roland McGrath
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 28+ messages in thread
From: Roland McGrath @ 2007-11-29  0:40 UTC (permalink / raw)
  To: Andrew Morton, Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin


This canonicalizes the indentation in the getreg and putreg functions.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 arch/x86/kernel/ptrace_64.c |  224 +++++++++++++++++++++---------------------
 1 files changed, 112 insertions(+), 112 deletions(-)

diff --git a/arch/x86/kernel/ptrace_64.c b/arch/x86/kernel/ptrace_64.c
index 56b31cd..2427548 100644
--- a/arch/x86/kernel/ptrace_64.c
+++ b/arch/x86/kernel/ptrace_64.c
@@ -2,7 +2,7 @@
 /*
  * Pentium III FXSR, SSE support
  *	Gareth Hughes <gareth@valinux.com>, May 2000
- * 
+ *
  * x86-64 port 2000-2002 Andi Kleen
  */
 
@@ -48,7 +48,7 @@
  * Make sure the single step bit is not set.
  */
 void ptrace_disable(struct task_struct *child)
-{ 
+{
 	user_disable_single_step(child);
 }
 
@@ -63,69 +63,69 @@ static int putreg(struct task_struct *child,
 {
 	struct pt_regs *regs = task_pt_regs(child);
 	switch (regno) {
-		case offsetof(struct user_regs_struct,fs):
-			if (value && (value & 3) != 3)
-				return -EIO;
-			child->thread.fsindex = value & 0xffff; 
-			return 0;
-		case offsetof(struct user_regs_struct,gs):
-			if (value && (value & 3) != 3)
-				return -EIO;
-			child->thread.gsindex = value & 0xffff;
-			return 0;
-		case offsetof(struct user_regs_struct,ds):
-			if (value && (value & 3) != 3)
-				return -EIO;
-			child->thread.ds = value & 0xffff;
-			return 0;
-		case offsetof(struct user_regs_struct,es): 
-			if (value && (value & 3) != 3)
-				return -EIO;
-			child->thread.es = value & 0xffff;
-			return 0;
-		case offsetof(struct user_regs_struct,ss):
-			if ((value & 3) != 3)
-				return -EIO;
-			value &= 0xffff;
-			return 0;
-		case offsetof(struct user_regs_struct,fs_base):
-			if (value >= TASK_SIZE_OF(child))
-				return -EIO;
-			/*
-			 * When changing the segment base, use do_arch_prctl
-			 * to set either thread.fs or thread.fsindex and the
-			 * corresponding GDT slot.
-			 */
-			if (child->thread.fs != value)
-				return do_arch_prctl(child, ARCH_SET_FS, value);
-			return 0;
-		case offsetof(struct user_regs_struct,gs_base):
-			/*
-			 * Exactly the same here as the %fs handling above.
-			 */
-			if (value >= TASK_SIZE_OF(child))
-				return -EIO;
-			if (child->thread.gs != value)
-				return do_arch_prctl(child, ARCH_SET_GS, value);
-			return 0;
-		case offsetof(struct user_regs_struct,flags):
-			value &= FLAG_MASK;
-			/*
-			 * If the user value contains TF, mark that
-			 * it was not "us" (the debugger) that set it.
-			 * If not, make sure it stays set if we had.
-			 */
-			if (value & X86_EFLAGS_TF)
-				clear_tsk_thread_flag(child, TIF_FORCED_TF);
-			else if (test_tsk_thread_flag(child, TIF_FORCED_TF))
-				value |= X86_EFLAGS_TF;
-			value |= regs->flags & ~FLAG_MASK;
-			break;
-		case offsetof(struct user_regs_struct,cs): 
-			if ((value & 3) != 3)
-				return -EIO;
-			value &= 0xffff;
-			break;
+	case offsetof(struct user_regs_struct,fs):
+		if (value && (value & 3) != 3)
+			return -EIO;
+		child->thread.fsindex = value & 0xffff;
+		return 0;
+	case offsetof(struct user_regs_struct,gs):
+		if (value && (value & 3) != 3)
+			return -EIO;
+		child->thread.gsindex = value & 0xffff;
+		return 0;
+	case offsetof(struct user_regs_struct,ds):
+		if (value && (value & 3) != 3)
+			return -EIO;
+		child->thread.ds = value & 0xffff;
+		return 0;
+	case offsetof(struct user_regs_struct,es):
+		if (value && (value & 3) != 3)
+			return -EIO;
+		child->thread.es = value & 0xffff;
+		return 0;
+	case offsetof(struct user_regs_struct,ss):
+		if ((value & 3) != 3)
+			return -EIO;
+		value &= 0xffff;
+		return 0;
+	case offsetof(struct user_regs_struct,fs_base):
+		if (value >= TASK_SIZE_OF(child))
+			return -EIO;
+		/*
+		 * When changing the segment base, use do_arch_prctl
+		 * to set either thread.fs or thread.fsindex and the
+		 * corresponding GDT slot.
+		 */
+		if (child->thread.fs != value)
+			return do_arch_prctl(child, ARCH_SET_FS, value);
+		return 0;
+	case offsetof(struct user_regs_struct,gs_base):
+		/*
+		 * Exactly the same here as the %fs handling above.
+		 */
+		if (value >= TASK_SIZE_OF(child))
+			return -EIO;
+		if (child->thread.gs != value)
+			return do_arch_prctl(child, ARCH_SET_GS, value);
+		return 0;
+	case offsetof(struct user_regs_struct,flags):
+		value &= FLAG_MASK;
+		/*
+		 * If the user value contains TF, mark that
+		 * it was not "us" (the debugger) that set it.
+		 * If not, make sure it stays set if we had.
+		 */
+		if (value & X86_EFLAGS_TF)
+			clear_tsk_thread_flag(child, TIF_FORCED_TF);
+		else if (test_tsk_thread_flag(child, TIF_FORCED_TF))
+			value |= X86_EFLAGS_TF;
+		value |= regs->flags & ~FLAG_MASK;
+		break;
+	case offsetof(struct user_regs_struct,cs):
+		if ((value & 3) != 3)
+			return -EIO;
+		value &= 0xffff;
+		break;
 	}
 	*pt_regs_access(regs, regno) = value;
 	return 0;
@@ -136,49 +136,49 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno)
 	struct pt_regs *regs = task_pt_regs(child);
 	unsigned long val;
 	switch (regno) {
-		case offsetof(struct user_regs_struct, fs):
-			return child->thread.fsindex;
-		case offsetof(struct user_regs_struct, gs):
-			return child->thread.gsindex;
-		case offsetof(struct user_regs_struct, ds):
-			return child->thread.ds;
-		case offsetof(struct user_regs_struct, es):
-			return child->thread.es; 
-		case offsetof(struct user_regs_struct, fs_base):
-			/*
-			 * do_arch_prctl may have used a GDT slot instead of
-			 * the MSR.  To userland, it appears the same either
-			 * way, except the %fs segment selector might not be 0.
-			 */
-			if (child->thread.fs != 0)
-				return child->thread.fs;
-			if (child->thread.fsindex != FS_TLS_SEL)
-				return 0;
-			return get_desc_base(&child->thread.tls_array[FS_TLS]);
-		case offsetof(struct user_regs_struct, gs_base):
-			/*
-			 * Exactly the same here as the %fs handling above.
-			 */
-			if (child->thread.gs != 0)
-				return child->thread.gs;
-			if (child->thread.gsindex != GS_TLS_SEL)
-				return 0;
-			return get_desc_base(&child->thread.tls_array[GS_TLS]);
-		case offsetof(struct user_regs_struct, flags):
-			/*
-			 * If the debugger set TF, hide it from the readout.
-			 */
-			val = regs->flags;
-			if (test_tsk_thread_flag(child, TIF_IA32))
-				val &= 0xffffffff;
-			if (test_tsk_thread_flag(child, TIF_FORCED_TF))
-				val &= ~X86_EFLAGS_TF;
-			return val;
-		default:
-			val = *pt_regs_access(regs, regno);
-			if (test_tsk_thread_flag(child, TIF_IA32))
-				val &= 0xffffffff;
-			return val;
+	case offsetof(struct user_regs_struct, fs):
+		return child->thread.fsindex;
+	case offsetof(struct user_regs_struct, gs):
+		return child->thread.gsindex;
+	case offsetof(struct user_regs_struct, ds):
+		return child->thread.ds;
+	case offsetof(struct user_regs_struct, es):
+		return child->thread.es;
+	case offsetof(struct user_regs_struct, fs_base):
+		/*
+		 * do_arch_prctl may have used a GDT slot instead of
+		 * the MSR.  To userland, it appears the same either
+		 * way, except the %fs segment selector might not be 0.
+		 */
+		if (child->thread.fs != 0)
+			return child->thread.fs;
+		if (child->thread.fsindex != FS_TLS_SEL)
+			return 0;
+		return get_desc_base(&child->thread.tls_array[FS_TLS]);
+	case offsetof(struct user_regs_struct, gs_base):
+		/*
+		 * Exactly the same here as the %fs handling above.
+		 */
+		if (child->thread.gs != 0)
+			return child->thread.gs;
+		if (child->thread.gsindex != GS_TLS_SEL)
+			return 0;
+		return get_desc_base(&child->thread.tls_array[GS_TLS]);
+	case offsetof(struct user_regs_struct, flags):
+		/*
+		 * If the debugger set TF, hide it from the readout.
+		 */
+		val = regs->flags;
+		if (test_tsk_thread_flag(child, TIF_IA32))
+			val &= 0xffffffff;
+		if (test_tsk_thread_flag(child, TIF_FORCED_TF))
+			val &= ~X86_EFLAGS_TF;
+		return val;
+	default:
+		val = *pt_regs_access(regs, regno);
+		if (test_tsk_thread_flag(child, TIF_IA32))
+			val &= 0xffffffff;
+		return val;
 	}
 
 }
@@ -244,7 +244,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 
 	switch (request) {
 	/* when I and D space are separate, these will need to be fixed. */
-	case PTRACE_PEEKTEXT: /* read word at location addr. */ 
+	case PTRACE_PEEKTEXT: /* read word at location addr. */
 	case PTRACE_PEEKDATA:
 		ret = generic_ptrace_peekdata(child, addr, data);
 		break;
@@ -310,10 +310,10 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 					 (struct user_desc __user *) data, 0);
 		break;
 #endif
-		/* normal 64bit interface to access TLS data. 
+		/* normal 64bit interface to access TLS data.
 		   Works just like arch_prctl, except that the arguments
 		   are reversed. */
-	case PTRACE_ARCH_PRCTL: 
+	case PTRACE_ARCH_PRCTL:
 		ret = do_arch_prctl(child, data, addr);
 		break;
 
@@ -386,7 +386,7 @@ static void syscall_trace(struct pt_regs *regs)
 	printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
 	       current->comm,
 	       regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0),
-	       current_thread_info()->flags, current->ptrace); 
+	       current_thread_info()->flags, current->ptrace);
 #endif
 
 	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH x86/mm 3/6] x86-32 ptrace whitespace
  2007-11-29  0:38 [PATCH x86/mm 1/6] x86-64 ia32 ptrace pt_regs cleanup Roland McGrath
  2007-11-29  0:40 ` [PATCH x86/mm 2/6] x86-64 ptrace whitespace Roland McGrath
@ 2007-11-29  0:40 ` Roland McGrath
  2007-11-29  0:41 ` [PATCH x86/mm 4/6] x86-64 ptrace get/putreg current task Roland McGrath
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 28+ messages in thread
From: Roland McGrath @ 2007-11-29  0:40 UTC (permalink / raw)
  To: Andrew Morton, Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin


This canonicalizes the indentation in the getreg and putreg functions.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 arch/x86/kernel/ptrace_32.c |  110 +++++++++++++++++++++---------------------
 1 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/arch/x86/kernel/ptrace_32.c b/arch/x86/kernel/ptrace_32.c
index f81e2f1..5aca84e 100644
--- a/arch/x86/kernel/ptrace_32.c
+++ b/arch/x86/kernel/ptrace_32.c
@@ -51,37 +51,37 @@ static int putreg(struct task_struct *child,
 	struct pt_regs *regs = task_pt_regs(child);
 	regno >>= 2;
 	switch (regno) {
-		case GS:
-			if (value && (value & 3) != 3)
-				return -EIO;
-			child->thread.gs = value;
-			return 0;
-		case DS:
-		case ES:
-		case FS:
-			if (value && (value & 3) != 3)
-				return -EIO;
-			value &= 0xffff;
-			break;
-		case SS:
-		case CS:
-			if ((value & 3) != 3)
-				return -EIO;
-			value &= 0xffff;
-			break;
-		case EFL:
-			value &= FLAG_MASK;
-			/*
-			 * If the user value contains TF, mark that
-			 * it was not "us" (the debugger) that set it.
-			 * If not, make sure it stays set if we had.
-			 */
-			if (value & X86_EFLAGS_TF)
-				clear_tsk_thread_flag(child, TIF_FORCED_TF);
-			else if (test_tsk_thread_flag(child, TIF_FORCED_TF))
-				value |= X86_EFLAGS_TF;
-			value |= regs->flags & ~FLAG_MASK;
-			break;
+	case GS:
+		if (value && (value & 3) != 3)
+			return -EIO;
+		child->thread.gs = value;
+		return 0;
+	case DS:
+	case ES:
+	case FS:
+		if (value && (value & 3) != 3)
+			return -EIO;
+		value &= 0xffff;
+		break;
+	case SS:
+	case CS:
+		if ((value & 3) != 3)
+			return -EIO;
+		value &= 0xffff;
+		break;
+	case EFL:
+		value &= FLAG_MASK;
+		/*
+		 * If the user value contains TF, mark that
+		 * it was not "us" (the debugger) that set it.
+		 * If not, make sure it stays set if we had.
+		 */
+		if (value & X86_EFLAGS_TF)
+			clear_tsk_thread_flag(child, TIF_FORCED_TF);
+		else if (test_tsk_thread_flag(child, TIF_FORCED_TF))
+			value |= X86_EFLAGS_TF;
+		value |= regs->flags & ~FLAG_MASK;
+		break;
 	}
 	*pt_regs_access(regs, regno) = value;
 	return 0;
@@ -94,26 +94,26 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno)
 
 	regno >>= 2;
 	switch (regno) {
-		case EFL:
-			/*
-			 * If the debugger set TF, hide it from the readout.
-			 */
-			retval = regs->flags;
-			if (test_tsk_thread_flag(child, TIF_FORCED_TF))
-				retval &= ~X86_EFLAGS_TF;
-			break;
-		case GS:
-			retval = child->thread.gs;
-			break;
-		case DS:
-		case ES:
-		case FS:
-		case SS:
-		case CS:
-			retval = 0xffff;
-			/* fall through */
-		default:
-			retval &= *pt_regs_access(regs, regno);
+	case EFL:
+		/*
+		 * If the debugger set TF, hide it from the readout.
+		 */
+		retval = regs->flags;
+		if (test_tsk_thread_flag(child, TIF_FORCED_TF))
+			retval &= ~X86_EFLAGS_TF;
+		break;
+	case GS:
+		retval = child->thread.gs;
+		break;
+	case DS:
+	case ES:
+	case FS:
+	case SS:
+	case CS:
+		retval = 0xffff;
+		/* fall through */
+	default:
+		retval &= *pt_regs_access(regs, regno);
 	}
 	return retval;
 }
@@ -190,7 +190,7 @@ static int ptrace_set_debugreg(struct task_struct *child,
  * Make sure the single step bit is not set.
  */
 void ptrace_disable(struct task_struct *child)
-{ 
+{
 	user_disable_single_step(child);
 	clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 }
@@ -203,7 +203,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 
 	switch (request) {
 	/* when I and D space are separate, these will need to be fixed. */
-	case PTRACE_PEEKTEXT: /* read word at location addr. */ 
+	case PTRACE_PEEKTEXT: /* read word at location addr. */
 	case PTRACE_PEEKDATA:
 		ret = generic_ptrace_peekdata(child, addr, data);
 		break;
@@ -213,7 +213,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		unsigned long tmp;
 
 		ret = -EIO;
-		if ((addr & 3) || addr < 0 || 
+		if ((addr & 3) || addr < 0 ||
 		    addr > sizeof(struct user) - 3)
 			break;
 
@@ -238,7 +238,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 
 	case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
 		ret = -EIO;
-		if ((addr & 3) || addr < 0 || 
+		if ((addr & 3) || addr < 0 ||
 		    addr > sizeof(struct user) - 3)
 			break;
 

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH x86/mm 4/6] x86-64 ptrace get/putreg current task
  2007-11-29  0:38 [PATCH x86/mm 1/6] x86-64 ia32 ptrace pt_regs cleanup Roland McGrath
  2007-11-29  0:40 ` [PATCH x86/mm 2/6] x86-64 ptrace whitespace Roland McGrath
  2007-11-29  0:40 ` [PATCH x86/mm 3/6] x86-32 " Roland McGrath
@ 2007-11-29  0:41 ` Roland McGrath
  2007-11-29 17:39   ` Christoph Hellwig
  2007-11-29  0:42 ` [PATCH x86/mm 5/6] x86-32 " Roland McGrath
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 28+ messages in thread
From: Roland McGrath @ 2007-11-29  0:41 UTC (permalink / raw)
  To: Andrew Morton, Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin


This generalizes the getreg and putreg functions so they can be used on the
current task, as well as on a task stopped in TASK_TRACED and switched off.
This lays the groundwork to share this code for all kinds of user-mode
machine state access, not just ptrace.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 arch/x86/kernel/ptrace_64.c |   36 ++++++++++++++++++++++++++++++++++--
 1 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/ptrace_64.c b/arch/x86/kernel/ptrace_64.c
index 2427548..5979dbe 100644
--- a/arch/x86/kernel/ptrace_64.c
+++ b/arch/x86/kernel/ptrace_64.c
@@ -67,21 +67,29 @@ static int putreg(struct task_struct *child,
 		if (value && (value & 3) != 3)
 			return -EIO;
 		child->thread.fsindex = value & 0xffff;
+		if (child == current)
+			loadsegment(fs, child->thread.fsindex);
 		return 0;
 	case offsetof(struct user_regs_struct,gs):
 		if (value && (value & 3) != 3)
 			return -EIO;
 		child->thread.gsindex = value & 0xffff;
+		if (child == current)
+			load_gs_index(child->thread.gsindex);
 		return 0;
 	case offsetof(struct user_regs_struct,ds):
 		if (value && (value & 3) != 3)
 			return -EIO;
 		child->thread.ds = value & 0xffff;
+		if (child == current)
+			loadsegment(ds, child->thread.ds);
 		return 0;
 	case offsetof(struct user_regs_struct,es):
 		if (value && (value & 3) != 3)
 			return -EIO;
 		child->thread.es = value & 0xffff;
+		if (child == current)
+			loadsegment(es, child->thread.es);
 		return 0;
 	case offsetof(struct user_regs_struct,ss):
 		if ((value & 3) != 3)
@@ -135,14 +143,32 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno)
 {
 	struct pt_regs *regs = task_pt_regs(child);
 	unsigned long val;
+	unsigned int seg;
 	switch (regno) {
 	case offsetof(struct user_regs_struct, fs):
+		if (child == current) {
+			/* Older gas can't assemble movq %?s,%r?? */
+			asm("movl %%fs,%0" : "=r" (seg));
+			return seg;
+		}
 		return child->thread.fsindex;
 	case offsetof(struct user_regs_struct, gs):
+		if (child == current) {
+			asm("movl %%gs,%0" : "=r" (seg));
+			return seg;
+		}
 		return child->thread.gsindex;
 	case offsetof(struct user_regs_struct, ds):
+		if (child == current) {
+			asm("movl %%ds,%0" : "=r" (seg));
+			return seg;
+		}
 		return child->thread.ds;
 	case offsetof(struct user_regs_struct, es):
+		if (child == current) {
+			asm("movl %%es,%0" : "=r" (seg));
+			return seg;
+		}
 		return child->thread.es;
 	case offsetof(struct user_regs_struct, fs_base):
 		/*
@@ -152,7 +178,10 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno)
 		 */
 		if (child->thread.fs != 0)
 			return child->thread.fs;
-		if (child->thread.fsindex != FS_TLS_SEL)
+		seg = child->thread.fsindex;
+		if (child == current)
+			asm("movl %%fs,%0" : "=r" (seg));
+		if (seg != FS_TLS_SEL)
 			return 0;
 		return get_desc_base(&child->thread.tls_array[FS_TLS]);
 	case offsetof(struct user_regs_struct, gs_base):
@@ -161,7 +190,10 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno)
 		 */
 		if (child->thread.gs != 0)
 			return child->thread.gs;
-		if (child->thread.gsindex != GS_TLS_SEL)
+		seg = child->thread.gsindex;
+		if (child == current)
+			asm("movl %%gs,%0" : "=r" (seg));
+		if (seg != GS_TLS_SEL)
 			return 0;
 		return get_desc_base(&child->thread.tls_array[GS_TLS]);
 	case offsetof(struct user_regs_struct, flags):

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH x86/mm 5/6] x86-32 ptrace get/putreg current task
  2007-11-29  0:38 [PATCH x86/mm 1/6] x86-64 ia32 ptrace pt_regs cleanup Roland McGrath
                   ` (2 preceding siblings ...)
  2007-11-29  0:41 ` [PATCH x86/mm 4/6] x86-64 ptrace get/putreg current task Roland McGrath
@ 2007-11-29  0:42 ` Roland McGrath
  2007-11-29  0:42 ` [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 " Roland McGrath
  2007-11-29 10:39 ` [PATCH x86/mm 1/6] x86-64 ia32 ptrace pt_regs cleanup Ingo Molnar
  5 siblings, 0 replies; 28+ messages in thread
From: Roland McGrath @ 2007-11-29  0:42 UTC (permalink / raw)
  To: Andrew Morton, Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin


This generalizes the getreg and putreg functions so they can be used on the
current task, as well as on a task stopped in TASK_TRACED and switched off.
This lays the groundwork to share this code for all kinds of user-mode
machine state access, not just ptrace.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 arch/x86/kernel/ptrace_32.c |    8 ++++++++
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/ptrace_32.c b/arch/x86/kernel/ptrace_32.c
index 5aca84e..2607130 100644
--- a/arch/x86/kernel/ptrace_32.c
+++ b/arch/x86/kernel/ptrace_32.c
@@ -55,6 +55,12 @@ static int putreg(struct task_struct *child,
 		if (value && (value & 3) != 3)
 			return -EIO;
 		child->thread.gs = value;
+		if (child == current)
+			/*
+			 * The user-mode %gs is not affected by
+			 * kernel entry, so we must update the CPU.
+			 */
+			loadsegment(gs, value);
 		return 0;
 	case DS:
 	case ES:
@@ -104,6 +110,8 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno)
 		break;
 	case GS:
 		retval = child->thread.gs;
+		if (child == current)
+			savesegment(gs, retval);
 		break;
 	case DS:
 	case ES:

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 current task
  2007-11-29  0:38 [PATCH x86/mm 1/6] x86-64 ia32 ptrace pt_regs cleanup Roland McGrath
                   ` (3 preceding siblings ...)
  2007-11-29  0:42 ` [PATCH x86/mm 5/6] x86-32 " Roland McGrath
@ 2007-11-29  0:42 ` Roland McGrath
  2007-11-29 17:34   ` Chuck Ebbert
  2007-11-29 10:39 ` [PATCH x86/mm 1/6] x86-64 ia32 ptrace pt_regs cleanup Ingo Molnar
  5 siblings, 1 reply; 28+ messages in thread
From: Roland McGrath @ 2007-11-29  0:42 UTC (permalink / raw)
  To: Andrew Morton, Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin


This generalizes the getreg32 and putreg32 functions so they can be used on
the current task, as well as on a task stopped in TASK_TRACED and switched
off.  This lays the groundwork to share this code for all kinds of
user-mode machine state access, not just ptrace.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 arch/x86/ia32/ptrace32.c |   16 ++++++++++++++++
 1 files changed, 16 insertions(+), 0 deletions(-)

diff --git a/arch/x86/ia32/ptrace32.c b/arch/x86/ia32/ptrace32.c
index c52d066..d5663e2 100644
--- a/arch/x86/ia32/ptrace32.c
+++ b/arch/x86/ia32/ptrace32.c
@@ -48,19 +48,27 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val)
 		if (val && (val & 3) != 3)
 			return -EIO;
 		child->thread.fsindex = val & 0xffff;
+		if (child == current)
+			loadsegment(fs, child->thread.fsindex);
 		break;
 	case offsetof(struct user32, regs.gs):
 		if (val && (val & 3) != 3)
 			return -EIO;
 		child->thread.gsindex = val & 0xffff;
+		if (child == current)
+			load_gs_index(child->thread.gsindex);
 		break;
 	case offsetof(struct user32, regs.ds):
 		if (val && (val & 3) != 3)
 			return -EIO;
 		child->thread.ds = val & 0xffff;
+		if (child == current)
+			loadsegment(ds, child->thread.ds);
 		break;
 	case offsetof(struct user32, regs.es):
 		child->thread.es = val & 0xffff;
+		if (child == current)
+			loadsegment(es, child->thread.ds);
 		break;
 	case offsetof(struct user32, regs.ss):
 		if ((val & 3) != 3)
@@ -129,15 +137,23 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
 	switch (regno) {
 	case offsetof(struct user32, regs.fs):
 		*val = child->thread.fsindex;
+		if (child == current)
+			asm("movl %%fs,%0" : "=r" (*val));
 		break;
 	case offsetof(struct user32, regs.gs):
 		*val = child->thread.gsindex;
+		if (child == current)
+			asm("movl %%gs,%0" : "=r" (*val));
 		break;
 	case offsetof(struct user32, regs.ds):
 		*val = child->thread.ds;
+		if (child == current)
+			asm("movl %%ds,%0" : "=r" (*val));
 		break;
 	case offsetof(struct user32, regs.es):
 		*val = child->thread.es;
+		if (child == current)
+			asm("movl %%es,%0" : "=r" (*val));
 		break;
 
 	R32(cs, cs);

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH x86/mm 1/6] x86-64 ia32 ptrace pt_regs cleanup
  2007-11-29  0:38 [PATCH x86/mm 1/6] x86-64 ia32 ptrace pt_regs cleanup Roland McGrath
                   ` (4 preceding siblings ...)
  2007-11-29  0:42 ` [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 " Roland McGrath
@ 2007-11-29 10:39 ` Ingo Molnar
  5 siblings, 0 replies; 28+ messages in thread
From: Ingo Molnar @ 2007-11-29 10:39 UTC (permalink / raw)
  To: Roland McGrath
  Cc: Andrew Morton, Linus Torvalds, linux-kernel, Thomas Gleixner,
	H. Peter Anvin


thanks - i've queued up all 6 patches of yours. They applied fine to 
x86.git and passed a quick build & boot test as well.

	Ingo

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 current task
  2007-11-29  0:42 ` [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 " Roland McGrath
@ 2007-11-29 17:34   ` Chuck Ebbert
  2007-11-29 18:09     ` Linus Torvalds
  2007-11-29 22:21     ` Roland McGrath
  0 siblings, 2 replies; 28+ messages in thread
From: Chuck Ebbert @ 2007-11-29 17:34 UTC (permalink / raw)
  To: Roland McGrath
  Cc: Andrew Morton, Linus Torvalds, linux-kernel, Thomas Gleixner,
	Ingo Molnar, H. Peter Anvin

On 11/28/2007 07:42 PM, Roland McGrath wrote:
> --- a/arch/x86/ia32/ptrace32.c
> +++ b/arch/x86/ia32/ptrace32.c
> @@ -48,19 +48,27 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val)
>  		if (val && (val & 3) != 3)
>  			return -EIO;
>  		child->thread.fsindex = val & 0xffff;
> +		if (child == current)
> +			loadsegment(fs, child->thread.fsindex);
>  		break;
>  	case offsetof(struct user32, regs.gs):
>  		if (val && (val & 3) != 3)
>  			return -EIO;
>  		child->thread.gsindex = val & 0xffff;
> +		if (child == current)
> +			load_gs_index(child->thread.gsindex);
>  		break;
>  	case offsetof(struct user32, regs.ds):
>  		if (val && (val & 3) != 3)
>  			return -EIO;
>  		child->thread.ds = val & 0xffff;
> +		if (child == current)
> +			loadsegment(ds, child->thread.ds);
>  		break;
>  	case offsetof(struct user32, regs.es):
>  		child->thread.es = val & 0xffff;
> +		if (child == current)
> +			loadsegment(es, child->thread.ds);

                                        child->thread.es ??

> @@ -129,15 +137,23 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
>  	switch (regno) {
>  	case offsetof(struct user32, regs.fs):
>  		*val = child->thread.fsindex;
> +		if (child == current)
> +			asm("movl %%fs,%0" : "=r" (*val));
>  		break;
>  	case offsetof(struct user32, regs.gs):
>  		*val = child->thread.gsindex;
> +		if (child == current)
> +			asm("movl %%gs,%0" : "=r" (*val));

Won't this return the kernel's GS instead of the user's?

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH x86/mm 4/6] x86-64 ptrace get/putreg current task
  2007-11-29  0:41 ` [PATCH x86/mm 4/6] x86-64 ptrace get/putreg current task Roland McGrath
@ 2007-11-29 17:39   ` Christoph Hellwig
  0 siblings, 0 replies; 28+ messages in thread
From: Christoph Hellwig @ 2007-11-29 17:39 UTC (permalink / raw)
  To: Roland McGrath
  Cc: Andrew Morton, Linus Torvalds, linux-kernel, Thomas Gleixner,
	Ingo Molnar, H. Peter Anvin

On Wed, Nov 28, 2007 at 04:41:18PM -0800, Roland McGrath wrote:
> 
> This generalizes the getreg and putreg functions so they can be used on the
> current task, as well as on a task stopped in TASK_TRACED and switched off.
> This lays the groundwork to share this code for all kinds of user-mode
> machine state access, not just ptrace.

Not sure it's a that good idea as it means almost every case now has
a branch for two entirely separate cases.  Wouldn't it be better to have
a separate helper for the looking at the current proces case?

Either way support for looking at the current thread should only be
merged together with an actual user for that.


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 current task
  2007-11-29 17:34   ` Chuck Ebbert
@ 2007-11-29 18:09     ` Linus Torvalds
  2007-11-29 18:16       ` H. Peter Anvin
                         ` (2 more replies)
  2007-11-29 22:21     ` Roland McGrath
  1 sibling, 3 replies; 28+ messages in thread
From: Linus Torvalds @ 2007-11-29 18:09 UTC (permalink / raw)
  To: Chuck Ebbert
  Cc: Roland McGrath, Andrew Morton, linux-kernel, Thomas Gleixner,
	Ingo Molnar, H. Peter Anvin



Chuck seems to have caught a bug, although the wrong one:

On Thu, 29 Nov 2007, Chuck Ebbert wrote:
>
> On 11/28/2007 07:42 PM, Roland McGrath wrote:
> > --- a/arch/x86/ia32/ptrace32.c
> > +++ b/arch/x86/ia32/ptrace32.c
> > ...
> > +		if (child == current)
> > +			load_gs_index(child->thread.gsindex);

This is correct.

But the ones that do the same thing for fs/es/ds are *not*. Those three 
registers are kernel mode registers (ds/es are the regular kernel data 
segment, fs is the per-cpu data segment), and restored on return to user 
space from the stack.

For similar reasons, this is wrong:

> > @@ -129,15 +137,23 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
> >  	switch (regno) {
> >  	case offsetof(struct user32, regs.fs):
> >  		*val = child->thread.fsindex;
> > +		if (child == current)
> > +			asm("movl %%fs,%0" : "=r" (*val));
> >  		break;

That %fs is the kernel per-cpu thing, not the user %fs.

But this one is correct:

> >  	case offsetof(struct user32, regs.gs):
> >  		*val = child->thread.gsindex;
> > +		if (child == current)
> > +			asm("movl %%gs,%0" : "=r" (*val));
> 
> Won't this return the kernel's GS instead of the user's?

No, %gs is untouched by the kernel, so it contains user space version, and 
getting the value directly from %gs looks correct.

		Linus

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 current task
  2007-11-29 18:09     ` Linus Torvalds
@ 2007-11-29 18:16       ` H. Peter Anvin
  2007-11-29 18:31         ` Linus Torvalds
  2007-11-29 18:17       ` Chuck Ebbert
  2007-11-29 22:25       ` Roland McGrath
  2 siblings, 1 reply; 28+ messages in thread
From: H. Peter Anvin @ 2007-11-29 18:16 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Chuck Ebbert, Roland McGrath, Andrew Morton, linux-kernel,
	Thomas Gleixner, Ingo Molnar

Linus Torvalds wrote:
> 
> But this one is correct:
> 
>>>  	case offsetof(struct user32, regs.gs):
>>>  		*val = child->thread.gsindex;
>>> +		if (child == current)
>>> +			asm("movl %%gs,%0" : "=r" (*val));
>> Won't this return the kernel's GS instead of the user's?
> 
> No, %gs is untouched by the kernel, so it contains user space version, and 
> getting the value directly from %gs looks correct.
> 

Brief summary/reminder:

The kernel uses %fs in 32-bit mode and %gs in 64-bit mode.
User space TLS uses %gs in 32-bit mode and %fs in 64-bit mode.

The 64-bit kernel has to use %gs in order for SWAPGS to be available to 
it (by which time the 32-bit ABI was already fixed.)  It is advantageous 
for user space to use the register the kernel typically won't, in order 
to speed up system call entry/exit.

	-hpa

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 current task
  2007-11-29 18:09     ` Linus Torvalds
  2007-11-29 18:16       ` H. Peter Anvin
@ 2007-11-29 18:17       ` Chuck Ebbert
  2007-11-29 18:23         ` H. Peter Anvin
  2007-11-29 22:25       ` Roland McGrath
  2 siblings, 1 reply; 28+ messages in thread
From: Chuck Ebbert @ 2007-11-29 18:17 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Roland McGrath, Andrew Morton, linux-kernel, Thomas Gleixner,
	Ingo Molnar, H. Peter Anvin

On 11/29/2007 01:09 PM, Linus Torvalds wrote:
>>>  	case offsetof(struct user32, regs.gs):
>>>  		*val = child->thread.gsindex;
>>> +		if (child == current)
>>> +			asm("movl %%gs,%0" : "=r" (*val));
>> Won't this return the kernel's GS instead of the user's?
> 
> No, %gs is untouched by the kernel, so it contains user space version, and 
> getting the value directly from %gs looks correct.
> 

But this is x86_64, where swapgs is done on kernel entry.


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 current task
  2007-11-29 18:17       ` Chuck Ebbert
@ 2007-11-29 18:23         ` H. Peter Anvin
  0 siblings, 0 replies; 28+ messages in thread
From: H. Peter Anvin @ 2007-11-29 18:23 UTC (permalink / raw)
  To: Chuck Ebbert
  Cc: Linus Torvalds, Roland McGrath, Andrew Morton, linux-kernel,
	Thomas Gleixner, Ingo Molnar

Chuck Ebbert wrote:
> On 11/29/2007 01:09 PM, Linus Torvalds wrote:
>>>>  	case offsetof(struct user32, regs.gs):
>>>>  		*val = child->thread.gsindex;
>>>> +		if (child == current)
>>>> +			asm("movl %%gs,%0" : "=r" (*val));
>>> Won't this return the kernel's GS instead of the user's?
>> No, %gs is untouched by the kernel, so it contains user space version, and 
>> getting the value directly from %gs looks correct.
>>
> 
> But this is x86_64, where swapgs is done on kernel entry.
> 

For i386-x86_64 sharing, getting to the user segments probably should be 
macroized.  I'm thinking something like 
get_user_[cs|ds|es|fs|gs|ss](thread) in <asm/processor.h> doing the 
appropriate thing for different configurations.

	-hpa

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 current task
  2007-11-29 18:16       ` H. Peter Anvin
@ 2007-11-29 18:31         ` Linus Torvalds
  2007-11-29 18:45           ` H. Peter Anvin
  0 siblings, 1 reply; 28+ messages in thread
From: Linus Torvalds @ 2007-11-29 18:31 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Chuck Ebbert, Roland McGrath, Andrew Morton, linux-kernel,
	Thomas Gleixner, Ingo Molnar



On Thu, 29 Nov 2007, H. Peter Anvin wrote:
> 
> The kernel uses %fs in 32-bit mode and %gs in 64-bit mode.

Yeah, thanks for reminding me about this particular insanity.

We should just make the kernel always use %gs for the percpu data. On 
32-bit x86 there really is no reason to use %fs over %gs, other than the 
purely historical one, and the fact that "f" is alphabetically before "g", 
so it's the one you use first if you have no other preferences.

That way we can more easily share code, if the rules for fs/gs are the 
same for 32-bit/64-bit code.

However, you also say:

> It is advantageous for user space to use the register the kernel 
> typically won't, in order to speed up system call entry/exit.

but I'm not seeing the reason for that one. Care to comment more? (Yes, 
there is often a latency from segment reload to use, but the reload 
latency for system call exit *should* be entirely covered by the cost of 
doing the system call return itself, no?)

			Linus

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 current task
  2007-11-29 18:31         ` Linus Torvalds
@ 2007-11-29 18:45           ` H. Peter Anvin
  2007-11-29 19:08             ` Linus Torvalds
  0 siblings, 1 reply; 28+ messages in thread
From: H. Peter Anvin @ 2007-11-29 18:45 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Chuck Ebbert, Roland McGrath, Andrew Morton, linux-kernel,
	Thomas Gleixner, Ingo Molnar

Linus Torvalds wrote:
> 
> However, you also say:
> 
>> It is advantageous for user space to use the register the kernel 
>> typically won't, in order to speed up system call entry/exit.
> 
> but I'm not seeing the reason for that one. Care to comment more? (Yes, 
> there is often a latency from segment reload to use, but the reload 
> latency for system call exit *should* be entirely covered by the cost of 
> doing the system call return itself, no?)
> 

I do seem to recall that some processor implementations can load a NULL 
segment faster than a non-NULL segment.  This was significant enough 
that we wanted to use %fs in x86-64 userspace, as opposed to the 
original ABI which used %gs both in userspace and in the kernel.

	-hpa

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 current task
  2007-11-29 18:45           ` H. Peter Anvin
@ 2007-11-29 19:08             ` Linus Torvalds
  2007-11-29 19:16               ` H. Peter Anvin
  0 siblings, 1 reply; 28+ messages in thread
From: Linus Torvalds @ 2007-11-29 19:08 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Chuck Ebbert, Roland McGrath, Andrew Morton, linux-kernel,
	Thomas Gleixner, Ingo Molnar



On Thu, 29 Nov 2007, H. Peter Anvin wrote:
> Linus Torvalds wrote:
> > 
> > > It is advantageous for user space to use the register the kernel typically
> > > won't, in order to speed up system call entry/exit.
> > 
> > but I'm not seeing the reason for that one. Care to comment more? (Yes,
> > there is often a latency from segment reload to use, but the reload latency
> > for system call exit *should* be entirely covered by the cost of doing the
> > system call return itself, no?)
> 
> I do seem to recall that some processor implementations can load a NULL
> segment faster than a non-NULL segment.  This was significant enough that we
> wanted to use %fs in x86-64 userspace, as opposed to the original ABI which
> used %gs both in userspace and in the kernel.

Ahh, I think you may be right for some CPUs. The zero selector is indeed 
potentially faster to load, since it doesn't have to even bother looking 
at the GDT/LDT.

That said, I doubt it's very noticeable. I just ran tests on both an old 
P4 and on a more modern Core 2 machine, and for both of those the 
performance was identical between loading a NUL selector and loading it 
with a non-zero one.

But I could well imagine that it matters a few cycles on other CPU's. But 
from my testing, it definitely isn't noticeable, and I think the 
maintenance advantage of using the same segment setup would more than make 
up for the fact that maybe some odd CPU can see a difference.

			Linus

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 current task
  2007-11-29 19:08             ` Linus Torvalds
@ 2007-11-29 19:16               ` H. Peter Anvin
  2007-11-29 19:27                 ` Andi Kleen
  0 siblings, 1 reply; 28+ messages in thread
From: H. Peter Anvin @ 2007-11-29 19:16 UTC (permalink / raw)
  To: Linus Torvalds, Andi Kleen
  Cc: Chuck Ebbert, Roland McGrath, Andrew Morton, linux-kernel,
	Thomas Gleixner, Ingo Molnar

Andi, do you happen to remember the details on this?

	-hpa


Linus Torvalds wrote:
> 
> On Thu, 29 Nov 2007, H. Peter Anvin wrote:
>> Linus Torvalds wrote:
>>>> It is advantageous for user space to use the register the kernel typically
>>>> won't, in order to speed up system call entry/exit.
>>> but I'm not seeing the reason for that one. Care to comment more? (Yes,
>>> there is often a latency from segment reload to use, but the reload latency
>>> for system call exit *should* be entirely covered by the cost of doing the
>>> system call return itself, no?)
>> I do seem to recall that some processor implementations can load a NULL
>> segment faster than a non-NULL segment.  This was significant enough that we
>> wanted to use %fs in x86-64 userspace, as opposed to the original ABI which
>> used %gs both in userspace and in the kernel.
> 
> Ahh, I think you may be right for some CPUs. The zero selector is indeed 
> potentially faster to load, since it doesn't have to even bother looking 
> at the GDT/LDT.
> 
> That said, I doubt it's very noticeable. I just ran tests on both an old 
> P4 and on a more modern Core 2 machine, and for both of those the 
> performance was identical between loading a NUL selector and loading it 
> with a non-zero one.
> 
> But I could well imagine that it matters a few cycles on other CPU's. But 
> from my testing, it definitely isn't noticeable, and I think the 
> maintenance advantage of using the same segment setup would more than make 
> up for the fact that maybe some odd CPU can see a difference.
> 
> 			Linus

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 current task
  2007-11-29 19:16               ` H. Peter Anvin
@ 2007-11-29 19:27                 ` Andi Kleen
  2007-11-29 19:44                   ` Ingo Molnar
  2007-11-29 19:49                   ` Linus Torvalds
  0 siblings, 2 replies; 28+ messages in thread
From: Andi Kleen @ 2007-11-29 19:27 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Linus Torvalds, Andi Kleen, Chuck Ebbert, Roland McGrath,
	Andrew Morton, linux-kernel, Thomas Gleixner, Ingo Molnar,
	Jeremy Fitzhardinge, zach

On Thu, Nov 29, 2007 at 11:16:55AM -0800, H. Peter Anvin wrote:
> Andi, do you happen to remember the details on this?

x86-64 has to use GS because there is no SWAPFS
We decided to make it opposite on user space back then, but not
based on benchmarks (there were only simulators back then)  Oh yes
the reason was that the GS context switch is slightly more expensive
than the FS one and the code does lazy optimization.

For i386 iirc Jeremy/Zach did the benchmarking and they settled
on %fs because it was faster for something (originally it was %gs too) 

-Andi

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 current task
  2007-11-29 19:27                 ` Andi Kleen
@ 2007-11-29 19:44                   ` Ingo Molnar
  2007-11-29 20:01                     ` H. Peter Anvin
  2007-12-01 23:44                     ` Jeremy Fitzhardinge
  2007-11-29 19:49                   ` Linus Torvalds
  1 sibling, 2 replies; 28+ messages in thread
From: Ingo Molnar @ 2007-11-29 19:44 UTC (permalink / raw)
  To: Andi Kleen
  Cc: H. Peter Anvin, Linus Torvalds, Andi Kleen, Chuck Ebbert,
	Roland McGrath, Andrew Morton, linux-kernel, Thomas Gleixner,
	Jeremy Fitzhardinge, zach


* Andi Kleen <andi@firstfloor.org> wrote:

> For i386 iirc Jeremy/Zach did the benchmarking and they settled on %fs 
> because it was faster for something (originally it was %gs too)

yep. IIRC, some CPUs only optimize %fs because that's what Windows uses 
and leaves Linux with %gs out in the cold. There's also a performance 
penalty for overlapping segment use, if the segment cache is single 
entry only with an additional optimization for NULL [which just hides 
the segment cache].

But if it's good for unification we could switch that to %gs again on 
32-bit. I was one of the people who advocated the use of the 'other' 
segment register, so that the hardware has less overlap, but clean and 
unified code trumps this concern. It shouldnt be an issue on reasonably 
modern CPUs anyway.

	Ingo

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 current task
  2007-11-29 19:27                 ` Andi Kleen
  2007-11-29 19:44                   ` Ingo Molnar
@ 2007-11-29 19:49                   ` Linus Torvalds
  2007-11-29 20:11                     ` Andi Kleen
  1 sibling, 1 reply; 28+ messages in thread
From: Linus Torvalds @ 2007-11-29 19:49 UTC (permalink / raw)
  To: Andi Kleen
  Cc: H. Peter Anvin, Andi Kleen, Chuck Ebbert, Roland McGrath,
	Andrew Morton, linux-kernel, Thomas Gleixner, Ingo Molnar,
	Jeremy Fitzhardinge, zach



On Thu, 29 Nov 2007, Andi Kleen wrote:
> 
> For i386 iirc Jeremy/Zach did the benchmarking and they settled
> on %fs because it was faster for something (originally it was %gs too) 

Hmm. Context switching ends up having to switch the segment that we do 
*not* use for the kernel, and the context switching can be faster for the 
case where

 (a) the segment selector is identical in both source and destination
*AND*
 (b) the selector is either zero or points to a GDT entry.

So yes, context switching can be faster for a NUL selector if both old and 
new threads use it, and in fact that's the onle case we check right now:

	/*
	 * Restore %gs if needed (which is common)
	 */
	if (prev->gs | next->gs)
		loadsegment(gs, next->gs);

for the 32-bit case. That is faster if "gs" is normally zero, since that 
means that we can avoid that loadsegment.

HOWEVER. That is actually not the right (well, "complete") conditional, 
since it's only one sub-case of the thing that matters. The right 
conditional is probably

	/*
	 * Restore %gs if needed (which is common).
	 * We can avoid it if they are identical, and
	 * point to the GDT.
	 */
	if ((prev->gs ^ next->gs) | (next->gs & 4))
		loadsegment(gs, next->gs);

At that point, we would only have to reload stuff if the user actually 
uses a local descriptor table entry (which does happen for threaded apps, 
I guess, but at least we avoid it for all the common traditional UNIX 
processes).

			Linus

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 current task
  2007-11-29 19:44                   ` Ingo Molnar
@ 2007-11-29 20:01                     ` H. Peter Anvin
  2007-12-01 23:44                     ` Jeremy Fitzhardinge
  1 sibling, 0 replies; 28+ messages in thread
From: H. Peter Anvin @ 2007-11-29 20:01 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andi Kleen, Linus Torvalds, Andi Kleen, Chuck Ebbert,
	Roland McGrath, Andrew Morton, linux-kernel, Thomas Gleixner,
	Jeremy Fitzhardinge, zach

Ingo Molnar wrote:
> * Andi Kleen <andi@firstfloor.org> wrote:
> 
>> For i386 iirc Jeremy/Zach did the benchmarking and they settled on %fs 
>> because it was faster for something (originally it was %gs too)
> 
> yep. IIRC, some CPUs only optimize %fs because that's what Windows uses 
> and leaves Linux with %gs out in the cold. There's also a performance 
> penalty for overlapping segment use, if the segment cache is single 
> entry only with an additional optimization for NULL [which just hides 
> the segment cache].
> 

For the 32-bit case, which is the only one that can be changed at all:

I guess, specifically, that assuming a sysenter implementation (meaning 
CS is handled ad hoc by the sysenter/sysexit instructions) we have 
USER_DS, KERNEL_DS, and the kernel thread pointer.  If the segments 
don't overlap, the user thread pointer gets loaded once per exec or task 
switch, and doesn't change in between.  If they do, the user thread 
pointer has to be reloaded on system call exit.

A nonzero segment load involves a memory reference followed by 
data-dependent traps on that reference, so the amount of reordering the 
CPU can do to hide that latency is limited.  A zero segment load doesn't 
  perform the memory reference at all.

Note that a segment cache (a proper cache, not the segment descriptor 
registers that the Intel docs bogusly call a "cache") does *not* save 
the memory reference, since if the descriptor has changed in memory it 
*has* to be honoured; it only allows it to be performed lazily (assume 
the cache is valid, then throw an internal exception and don't commit 
state if the descriptor stored in the cache tag doesn't match the 
descriptor loaded from memory.)

	-hpa


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 current task
  2007-11-29 19:49                   ` Linus Torvalds
@ 2007-11-29 20:11                     ` Andi Kleen
  2007-11-29 20:23                       ` Linus Torvalds
  0 siblings, 1 reply; 28+ messages in thread
From: Andi Kleen @ 2007-11-29 20:11 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: H. Peter Anvin, Chuck Ebbert, Roland McGrath, Andrew Morton,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Jeremy Fitzhardinge,
	zach


> HOWEVER. That is actually not the right (well, "complete") conditional, 
> since it's only one sub-case of the thing that matters. The right 
> conditional is probably
> 
> 	/*
> 	 * Restore %gs if needed (which is common).
> 	 * We can avoid it if they are identical, and
> 	 * point to the GDT.

How would you catch (common) the case of them having different bases in the 
GDT TLS entries? At some point the selector has to be reloaded, otherwise 
it won't be picked up by the CPU.

I think the original condition is correct. You could maybe merge it with the 
TLS entry rewrite and only do it if something changes there. Not
sure it is worth it though.

-Andi
> 	 */
> 	if ((prev->gs ^ next->gs) | (next->gs & 4))
> 		loadsegment(gs, next->gs);
> 
> At that point, we would only have to reload stuff if the user actually 
> uses a local descriptor table entry (which does happen for threaded apps, 
> I guess, but at least we avoid it for all the common traditional UNIX 
> processes).
> 
> 			Linus
> 



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 current task
  2007-11-29 20:11                     ` Andi Kleen
@ 2007-11-29 20:23                       ` Linus Torvalds
  0 siblings, 0 replies; 28+ messages in thread
From: Linus Torvalds @ 2007-11-29 20:23 UTC (permalink / raw)
  To: Andi Kleen
  Cc: H. Peter Anvin, Chuck Ebbert, Roland McGrath, Andrew Morton,
	linux-kernel, Thomas Gleixner, Ingo Molnar, Jeremy Fitzhardinge,
	zach



On Thu, 29 Nov 2007, Andi Kleen wrote:
> 
> How would you catch (common) the case of them having different bases in the 
> GDT TLS entries? At some point the selector has to be reloaded, otherwise 
> it won't be picked up by the CPU.

You're right. I somehow thought we were using the LDT for TLS entries, 
which I guess we did back before kernel support, but yeah, we're using 
per-CPU GDT entries, and would need to check that those match too.

So yeah, the only safe ones are the ones we don't ever change, and that is 
basically just the kernel entries (that users cannot load anyway).

So scratch that. You're right - the NUL selector is the only special one 
for the context switcher. 

		Linus

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 current task
  2007-11-29 17:34   ` Chuck Ebbert
  2007-11-29 18:09     ` Linus Torvalds
@ 2007-11-29 22:21     ` Roland McGrath
  2007-11-29 23:00       ` Chuck Ebbert
  1 sibling, 1 reply; 28+ messages in thread
From: Roland McGrath @ 2007-11-29 22:21 UTC (permalink / raw)
  To: Chuck Ebbert
  Cc: Andrew Morton, Linus Torvalds, linux-kernel, Thomas Gleixner,
	Ingo Molnar, H. Peter Anvin

> >  	case offsetof(struct user32, regs.gs):
> >  		*val = child->thread.gsindex;
> > +		if (child == current)
> > +			asm("movl %%gs,%0" : "=r" (*val));
> 
> Won't this return the kernel's GS instead of the user's?
[...]
> But this is x86_64, where swapgs is done on kernel entry.

As I understand it, and from what the documentation I have says, swapgs has
nothing to do with the %gs selector.  It affects the "GS base register",
i.e. the MSR.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 current task
  2007-11-29 18:09     ` Linus Torvalds
  2007-11-29 18:16       ` H. Peter Anvin
  2007-11-29 18:17       ` Chuck Ebbert
@ 2007-11-29 22:25       ` Roland McGrath
  2007-11-29 22:34         ` Linus Torvalds
  2 siblings, 1 reply; 28+ messages in thread
From: Roland McGrath @ 2007-11-29 22:25 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Chuck Ebbert, Andrew Morton, linux-kernel, Thomas Gleixner,
	Ingo Molnar, H. Peter Anvin

> But the ones that do the same thing for fs/es/ds are *not*. Those three 
> registers are kernel mode registers (ds/es are the regular kernel data 
> segment, fs is the per-cpu data segment), and restored on return to user 
> space from the stack.

Um, really?  This is x86-64 code.  AIUI those values don't have any effect
at all in 64-bit mode (as the kernel is).  I haven't found any code in
entry_64.S or ia32entry.S that touches them.  __switch_to uses direct
access to the segment registers just as I've done.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 current task
  2007-11-29 22:25       ` Roland McGrath
@ 2007-11-29 22:34         ` Linus Torvalds
  0 siblings, 0 replies; 28+ messages in thread
From: Linus Torvalds @ 2007-11-29 22:34 UTC (permalink / raw)
  To: Roland McGrath
  Cc: Chuck Ebbert, Andrew Morton, linux-kernel, Thomas Gleixner,
	Ingo Molnar, H. Peter Anvin



On Thu, 29 Nov 2007, Roland McGrath wrote:
> 
> Um, really?  This is x86-64 code.  AIUI those values don't have any effect
> at all in 64-bit mode (as the kernel is).  I haven't found any code in
> entry_64.S or ia32entry.S that touches them.  __switch_to uses direct
> access to the segment registers just as I've done.

Yes, you're right, I just looked at the ia32 part and didn't read your 
patches right.

		Linus

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 current task
  2007-11-29 22:21     ` Roland McGrath
@ 2007-11-29 23:00       ` Chuck Ebbert
  0 siblings, 0 replies; 28+ messages in thread
From: Chuck Ebbert @ 2007-11-29 23:00 UTC (permalink / raw)
  To: Roland McGrath
  Cc: Andrew Morton, Linus Torvalds, linux-kernel, Thomas Gleixner,
	Ingo Molnar, H. Peter Anvin

On 11/29/2007 05:21 PM, Roland McGrath wrote:
>>>  	case offsetof(struct user32, regs.gs):
>>>  		*val = child->thread.gsindex;
>>> +		if (child == current)
>>> +			asm("movl %%gs,%0" : "=r" (*val));
>> Won't this return the kernel's GS instead of the user's?
> [...]
>> But this is x86_64, where swapgs is done on kernel entry.
> 
> As I understand it, and from what the documentation I have says, swapgs has
> nothing to do with the %gs selector.  It affects the "GS base register",
> i.e. the MSR.
> 

Yep, I confused the GS selector with the base address in the descriptor.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 current task
  2007-11-29 19:44                   ` Ingo Molnar
  2007-11-29 20:01                     ` H. Peter Anvin
@ 2007-12-01 23:44                     ` Jeremy Fitzhardinge
  1 sibling, 0 replies; 28+ messages in thread
From: Jeremy Fitzhardinge @ 2007-12-01 23:44 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andi Kleen, H. Peter Anvin, Linus Torvalds, Andi Kleen,
	Chuck Ebbert, Roland McGrath, Andrew Morton, linux-kernel,
	Thomas Gleixner, zach


On Nov 29, 2007, at 2:44 PM, Ingo Molnar wrote:

>
> * Andi Kleen <andi@firstfloor.org> wrote:
>
>> For i386 iirc Jeremy/Zach did the benchmarking and they settled on  
>> %fs
>> because it was faster for something (originally it was %gs too)
>
> yep. IIRC, some CPUs only optimize %fs because that's what Windows  
> uses
> and leaves Linux with %gs out in the cold.

I did measure some anomalies with the AMD K6+ (or something like  
that), in which %gs was faster than %fs.  It was pretty much  
inexplicable, but also unique - all other processors I tested (which  
was a range from Pentium MMX to current) had identical performance.

> There's also a performance
> penalty for overlapping segment use, if the segment cache is single
> entry only with an additional optimization for NULL [which just hides
> the segment cache].

Some processors do perform slightly better with null selector loads  
than GDT/LDT ones, but it wasn't really noticeable for modern  
processors.  The Intel architecture guy I asked about this said that  
it might be worth doing, but it would likely be swamped by a GDT  
cache miss.  I looked at rearranging the kernel's GDT to pack all the  
kernel entry/exit entries into as few cachelines as possible, but it  
was surprisingly fiddley.

> But if it's good for unification we could switch that to %gs again on
> 32-bit. I was one of the people who advocated the use of the 'other'
> segment register, so that the hardware has less overlap, but clean and
> unified code trumps this concern. It shouldnt be an issue on  
> reasonably
> modern CPUs anyway.

Well, overall it should be fairly easy to make the two arches use  
their own segment registers with a simple #define.  But things like  
ptrace and vm86 were tricky, though I guess the latter isn't an issue  
for 64-bit.

I originally chose %gs for the kernel, partly in the hope that  
compiler support for TLS would be helpful in the kernel, though that  
doesn't seem like a good idea in retrospect.  %gs for the sake of  
consistency would be reasonable, and wouldn't have a measurable  
downside.

	J

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2007-12-01 23:45 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-11-29  0:38 [PATCH x86/mm 1/6] x86-64 ia32 ptrace pt_regs cleanup Roland McGrath
2007-11-29  0:40 ` [PATCH x86/mm 2/6] x86-64 ptrace whitespace Roland McGrath
2007-11-29  0:40 ` [PATCH x86/mm 3/6] x86-32 " Roland McGrath
2007-11-29  0:41 ` [PATCH x86/mm 4/6] x86-64 ptrace get/putreg current task Roland McGrath
2007-11-29 17:39   ` Christoph Hellwig
2007-11-29  0:42 ` [PATCH x86/mm 5/6] x86-32 " Roland McGrath
2007-11-29  0:42 ` [PATCH x86/mm 6/6] x86-64 ia32 ptrace get/putreg32 " Roland McGrath
2007-11-29 17:34   ` Chuck Ebbert
2007-11-29 18:09     ` Linus Torvalds
2007-11-29 18:16       ` H. Peter Anvin
2007-11-29 18:31         ` Linus Torvalds
2007-11-29 18:45           ` H. Peter Anvin
2007-11-29 19:08             ` Linus Torvalds
2007-11-29 19:16               ` H. Peter Anvin
2007-11-29 19:27                 ` Andi Kleen
2007-11-29 19:44                   ` Ingo Molnar
2007-11-29 20:01                     ` H. Peter Anvin
2007-12-01 23:44                     ` Jeremy Fitzhardinge
2007-11-29 19:49                   ` Linus Torvalds
2007-11-29 20:11                     ` Andi Kleen
2007-11-29 20:23                       ` Linus Torvalds
2007-11-29 18:17       ` Chuck Ebbert
2007-11-29 18:23         ` H. Peter Anvin
2007-11-29 22:25       ` Roland McGrath
2007-11-29 22:34         ` Linus Torvalds
2007-11-29 22:21     ` Roland McGrath
2007-11-29 23:00       ` Chuck Ebbert
2007-11-29 10:39 ` [PATCH x86/mm 1/6] x86-64 ia32 ptrace pt_regs cleanup Ingo Molnar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).