linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* cpu2000(both float and int) 13% regression with 2.6.28-rc1
@ 2008-10-28  6:32 Zhang, Yanmin
  2008-10-28  8:03 ` Ingo Molnar
  0 siblings, 1 reply; 9+ messages in thread
From: Zhang, Yanmin @ 2008-10-28  6:32 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML

Comparing with 2.6.27, cpu2000 (both float and int) has about 13% regression
with 2.6.28-rc1 on my new-model x86-64 machine.

I bisected down to below patch.

commit 0afe2db21394820d32646a695eccf3fbfe6ab5c7
Merge: d847059... 43603c8...
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sat Oct 11 20:23:20 2008 +0200

    Merge branch 'x86/unify-cpu-detect' into x86-v28-for-linus-phase4-D
    
    Conflicts:
        arch/x86/kernel/cpu/common.c
        arch/x86/kernel/signal_64.c
        include/asm-x86/cpufeature.h


When I tried to revert it against 2.6.28-rc2, there are many conflictions.

Ingo,
Is it possible to break it to small patches, so we can locate the bad source lines?

Yanmin



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: cpu2000(both float and int) 13% regression with 2.6.28-rc1
  2008-10-28  6:32 cpu2000(both float and int) 13% regression with 2.6.28-rc1 Zhang, Yanmin
@ 2008-10-28  8:03 ` Ingo Molnar
  2008-10-28  8:31   ` Zhang, Yanmin
  2008-10-28 20:26   ` Suresh Siddha
  0 siblings, 2 replies; 9+ messages in thread
From: Ingo Molnar @ 2008-10-28  8:03 UTC (permalink / raw)
  To: Zhang, Yanmin
  Cc: LKML, H. Peter Anvin, Suresh Siddha, Roland McGrath,
	Hiroshi Shimamoto, Yinghai Lu


* Zhang, Yanmin <yanmin_zhang@linux.intel.com> wrote:

> Comparing with 2.6.27, cpu2000 (both float and int) has about 13% regression
> with 2.6.28-rc1 on my new-model x86-64 machine.
> 
> I bisected down to below patch.
> 
> commit 0afe2db21394820d32646a695eccf3fbfe6ab5c7
> Merge: d847059... 43603c8...
> Author: Ingo Molnar <mingo@elte.hu>
> Date:   Sat Oct 11 20:23:20 2008 +0200
> 
>     Merge branch 'x86/unify-cpu-detect' into x86-v28-for-linus-phase4-D
>     
>     Conflicts:
>         arch/x86/kernel/cpu/common.c
>         arch/x86/kernel/signal_64.c
>         include/asm-x86/cpufeature.h
> 
> 
> When I tried to revert it against 2.6.28-rc2, there are many conflictions.

My guess right now is that it's the merge commit's doing, see the diff 
below. Could you undo just the restore_sigcontext() chunk of it, in 
arch/x86/kernel/signal_64.c:

@@@ -157,20 -96,9 +94,9 @@@ restore_sigcontext(struct pt_regs *regs

I've attached it as a patch below, apply it with "patch -p1 -R"

(I've also attached the full merge commit further below - just in case 
it's in another portion of it.)

	Ingo

---------------->

diff --cc arch/x86/kernel/signal_64.c
index 694aa88,4665b59..823a55b
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@@ -157,20 -96,9 +94,9 @@@ restore_sigcontext(struct pt_regs *regs
  	}
  
  	{
 -		struct _fpstate __user * buf;
 +		struct _fpstate __user *buf;
  		err |= __get_user(buf, &sc->fpstate);
- 
- 		if (buf) {
- 			if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
- 				goto badframe;
- 			err |= restore_i387(buf);
- 		} else {
- 			struct task_struct *me = current;
- 			if (used_math()) {
- 				clear_fpu(me);
- 				clear_used_math();
- 			}
- 		}
+ 		err |= restore_i387_xstate(buf);
  	}
  
  	err |= __get_user(*pax, &sc->ax);

------------------->
commit 0afe2db21394820d32646a695eccf3fbfe6ab5c7
Merge: d847059... 43603c8...
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sat Oct 11 20:23:20 2008 +0200

    Merge branch 'x86/unify-cpu-detect' into x86-v28-for-linus-phase4-D
    
    Conflicts:
    	arch/x86/kernel/cpu/common.c
    	arch/x86/kernel/signal_64.c
    	include/asm-x86/cpufeature.h

diff --cc arch/x86/kernel/sigframe.h
index 8b4956e,6dd7e2b..cc673aa
--- a/arch/x86/kernel/sigframe.h
+++ b/arch/x86/kernel/sigframe.h
@@@ -23,10 -32,6 +32,11 @@@ struct rt_sigframe 
  	char __user *pretcode;
  	struct ucontext uc;
  	struct siginfo info;
+ 	/* fp state follows here */
  };
 +
 +int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 +		sigset_t *set, struct pt_regs *regs);
 +int ia32_setup_frame(int sig, struct k_sigaction *ka,
 +		sigset_t *set, struct pt_regs *regs);
  #endif
diff --cc arch/x86/kernel/signal_64.c
index 694aa88,4665b59..823a55b
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@@ -157,20 -96,9 +94,9 @@@ restore_sigcontext(struct pt_regs *regs
  	}
  
  	{
 -		struct _fpstate __user * buf;
 +		struct _fpstate __user *buf;
  		err |= __get_user(buf, &sc->fpstate);
- 
- 		if (buf) {
- 			if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
- 				goto badframe;
- 			err |= restore_i387(buf);
- 		} else {
- 			struct task_struct *me = current;
- 			if (used_math()) {
- 				clear_fpu(me);
- 				clear_used_math();
- 			}
- 		}
+ 		err |= restore_i387_xstate(buf);
  	}
  
  	err |= __get_user(*pax, &sc->ax);
@@@ -273,10 -197,10 +196,10 @@@ get_stack(struct k_sigaction *ka, struc
  }
  
  static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 -			   sigset_t *set, struct pt_regs * regs)
 +			   sigset_t *set, struct pt_regs *regs)
  {
  	struct rt_sigframe __user *frame;
- 	struct _fpstate __user *fp = NULL;
+ 	void __user *fp = NULL;
  	int err = 0;
  	struct task_struct *me = current;
  
@@@ -285,11 -209,8 +208,8 @@@
  		frame = (void __user *)round_down(
  			(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
  
- 		if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate)))
- 			goto give_sigsegv;
- 
- 		if (save_i387(fp) < 0)
+ 		if (save_i387_xstate(fp) < 0)
 -			err |= -1; 
 +			err |= -1;
  	} else
  		frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
  
@@@ -301,9 -222,12 +221,12 @@@
  		if (err)
  			goto give_sigsegv;
  	}
 -		
 +
  	/* Create the ucontext.  */
- 	err |= __put_user(0, &frame->uc.uc_flags);
+ 	if (cpu_has_xsave)
+ 		err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
+ 	else
+ 		err |= __put_user(0, &frame->uc.uc_flags);
  	err |= __put_user(0, &frame->uc.uc_link);
  	err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
  	err |= __put_user(sas_ss_flags(regs->sp),
diff --cc include/asm-x86/cpufeature.h
index 065c6a8,8d45690..adfeae6
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@@ -64,49 -72,61 +72,63 @@@
  #define X86_FEATURE_CYRIX_ARR	(3*32+ 2) /* Cyrix ARRs (= MTRRs) */
  #define X86_FEATURE_CENTAUR_MCR	(3*32+ 3) /* Centaur MCRs (= MTRRs) */
  /* cpu types for specific tunings: */
- #define X86_FEATURE_K8		(3*32+ 4) /* Opteron, Athlon64 */
- #define X86_FEATURE_K7		(3*32+ 5) /* Athlon */
- #define X86_FEATURE_P3		(3*32+ 6) /* P3 */
- #define X86_FEATURE_P4		(3*32+ 7) /* P4 */
+ #define X86_FEATURE_K8		(3*32+ 4) /* "" Opteron, Athlon64 */
+ #define X86_FEATURE_K7		(3*32+ 5) /* "" Athlon */
+ #define X86_FEATURE_P3		(3*32+ 6) /* "" P3 */
+ #define X86_FEATURE_P4		(3*32+ 7) /* "" P4 */
  #define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
  #define X86_FEATURE_UP		(3*32+ 9) /* smp kernel running on up */
- #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */
+ #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */
  #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
++#define X86_FEATURE_NOPL	(3*32+20) /* The NOPL (0F 1F) instructions */
  #define X86_FEATURE_PEBS	(3*32+12) /* Precise-Event Based Sampling */
  #define X86_FEATURE_BTS		(3*32+13) /* Branch Trace Store */
- #define X86_FEATURE_SYSCALL32	(3*32+14) /* syscall in ia32 userspace */
- #define X86_FEATURE_SYSENTER32	(3*32+15) /* sysenter in ia32 userspace */
- #define X86_FEATURE_REP_GOOD	(3*32+16) /* rep microcode works well on this CPU */
- #define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */
- #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */
- #define X86_FEATURE_11AP	(3*32+19) /* Bad local APIC aka 11AP */
+ #define X86_FEATURE_SYSCALL32	(3*32+14) /* "" syscall in ia32 userspace */
+ #define X86_FEATURE_SYSENTER32	(3*32+15) /* "" sysenter in ia32 userspace */
+ #define X86_FEATURE_REP_GOOD	(3*32+16) /* rep microcode works well */
+ #define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* "" Mfence synchronizes RDTSC */
+ #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */
+ #define X86_FEATURE_11AP	(3*32+19) /* "" Bad local APIC aka 11AP */
  #define X86_FEATURE_NOPL	(3*32+20) /* The NOPL (0F 1F) instructions */
 +#define X86_FEATURE_AMDC1E	(3*32+21) /* AMD C1E detected */
+ #define X86_FEATURE_XTOPOLOGY	(3*32+21) /* cpu topology enum extensions */
  
  /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
- #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
- #define X86_FEATURE_MWAIT	(4*32+ 3) /* Monitor/Mwait support */
- #define X86_FEATURE_DSCPL	(4*32+ 4) /* CPL Qualified Debug Store */
+ #define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */
+ #define X86_FEATURE_PCLMULQDQ	(4*32+ 1) /* PCLMULQDQ instruction */
+ #define X86_FEATURE_DTES64	(4*32+ 2) /* 64-bit Debug Store */
+ #define X86_FEATURE_MWAIT	(4*32+ 3) /* "monitor" Monitor/Mwait support */
+ #define X86_FEATURE_DSCPL	(4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
+ #define X86_FEATURE_VMX		(4*32+ 5) /* Hardware virtualization */
+ #define X86_FEATURE_SMX		(4*32+ 6) /* Safer mode */
  #define X86_FEATURE_EST		(4*32+ 7) /* Enhanced SpeedStep */
  #define X86_FEATURE_TM2		(4*32+ 8) /* Thermal Monitor 2 */
+ #define X86_FEATURE_SSSE3	(4*32+ 9) /* Supplemental SSE-3 */
  #define X86_FEATURE_CID		(4*32+10) /* Context ID */
+ #define X86_FEATURE_FMA		(4*32+12) /* Fused multiply-add */
  #define X86_FEATURE_CX16	(4*32+13) /* CMPXCHG16B */
  #define X86_FEATURE_XTPR	(4*32+14) /* Send Task Priority Messages */
+ #define X86_FEATURE_PDCM	(4*32+15) /* Performance Capabilities */
  #define X86_FEATURE_DCA		(4*32+18) /* Direct Cache Access */
+ #define X86_FEATURE_XMM4_1	(4*32+19) /* "sse4_1" SSE-4.1 */
+ #define X86_FEATURE_XMM4_2	(4*32+20) /* "sse4_2" SSE-4.2 */
  #define X86_FEATURE_X2APIC	(4*32+21) /* x2APIC */
- #define X86_FEATURE_XMM4_2	(4*32+20) /* Streaming SIMD Extensions-4.2 */
+ #define X86_FEATURE_AES		(4*32+25) /* AES instructions */
+ #define X86_FEATURE_XSAVE	(4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
+ #define X86_FEATURE_OSXSAVE	(4*32+27) /* "" XSAVE enabled in the OS */
+ #define X86_FEATURE_AVX		(4*32+28) /* Advanced Vector Extensions */
  
  /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
- #define X86_FEATURE_XSTORE	(5*32+ 2) /* on-CPU RNG present (xstore insn) */
- #define X86_FEATURE_XSTORE_EN	(5*32+ 3) /* on-CPU RNG enabled */
- #define X86_FEATURE_XCRYPT	(5*32+ 6) /* on-CPU crypto (xcrypt insn) */
- #define X86_FEATURE_XCRYPT_EN	(5*32+ 7) /* on-CPU crypto enabled */
+ #define X86_FEATURE_XSTORE	(5*32+ 2) /* "rng" RNG present (xstore) */
+ #define X86_FEATURE_XSTORE_EN	(5*32+ 3) /* "rng_en" RNG enabled */
+ #define X86_FEATURE_XCRYPT	(5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
+ #define X86_FEATURE_XCRYPT_EN	(5*32+ 7) /* "ace_en" on-CPU crypto enabled */
  #define X86_FEATURE_ACE2	(5*32+ 8) /* Advanced Cryptography Engine v2 */
  #define X86_FEATURE_ACE2_EN	(5*32+ 9) /* ACE v2 enabled */
- #define X86_FEATURE_PHE		(5*32+ 10) /* PadLock Hash Engine */
- #define X86_FEATURE_PHE_EN	(5*32+ 11) /* PHE enabled */
- #define X86_FEATURE_PMM		(5*32+ 12) /* PadLock Montgomery Multiplier */
- #define X86_FEATURE_PMM_EN	(5*32+ 13) /* PMM enabled */
+ #define X86_FEATURE_PHE		(5*32+10) /* PadLock Hash Engine */
+ #define X86_FEATURE_PHE_EN	(5*32+11) /* PHE enabled */
+ #define X86_FEATURE_PMM		(5*32+12) /* PadLock Montgomery Multiplier */
+ #define X86_FEATURE_PMM_EN	(5*32+13) /* PMM enabled */
  
  /* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
  #define X86_FEATURE_LAHF_LM	(6*32+ 0) /* LAHF/SAHF in long mode */

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: cpu2000(both float and int) 13% regression with 2.6.28-rc1
  2008-10-28  8:03 ` Ingo Molnar
@ 2008-10-28  8:31   ` Zhang, Yanmin
  2008-10-28 20:26   ` Suresh Siddha
  1 sibling, 0 replies; 9+ messages in thread
From: Zhang, Yanmin @ 2008-10-28  8:31 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, H. Peter Anvin, Suresh Siddha, Roland McGrath,
	Hiroshi Shimamoto, Yinghai Lu


On Tue, 2008-10-28 at 09:03 +0100, Ingo Molnar wrote:
> * Zhang, Yanmin <yanmin_zhang@linux.intel.com> wrote:
> 
> > Comparing with 2.6.27, cpu2000 (both float and int) has about 13% regression
> > with 2.6.28-rc1 on my new-model x86-64 machine.
> > 
> > I bisected down to below patch.
> > 
> > commit 0afe2db21394820d32646a695eccf3fbfe6ab5c7
> > Merge: d847059... 43603c8...
> > Author: Ingo Molnar <mingo@elte.hu>
> > Date:   Sat Oct 11 20:23:20 2008 +0200
> > 
> >     Merge branch 'x86/unify-cpu-detect' into x86-v28-for-linus-phase4-D
> >     
> >     Conflicts:
> >         arch/x86/kernel/cpu/common.c
> >         arch/x86/kernel/signal_64.c
> >         include/asm-x86/cpufeature.h
> > 
> > 
> > When I tried to revert it against 2.6.28-rc2, there are many conflictions.
> 
> My guess right now is that it's the merge commit's doing, see the diff 
> below. Could you undo just the restore_sigcontext() chunk of it, in 
> arch/x86/kernel/signal_64.c:
I failed to apply the patch. When I tried to manually copy the source codes
from 2.6.27 to 2.6.28-rc2, I found there are many dependence on xsave, such like
TS_XSAVE, so restore might not match with save.

> 
> @@@ -157,20 -96,9 +94,9 @@@ restore_sigcontext(struct pt_regs *regs
> 
> I've attached it as a patch below, apply it with "patch -p1 -R"
> 
> (I've also attached the full merge commit further below - just in case 
> it's in another portion of it.)
> 
> 	Ingo
> 
> ---------------->
> 
> diff --cc arch/x86/kernel/signal_64.c
> index 694aa88,4665b59..823a55b
> --- a/arch/x86/kernel/signal_64.c
> +++ b/arch/x86/kernel/signal_64.c
> @@@ -157,20 -96,9 +94,9 @@@ restore_sigcontext(struct pt_regs *regs
>   	}
>   
>   	{
>  -		struct _fpstate __user * buf;
>  +		struct _fpstate __user *buf;
>   		err |= __get_user(buf, &sc->fpstate);
> - 
> - 		if (buf) {
> - 			if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
> - 				goto badframe;
> - 			err |= restore_i387(buf);
> - 		} else {
> - 			struct task_struct *me = current;
> - 			if (used_math()) {
> - 				clear_fpu(me);
> - 				clear_used_math();
> - 			}
> - 		}
> + 		err |= restore_i387_xstate(buf);
>   	}
>   
>   	err |= __get_user(*pax, &sc->ax);
> 
> ------------------->
> commit 0afe2db21394820d32646a695eccf3fbfe6ab5c7
> Merge: d847059... 43603c8...
> Author: Ingo Molnar <mingo@elte.hu>
> Date:   Sat Oct 11 20:23:20 2008 +0200
> 
>     Merge branch 'x86/unify-cpu-detect' into x86-v28-for-linus-phase4-D
>     
>     Conflicts:
>     	arch/x86/kernel/cpu/common.c
>     	arch/x86/kernel/signal_64.c
>     	include/asm-x86/cpufeature.h
> 
> diff --cc arch/x86/kernel/sigframe.h
> index 8b4956e,6dd7e2b..cc673aa
> --- a/arch/x86/kernel/sigframe.h
> +++ b/arch/x86/kernel/sigframe.h
> @@@ -23,10 -32,6 +32,11 @@@ struct rt_sigframe 
>   	char __user *pretcode;
>   	struct ucontext uc;
>   	struct siginfo info;
> + 	/* fp state follows here */
>   };
>  +
>  +int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
>  +		sigset_t *set, struct pt_regs *regs);
>  +int ia32_setup_frame(int sig, struct k_sigaction *ka,
>  +		sigset_t *set, struct pt_regs *regs);
>   #endif
> diff --cc arch/x86/kernel/signal_64.c
> index 694aa88,4665b59..823a55b
> --- a/arch/x86/kernel/signal_64.c
> +++ b/arch/x86/kernel/signal_64.c
> @@@ -157,20 -96,9 +94,9 @@@ restore_sigcontext(struct pt_regs *regs
>   	}
>   
>   	{
>  -		struct _fpstate __user * buf;
>  +		struct _fpstate __user *buf;
>   		err |= __get_user(buf, &sc->fpstate);
> - 
> - 		if (buf) {
> - 			if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
> - 				goto badframe;
> - 			err |= restore_i387(buf);
> - 		} else {
> - 			struct task_struct *me = current;
> - 			if (used_math()) {
> - 				clear_fpu(me);
> - 				clear_used_math();
> - 			}
> - 		}
> + 		err |= restore_i387_xstate(buf);
>   	}
>   
>   	err |= __get_user(*pax, &sc->ax);
> @@@ -273,10 -197,10 +196,10 @@@ get_stack(struct k_sigaction *ka, struc
>   }
>   
>   static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
>  -			   sigset_t *set, struct pt_regs * regs)
>  +			   sigset_t *set, struct pt_regs *regs)
>   {
>   	struct rt_sigframe __user *frame;
> - 	struct _fpstate __user *fp = NULL;
> + 	void __user *fp = NULL;
>   	int err = 0;
>   	struct task_struct *me = current;
>   
> @@@ -285,11 -209,8 +208,8 @@@
>   		frame = (void __user *)round_down(
>   			(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
>   
> - 		if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate)))
> - 			goto give_sigsegv;
> - 
> - 		if (save_i387(fp) < 0)
> + 		if (save_i387_xstate(fp) < 0)
>  -			err |= -1; 
>  +			err |= -1;
>   	} else
>   		frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
>   
> @@@ -301,9 -222,12 +221,12 @@@
>   		if (err)
>   			goto give_sigsegv;
>   	}
>  -		
>  +
>   	/* Create the ucontext.  */
> - 	err |= __put_user(0, &frame->uc.uc_flags);
> + 	if (cpu_has_xsave)
> + 		err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
> + 	else
> + 		err |= __put_user(0, &frame->uc.uc_flags);
>   	err |= __put_user(0, &frame->uc.uc_link);
>   	err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
>   	err |= __put_user(sas_ss_flags(regs->sp),
> diff --cc include/asm-x86/cpufeature.h
> index 065c6a8,8d45690..adfeae6
> --- a/include/asm-x86/cpufeature.h
> +++ b/include/asm-x86/cpufeature.h
> @@@ -64,49 -72,61 +72,63 @@@
>   #define X86_FEATURE_CYRIX_ARR	(3*32+ 2) /* Cyrix ARRs (= MTRRs) */
>   #define X86_FEATURE_CENTAUR_MCR	(3*32+ 3) /* Centaur MCRs (= MTRRs) */
>   /* cpu types for specific tunings: */
> - #define X86_FEATURE_K8		(3*32+ 4) /* Opteron, Athlon64 */
> - #define X86_FEATURE_K7		(3*32+ 5) /* Athlon */
> - #define X86_FEATURE_P3		(3*32+ 6) /* P3 */
> - #define X86_FEATURE_P4		(3*32+ 7) /* P4 */
> + #define X86_FEATURE_K8		(3*32+ 4) /* "" Opteron, Athlon64 */
> + #define X86_FEATURE_K7		(3*32+ 5) /* "" Athlon */
> + #define X86_FEATURE_P3		(3*32+ 6) /* "" P3 */
> + #define X86_FEATURE_P4		(3*32+ 7) /* "" P4 */
>   #define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
>   #define X86_FEATURE_UP		(3*32+ 9) /* smp kernel running on up */
> - #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */
> + #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */
>   #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
> ++#define X86_FEATURE_NOPL	(3*32+20) /* The NOPL (0F 1F) instructions */
>   #define X86_FEATURE_PEBS	(3*32+12) /* Precise-Event Based Sampling */
>   #define X86_FEATURE_BTS		(3*32+13) /* Branch Trace Store */
> - #define X86_FEATURE_SYSCALL32	(3*32+14) /* syscall in ia32 userspace */
> - #define X86_FEATURE_SYSENTER32	(3*32+15) /* sysenter in ia32 userspace */
> - #define X86_FEATURE_REP_GOOD	(3*32+16) /* rep microcode works well on this CPU */
> - #define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */
> - #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */
> - #define X86_FEATURE_11AP	(3*32+19) /* Bad local APIC aka 11AP */
> + #define X86_FEATURE_SYSCALL32	(3*32+14) /* "" syscall in ia32 userspace */
> + #define X86_FEATURE_SYSENTER32	(3*32+15) /* "" sysenter in ia32 userspace */
> + #define X86_FEATURE_REP_GOOD	(3*32+16) /* rep microcode works well */
> + #define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* "" Mfence synchronizes RDTSC */
> + #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */
> + #define X86_FEATURE_11AP	(3*32+19) /* "" Bad local APIC aka 11AP */
>   #define X86_FEATURE_NOPL	(3*32+20) /* The NOPL (0F 1F) instructions */
>  +#define X86_FEATURE_AMDC1E	(3*32+21) /* AMD C1E detected */
> + #define X86_FEATURE_XTOPOLOGY	(3*32+21) /* cpu topology enum extensions */
>   
>   /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
> - #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
> - #define X86_FEATURE_MWAIT	(4*32+ 3) /* Monitor/Mwait support */
> - #define X86_FEATURE_DSCPL	(4*32+ 4) /* CPL Qualified Debug Store */
> + #define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */
> + #define X86_FEATURE_PCLMULQDQ	(4*32+ 1) /* PCLMULQDQ instruction */
> + #define X86_FEATURE_DTES64	(4*32+ 2) /* 64-bit Debug Store */
> + #define X86_FEATURE_MWAIT	(4*32+ 3) /* "monitor" Monitor/Mwait support */
> + #define X86_FEATURE_DSCPL	(4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
> + #define X86_FEATURE_VMX		(4*32+ 5) /* Hardware virtualization */
> + #define X86_FEATURE_SMX		(4*32+ 6) /* Safer mode */
>   #define X86_FEATURE_EST		(4*32+ 7) /* Enhanced SpeedStep */
>   #define X86_FEATURE_TM2		(4*32+ 8) /* Thermal Monitor 2 */
> + #define X86_FEATURE_SSSE3	(4*32+ 9) /* Supplemental SSE-3 */
>   #define X86_FEATURE_CID		(4*32+10) /* Context ID */
> + #define X86_FEATURE_FMA		(4*32+12) /* Fused multiply-add */
>   #define X86_FEATURE_CX16	(4*32+13) /* CMPXCHG16B */
>   #define X86_FEATURE_XTPR	(4*32+14) /* Send Task Priority Messages */
> + #define X86_FEATURE_PDCM	(4*32+15) /* Performance Capabilities */
>   #define X86_FEATURE_DCA		(4*32+18) /* Direct Cache Access */
> + #define X86_FEATURE_XMM4_1	(4*32+19) /* "sse4_1" SSE-4.1 */
> + #define X86_FEATURE_XMM4_2	(4*32+20) /* "sse4_2" SSE-4.2 */
>   #define X86_FEATURE_X2APIC	(4*32+21) /* x2APIC */
> - #define X86_FEATURE_XMM4_2	(4*32+20) /* Streaming SIMD Extensions-4.2 */
> + #define X86_FEATURE_AES		(4*32+25) /* AES instructions */
> + #define X86_FEATURE_XSAVE	(4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
> + #define X86_FEATURE_OSXSAVE	(4*32+27) /* "" XSAVE enabled in the OS */
> + #define X86_FEATURE_AVX		(4*32+28) /* Advanced Vector Extensions */
>   
>   /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
> - #define X86_FEATURE_XSTORE	(5*32+ 2) /* on-CPU RNG present (xstore insn) */
> - #define X86_FEATURE_XSTORE_EN	(5*32+ 3) /* on-CPU RNG enabled */
> - #define X86_FEATURE_XCRYPT	(5*32+ 6) /* on-CPU crypto (xcrypt insn) */
> - #define X86_FEATURE_XCRYPT_EN	(5*32+ 7) /* on-CPU crypto enabled */
> + #define X86_FEATURE_XSTORE	(5*32+ 2) /* "rng" RNG present (xstore) */
> + #define X86_FEATURE_XSTORE_EN	(5*32+ 3) /* "rng_en" RNG enabled */
> + #define X86_FEATURE_XCRYPT	(5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
> + #define X86_FEATURE_XCRYPT_EN	(5*32+ 7) /* "ace_en" on-CPU crypto enabled */
>   #define X86_FEATURE_ACE2	(5*32+ 8) /* Advanced Cryptography Engine v2 */
>   #define X86_FEATURE_ACE2_EN	(5*32+ 9) /* ACE v2 enabled */
> - #define X86_FEATURE_PHE		(5*32+ 10) /* PadLock Hash Engine */
> - #define X86_FEATURE_PHE_EN	(5*32+ 11) /* PHE enabled */
> - #define X86_FEATURE_PMM		(5*32+ 12) /* PadLock Montgomery Multiplier */
> - #define X86_FEATURE_PMM_EN	(5*32+ 13) /* PMM enabled */
> + #define X86_FEATURE_PHE		(5*32+10) /* PadLock Hash Engine */
> + #define X86_FEATURE_PHE_EN	(5*32+11) /* PHE enabled */
> + #define X86_FEATURE_PMM		(5*32+12) /* PadLock Montgomery Multiplier */
> + #define X86_FEATURE_PMM_EN	(5*32+13) /* PMM enabled */
>   
>   /* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
>   #define X86_FEATURE_LAHF_LM	(6*32+ 0) /* LAHF/SAHF in long mode */


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: cpu2000(both float and int) 13% regression with 2.6.28-rc1
  2008-10-28  8:03 ` Ingo Molnar
  2008-10-28  8:31   ` Zhang, Yanmin
@ 2008-10-28 20:26   ` Suresh Siddha
  2008-10-31  0:32     ` Pallipadi, Venkatesh
  1 sibling, 1 reply; 9+ messages in thread
From: Suresh Siddha @ 2008-10-28 20:26 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Zhang, Yanmin, LKML, H. Peter Anvin, Siddha, Suresh B,
	Roland McGrath, Hiroshi Shimamoto, Yinghai Lu

On Tue, Oct 28, 2008 at 01:03:27AM -0700, Ingo Molnar wrote:
> 
> * Zhang, Yanmin <yanmin_zhang@linux.intel.com> wrote:
> 
> > Comparing with 2.6.27, cpu2000 (both float and int) has about 13% regression
> > with 2.6.28-rc1 on my new-model x86-64 machine.
> > 
> > I bisected down to below patch.
> > 
> > commit 0afe2db21394820d32646a695eccf3fbfe6ab5c7
> > Merge: d847059... 43603c8...
> > Author: Ingo Molnar <mingo@elte.hu>
> > Date:   Sat Oct 11 20:23:20 2008 +0200
> > 
> >     Merge branch 'x86/unify-cpu-detect' into x86-v28-for-linus-phase4-D
> >     
> >     Conflicts:
> >         arch/x86/kernel/cpu/common.c
> >         arch/x86/kernel/signal_64.c
> >         include/asm-x86/cpufeature.h
> > 
> > 
> > When I tried to revert it against 2.6.28-rc2, there are many conflictions.

Ingo, I will work with Yanmin and report our findings. It is interesting to see
double digit regression on cpu2000 benchmark. My understanding is that
these benchmarks are not sensitive to signal handling. Also lmbench
signal handling(lat_sig) has less than 3-4% regression, because of
added overhead duing signal setup and restore. Context switch didn't have
any noticeable difference, when I measure before.

Anyhow, while we look at this, probably this is the best point to add "noxsave"
boot option patch, which I missed before. Ingo, Peter: Please consider this for
2.6.28.

thanks,
suresh 
---

From: Suresh Siddha <suresh.b.siddha@intel.com>
Subject: x86: Introduce noxsave boot parameter

Introduce "noxsave" boot parameter which will disable the cpu's xsave/xrstor
capabilities.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
---

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 343e0f0..c60be3d 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1451,6 +1451,10 @@ and is between 256 and 4096 characters. It is defined in the file
 			register save and restore. The kernel will only save
 			legacy floating-point registers on task switch.
 
+	noxsave		[BUGS=X86] Disables x86 extended register state save
+			and restore using xsave. The kernel will fallback to
+			enabling legacy floating-point and sse state.
+
 	noclflush	[BUGS=X86] Don't use the CLFLUSH instruction
 
 	nohlt		[BUGS=ARM,SH]
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 25581dc..bb9d547 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -93,6 +93,13 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
 #endif
 EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
 
+static int __init x86_xsave_setup(char *s)
+{
+	setup_clear_cpu_cap(X86_FEATURE_XSAVE);
+	return 1;
+}
+__setup("noxsave", x86_xsave_setup);
+
 #ifdef CONFIG_X86_32
 static int cachesize_override __cpuinitdata = -1;
 static int disable_x86_serial_nr __cpuinitdata = 1;

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* RE: cpu2000(both float and int) 13% regression with 2.6.28-rc1
  2008-10-28 20:26   ` Suresh Siddha
@ 2008-10-31  0:32     ` Pallipadi, Venkatesh
  2008-10-31  1:08       ` H. Peter Anvin
  2008-10-31  1:15       ` Zhang, Yanmin
  0 siblings, 2 replies; 9+ messages in thread
From: Pallipadi, Venkatesh @ 2008-10-31  0:32 UTC (permalink / raw)
  To: Siddha, Suresh B, Ingo Molnar
  Cc: Zhang, Yanmin, LKML, H. Peter Anvin, Siddha, Suresh B,
	Roland McGrath, Hiroshi Shimamoto, Yinghai Lu

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 2226 bytes --]

>-----Original Message-----
>From: linux-kernel-owner@vger.kernel.org
>[mailto:linux-kernel-owner@vger.kernel.org] On Behalf Of Suresh Siddha
>Sent: Tuesday, October 28, 2008 1:27 PM
>To: Ingo Molnar
>Cc: Zhang, Yanmin; LKML; H. Peter Anvin; Siddha, Suresh B;
>Roland McGrath; Hiroshi Shimamoto; Yinghai Lu
>Subject: Re: cpu2000(both float and int) 13% regression with 2.6.28-rc1
>
>On Tue, Oct 28, 2008 at 01:03:27AM -0700, Ingo Molnar wrote:
>>
>> * Zhang, Yanmin <yanmin_zhang@linux.intel.com> wrote:
>>
>> > Comparing with 2.6.27, cpu2000 (both float and int) has
>about 13% regression
>> > with 2.6.28-rc1 on my new-model x86-64 machine.
>> >
>> > I bisected down to below patch.
>> >
>> > commit 0afe2db21394820d32646a695eccf3fbfe6ab5c7
>> > Merge: d847059... 43603c8...
>> > Author: Ingo Molnar <mingo@elte.hu>
>> > Date:   Sat Oct 11 20:23:20 2008 +0200
>> >
>> >     Merge branch 'x86/unify-cpu-detect' into
>x86-v28-for-linus-phase4-D
>> >
>> >     Conflicts:
>> >         arch/x86/kernel/cpu/common.c
>> >         arch/x86/kernel/signal_64.c
>> >         include/asm-x86/cpufeature.h
>> >
>> >
>> > When I tried to revert it against 2.6.28-rc2, there are
>many conflictions.
>
>Ingo, I will work with Yanmin and report our findings. It is
>interesting to see
>double digit regression on cpu2000 benchmark. My understanding is that
>these benchmarks are not sensitive to signal handling. Also lmbench
>signal handling(lat_sig) has less than 3-4% regression, because of
>added overhead duing signal setup and restore. Context switch
>didn't have
>any noticeable difference, when I measure before.
>

We figured out that this is not related to signals. But to this mismerge here

>  +#define X86_FEATURE_AMDC1E  (3*32+21) /* AMD C1E detected */
> + #define X86_FEATURE_XTOPOLOGY       (3*32+21) /* cpu topology enum extensions */

I had earler sent a patch to fix this.
http://marc.info/?l=linux-kernel&m=122341178202930&w=2

But, somehow I don’t see this patch either in Linus's git or in tip.

ingo, hpa: Can you push that patch along.

Thanks,
Venki
ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: cpu2000(both float and int) 13% regression with 2.6.28-rc1
  2008-10-31  0:32     ` Pallipadi, Venkatesh
@ 2008-10-31  1:08       ` H. Peter Anvin
  2008-10-31 10:02         ` Ingo Molnar
  2008-10-31  1:15       ` Zhang, Yanmin
  1 sibling, 1 reply; 9+ messages in thread
From: H. Peter Anvin @ 2008-10-31  1:08 UTC (permalink / raw)
  To: Pallipadi, Venkatesh
  Cc: Siddha, Suresh B, Ingo Molnar, Zhang, Yanmin, LKML,
	Roland McGrath, Hiroshi Shimamoto, Yinghai Lu

Pallipadi, Venkatesh wrote:
> 
> We figured out that this is not related to signals. But to this mismerge here
> 
>>  +#define X86_FEATURE_AMDC1E  (3*32+21) /* AMD C1E detected */
>> + #define X86_FEATURE_XTOPOLOGY       (3*32+21) /* cpu topology enum extensions */
> 
> I had earler sent a patch to fix this.
> http://marc.info/?l=linux-kernel&m=122341178202930&w=2
> 
> But, somehow I don’t see this patch either in Linus's git or in tip.
> 
> ingo, hpa: Can you push that patch along.
> 

OK, thought it already had been.  I will investigate later tonight.

	-hpa


^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: cpu2000(both float and int) 13% regression with 2.6.28-rc1
  2008-10-31  0:32     ` Pallipadi, Venkatesh
  2008-10-31  1:08       ` H. Peter Anvin
@ 2008-10-31  1:15       ` Zhang, Yanmin
  1 sibling, 0 replies; 9+ messages in thread
From: Zhang, Yanmin @ 2008-10-31  1:15 UTC (permalink / raw)
  To: Pallipadi, Venkatesh
  Cc: Siddha, Suresh B, Ingo Molnar, LKML, H. Peter Anvin,
	Roland McGrath, Hiroshi Shimamoto, Yinghai Lu


On Thu, 2008-10-30 at 17:32 -0700, Pallipadi, Venkatesh wrote:
> >-----Original Message-----
> >From: linux-kernel-owner@vger.kernel.org
> >[mailto:linux-kernel-owner@vger.kernel.org] On Behalf Of Suresh Siddha
> >Sent: Tuesday, October 28, 2008 1:27 PM
> >To: Ingo Molnar
> >Cc: Zhang, Yanmin; LKML; H. Peter Anvin; Siddha, Suresh B;
> >Roland McGrath; Hiroshi Shimamoto; Yinghai Lu
> >Subject: Re: cpu2000(both float and int) 13% regression with 2.6.28-rc1
> >
> >On Tue, Oct 28, 2008 at 01:03:27AM -0700, Ingo Molnar wrote:
> >>
> >> * Zhang, Yanmin <yanmin_zhang@linux.intel.com> wrote:
> >>
> >> > Comparing with 2.6.27, cpu2000 (both float and int) has
> >about 13% regression
> >> > with 2.6.28-rc1 on my new-model x86-64 machine.
> >> >
> >> > I bisected down to below patch.
> >> >
> >> > commit 0afe2db21394820d32646a695eccf3fbfe6ab5c7
> >> > Merge: d847059... 43603c8...
> >> > Author: Ingo Molnar <mingo@elte.hu>
> >> > Date:   Sat Oct 11 20:23:20 2008 +0200
> >> >
> >> >     Merge branch 'x86/unify-cpu-detect' into
> >x86-v28-for-linus-phase4-D
> >> >
> >> >     Conflicts:
> >> >         arch/x86/kernel/cpu/common.c
> >> >         arch/x86/kernel/signal_64.c
> >> >         include/asm-x86/cpufeature.h
> >> >
> >> >
> >> > When I tried to revert it against 2.6.28-rc2, there are
> >many conflictions.
> >
> >Ingo, I will work with Yanmin and report our findings. It is
> >interesting to see
> >double digit regression on cpu2000 benchmark. My understanding is that
> >these benchmarks are not sensitive to signal handling. Also lmbench
> >signal handling(lat_sig) has less than 3-4% regression, because of
> >added overhead duing signal setup and restore. Context switch
> >didn't have
> >any noticeable difference, when I measure before.
> >
> 
> We figured out that this is not related to signals. But to this mismerge here
> 
> >  +#define X86_FEATURE_AMDC1E  (3*32+21) /* AMD C1E detected */
> > + #define X86_FEATURE_XTOPOLOGY       (3*32+21) /* cpu topology enum extensions */
> 
> I had earler sent a patch to fix this.
> http://marc.info/?l=linux-kernel&m=122341178202930&w=2
> 
> But, somehow I don’t see this patch either in Linus's git or in tip.
I confirm it does fix the issues. But the patch need to be ported to 2.6.28-rc2.



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: cpu2000(both float and int) 13% regression with 2.6.28-rc1
  2008-10-31  1:08       ` H. Peter Anvin
@ 2008-10-31 10:02         ` Ingo Molnar
  2008-10-31 15:53           ` H. Peter Anvin
  0 siblings, 1 reply; 9+ messages in thread
From: Ingo Molnar @ 2008-10-31 10:02 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Pallipadi, Venkatesh, Siddha, Suresh B, Zhang, Yanmin, LKML,
	Roland McGrath, Hiroshi Shimamoto, Yinghai Lu


* H. Peter Anvin <hpa@zytor.com> wrote:

> Pallipadi, Venkatesh wrote:
> > 
> > We figured out that this is not related to signals. But to this mismerge here
> > 
> >>  +#define X86_FEATURE_AMDC1E  (3*32+21) /* AMD C1E detected */
> >> + #define X86_FEATURE_XTOPOLOGY       (3*32+21) /* cpu topology enum extensions */
> > 
> > I had earler sent a patch to fix this.
> > http://marc.info/?l=linux-kernel&m=122341178202930&w=2
> > 
> > But, somehow I don’t see this patch either in Linus's git or in tip.
> > 
> > ingo, hpa: Can you push that patch along.
> > 
> 
> OK, thought it already had been.  I will investigate later tonight.

i've picked it up into x86/urgent - see the commit below.

	Ingo

--------------->
>From 2576c9991758e431b73e374f6019d6e1e12a8d36 Mon Sep 17 00:00:00 2001
From: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue, 7 Oct 2008 13:33:12 -0700
Subject: [PATCH] x86: fix AMDC1E and XTOPOLOGY conflict in cpufeature

Impact: fix xsave slowdown regression

Fix two features from conflicting in feature bits.

Fixes this performance regression:

   Subject: cpu2000(both float and int) 13% regression with 2.6.28-rc1
   http://lkml.org/lkml/2008/10/28/36

Reported-by: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Bisected-by: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/cpufeature.h |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index f73e95d..cfdf8c2 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -91,7 +91,7 @@
 #define X86_FEATURE_11AP	(3*32+19) /* "" Bad local APIC aka 11AP */
 #define X86_FEATURE_NOPL	(3*32+20) /* The NOPL (0F 1F) instructions */
 #define X86_FEATURE_AMDC1E	(3*32+21) /* AMD C1E detected */
-#define X86_FEATURE_XTOPOLOGY	(3*32+21) /* cpu topology enum extensions */
+#define X86_FEATURE_XTOPOLOGY	(3*32+22) /* cpu topology enum extensions */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: cpu2000(both float and int) 13% regression with 2.6.28-rc1
  2008-10-31 10:02         ` Ingo Molnar
@ 2008-10-31 15:53           ` H. Peter Anvin
  0 siblings, 0 replies; 9+ messages in thread
From: H. Peter Anvin @ 2008-10-31 15:53 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Pallipadi, Venkatesh, Siddha, Suresh B, Zhang, Yanmin, LKML,
	Roland McGrath, Hiroshi Shimamoto, Yinghai Lu

Ingo Molnar wrote:
>> OK, thought it already had been.  I will investigate later tonight.
> 
> i've picked it up into x86/urgent - see the commit below.
> 

Thank you.  Sorry for not really having been there this week.

	-hpa

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2008-10-31 15:54 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-10-28  6:32 cpu2000(both float and int) 13% regression with 2.6.28-rc1 Zhang, Yanmin
2008-10-28  8:03 ` Ingo Molnar
2008-10-28  8:31   ` Zhang, Yanmin
2008-10-28 20:26   ` Suresh Siddha
2008-10-31  0:32     ` Pallipadi, Venkatesh
2008-10-31  1:08       ` H. Peter Anvin
2008-10-31 10:02         ` Ingo Molnar
2008-10-31 15:53           ` H. Peter Anvin
2008-10-31  1:15       ` Zhang, Yanmin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).