linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
@ 2016-01-16 19:22 Brian Gerst
  2016-01-16 19:36 ` Borislav Petkov
  0 siblings, 1 reply; 66+ messages in thread
From: Brian Gerst @ 2016-01-16 19:22 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Ingo Molnar, H. Peter Anvin, Denys Vlasenko, Andy Lutomirski,
	Linus Torvalds, Borislav Petkov

Move the code to do the dynamic check to the init text section so that it
is discarded after alternatives have run and a static branch has been
chosen.

A new section is defined to avoid warnings with modpost due to references
to init text from main text, which in this case is legitimate.  All such
references are patched out before init mem is discarded.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
 arch/x86/include/asm/cpufeature.h | 33 ++++++++++++++++++++++-----------
 arch/x86/kernel/cpu/common.c      |  6 ------
 arch/x86/kernel/vmlinux.lds.S     |  6 ++++++
 3 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 7ad8c94..2efbd83 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -412,7 +412,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
 
 #if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS)
 extern void warn_pre_alternatives(void);
-extern bool __static_cpu_has_safe(u16 bit);
 
 /*
  * Static testing of CPU features.  Used the same as boot_cpu_has().
@@ -502,10 +501,10 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
 		boot_cpu_has(bit)				\
 )
 
-static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
+static __always_inline __pure bool _static_cpu_has_safe(u16 bit, __u32 *caps)
 {
 #ifdef CC_HAVE_ASM_GOTO
-		asm_volatile_goto("1: jmp %l[t_dynamic]\n"
+		asm_volatile_goto("1: jmp 6f\n"
 			 "2:\n"
 			 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
 			         "((5f-4f) - (2b-1b)),0x90\n"
@@ -530,17 +529,22 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 			 " .byte 0\n"			/* repl len */
 			 " .byte 0\n"			/* pad len */
 			 ".previous\n"
-			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
-			 : : t_dynamic, t_no);
+			 ".section .static_cpu_has,\"ax\"\n"
+			 "6: testl %2,%3\n"
+			 "   jnz %l[t_yes]\n"
+			 "   jmp %l[t_no]\n"
+			 ".previous\n"
+			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
+			     "i" (1 << (bit & 31)), "m" (caps[bit/32])
+			 : : t_yes, t_no);
+	t_yes:
 		return true;
 	t_no:
 		return false;
-	t_dynamic:
-		return __static_cpu_has_safe(bit);
 #else
 		u8 flag;
 		/* Open-coded due to __stringify() in ALTERNATIVE() */
-		asm volatile("1: movb $2,%0\n"
+		asm volatile("1: jmp 7f\n"
 			     "2:\n"
 			     ".section .altinstructions,\"a\"\n"
 			     " .long 1b - .\n"		/* src offset */
@@ -572,9 +576,15 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 			     "5: movb $1,%0\n"
 			     "6:\n"
 			     ".previous\n"
+			     ".section .static_cpu_has,\"ax\"\n"
+			     "7: testl %3,%4\n"
+			     "   setnz %0\n"
+			     "   jmp 2b\n"
+			     ".previous\n"
 			     : "=qm" (flag)
-			     : "i" (bit), "i" (X86_FEATURE_ALWAYS));
-		return (flag == 2 ? __static_cpu_has_safe(bit) : flag);
+			     : "i" (bit), "i" (X86_FEATURE_ALWAYS),
+			       "i" (1 << (bit & 31)), "m" (caps[bit/32]));
+		return (flag != 0);
 #endif /* CC_HAVE_ASM_GOTO */
 }
 
@@ -582,7 +592,8 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 (								\
 	__builtin_constant_p(boot_cpu_has(bit)) ?		\
 		boot_cpu_has(bit) :				\
-		_static_cpu_has_safe(bit)			\
+		_static_cpu_has_safe(bit,			\
+			 &boot_cpu_data.x86_capability[0])	\
 )
 #else
 /*
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 37830de..897c65b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1483,12 +1483,6 @@ void warn_pre_alternatives(void)
 EXPORT_SYMBOL_GPL(warn_pre_alternatives);
 #endif
 
-inline bool __static_cpu_has_safe(u16 bit)
-{
-	return boot_cpu_has(bit);
-}
-EXPORT_SYMBOL_GPL(__static_cpu_has_safe);
-
 static void bsp_resume(void)
 {
 	if (this_cpu->c_bsp_resume)
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 4f19942..4df1467 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -195,6 +195,12 @@ SECTIONS
 	:init
 #endif
 
+	.static_cpu_has : AT(ADDR(.static_cpu_has) - LOAD_OFFSET) {
+		__static_cpu_has_start = .;
+		*(.static_cpu_has)
+		__static_cpu_has_end = .;
+	}
+
 	INIT_DATA_SECTION(16)
 
 	.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-16 19:22 [PATCH] x86: static_cpu_has_safe: discard dynamic check after init Brian Gerst
@ 2016-01-16 19:36 ` Borislav Petkov
  2016-01-16 19:58   ` Brian Gerst
  0 siblings, 1 reply; 66+ messages in thread
From: Borislav Petkov @ 2016-01-16 19:36 UTC (permalink / raw)
  To: Brian Gerst
  Cc: x86, linux-kernel, Ingo Molnar, H. Peter Anvin, Denys Vlasenko,
	Andy Lutomirski, Linus Torvalds

On Sat, Jan 16, 2016 at 02:22:04PM -0500, Brian Gerst wrote:
> Move the code to do the dynamic check to the init text section so that it
> is discarded after alternatives have run and a static branch has been
> chosen.
> 
> A new section is defined to avoid warnings with modpost due to references
> to init text from main text, which in this case is legitimate.  All such
> references are patched out before init mem is discarded.

And we're doing this because...? Space savings? How much are we talkin'?

-- 
Regards/Gruss,
    Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-16 19:36 ` Borislav Petkov
@ 2016-01-16 19:58   ` Brian Gerst
  2016-01-17 10:33     ` Borislav Petkov
  0 siblings, 1 reply; 66+ messages in thread
From: Brian Gerst @ 2016-01-16 19:58 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List, Ingo Molnar,
	H. Peter Anvin, Denys Vlasenko, Andy Lutomirski, Linus Torvalds

On Sat, Jan 16, 2016 at 2:36 PM, Borislav Petkov <bp@suse.de> wrote:
> On Sat, Jan 16, 2016 at 02:22:04PM -0500, Brian Gerst wrote:
>> Move the code to do the dynamic check to the init text section so that it
>> is discarded after alternatives have run and a static branch has been
>> chosen.
>>
>> A new section is defined to avoid warnings with modpost due to references
>> to init text from main text, which in this case is legitimate.  All such
>> references are patched out before init mem is discarded.
>
> And we're doing this because...? Space savings? How much are we talkin'?

It saves space.  The only thing left in main text is one jump
instruction (5 bytes).

The old t_dynamic branch looked something like this (26 bytes in
.text, not discarded):
     553:       bf 7d 00 00 00          mov    $0x7d,%edi
     558:       48 89 4d c8             mov    %rcx,-0x38(%rbp)
     55c:       e8 00 00 00 00          callq  561 <__switch_to+0xf1>
                        55d: R_X86_64_PC32      __static_cpu_has_safe-0x4
     561:       84 c0                   test   %al,%al
     563:       48 8b 4d c8             mov    -0x38(%rbp),%rcx
     567:       0f 85 77 01 00 00       jne    6e4 <__switch_to+0x274>

New (21 bytes, discarded after init):
   0:   f7 05 00 00 00 00 00    testl  $0x20000000,0x0(%rip)        #
a <.static_cpu_has+0xa>
   7:   00 00 20
                        2: R_X86_64_PC32        boot_cpu_data+0x18
   a:   0f 85 00 00 00 00       jne    10 <.static_cpu_has+0x10>
                        c: R_X86_64_PC32        .text+0x4b9
  10:   e9 00 00 00 00          jmpq   15 <.static_cpu_has+0x15>
                        11: R_X86_64_PC32       .text+0x529

--
Brian Gerst

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-16 19:58   ` Brian Gerst
@ 2016-01-17 10:33     ` Borislav Petkov
  2016-01-18 16:52       ` Brian Gerst
  0 siblings, 1 reply; 66+ messages in thread
From: Borislav Petkov @ 2016-01-17 10:33 UTC (permalink / raw)
  To: Brian Gerst
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List, Ingo Molnar,
	H. Peter Anvin, Denys Vlasenko, Andy Lutomirski, Linus Torvalds

On Sat, Jan 16, 2016 at 02:58:21PM -0500, Brian Gerst wrote:
> It saves space.  The only thing left in main text is one jump
> instruction (5 bytes).

How do I measure this?

Because with my tailored config here, the only thing I'm seeing is a growth of
text by 730 bytes:

before:
   text    data     bss     dec     hex filename
10926552        3598944 16642048        31167544        1db9438 vmlinux

[    0.056552] Freeing SMP alternatives memory: 24K (ffffffff81dd7000 - ffffffff81ddd000)
[    2.883728] Freeing unused kernel memory: 2904K (ffffffff81b01000 - ffffffff81dd7000)


after:
   text    data     bss     dec     hex filename
10927282        3598944 16642048        31168274        1db9712 vmlinux

[    0.052559] Freeing SMP alternatives memory: 24K (ffffffff81dd7000 - ffffffff81ddd000)
[    3.225318] Freeing unused kernel memory: 2904K (ffffffff81b01000 - ffffffff81dd7000)

-- 
Regards/Gruss,
    Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-17 10:33     ` Borislav Petkov
@ 2016-01-18 16:52       ` Brian Gerst
  2016-01-18 17:49         ` Andy Lutomirski
  2016-01-18 18:14         ` Borislav Petkov
  0 siblings, 2 replies; 66+ messages in thread
From: Brian Gerst @ 2016-01-18 16:52 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List, Ingo Molnar,
	H. Peter Anvin, Denys Vlasenko, Andy Lutomirski, Linus Torvalds

On Sun, Jan 17, 2016 at 5:33 AM, Borislav Petkov <bp@suse.de> wrote:
> On Sat, Jan 16, 2016 at 02:58:21PM -0500, Brian Gerst wrote:
>> It saves space.  The only thing left in main text is one jump
>> instruction (5 bytes).
>
> How do I measure this?
>
> Because with my tailored config here, the only thing I'm seeing is a growth of
> text by 730 bytes:
>
> before:
>    text    data     bss     dec     hex filename
> 10926552        3598944 16642048        31167544        1db9438 vmlinux
>
> [    0.056552] Freeing SMP alternatives memory: 24K (ffffffff81dd7000 - ffffffff81ddd000)
> [    2.883728] Freeing unused kernel memory: 2904K (ffffffff81b01000 - ffffffff81dd7000)
>
>
> after:
>    text    data     bss     dec     hex filename
> 10927282        3598944 16642048        31168274        1db9712 vmlinux
>
> [    0.052559] Freeing SMP alternatives memory: 24K (ffffffff81dd7000 - ffffffff81ddd000)
> [    3.225318] Freeing unused kernel memory: 2904K (ffffffff81b01000 - ffffffff81dd7000)

It is due to page alignment padding.  It was not enough to lose a
whole page from .text in your case.

The size command includes any section that is marked executable in the
text count, including init text.  If you use readelf -S vmlinux.o
instead you will notice that .text is the same size or smaller, and
.static_cpu_has (which is freed after boot) is the difference.

--
Brian Gerst

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-18 16:52       ` Brian Gerst
@ 2016-01-18 17:49         ` Andy Lutomirski
  2016-01-18 18:14         ` Borislav Petkov
  1 sibling, 0 replies; 66+ messages in thread
From: Andy Lutomirski @ 2016-01-18 17:49 UTC (permalink / raw)
  To: Brian Gerst
  Cc: Borislav Petkov, the arch/x86 maintainers,
	Linux Kernel Mailing List, Ingo Molnar, H. Peter Anvin,
	Denys Vlasenko, Linus Torvalds

On Mon, Jan 18, 2016 at 8:52 AM, Brian Gerst <brgerst@gmail.com> wrote:
> On Sun, Jan 17, 2016 at 5:33 AM, Borislav Petkov <bp@suse.de> wrote:
>> On Sat, Jan 16, 2016 at 02:58:21PM -0500, Brian Gerst wrote:
>>> It saves space.  The only thing left in main text is one jump
>>> instruction (5 bytes).
>>
>> How do I measure this?
>>
>> Because with my tailored config here, the only thing I'm seeing is a growth of
>> text by 730 bytes:
>>
>> before:
>>    text    data     bss     dec     hex filename
>> 10926552        3598944 16642048        31167544        1db9438 vmlinux
>>
>> [    0.056552] Freeing SMP alternatives memory: 24K (ffffffff81dd7000 - ffffffff81ddd000)
>> [    2.883728] Freeing unused kernel memory: 2904K (ffffffff81b01000 - ffffffff81dd7000)
>>
>>
>> after:
>>    text    data     bss     dec     hex filename
>> 10927282        3598944 16642048        31168274        1db9712 vmlinux
>>
>> [    0.052559] Freeing SMP alternatives memory: 24K (ffffffff81dd7000 - ffffffff81ddd000)
>> [    3.225318] Freeing unused kernel memory: 2904K (ffffffff81b01000 - ffffffff81dd7000)
>
> It is due to page alignment padding.  It was not enough to lose a
> whole page from .text in your case.
>
> The size command includes any section that is marked executable in the
> text count, including init text.  If you use readelf -S vmlinux.o
> instead you will notice that .text is the same size or smaller, and
> .static_cpu_has (which is freed after boot) is the difference.
>

If I'm understanding this correctly, the total non-init overhead from
static_cpu_has_safe with your patch is five bytes.  I'd imagine that
the short jmp optimization in regular static_cpu_has essentially never
works, which means it will also use five bytes of text, which makes me
wonder whether we should just make static_cpu_has safe and remove the
distinction.

Also, someone should write a little script to measure kernel size
minus .init stuff.

--Andy

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-18 16:52       ` Brian Gerst
  2016-01-18 17:49         ` Andy Lutomirski
@ 2016-01-18 18:14         ` Borislav Petkov
  2016-01-18 18:29           ` Andy Lutomirski
  2016-01-18 18:51           ` Borislav Petkov
  1 sibling, 2 replies; 66+ messages in thread
From: Borislav Petkov @ 2016-01-18 18:14 UTC (permalink / raw)
  To: Brian Gerst
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List, Ingo Molnar,
	H. Peter Anvin, Denys Vlasenko, Andy Lutomirski, Linus Torvalds

On Mon, Jan 18, 2016 at 11:52:34AM -0500, Brian Gerst wrote:
> It is due to page alignment padding.  It was not enough to lose a
> whole page from .text in your case.

So nothing more got freed.

> The size command includes any section that is marked executable in the
> text count, including init text.  If you use readelf -S vmlinux.o
> instead you will notice that .text is the same size or smaller, and
> .static_cpu_has (which is freed after boot) is the difference.

So we're talking about less than a page here?

  [19] .static_cpu_has   PROGBITS         ffffffff81d335d3  011335d3
       00000000000002df  0000000000000000  AX       0     0     1

That's 479 bytes. Meh, it doesn't look like it is worth the trouble.

Maybe I should build an allyesconfig.

-- 
Regards/Gruss,
    Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-18 18:14         ` Borislav Petkov
@ 2016-01-18 18:29           ` Andy Lutomirski
  2016-01-18 18:39             ` Borislav Petkov
  2016-01-18 18:51           ` Borislav Petkov
  1 sibling, 1 reply; 66+ messages in thread
From: Andy Lutomirski @ 2016-01-18 18:29 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Brian Gerst, the arch/x86 maintainers, Linux Kernel Mailing List,
	Ingo Molnar, H. Peter Anvin, Denys Vlasenko, Linus Torvalds

On Mon, Jan 18, 2016 at 10:14 AM, Borislav Petkov <bp@suse.de> wrote:
> On Mon, Jan 18, 2016 at 11:52:34AM -0500, Brian Gerst wrote:
>> It is due to page alignment padding.  It was not enough to lose a
>> whole page from .text in your case.
>
> So nothing more got freed.
>
>> The size command includes any section that is marked executable in the
>> text count, including init text.  If you use readelf -S vmlinux.o
>> instead you will notice that .text is the same size or smaller, and
>> .static_cpu_has (which is freed after boot) is the difference.
>
> So we're talking about less than a page here?
>
>   [19] .static_cpu_has   PROGBITS         ffffffff81d335d3  011335d3
>        00000000000002df  0000000000000000  AX       0     0     1
>
> That's 479 bytes. Meh, it doesn't look like it is worth the trouble.
>

I think that, if we can make static_cpu_has be unconditionally safe as
a result and get rid of warn_pre_alternatives, then it is worth the
trouble.

--Andy

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-18 18:29           ` Andy Lutomirski
@ 2016-01-18 18:39             ` Borislav Petkov
  2016-01-18 19:45               ` H. Peter Anvin
  0 siblings, 1 reply; 66+ messages in thread
From: Borislav Petkov @ 2016-01-18 18:39 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Brian Gerst, the arch/x86 maintainers, Linux Kernel Mailing List,
	Ingo Molnar, H. Peter Anvin, Denys Vlasenko, Linus Torvalds

On Mon, Jan 18, 2016 at 10:29:24AM -0800, Andy Lutomirski wrote:
> I think that, if we can make static_cpu_has be unconditionally safe as
> a result

Problem with this is the additional .altinstructions entry for
X86_FEATURE_ALWAYS. And sometimes you don't really need to use the _safe
variant when you know you're safe.

> and get rid of warn_pre_alternatives,

That's off by default, behind CONFIG_X86_DEBUG_STATIC_CPU_HAS.

-- 
Regards/Gruss,
    Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-18 18:14         ` Borislav Petkov
  2016-01-18 18:29           ` Andy Lutomirski
@ 2016-01-18 18:51           ` Borislav Petkov
  2016-01-19  1:10             ` Borislav Petkov
  1 sibling, 1 reply; 66+ messages in thread
From: Borislav Petkov @ 2016-01-18 18:51 UTC (permalink / raw)
  To: Brian Gerst
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List, Ingo Molnar,
	H. Peter Anvin, Denys Vlasenko, Andy Lutomirski, Linus Torvalds

On Mon, Jan 18, 2016 at 07:14:57PM +0100, Borislav Petkov wrote:
> So we're talking about less than a page here?
> 
>   [19] .static_cpu_has   PROGBITS         ffffffff81d335d3  011335d3
>        00000000000002df  0000000000000000  AX       0     0     1
> 
> That's 479 bytes. Meh, it doesn't look like it is worth the trouble.
> 
> Maybe I should build an allyesconfig.

  [45] .static_cpu_has   PROGBITS         ffffffff97aa655b  16ea655b
       00000000000002df  0000000000000000  AX       0     0     1

Same.

-- 
Regards/Gruss,
    Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-18 18:39             ` Borislav Petkov
@ 2016-01-18 19:45               ` H. Peter Anvin
  2016-01-18 23:05                 ` Borislav Petkov
  0 siblings, 1 reply; 66+ messages in thread
From: H. Peter Anvin @ 2016-01-18 19:45 UTC (permalink / raw)
  To: Borislav Petkov, Andy Lutomirski
  Cc: Brian Gerst, the arch/x86 maintainers, Linux Kernel Mailing List,
	Ingo Molnar, Denys Vlasenko, Linus Torvalds

On 01/18/16 10:39, Borislav Petkov wrote:
> On Mon, Jan 18, 2016 at 10:29:24AM -0800, Andy Lutomirski wrote:
>> I think that, if we can make static_cpu_has be unconditionally safe as
>> a result
> 
> Problem with this is the additional .altinstructions entry for
> X86_FEATURE_ALWAYS. And sometimes you don't really need to use the _safe
> variant when you know you're safe.
> 

I think the two-byte optimization is the real issue if there is one at
all.  I don't care about the inittext, and unless I'm misremembering
completely altinstructions also get ejected.

So I don't personally object to killing off the unsafe variant.

	-hpa

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-18 19:45               ` H. Peter Anvin
@ 2016-01-18 23:05                 ` Borislav Petkov
  2016-01-18 23:13                   ` H. Peter Anvin
  0 siblings, 1 reply; 66+ messages in thread
From: Borislav Petkov @ 2016-01-18 23:05 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andy Lutomirski, Brian Gerst, the arch/x86 maintainers,
	Linux Kernel Mailing List, Ingo Molnar, Denys Vlasenko,
	Linus Torvalds

On Mon, Jan 18, 2016 at 11:45:18AM -0800, H. Peter Anvin wrote:
> I think the two-byte optimization is the real issue if there is one at
> all.

Well, we do optimize the jumps for a couple releases now, see
recompute_jump(). So we would always get the short JMP, if made possible
by how gcc lays out the code so that the target is not far away. If the
JMP to the dynamic branch doesn't get nopped out anyway, that is.

We have "debug-alternative" to dump exactly what happens during
patching.

> I don't care about the inittext, and unless I'm misremembering
> completely altinstructions also get ejected.

Ah yes, they do.

> So I don't personally object to killing off the unsafe variant.

Ok.

-- 
Regards/Gruss,
    Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-18 23:05                 ` Borislav Petkov
@ 2016-01-18 23:13                   ` H. Peter Anvin
  2016-01-18 23:25                     ` Borislav Petkov
  0 siblings, 1 reply; 66+ messages in thread
From: H. Peter Anvin @ 2016-01-18 23:13 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Andy Lutomirski, Brian Gerst, the arch/x86 maintainers,
	Linux Kernel Mailing List, Ingo Molnar, Denys Vlasenko,
	Linus Torvalds

On January 18, 2016 3:05:54 PM PST, Borislav Petkov <bp@suse.de> wrote:
>On Mon, Jan 18, 2016 at 11:45:18AM -0800, H. Peter Anvin wrote:
>> I think the two-byte optimization is the real issue if there is one
>at
>> all.
>
>Well, we do optimize the jumps for a couple releases now, see
>recompute_jump(). So we would always get the short JMP, if made
>possible
>by how gcc lays out the code so that the target is not far away. If the
>JMP to the dynamic branch doesn't get nopped out anyway, that is.
>
>We have "debug-alternative" to dump exactly what happens during
>patching.
>
>> I don't care about the inittext, and unless I'm misremembering
>> completely altinstructions also get ejected.
>
>Ah yes, they do.
>
>> So I don't personally object to killing off the unsafe variant.
>
>Ok.

The optimization has always been there, the question is how often it actually kicks in.
-- 
Sent from my Android device with K-9 Mail. Please excuse brevity and formatting.

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-18 23:13                   ` H. Peter Anvin
@ 2016-01-18 23:25                     ` Borislav Petkov
  2016-01-19 13:57                       ` Borislav Petkov
  0 siblings, 1 reply; 66+ messages in thread
From: Borislav Petkov @ 2016-01-18 23:25 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andy Lutomirski, Brian Gerst, the arch/x86 maintainers,
	Linux Kernel Mailing List, Ingo Molnar, Denys Vlasenko,
	Linus Torvalds

On Mon, Jan 18, 2016 at 03:13:00PM -0800, H. Peter Anvin wrote:
> The optimization has always been there, the question is how often it
> actually kicks in.

Luckily, I have this disassembler tool which dumps the alternatives
sections in a more readable format. I can dump all the static_cpu_has()
call sites tomorrow and we can see what gcc generates.

;-}

-- 
Regards/Gruss,
    Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-18 18:51           ` Borislav Petkov
@ 2016-01-19  1:10             ` Borislav Petkov
  2016-01-19  1:33               ` H. Peter Anvin
  0 siblings, 1 reply; 66+ messages in thread
From: Borislav Petkov @ 2016-01-19  1:10 UTC (permalink / raw)
  To: Brian Gerst
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List, Ingo Molnar,
	H. Peter Anvin, Denys Vlasenko, Andy Lutomirski, Linus Torvalds

On Mon, Jan 18, 2016 at 07:51:07PM +0100, Borislav Petkov wrote:
>   [45] .static_cpu_has   PROGBITS         ffffffff97aa655b  16ea655b
>        00000000000002df  0000000000000000  AX       0     0     1

Ok, staring at this section was wrong. I went and looked at
the .s file and now it clicked: gcc inlines those calls to
__static_cpu_has_safe of the dynamic jump target:

        .loc 4 538 0
        movl    $125, %edi      #,
        call    __static_cpu_has_safe   #
.LBE885:
.LBE886:
.LBE914:
        .loc 1 240 0
        testb   %al, %al        # D.30157
        je      .L150   #,
        jmp     .L151   #
.L152:
.LBB915:
.LBB909:
.LBB905:
.LBB893:
.LBB892:
        .loc 4 538 0
        movl    $154, %edi      #,
        call    __static_cpu_has_safe   #
.LBE892:
.LBE893:
.LBE905:
        .loc 7 431 0
        testb   %al, %al        # D.30157
        jne     .L154   #,

which turn into:

 751:   bf 7d 00 00 00          mov    $0x7d,%edi
 756:   e8 00 00 00 00          callq  75b <fpu__copy+0xab>
 75b:   84 c0                   test   %al,%al
 75d:   74 a3                   je     702 <fpu__copy+0x52>
 75f:   eb 90                   jmp    6f1 <fpu__copy+0x41>
 761:   bf 9a 00 00 00          mov    $0x9a,%edi
 766:   e8 00 00 00 00          callq  76b <fpu__copy+0xbb>
 76b:   84 c0                   test   %al,%al

That's like 28 bytes in this particular case, which is 14 per call site,
on average.

So grepping through my tailored vmlinux, it has 35 entries with
X86_FEATURE_ALWAYS in .altinstructions X 14 = 490 bytes.

So yeah, we probably should do this, allyesconfig should give more
savings.

One thing I'd probably do differently is not call the throwaway section
.static_cpu_has but something like .altinstr_temporary or so and put it
after the replacement insns:

        .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
                *(.altinstr_replacement)
		*(.altinstr_temporary)
        }

so that we know those instructions belong to the alternatives mechanism.
They'll get discared too, of course.

I could just as well be talking a lot of crap, it is waay too late here.

-- 
Regards/Gruss,
    Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-19  1:10             ` Borislav Petkov
@ 2016-01-19  1:33               ` H. Peter Anvin
  2016-01-19  9:22                 ` Borislav Petkov
  0 siblings, 1 reply; 66+ messages in thread
From: H. Peter Anvin @ 2016-01-19  1:33 UTC (permalink / raw)
  To: Borislav Petkov, Brian Gerst
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List, Ingo Molnar,
	Denys Vlasenko, Andy Lutomirski, Linus Torvalds

On January 18, 2016 5:10:26 PM PST, Borislav Petkov <bp@suse.de> wrote:
>On Mon, Jan 18, 2016 at 07:51:07PM +0100, Borislav Petkov wrote:
>>   [45] .static_cpu_has   PROGBITS         ffffffff97aa655b  16ea655b
>>        00000000000002df  0000000000000000  AX       0     0     1
>
>Ok, staring at this section was wrong. I went and looked at
>the .s file and now it clicked: gcc inlines those calls to
>__static_cpu_has_safe of the dynamic jump target:
>
>        .loc 4 538 0
>        movl    $125, %edi      #,
>        call    __static_cpu_has_safe   #
>.LBE885:
>.LBE886:
>.LBE914:
>        .loc 1 240 0
>        testb   %al, %al        # D.30157
>        je      .L150   #,
>        jmp     .L151   #
>.L152:
>.LBB915:
>.LBB909:
>.LBB905:
>.LBB893:
>.LBB892:
>        .loc 4 538 0
>        movl    $154, %edi      #,
>        call    __static_cpu_has_safe   #
>.LBE892:
>.LBE893:
>.LBE905:
>        .loc 7 431 0
>        testb   %al, %al        # D.30157
>        jne     .L154   #,
>
>which turn into:
>
> 751:   bf 7d 00 00 00          mov    $0x7d,%edi
> 756:   e8 00 00 00 00          callq  75b <fpu__copy+0xab>
> 75b:   84 c0                   test   %al,%al
> 75d:   74 a3                   je     702 <fpu__copy+0x52>
> 75f:   eb 90                   jmp    6f1 <fpu__copy+0x41>
> 761:   bf 9a 00 00 00          mov    $0x9a,%edi
> 766:   e8 00 00 00 00          callq  76b <fpu__copy+0xbb>
> 76b:   84 c0                   test   %al,%al
>
>That's like 28 bytes in this particular case, which is 14 per call
>site,
>on average.
>
>So grepping through my tailored vmlinux, it has 35 entries with
>X86_FEATURE_ALWAYS in .altinstructions X 14 = 490 bytes.
>
>So yeah, we probably should do this, allyesconfig should give more
>savings.
>
>One thing I'd probably do differently is not call the throwaway section
>.static_cpu_has but something like .altinstr_temporary or so and put it
>after the replacement insns:
>
>.altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
>                *(.altinstr_replacement)
>		*(.altinstr_temporary)
>        }
>
>so that we know those instructions belong to the alternatives
>mechanism.
>They'll get discared too, of course.
>
>I could just as well be talking a lot of crap, it is waay too late
>here.

Why the f do we call a subroutine for what amounts to a single bt or test instruction?
-- 
Sent from my Android device with K-9 Mail. Please excuse brevity and formatting.

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-19  1:33               ` H. Peter Anvin
@ 2016-01-19  9:22                 ` Borislav Petkov
  2016-01-20  4:02                   ` H. Peter Anvin
  0 siblings, 1 reply; 66+ messages in thread
From: Borislav Petkov @ 2016-01-19  9:22 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Brian Gerst, the arch/x86 maintainers, Linux Kernel Mailing List,
	Ingo Molnar, Denys Vlasenko, Andy Lutomirski, Linus Torvalds

On Mon, Jan 18, 2016 at 05:33:03PM -0800, H. Peter Anvin wrote:
> Why the f do we call a subroutine for what amounts to a single bt or
> test instruction?

No real reason. You can kick me when you see me next time:

4a90a99c4f80 ("x86: Add a static_cpu_has_safe variant")

:-)

-- 
Regards/Gruss,
    Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-18 23:25                     ` Borislav Petkov
@ 2016-01-19 13:57                       ` Borislav Petkov
  2016-01-19 16:23                         ` Borislav Petkov
                                           ` (2 more replies)
  0 siblings, 3 replies; 66+ messages in thread
From: Borislav Petkov @ 2016-01-19 13:57 UTC (permalink / raw)
  To: H. Peter Anvin, Andy Lutomirski, Brian Gerst
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List, Ingo Molnar,
	Denys Vlasenko, Linus Torvalds

On Tue, Jan 19, 2016 at 12:25:47AM +0100, Borislav Petkov wrote:
> Luckily, I have this disassembler tool which dumps the alternatives
> sections in a more readable format. I can dump all the static_cpu_has()
> call sites tomorrow and we can see what gcc generates.

Ok, below is the list of all 12(!) places where static_cpu_has() has
generated a 2-byte JMP. Which has saved us a whopping 36 bytes! On an
x86_64 allyesconfig!

Patch removing it below. I better look at a 32-bit allyesconfig too
first, though.

old insn VA: 0xffffffff8146b5ac, CPU feat: X86_FEATURE_PCOMMIT, size: 2, padlen: 0
wmb_pmem:
 ffffffff8146b5ac:      eb 02                   jmp ffffffff8146b5b0
repl insn: 0xffffffff8ca7c651, size: 0

old insn VA: 0xffffffff8200dcd6, CPU feat: X86_FEATURE_PCOMMIT, size: 2, padlen: 0
arch_has_wmb_pmem:
 ffffffff8200dcd6:      eb 02                   jmp ffffffff8200dcda
repl insn: 0xffffffff8ca7ebd8, size: 0

old insn VA: 0xffffffff828afe51, CPU feat: X86_FEATURE_PCOMMIT, size: 2, padlen: 0
arch_has_wmb_pmem:
 ffffffff828afe51:      eb 02                   jmp ffffffff828afe55
repl insn: 0xffffffff8ca80f98, size: 0

old insn VA: 0xffffffff81072f77, CPU feat: X86_FEATURE_NRIPS, size: 2, padlen: 0
rdpmc_interception:
 ffffffff81072f77:      eb 4d                   jmp ffffffff81072fc6
repl insn: 0xffffffff8ca79f6b, size: 0

old insn VA: 0xffffffff8107437d, CPU feat: X86_FEATURE_NRIPS, size: 2, padlen: 0
svm_queue_exception:
 ffffffff8107437d:      eb 6f                   jmp ffffffff810743ee
repl insn: 0xffffffff8ca79f9f, size: 0

old insn VA: 0xffffffff8107741b, CPU feat: X86_FEATURE_NRIPS, size: 2, padlen: 0
svm_check_intercept:
 ffffffff8107741b:      eb 67                   jmp ffffffff81077484
repl insn: 0xffffffff8ca79fed, size: 0

old insn VA: 0xffffffff8107741b, CPU feat: X86_FEATURE_NRIPS, size: 2, padlen: 0
svm_check_intercept:
 ffffffff8107741b:      eb 67                   jmp ffffffff81077484
repl insn: 0xffffffff8ca79fed, size: 0

old insn VA: 0xffffffff81075c4f, CPU feat: X86_FEATURE_TSCRATEMSR, size: 2, padlen: 0
svm_hardware_enable:
 ffffffff81075c4f:      eb 57                   jmp ffffffff81075ca8
repl insn: 0xffffffff8ca79fb9, size: 0

old insn VA: 0xffffffff81072c00, CPU feat: X86_FEATURE_DECODEASSISTS, size: 2, padlen: 0
invlpg_interception:
 ffffffff81072c00:      eb 55                   jmp ffffffff81072c57
repl insn: 0xffffffff8ca79f51, size: 0

old insn VA: 0xffffffff8107097c, CPU feat: X86_FEATURE_FLUSHBYASID, size: 2, padlen: 0
svm_flush_tlb:
 ffffffff8107097c:      eb 35                   jmp ffffffff810709b3
repl insn: 0xffffffff8ca79ee9, size: 0

old insn VA: 0xffffffff8108c0d0, CPU feat: X86_BUG_SYSRET_SS_ATTRS, size: 2, padlen: 0
__switch_to:
 ffffffff8108c0d0:      eb 70                   jmp ffffffff8108c142
repl insn: 0xffffffff8ca7a1a7, size: 0

old insn VA: 0xffffffff81075d90, CPU feat: X86_BUG_AMD_TLB_MMATCH, size: 2, padlen: 0
svm_hardware_enable:
 ffffffff81075d90:      eb 7c                   jmp ffffffff81075e0e
repl insn: 0xffffffff8ca79fd3, size: 0

---
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 9b18ed9..68a2d1f 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -350,16 +350,6 @@ config DEBUG_IMR_SELFTEST
 
 	  If unsure say N here.
 
-config X86_DEBUG_STATIC_CPU_HAS
-	bool "Debug alternatives"
-	depends on DEBUG_KERNEL
-	---help---
-	  This option causes additional code to be generated which
-	  fails if static_cpu_has() is used before alternatives have
-	  run.
-
-	  If unsure, say N.
-
 config X86_DEBUG_FPU
 	bool "Debug the x86 FPU code"
 	depends on DEBUG_KERNEL
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 0366374..c2d7a97 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -477,7 +477,7 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
 	 * We don't allow syscalls at all from VM86 mode, but we still
 	 * need to check VM, because we might be returning from sys_vm86.
 	 */
-	return static_cpu_has(X86_FEATURE_SEP) &&
+	return static_cpu_has_safe(X86_FEATURE_SEP) &&
 		regs->cs == __USER_CS && regs->ss == __USER_DS &&
 		regs->ip == landing_pad &&
 		(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0;
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index 69f1366..2fb511b 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -114,8 +114,8 @@ GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4);
 
 #endif /* CONFIG_X86_64 */
 
-#define arch_has_random()	static_cpu_has(X86_FEATURE_RDRAND)
-#define arch_has_random_seed()	static_cpu_has(X86_FEATURE_RDSEED)
+#define arch_has_random()	static_cpu_has_safe(X86_FEATURE_RDRAND)
+#define arch_has_random_seed()	static_cpu_has_safe(X86_FEATURE_RDSEED)
 
 #else
 
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 7ad8c94..5fe399a 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -419,89 +419,6 @@ extern bool __static_cpu_has_safe(u16 bit);
  * These are only valid after alternatives have run, but will statically
  * patch the target code for additional performance.
  */
-static __always_inline __pure bool __static_cpu_has(u16 bit)
-{
-#ifdef CC_HAVE_ASM_GOTO
-
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-
-		/*
-		 * Catch too early usage of this before alternatives
-		 * have run.
-		 */
-		asm_volatile_goto("1: jmp %l[t_warn]\n"
-			 "2:\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"
-			 " .long 0\n"		/* no replacement */
-			 " .word %P0\n"		/* 1: do replace */
-			 " .byte 2b - 1b\n"	/* source len */
-			 " .byte 0\n"		/* replacement len */
-			 " .byte 0\n"		/* pad len */
-			 ".previous\n"
-			 /* skipping size check since replacement size = 0 */
-			 : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
-
-#endif
-
-		asm_volatile_goto("1: jmp %l[t_no]\n"
-			 "2:\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"
-			 " .long 0\n"		/* no replacement */
-			 " .word %P0\n"		/* feature bit */
-			 " .byte 2b - 1b\n"	/* source len */
-			 " .byte 0\n"		/* replacement len */
-			 " .byte 0\n"		/* pad len */
-			 ".previous\n"
-			 /* skipping size check since replacement size = 0 */
-			 : : "i" (bit) : : t_no);
-		return true;
-	t_no:
-		return false;
-
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-	t_warn:
-		warn_pre_alternatives();
-		return false;
-#endif
-
-#else /* CC_HAVE_ASM_GOTO */
-
-		u8 flag;
-		/* Open-coded due to __stringify() in ALTERNATIVE() */
-		asm volatile("1: movb $0,%0\n"
-			     "2:\n"
-			     ".section .altinstructions,\"a\"\n"
-			     " .long 1b - .\n"
-			     " .long 3f - .\n"
-			     " .word %P1\n"		/* feature bit */
-			     " .byte 2b - 1b\n"		/* source len */
-			     " .byte 4f - 3f\n"		/* replacement len */
-			     " .byte 0\n"		/* pad len */
-			     ".previous\n"
-			     ".section .discard,\"aw\",@progbits\n"
-			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
-			     ".previous\n"
-			     ".section .altinstr_replacement,\"ax\"\n"
-			     "3: movb $1,%0\n"
-			     "4:\n"
-			     ".previous\n"
-			     : "=qm" (flag) : "i" (bit));
-		return flag;
-
-#endif /* CC_HAVE_ASM_GOTO */
-}
-
-#define static_cpu_has(bit)					\
-(								\
-	__builtin_constant_p(boot_cpu_has(bit)) ?		\
-		boot_cpu_has(bit) :				\
-	__builtin_constant_p(bit) ?				\
-		__static_cpu_has(bit) :				\
-		boot_cpu_has(bit)				\
-)
-
 static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 {
 #ifdef CC_HAVE_ASM_GOTO
@@ -588,7 +505,6 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 /*
  * gcc 3.x is too stupid to do the static test; fall back to dynamic.
  */
-#define static_cpu_has(bit)		boot_cpu_has(bit)
 #define static_cpu_has_safe(bit)	boot_cpu_has(bit)
 #endif
 
@@ -596,7 +512,6 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 #define set_cpu_bug(c, bit)		set_cpu_cap(c, (bit))
 #define clear_cpu_bug(c, bit)		clear_cpu_cap(c, (bit))
 
-#define static_cpu_has_bug(bit)		static_cpu_has((bit))
 #define static_cpu_has_bug_safe(bit)	static_cpu_has_safe((bit))
 #define boot_cpu_has_bug(bit)		cpu_has_bug(&boot_cpu_data, (bit))
 
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index c70689b..2bd6e47 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -96,7 +96,7 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
 static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
 {
 	if (!current_set_polling_and_test()) {
-		if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) {
+		if (static_cpu_has_bug_safe(X86_BUG_CLFLUSH_MONITOR)) {
 			mb();
 			clflush((void *)&current_thread_info()->flags);
 			mb();
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index 1544fab..5d7abb4 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -142,7 +142,7 @@ static inline bool __arch_has_wmb_pmem(void)
 	 * We require that wmb() be an 'sfence', that is only guaranteed on
 	 * 64-bit builds
 	 */
-	return static_cpu_has(X86_FEATURE_PCOMMIT);
+	return static_cpu_has_safe(X86_FEATURE_PCOMMIT);
 }
 #endif /* CONFIG_ARCH_HAS_PMEM_API */
 #endif /* __ASM_X86_PMEM_H__ */
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index eaba080..f456616 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -43,7 +43,7 @@ static inline void queued_spin_unlock(struct qspinlock *lock)
 #define virt_spin_lock virt_spin_lock
 static inline bool virt_spin_lock(struct qspinlock *lock)
 {
-	if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
+	if (!static_cpu_has_safe(X86_FEATURE_HYPERVISOR))
 		return false;
 
 	/*
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 18ca99f..f8a6cfb 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -32,11 +32,11 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
 		   "fpu_exception\t: %s\n"
 		   "cpuid level\t: %d\n"
 		   "wp\t\t: %s\n",
-		   static_cpu_has_bug(X86_BUG_FDIV) ? "yes" : "no",
-		   static_cpu_has_bug(X86_BUG_F00F) ? "yes" : "no",
-		   static_cpu_has_bug(X86_BUG_COMA) ? "yes" : "no",
-		   static_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
-		   static_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
+		   static_cpu_has_bug_safe(X86_BUG_FDIV) ? "yes" : "no",
+		   static_cpu_has_bug_safe(X86_BUG_F00F) ? "yes" : "no",
+		   static_cpu_has_bug_safe(X86_BUG_COMA) ? "yes" : "no",
+		   static_cpu_has_safe(X86_FEATURE_FPU) ? "yes" : "no",
+		   static_cpu_has_safe(X86_FEATURE_FPU) ? "yes" : "no",
 		   c->cpuid_level,
 		   c->wp_works_ok ? "yes" : "no");
 }
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 0bc3490..9b0163c 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -275,7 +275,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
 
 	fpu__activate_fpstate_read(fpu);
 
-	if (!static_cpu_has(X86_FEATURE_FPU))
+	if (!static_cpu_has_safe(X86_FEATURE_FPU))
 		return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
 
 	if (!cpu_has_fxsr)
@@ -306,7 +306,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
 	fpu__activate_fpstate_write(fpu);
 	fpstate_sanitize_xstate(fpu);
 
-	if (!static_cpu_has(X86_FEATURE_FPU))
+	if (!static_cpu_has_safe(X86_FEATURE_FPU))
 		return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
 
 	if (!cpu_has_fxsr)
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 31c6a60..05b6ede 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -162,7 +162,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 	if (!access_ok(VERIFY_WRITE, buf, size))
 		return -EACCES;
 
-	if (!static_cpu_has(X86_FEATURE_FPU))
+	if (!static_cpu_has_safe(X86_FEATURE_FPU))
 		return fpregs_soft_get(current, NULL, 0,
 			sizeof(struct user_i387_ia32_struct), NULL,
 			(struct _fpstate_32 __user *) buf) ? -1 : 1;
@@ -267,7 +267,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 
 	fpu__activate_curr(fpu);
 
-	if (!static_cpu_has(X86_FEATURE_FPU))
+	if (!static_cpu_has_safe(X86_FEATURE_FPU))
 		return fpregs_soft_set(current, NULL,
 				       0, sizeof(struct user_i387_ia32_struct),
 				       NULL, buf) != 0;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b9d99e0..c49a284 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -411,7 +411,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
 		__switch_to_xtra(prev_p, next_p, tss);
 
-	if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
+	if (static_cpu_has_bug_safe(X86_BUG_SYSRET_SS_ATTRS)) {
 		/*
 		 * AMD CPUs have a misfeature: SYSRET sets the SS selector but
 		 * does not update the cached descriptor.  As a result, if we
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index c8eda14..89d5ad7 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -32,7 +32,7 @@ static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *best;
 
-	if (!static_cpu_has(X86_FEATURE_XSAVE))
+	if (!static_cpu_has_safe(X86_FEATURE_XSAVE))
 		return false;
 
 	best = kvm_find_cpuid_entry(vcpu, 1, 0);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c13a64b..1892bdd 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -516,7 +516,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	if (svm->vmcb->control.next_rip != 0) {
-		WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
+		WARN_ON_ONCE(!static_cpu_has_safe(X86_FEATURE_NRIPS));
 		svm->next_rip = svm->vmcb->control.next_rip;
 	}
 
@@ -548,7 +548,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
 	    nested_svm_check_exception(svm, nr, has_error_code, error_code))
 		return;
 
-	if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
+	if (nr == BP_VECTOR && !static_cpu_has_safe(X86_FEATURE_NRIPS)) {
 		unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
 
 		/*
@@ -577,7 +577,7 @@ static void svm_init_erratum_383(void)
 	int err;
 	u64 val;
 
-	if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH))
+	if (!static_cpu_has_bug_safe(X86_BUG_AMD_TLB_MMATCH))
 		return;
 
 	/* Use _safe variants to not break nested virtualization */
@@ -631,7 +631,7 @@ static int has_svm(void)
 static void svm_hardware_disable(void)
 {
 	/* Make sure we clean up behind us */
-	if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
+	if (static_cpu_has_safe(X86_FEATURE_TSCRATEMSR))
 		wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
 
 	cpu_svm_disable();
@@ -674,7 +674,7 @@ static int svm_hardware_enable(void)
 
 	wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
 
-	if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
+	if (static_cpu_has_safe(X86_FEATURE_TSCRATEMSR)) {
 		wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
 		__this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT);
 	}
@@ -1233,7 +1233,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
 		rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
 
-	if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
+	if (static_cpu_has_safe(X86_FEATURE_TSCRATEMSR)) {
 		u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio;
 		if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) {
 			__this_cpu_write(current_tsc_ratio, tsc_ratio);
@@ -1241,7 +1241,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		}
 	}
 	/* This assumes that the kernel never uses MSR_TSC_AUX */
-	if (static_cpu_has(X86_FEATURE_RDTSCP))
+	if (static_cpu_has_safe(X86_FEATURE_RDTSCP))
 		wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
 }
 
@@ -2806,7 +2806,7 @@ static int iret_interception(struct vcpu_svm *svm)
 
 static int invlpg_interception(struct vcpu_svm *svm)
 {
-	if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
+	if (!static_cpu_has_safe(X86_FEATURE_DECODEASSISTS))
 		return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
 
 	kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
@@ -2823,7 +2823,7 @@ static int rdpmc_interception(struct vcpu_svm *svm)
 {
 	int err;
 
-	if (!static_cpu_has(X86_FEATURE_NRIPS))
+	if (!static_cpu_has_safe(X86_FEATURE_NRIPS))
 		return emulate_on_interception(svm);
 
 	err = kvm_rdpmc(&svm->vcpu);
@@ -2864,7 +2864,7 @@ static int cr_interception(struct vcpu_svm *svm)
 	unsigned long val;
 	int err;
 
-	if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
+	if (!static_cpu_has_safe(X86_FEATURE_DECODEASSISTS))
 		return emulate_on_interception(svm);
 
 	if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0))
@@ -3710,7 +3710,7 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
+	if (static_cpu_has_safe(X86_FEATURE_FLUSHBYASID))
 		svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
 	else
 		svm->asid_generation--;
@@ -4282,7 +4282,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
 	}
 
 	/* TODO: Advertise NRIPS to guest hypervisor unconditionally */
-	if (static_cpu_has(X86_FEATURE_NRIPS))
+	if (static_cpu_has_safe(X86_FEATURE_NRIPS))
 		vmcb->control.next_rip  = info->next_rip;
 	vmcb->control.exit_code = icpt_info.exit_code;
 	vmexit = nested_svm_exit_handled(svm);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e2951b6..4bef603 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8993,7 +8993,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 	if (cpu_has_secondary_exec_ctrls())
 		vmcs_set_secondary_exec_control(secondary_exec_ctl);
 
-	if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) {
+	if (static_cpu_has_safe(X86_FEATURE_PCOMMIT) && nested) {
 		if (guest_cpuid_has_pcommit(vcpu))
 			vmx->nested.nested_vmx_secondary_ctls_high |=
 				SECONDARY_EXEC_PCOMMIT;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index eef44d9..711e9bc 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1033,7 +1033,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
 	if (!IS_ENABLED(CONFIG_X86_SMAP))
 		return false;
 
-	if (!static_cpu_has(X86_FEATURE_SMAP))
+	if (!static_cpu_has_safe(X86_FEATURE_SMAP))
 		return false;
 
 	if (error_code & PF_USER)
diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c
index 55d38cf..191650f 100644
--- a/arch/x86/ras/mce_amd_inj.c
+++ b/arch/x86/ras/mce_amd_inj.c
@@ -275,7 +275,7 @@ static void do_inject(void)
 	 * only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
 	 * Fam10h and later BKDGs.
 	 */
-	if (static_cpu_has(X86_FEATURE_AMD_DCM) && b == 4) {
+	if (static_cpu_has_safe(X86_FEATURE_AMD_DCM) && b == 4) {
 		toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
 		cpu = get_nbc_for_node(amd_get_nb_id(cpu));
 	}
diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c
index f6b79ab..87b0cf4 100644
--- a/drivers/cpufreq/amd_freq_sensitivity.c
+++ b/drivers/cpufreq/amd_freq_sensitivity.c
@@ -115,7 +115,7 @@ static int __init amd_freq_sensitivity_init(void)
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
 		return -ENODEV;
 
-	if (!static_cpu_has(X86_FEATURE_PROC_FEEDBACK))
+	if (!static_cpu_has_safe(X86_FEATURE_PROC_FEEDBACK))
 		return -ENODEV;
 
 	if (rdmsrl_safe(MSR_AMD64_FREQ_SENSITIVITY_ACTUAL, &val))
diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
index 0b5bf13..aa71612 100644
--- a/drivers/cpufreq/powernow-k8.c
+++ b/drivers/cpufreq/powernow-k8.c
@@ -1193,7 +1193,7 @@ static int powernowk8_init(void)
 	unsigned int i, supported_cpus = 0;
 	int ret;
 
-	if (static_cpu_has(X86_FEATURE_HW_PSTATE)) {
+	if (static_cpu_has_safe(X86_FEATURE_HW_PSTATE)) {
 		__request_acpi_cpufreq();
 		return -ENODEV;
 	}


-- 
Regards/Gruss,
    Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-19 13:57                       ` Borislav Petkov
@ 2016-01-19 16:23                         ` Borislav Petkov
  2016-01-19 23:10                         ` Borislav Petkov
  2016-01-20  4:03                         ` H. Peter Anvin
  2 siblings, 0 replies; 66+ messages in thread
From: Borislav Petkov @ 2016-01-19 16:23 UTC (permalink / raw)
  To: H. Peter Anvin, Andy Lutomirski, Brian Gerst
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List, Ingo Molnar,
	Denys Vlasenko, Linus Torvalds

On Tue, Jan 19, 2016 at 02:57:14PM +0100, Borislav Petkov wrote:
> Patch removing it below. I better look at a 32-bit allyesconfig too
> first, though.

Yap, no 2-byte jumps in the static_cpu_has()-generated code on an 32-bit
allyesconfig build.

-- 
Regards/Gruss,
    Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-19 13:57                       ` Borislav Petkov
  2016-01-19 16:23                         ` Borislav Petkov
@ 2016-01-19 23:10                         ` Borislav Petkov
  2016-01-19 23:26                           ` Andy Lutomirski
  2016-01-20  4:03                         ` H. Peter Anvin
  2 siblings, 1 reply; 66+ messages in thread
From: Borislav Petkov @ 2016-01-19 23:10 UTC (permalink / raw)
  To: H. Peter Anvin, Andy Lutomirski, Brian Gerst
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List, Ingo Molnar,
	Denys Vlasenko, Linus Torvalds

On Tue, Jan 19, 2016 at 02:57:14PM +0100, Borislav Petkov wrote:
> Patch removing it below.

Andy has a point: I should simply drop static_cpu_has and kill the "_safe"
suffix of the remaining variant:

---
 arch/x86/Kconfig.debug               | 10 ----
 arch/x86/include/asm/cpufeature.h    | 99 +++---------------------------------
 arch/x86/include/asm/fpu/internal.h  | 14 ++---
 arch/x86/kernel/apic/apic_numachip.c |  4 +-
 arch/x86/kernel/cpu/common.c         |  4 +-
 arch/x86/kernel/vm86_32.c            |  2 +-
 drivers/cpufreq/intel_pstate.c       |  2 +-
 fs/btrfs/disk-io.c                   |  2 +-
 8 files changed, 21 insertions(+), 116 deletions(-)

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 9b18ed9..68a2d1f 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -350,16 +350,6 @@ config DEBUG_IMR_SELFTEST
 
 	  If unsure say N here.
 
-config X86_DEBUG_STATIC_CPU_HAS
-	bool "Debug alternatives"
-	depends on DEBUG_KERNEL
-	---help---
-	  This option causes additional code to be generated which
-	  fails if static_cpu_has() is used before alternatives have
-	  run.
-
-	  If unsure, say N.
-
 config X86_DEBUG_FPU
 	bool "Debug the x86 FPU code"
 	depends on DEBUG_KERNEL
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 7ad8c94..9219f00 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -406,103 +406,20 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
 #define cpu_has_osxsave		boot_cpu_has(X86_FEATURE_OSXSAVE)
 #define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
 /*
- * Do not add any more of those clumsy macros - use static_cpu_has_safe() for
+ * Do not add any more of those clumsy macros - use static_cpu_has() for
  * fast paths and boot_cpu_has() otherwise!
  */
 
 #if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS)
 extern void warn_pre_alternatives(void);
-extern bool __static_cpu_has_safe(u16 bit);
+extern bool __static_cpu_has(u16 bit);
 
 /*
  * Static testing of CPU features.  Used the same as boot_cpu_has().
  * These are only valid after alternatives have run, but will statically
  * patch the target code for additional performance.
  */
-static __always_inline __pure bool __static_cpu_has(u16 bit)
-{
-#ifdef CC_HAVE_ASM_GOTO
-
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-
-		/*
-		 * Catch too early usage of this before alternatives
-		 * have run.
-		 */
-		asm_volatile_goto("1: jmp %l[t_warn]\n"
-			 "2:\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"
-			 " .long 0\n"		/* no replacement */
-			 " .word %P0\n"		/* 1: do replace */
-			 " .byte 2b - 1b\n"	/* source len */
-			 " .byte 0\n"		/* replacement len */
-			 " .byte 0\n"		/* pad len */
-			 ".previous\n"
-			 /* skipping size check since replacement size = 0 */
-			 : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
-
-#endif
-
-		asm_volatile_goto("1: jmp %l[t_no]\n"
-			 "2:\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"
-			 " .long 0\n"		/* no replacement */
-			 " .word %P0\n"		/* feature bit */
-			 " .byte 2b - 1b\n"	/* source len */
-			 " .byte 0\n"		/* replacement len */
-			 " .byte 0\n"		/* pad len */
-			 ".previous\n"
-			 /* skipping size check since replacement size = 0 */
-			 : : "i" (bit) : : t_no);
-		return true;
-	t_no:
-		return false;
-
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-	t_warn:
-		warn_pre_alternatives();
-		return false;
-#endif
-
-#else /* CC_HAVE_ASM_GOTO */
-
-		u8 flag;
-		/* Open-coded due to __stringify() in ALTERNATIVE() */
-		asm volatile("1: movb $0,%0\n"
-			     "2:\n"
-			     ".section .altinstructions,\"a\"\n"
-			     " .long 1b - .\n"
-			     " .long 3f - .\n"
-			     " .word %P1\n"		/* feature bit */
-			     " .byte 2b - 1b\n"		/* source len */
-			     " .byte 4f - 3f\n"		/* replacement len */
-			     " .byte 0\n"		/* pad len */
-			     ".previous\n"
-			     ".section .discard,\"aw\",@progbits\n"
-			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
-			     ".previous\n"
-			     ".section .altinstr_replacement,\"ax\"\n"
-			     "3: movb $1,%0\n"
-			     "4:\n"
-			     ".previous\n"
-			     : "=qm" (flag) : "i" (bit));
-		return flag;
-
-#endif /* CC_HAVE_ASM_GOTO */
-}
-
-#define static_cpu_has(bit)					\
-(								\
-	__builtin_constant_p(boot_cpu_has(bit)) ?		\
-		boot_cpu_has(bit) :				\
-	__builtin_constant_p(bit) ?				\
-		__static_cpu_has(bit) :				\
-		boot_cpu_has(bit)				\
-)
-
-static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
+static __always_inline __pure bool _static_cpu_has(u16 bit)
 {
 #ifdef CC_HAVE_ASM_GOTO
 		asm_volatile_goto("1: jmp %l[t_dynamic]\n"
@@ -536,7 +453,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 	t_no:
 		return false;
 	t_dynamic:
-		return __static_cpu_has_safe(bit);
+		return __static_cpu_has(bit);
 #else
 		u8 flag;
 		/* Open-coded due to __stringify() in ALTERNATIVE() */
@@ -574,22 +491,21 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 			     ".previous\n"
 			     : "=qm" (flag)
 			     : "i" (bit), "i" (X86_FEATURE_ALWAYS));
-		return (flag == 2 ? __static_cpu_has_safe(bit) : flag);
+		return (flag == 2 ? __static_cpu_has(bit) : flag);
 #endif /* CC_HAVE_ASM_GOTO */
 }
 
-#define static_cpu_has_safe(bit)				\
+#define static_cpu_has(bit)					\
 (								\
 	__builtin_constant_p(boot_cpu_has(bit)) ?		\
 		boot_cpu_has(bit) :				\
-		_static_cpu_has_safe(bit)			\
+		_static_cpu_has(bit)				\
 )
 #else
 /*
  * gcc 3.x is too stupid to do the static test; fall back to dynamic.
  */
 #define static_cpu_has(bit)		boot_cpu_has(bit)
-#define static_cpu_has_safe(bit)	boot_cpu_has(bit)
 #endif
 
 #define cpu_has_bug(c, bit)		cpu_has(c, (bit))
@@ -597,7 +513,6 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 #define clear_cpu_bug(c, bit)		clear_cpu_cap(c, (bit))
 
 #define static_cpu_has_bug(bit)		static_cpu_has((bit))
-#define static_cpu_has_bug_safe(bit)	static_cpu_has_safe((bit))
 #define boot_cpu_has_bug(bit)		cpu_has_bug(&boot_cpu_data, (bit))
 
 #define MAX_CPU_FEATURES		(NCAPINTS * 32)
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 0fd440d..97022dd 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -58,22 +58,22 @@ extern u64 fpu__get_supported_xfeatures_mask(void);
  */
 static __always_inline __pure bool use_eager_fpu(void)
 {
-	return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
+	return static_cpu_has(X86_FEATURE_EAGER_FPU);
 }
 
 static __always_inline __pure bool use_xsaveopt(void)
 {
-	return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
+	return static_cpu_has(X86_FEATURE_XSAVEOPT);
 }
 
 static __always_inline __pure bool use_xsave(void)
 {
-	return static_cpu_has_safe(X86_FEATURE_XSAVE);
+	return static_cpu_has(X86_FEATURE_XSAVE);
 }
 
 static __always_inline __pure bool use_fxsr(void)
 {
-	return static_cpu_has_safe(X86_FEATURE_FXSR);
+	return static_cpu_has(X86_FEATURE_FXSR);
 }
 
 /*
@@ -300,7 +300,7 @@ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
 
 	WARN_ON(system_state != SYSTEM_BOOTING);
 
-	if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+	if (static_cpu_has(X86_FEATURE_XSAVES))
 		XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
 	else
 		XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
@@ -322,7 +322,7 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
 
 	WARN_ON(system_state != SYSTEM_BOOTING);
 
-	if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+	if (static_cpu_has(X86_FEATURE_XSAVES))
 		XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
 	else
 		XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
@@ -460,7 +460,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
 	 * pending. Clear the x87 state here by setting it to fixed values.
 	 * "m" is a random variable that should be in L1.
 	 */
-	if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
+	if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {
 		asm volatile(
 			"fnclex\n\t"
 			"emms\n\t"
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index c80c02c..ab5c2c6 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -30,7 +30,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x)
 	unsigned long value;
 	unsigned int id = (x >> 24) & 0xff;
 
-	if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+	if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
 		rdmsrl(MSR_FAM10H_NODE_ID, value);
 		id |= (value << 2) & 0xff00;
 	}
@@ -178,7 +178,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
 	this_cpu_write(cpu_llc_id, node);
 
 	/* Account for nodes per socket in multi-core-module processors */
-	if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+	if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
 		rdmsrl(MSR_FAM10H_NODE_ID, val);
 		nodes = ((val >> 3) & 7) + 1;
 	}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 37830de..a57ec0d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1483,11 +1483,11 @@ void warn_pre_alternatives(void)
 EXPORT_SYMBOL_GPL(warn_pre_alternatives);
 #endif
 
-inline bool __static_cpu_has_safe(u16 bit)
+inline bool __static_cpu_has(u16 bit)
 {
 	return boot_cpu_has(bit);
 }
-EXPORT_SYMBOL_GPL(__static_cpu_has_safe);
+EXPORT_SYMBOL_GPL(__static_cpu_has);
 
 static void bsp_resume(void)
 {
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index e574b85..3dce1ca 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -362,7 +362,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
 	/* make room for real-mode segments */
 	tsk->thread.sp0 += 16;
 
-	if (static_cpu_has_safe(X86_FEATURE_SEP))
+	if (static_cpu_has(X86_FEATURE_SEP))
 		tsk->thread.sysenter_cs = 0;
 
 	load_sp0(tss, &tsk->thread);
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index cd83d47..3a4b39a 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1431,7 +1431,7 @@ static int __init intel_pstate_init(void)
 	if (!all_cpu_data)
 		return -ENOMEM;
 
-	if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) {
+	if (static_cpu_has(X86_FEATURE_HWP) && !no_hwp) {
 		pr_info("intel_pstate: HWP enabled\n");
 		hwp_active++;
 	}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e99ccd6..87ce612 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -924,7 +924,7 @@ static int check_async_write(struct inode *inode, unsigned long bio_flags)
 	if (bio_flags & EXTENT_BIO_TREE_LOG)
 		return 0;
 #ifdef CONFIG_X86
-	if (static_cpu_has_safe(X86_FEATURE_XMM4_2))
+	if (static_cpu_has(X86_FEATURE_XMM4_2))
 		return 0;
 #endif
 	return 1;
-- 
1.8.5.6

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-19 23:10                         ` Borislav Petkov
@ 2016-01-19 23:26                           ` Andy Lutomirski
  2016-01-19 23:49                             ` Boris Petkov
  0 siblings, 1 reply; 66+ messages in thread
From: Andy Lutomirski @ 2016-01-19 23:26 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: H. Peter Anvin, Brian Gerst, the arch/x86 maintainers,
	Linux Kernel Mailing List, Ingo Molnar, Denys Vlasenko,
	Linus Torvalds

On Tue, Jan 19, 2016 at 3:10 PM, Borislav Petkov <bp@suse.de> wrote:
> On Tue, Jan 19, 2016 at 02:57:14PM +0100, Borislav Petkov wrote:
>> Patch removing it below.
>
> Andy has a point: I should simply drop static_cpu_has and kill the "_safe"
> suffix of the remaining variant:
>

Is this on top of Brian's patch?  I think we should do both or neither.

--Andy

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-19 23:26                           ` Andy Lutomirski
@ 2016-01-19 23:49                             ` Boris Petkov
  0 siblings, 0 replies; 66+ messages in thread
From: Boris Petkov @ 2016-01-19 23:49 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: H. Peter Anvin, Brian Gerst, the arch/x86 maintainers,
	Linux Kernel Mailing List, Ingo Molnar, Denys Vlasenko,
	Linus Torvalds

Andy Lutomirski <luto@amacapital.net> wrote:

>Is this on top of Brian's patch?  I think we should do both or >neither.

Of course. I'll prep a branch tomorrow for the build bot to smoke-test.


-- 
Sent from a small device: formatting sux and brevity is inevitable. 

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-19  9:22                 ` Borislav Petkov
@ 2016-01-20  4:02                   ` H. Peter Anvin
  2016-01-20  4:39                     ` Brian Gerst
  2016-01-20 15:01                     ` Borislav Petkov
  0 siblings, 2 replies; 66+ messages in thread
From: H. Peter Anvin @ 2016-01-20  4:02 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Brian Gerst, the arch/x86 maintainers, Linux Kernel Mailing List,
	Ingo Molnar, Denys Vlasenko, Andy Lutomirski, Linus Torvalds

On 01/19/16 01:22, Borislav Petkov wrote:
> On Mon, Jan 18, 2016 at 05:33:03PM -0800, H. Peter Anvin wrote:
>> Why the f do we call a subroutine for what amounts to a single bt or
>> test instruction?
> 
> No real reason. You can kick me when you see me next time:
> 
> 4a90a99c4f80 ("x86: Add a static_cpu_has_safe variant")
> 

So, here is my suggestion:

1. Just get rid of static_cpu_has_safe() and make static_cpu_has() safe.

2. Get rid of the non-asm goto variant and just fall back to dynamic if
asm goto is unavailable.  It doesn't make any sense, really, if it is
supposed to be safe, and by now the asm goto-capable gcc is in more wide
use.  (Originally the gcc 3.x fallback to pure dynamic didn't exist,
either.)

3. Put the dynamic test in the .init.text section and inline it:

	.section .init.text,"ax"
	testb %2,%3
	jnz %[t_yes]
	jmp %[t_no]
	.previous

	... "i" (1 << (bit & 7)),
	    "m" (((const char *)boot_cpu_data->x86_capability)[bit >> 3]) ...

(The code would be slightly simpler/cleaner with testl, but that would
unnecessarily create a long immediate, or with btl, but that would be
slower.  We could use CONST_MASK_ADDR() and CONST_MASK() from
asm/bitops.h, but I'm slightly uncomfortable with the idea of leveraging
an interface which is ultimately an internal implementation detail of
bitops.h that might change in the future without people realizing its
implications.)

The only thing we lose is the 2-byte optimization in case we have the
good luck for it to actually work.  At this point I'm thinking it isn't
worth it, and instead that safety trumps it.  It *would* be interesting
to see if there are any call sites where it would actually kick in.

	-hpa

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-19 13:57                       ` Borislav Petkov
  2016-01-19 16:23                         ` Borislav Petkov
  2016-01-19 23:10                         ` Borislav Petkov
@ 2016-01-20  4:03                         ` H. Peter Anvin
  2016-01-20 10:33                           ` Borislav Petkov
  2 siblings, 1 reply; 66+ messages in thread
From: H. Peter Anvin @ 2016-01-20  4:03 UTC (permalink / raw)
  To: Borislav Petkov, Andy Lutomirski, Brian Gerst
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List, Ingo Molnar,
	Denys Vlasenko, Linus Torvalds

On 01/19/16 05:57, Borislav Petkov wrote:
> 
> old insn VA: 0xffffffff8108c0d0, CPU feat: X86_BUG_SYSRET_SS_ATTRS, size: 2, padlen: 0
> __switch_to:
>  ffffffff8108c0d0:      eb 70                   jmp ffffffff8108c142
> repl insn: 0xffffffff8ca7a1a7, size: 0
> 

This is the only one I could possibly imagine mattering.  Would it be
possible to get the disassembly here?

	-hpa

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-20  4:02                   ` H. Peter Anvin
@ 2016-01-20  4:39                     ` Brian Gerst
  2016-01-20  4:42                       ` H. Peter Anvin
  2016-01-20 15:01                     ` Borislav Petkov
  1 sibling, 1 reply; 66+ messages in thread
From: Brian Gerst @ 2016-01-20  4:39 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Borislav Petkov, the arch/x86 maintainers,
	Linux Kernel Mailing List, Ingo Molnar, Denys Vlasenko,
	Andy Lutomirski, Linus Torvalds

On Tue, Jan 19, 2016 at 11:02 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 01/19/16 01:22, Borislav Petkov wrote:
>> On Mon, Jan 18, 2016 at 05:33:03PM -0800, H. Peter Anvin wrote:
>>> Why the f do we call a subroutine for what amounts to a single bt or
>>> test instruction?
>>
>> No real reason. You can kick me when you see me next time:
>>
>> 4a90a99c4f80 ("x86: Add a static_cpu_has_safe variant")
>>
>
> So, here is my suggestion:
>
> 1. Just get rid of static_cpu_has_safe() and make static_cpu_has() safe.
>
> 2. Get rid of the non-asm goto variant and just fall back to dynamic if
> asm goto is unavailable.  It doesn't make any sense, really, if it is
> supposed to be safe, and by now the asm goto-capable gcc is in more wide
> use.  (Originally the gcc 3.x fallback to pure dynamic didn't exist,
> either.)
>
> 3. Put the dynamic test in the .init.text section and inline it:
>
>         .section .init.text,"ax"
>         testb %2,%3
>         jnz %[t_yes]
>         jmp %[t_no]
>         .previous
>
>         ... "i" (1 << (bit & 7)),
>             "m" (((const char *)boot_cpu_data->x86_capability)[bit >> 3]) ...

Can't put it in .init.text or else you get:
WARNING: arch/x86/kernel/built-in.o(.text+0x4b9): Section mismatch in
reference from the function __switch_to() to the (unknown reference)
.init.text:(unknown)
The function __switch_to() references
the (unknown reference) __init (unknown).
This is often because __switch_to lacks a __init
annotation or the annotation of (unknown) is wrong.

We want to override that because we know that the reference will be
removed after alternatives run.  That's why I created a new section.

--
Brian Gerst

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-20  4:39                     ` Brian Gerst
@ 2016-01-20  4:42                       ` H. Peter Anvin
  2016-01-20 10:50                         ` Borislav Petkov
  0 siblings, 1 reply; 66+ messages in thread
From: H. Peter Anvin @ 2016-01-20  4:42 UTC (permalink / raw)
  To: Brian Gerst
  Cc: Borislav Petkov, the arch/x86 maintainers,
	Linux Kernel Mailing List, Ingo Molnar, Denys Vlasenko,
	Andy Lutomirski, Linus Torvalds

On 01/19/16 20:39, Brian Gerst wrote:
> 
> Can't put it in .init.text or else you get:
> WARNING: arch/x86/kernel/built-in.o(.text+0x4b9): Section mismatch in
> reference from the function __switch_to() to the (unknown reference)
> .init.text:(unknown)
> The function __switch_to() references
> the (unknown reference) __init (unknown).
> This is often because __switch_to lacks a __init
> annotation or the annotation of (unknown) is wrong.
> 
> We want to override that because we know that the reference will be
> removed after alternatives run.  That's why I created a new section.
> 

Right.  I wish we could tag reference call sites as clean, not sources
or targets.  Sigh.

However, that's not too much of an issue.

	-hpa

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-20  4:03                         ` H. Peter Anvin
@ 2016-01-20 10:33                           ` Borislav Petkov
  2016-01-20 10:41                             ` H. Peter Anvin
  0 siblings, 1 reply; 66+ messages in thread
From: Borislav Petkov @ 2016-01-20 10:33 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andy Lutomirski, Brian Gerst, the arch/x86 maintainers,
	Linux Kernel Mailing List, Ingo Molnar, Denys Vlasenko,
	Linus Torvalds

On Tue, Jan 19, 2016 at 08:03:55PM -0800, H. Peter Anvin wrote:
> On 01/19/16 05:57, Borislav Petkov wrote:
> > 
> > old insn VA: 0xffffffff8108c0d0, CPU feat: X86_BUG_SYSRET_SS_ATTRS, size: 2, padlen: 0
> > __switch_to:
> >  ffffffff8108c0d0:      eb 70                   jmp ffffffff8108c142
> > repl insn: 0xffffffff8ca7a1a7, size: 0
> > 
> 
> This is the only one I could possibly imagine mattering.  Would it be
> possible to get the disassembly here?

Sure, I've kept the vmlinux, see below:

ffffffff8108c0d0:       eb 70                   jmp    ffffffff8108c142 <__switch_to+0x838>

jumps to:

ffffffff8108c142:       48 ff 05 b7 08 aa 0b    incq   0xbaa08b7(%rip)

which is something-gcov. In any case, it jumps over the SS fixup code:

        if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {

		...

                unsigned short ss_sel;
                savesegment(ss, ss_sel);
                if (ss_sel != __KERNEL_DS)
                        loadsegment(ss, __KERNEL_DS);
        }

I guess we could fix that with an alternative_call() where @oldinstr is
empty so that on !X86_BUG_SYSRET_SS_ATTRS machines we simply return and
on the others, we do a CALL fixup_ss or so.

We have to pay attention to clobbers, though, similar to the POPCNT fun
in __arch_hweight32().

ffffffff8108b90a <__switch_to>:
ffffffff8108b90a:       55                      push   %rbp
ffffffff8108b90b:       48 8d 87 c0 2d 00 00    lea    0x2dc0(%rdi),%rax
ffffffff8108b912:       48 c7 c1 80 f6 1d 00    mov    $0x1df680,%rcx
ffffffff8108b919:       48 89 e5                mov    %rsp,%rbp
ffffffff8108b91c:       41 57                   push   %r15
ffffffff8108b91e:       45 31 ff                xor    %r15d,%r15d
ffffffff8108b921:       41 56                   push   %r14

...

ffffffff8108c0c2:       48 ff 05 07 09 aa 0b    incq   0xbaa0907(%rip)        # ffffffff8cb2c9d0 <__gcov0.__switch_to+0xc0>
ffffffff8108c0c9:       48 ff 05 08 09 aa 0b    incq   0xbaa0908(%rip)        # ffffffff8cb2c9d8 <__gcov0.__switch_to+0xc8>
ffffffff8108c0d0:       eb 70                   jmp    ffffffff8108c142 <__switch_to+0x838>
ffffffff8108c0d2:       48 ff 05 07 09 aa 0b    incq   0xbaa0907(%rip)        # ffffffff8cb2c9e0 <__gcov0.__switch_to+0xd0>
ffffffff8108c0d9:       48 ff 05 08 09 aa 0b    incq   0xbaa0908(%rip)        # ffffffff8cb2c9e8 <__gcov0.__switch_to+0xd8>
ffffffff8108c0e0:       48 ff 05 11 09 aa 0b    incq   0xbaa0911(%rip)        # ffffffff8cb2c9f8 <__gcov0.__switch_to+0xe8>
ffffffff8108c0e7:       66 8c d0                mov    %ss,%ax
ffffffff8108c0ea:       66 83 f8 18             cmp    $0x18,%ax
ffffffff8108c0ee:       75 17                   jne    ffffffff8108c107 <__switch_to+0x7fd>
ffffffff8108c0f0:       eb 57                   jmp    ffffffff8108c149 <__switch_to+0x83f>
ffffffff8108c0f2:       48 ff 05 cf 08 aa 0b    incq   0xbaa08cf(%rip)        # ffffffff8cb2c9c8 <__gcov0.__switch_to+0xb8>
ffffffff8108c0f9:       e8 db b0 01 00          callq  ffffffff810a71d9 <warn_pre_alternatives>
ffffffff8108c0fe:       48 ff 05 eb 08 aa 0b    incq   0xbaa08eb(%rip)        # ffffffff8cb2c9f0 <__gcov0.__switch_to+0xe0>
ffffffff8108c105:       eb 3b                   jmp    ffffffff8108c142 <__switch_to+0x838>
ffffffff8108c107:       48 ff 05 fa 08 aa 0b    incq   0xbaa08fa(%rip)        # ffffffff8cb2ca08 <__gcov0.__switch_to+0xf8>
ffffffff8108c10e:       b8 18 00 00 00          mov    $0x18,%eax
ffffffff8108c113:       8e d0                   mov    %eax,%ss
ffffffff8108c115:       48 ff 05 f4 08 aa 0b    incq   0xbaa08f4(%rip)        # ffffffff8cb2ca10 <__gcov0.__switch_to+0x100>
ffffffff8108c11c:       eb 2b                   jmp    ffffffff8108c149 <__switch_to+0x83f>
ffffffff8108c11e:       48 ff 05 9b 02 aa 0b    incq   0xbaa029b(%rip)        # ffffffff8cb2c3c0 <__gcov0.copy_xregs_to_kernel+0x30>
ffffffff8108c125:       e9 91 f9 ff ff          jmpq   ffffffff8108babb <__switch_to+0x1b1>
ffffffff8108c12a:       48 ff 05 bf 02 aa 0b    incq   0xbaa02bf(%rip)        # ffffffff8cb2c3f0 <__gcov0.copy_kernel_to_xregs+0x20>
ffffffff8108c131:       e9 84 fe ff ff          jmpq   ffffffff8108bfba <__switch_to+0x6b0>
ffffffff8108c136:       48 ff 05 43 02 aa 0b    incq   0xbaa0243(%rip)        # ffffffff8cb2c380 <__gcov0.copy_kernel_to_fxregs+0x20>
ffffffff8108c13d:       e9 db fe ff ff          jmpq   ffffffff8108c01d <__switch_to+0x713>
ffffffff8108c142:       48 ff 05 b7 08 aa 0b    incq   0xbaa08b7(%rip)        # ffffffff8cb2ca00 <__gcov0.__switch_to+0xf0>
ffffffff8108c149:       48 83 c4 28             add    $0x28,%rsp
ffffffff8108c14d:       4c 89 e0                mov    %r12,%rax
ffffffff8108c150:       5b                      pop    %rbx
ffffffff8108c151:       41 5c                   pop    %r12
ffffffff8108c153:       41 5d                   pop    %r13
ffffffff8108c155:       41 5e                   pop    %r14
ffffffff8108c157:       41 5f                   pop    %r15
ffffffff8108c159:       5d                      pop    %rbp
ffffffff8108c15a:       c3                      retq


-- 
Regards/Gruss,
    Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-20 10:33                           ` Borislav Petkov
@ 2016-01-20 10:41                             ` H. Peter Anvin
  2016-01-21 22:14                               ` Borislav Petkov
  0 siblings, 1 reply; 66+ messages in thread
From: H. Peter Anvin @ 2016-01-20 10:41 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Andy Lutomirski, Brian Gerst, the arch/x86 maintainers,
	Linux Kernel Mailing List, Ingo Molnar, Denys Vlasenko,
	Linus Torvalds

On January 20, 2016 2:33:45 AM PST, Borislav Petkov <bp@suse.de> wrote:
>On Tue, Jan 19, 2016 at 08:03:55PM -0800, H. Peter Anvin wrote:
>> On 01/19/16 05:57, Borislav Petkov wrote:
>> > 
>> > old insn VA: 0xffffffff8108c0d0, CPU feat: X86_BUG_SYSRET_SS_ATTRS,
>size: 2, padlen: 0
>> > __switch_to:
>> >  ffffffff8108c0d0:      eb 70                   jmp
>ffffffff8108c142
>> > repl insn: 0xffffffff8ca7a1a7, size: 0
>> > 
>> 
>> This is the only one I could possibly imagine mattering.  Would it be
>> possible to get the disassembly here?
>
>Sure, I've kept the vmlinux, see below:
>
>ffffffff8108c0d0:       eb 70                   jmp    ffffffff8108c142
><__switch_to+0x838>
>
>jumps to:
>
>ffffffff8108c142:       48 ff 05 b7 08 aa 0b    incq   0xbaa08b7(%rip)
>
>which is something-gcov. In any case, it jumps over the SS fixup code:
>
>        if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
>
>		...
>
>                unsigned short ss_sel;
>                savesegment(ss, ss_sel);
>                if (ss_sel != __KERNEL_DS)
>                        loadsegment(ss, __KERNEL_DS);
>        }
>
>I guess we could fix that with an alternative_call() where @oldinstr is
>empty so that on !X86_BUG_SYSRET_SS_ATTRS machines we simply return and
>on the others, we do a CALL fixup_ss or so.
>
>We have to pay attention to clobbers, though, similar to the POPCNT fun
>in __arch_hweight32().
>
>ffffffff8108b90a <__switch_to>:
>ffffffff8108b90a:       55                      push   %rbp
>ffffffff8108b90b:       48 8d 87 c0 2d 00 00    lea   
>0x2dc0(%rdi),%rax
>ffffffff8108b912:       48 c7 c1 80 f6 1d 00    mov    $0x1df680,%rcx
>ffffffff8108b919:       48 89 e5                mov    %rsp,%rbp
>ffffffff8108b91c:       41 57                   push   %r15
>ffffffff8108b91e:       45 31 ff                xor    %r15d,%r15d
>ffffffff8108b921:       41 56                   push   %r14
>
>...
>
>ffffffff8108c0c2:       48 ff 05 07 09 aa 0b    incq   0xbaa0907(%rip) 
>      # ffffffff8cb2c9d0 <__gcov0.__switch_to+0xc0>
>ffffffff8108c0c9:       48 ff 05 08 09 aa 0b    incq   0xbaa0908(%rip) 
>      # ffffffff8cb2c9d8 <__gcov0.__switch_to+0xc8>
>ffffffff8108c0d0:       eb 70                   jmp    ffffffff8108c142
><__switch_to+0x838>
>ffffffff8108c0d2:       48 ff 05 07 09 aa 0b    incq   0xbaa0907(%rip) 
>      # ffffffff8cb2c9e0 <__gcov0.__switch_to+0xd0>
>ffffffff8108c0d9:       48 ff 05 08 09 aa 0b    incq   0xbaa0908(%rip) 
>      # ffffffff8cb2c9e8 <__gcov0.__switch_to+0xd8>
>ffffffff8108c0e0:       48 ff 05 11 09 aa 0b    incq   0xbaa0911(%rip) 
>      # ffffffff8cb2c9f8 <__gcov0.__switch_to+0xe8>
>ffffffff8108c0e7:       66 8c d0                mov    %ss,%ax
>ffffffff8108c0ea:       66 83 f8 18             cmp    $0x18,%ax
>ffffffff8108c0ee:       75 17                   jne    ffffffff8108c107
><__switch_to+0x7fd>
>ffffffff8108c0f0:       eb 57                   jmp    ffffffff8108c149
><__switch_to+0x83f>
>ffffffff8108c0f2:       48 ff 05 cf 08 aa 0b    incq   0xbaa08cf(%rip) 
>      # ffffffff8cb2c9c8 <__gcov0.__switch_to+0xb8>
>ffffffff8108c0f9:       e8 db b0 01 00          callq  ffffffff810a71d9
><warn_pre_alternatives>
>ffffffff8108c0fe:       48 ff 05 eb 08 aa 0b    incq   0xbaa08eb(%rip) 
>      # ffffffff8cb2c9f0 <__gcov0.__switch_to+0xe0>
>ffffffff8108c105:       eb 3b                   jmp    ffffffff8108c142
><__switch_to+0x838>
>ffffffff8108c107:       48 ff 05 fa 08 aa 0b    incq   0xbaa08fa(%rip) 
>      # ffffffff8cb2ca08 <__gcov0.__switch_to+0xf8>
>ffffffff8108c10e:       b8 18 00 00 00          mov    $0x18,%eax
>ffffffff8108c113:       8e d0                   mov    %eax,%ss
>ffffffff8108c115:       48 ff 05 f4 08 aa 0b    incq   0xbaa08f4(%rip) 
>      # ffffffff8cb2ca10 <__gcov0.__switch_to+0x100>
>ffffffff8108c11c:       eb 2b                   jmp    ffffffff8108c149
><__switch_to+0x83f>
>ffffffff8108c11e:       48 ff 05 9b 02 aa 0b    incq   0xbaa029b(%rip) 
>      # ffffffff8cb2c3c0 <__gcov0.copy_xregs_to_kernel+0x30>
>ffffffff8108c125:       e9 91 f9 ff ff          jmpq   ffffffff8108babb
><__switch_to+0x1b1>
>ffffffff8108c12a:       48 ff 05 bf 02 aa 0b    incq   0xbaa02bf(%rip) 
>      # ffffffff8cb2c3f0 <__gcov0.copy_kernel_to_xregs+0x20>
>ffffffff8108c131:       e9 84 fe ff ff          jmpq   ffffffff8108bfba
><__switch_to+0x6b0>
>ffffffff8108c136:       48 ff 05 43 02 aa 0b    incq   0xbaa0243(%rip) 
>      # ffffffff8cb2c380 <__gcov0.copy_kernel_to_fxregs+0x20>
>ffffffff8108c13d:       e9 db fe ff ff          jmpq   ffffffff8108c01d
><__switch_to+0x713>
>ffffffff8108c142:       48 ff 05 b7 08 aa 0b    incq   0xbaa08b7(%rip) 
>      # ffffffff8cb2ca00 <__gcov0.__switch_to+0xf0>
>ffffffff8108c149:       48 83 c4 28             add    $0x28,%rsp
>ffffffff8108c14d:       4c 89 e0                mov    %r12,%rax
>ffffffff8108c150:       5b                      pop    %rbx
>ffffffff8108c151:       41 5c                   pop    %r12
>ffffffff8108c153:       41 5d                   pop    %r13
>ffffffff8108c155:       41 5e                   pop    %r14
>ffffffff8108c157:       41 5f                   pop    %r15
>ffffffff8108c159:       5d                      pop    %rbp
>ffffffff8108c15a:       c3                      retq

Ah.  What would be even more of a win would be to rebias static_cpu_has_bug() so that the fallthrough case is the functional one.  Easily done by reversing the labels.
-- 
Sent from my Android device with K-9 Mail. Please excuse brevity and formatting.

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-20  4:42                       ` H. Peter Anvin
@ 2016-01-20 10:50                         ` Borislav Petkov
  2016-01-20 10:55                           ` H. Peter Anvin
  0 siblings, 1 reply; 66+ messages in thread
From: Borislav Petkov @ 2016-01-20 10:50 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Brian Gerst, the arch/x86 maintainers, Linux Kernel Mailing List,
	Ingo Molnar, Denys Vlasenko, Andy Lutomirski, Linus Torvalds

On Tue, Jan 19, 2016 at 08:42:10PM -0800, H. Peter Anvin wrote:
> Right.  I wish we could tag reference call sites as clean, not sources
> or targets.  Sigh.

I was thinking about something like this:

From: Borislav Petkov <bp@suse.de>
Date: Wed, 20 Jan 2016 11:45:36 +0100
Subject: [PATCH] x86/alternatives: Add an auxilary section

Add .altinstr_aux for additional instructions which will be used before
and/or during patching. All stuff which needs more sophisticated
patching should go there. See next patch.

Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/kernel/vmlinux.lds.S | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 74e4bf11f562..35868bf529b9 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -244,6 +244,11 @@ SECTIONS
 	 */
 	.altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
 		*(.altinstr_replacement)
+		/*
+		 * Auxiliary section for misc instruction patching tasks. See
+		 * static_cpu_has(), for an example.
+		 */
+		*(.altinstr_aux)
 	}
 
 	/*
-- 
2.3.5

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-20 10:50                         ` Borislav Petkov
@ 2016-01-20 10:55                           ` H. Peter Anvin
  2016-01-20 11:05                             ` Borislav Petkov
  0 siblings, 1 reply; 66+ messages in thread
From: H. Peter Anvin @ 2016-01-20 10:55 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Brian Gerst, the arch/x86 maintainers, Linux Kernel Mailing List,
	Ingo Molnar, Denys Vlasenko, Andy Lutomirski, Linus Torvalds

On January 20, 2016 2:50:18 AM PST, Borislav Petkov <bp@suse.de> wrote:
>On Tue, Jan 19, 2016 at 08:42:10PM -0800, H. Peter Anvin wrote:
>> Right.  I wish we could tag reference call sites as clean, not
>sources
>> or targets.  Sigh.
>
>I was thinking about something like this:
>
>From: Borislav Petkov <bp@suse.de>
>Date: Wed, 20 Jan 2016 11:45:36 +0100
>Subject: [PATCH] x86/alternatives: Add an auxilary section
>
>Add .altinstr_aux for additional instructions which will be used before
>and/or during patching. All stuff which needs more sophisticated
>patching should go there. See next patch.
>
>Signed-off-by: Borislav Petkov <bp@suse.de>
>---
> arch/x86/kernel/vmlinux.lds.S | 5 +++++
> 1 file changed, 5 insertions(+)
>
>diff --git a/arch/x86/kernel/vmlinux.lds.S
>b/arch/x86/kernel/vmlinux.lds.S
>index 74e4bf11f562..35868bf529b9 100644
>--- a/arch/x86/kernel/vmlinux.lds.S
>+++ b/arch/x86/kernel/vmlinux.lds.S
>@@ -244,6 +244,11 @@ SECTIONS
> 	 */
>	.altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET)
>{
> 		*(.altinstr_replacement)
>+		/*
>+		 * Auxiliary section for misc instruction patching tasks. See
>+		 * static_cpu_has(), for an example.
>+		 */
>+		*(.altinstr_aux)
> 	}
> 
> 	/*

How about:

section for code used exclusively before alternatives are run.  All references to such code must be patched out by alternatives, normally by using a patch with X86_FEATURE_ALWAYS.

See static_cpu_has() for an example.
-- 
Sent from my Android device with K-9 Mail. Please excuse brevity and formatting.

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-20 10:55                           ` H. Peter Anvin
@ 2016-01-20 11:05                             ` Borislav Petkov
  2016-01-20 14:48                               ` H. Peter Anvin
  0 siblings, 1 reply; 66+ messages in thread
From: Borislav Petkov @ 2016-01-20 11:05 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Brian Gerst, the arch/x86 maintainers, Linux Kernel Mailing List,
	Ingo Molnar, Denys Vlasenko, Andy Lutomirski, Linus Torvalds

On Wed, Jan 20, 2016 at 02:55:24AM -0800, H. Peter Anvin wrote:
> How about:
> 
> section for code used exclusively before alternatives are run.  All references to such code must be patched out by alternatives, normally by using a patch with X86_FEATURE_ALWAYS.
> 
> See static_cpu_has() for an example.

Sure.

My thinking was to make it a bit more generic so that if we decide to do
some different monkey business with the alternatives, to put stuff in
there too.

But we can always change that later - it's not like it is user-visible.

-- 
Regards/Gruss,
    Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-20 11:05                             ` Borislav Petkov
@ 2016-01-20 14:48                               ` H. Peter Anvin
  0 siblings, 0 replies; 66+ messages in thread
From: H. Peter Anvin @ 2016-01-20 14:48 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Brian Gerst, the arch/x86 maintainers, Linux Kernel Mailing List,
	Ingo Molnar, Denys Vlasenko, Andy Lutomirski, Linus Torvalds

On January 20, 2016 3:05:19 AM PST, Borislav Petkov <bp@suse.de> wrote:
>On Wed, Jan 20, 2016 at 02:55:24AM -0800, H. Peter Anvin wrote:
>> How about:
>> 
>> section for code used exclusively before alternatives are run.  All
>references to such code must be patched out by alternatives, normally
>by using a patch with X86_FEATURE_ALWAYS.
>> 
>> See static_cpu_has() for an example.
>
>Sure.
>
>My thinking was to make it a bit more generic so that if we decide to
>do
>some different monkey business with the alternatives, to put stuff in
>there too.
>
>But we can always change that later - it's not like it is user-visible.

I don't think the verbiage I suggested in any way disagrees with that notion.
-- 
Sent from my Android device with K-9 Mail. Please excuse brevity and formatting.

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-20  4:02                   ` H. Peter Anvin
  2016-01-20  4:39                     ` Brian Gerst
@ 2016-01-20 15:01                     ` Borislav Petkov
  2016-01-20 15:09                       ` H. Peter Anvin
  1 sibling, 1 reply; 66+ messages in thread
From: Borislav Petkov @ 2016-01-20 15:01 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Brian Gerst, the arch/x86 maintainers, Linux Kernel Mailing List,
	Ingo Molnar, Denys Vlasenko, Andy Lutomirski, Linus Torvalds

On Tue, Jan 19, 2016 at 08:02:01PM -0800, H. Peter Anvin wrote:
> 	... "i" (1 << (bit & 7)),
> 	    "m" (((const char *)boot_cpu_data->x86_capability)[bit >> 3]) ...

Nice!

I was going to do:

		"i" (1 << (bit & 31)),
                "m" (((boot_cpu_data->x86_capability)[bit >> 5]))

:-)

-- 
Regards/Gruss,
    Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-20 15:01                     ` Borislav Petkov
@ 2016-01-20 15:09                       ` H. Peter Anvin
  2016-01-20 16:04                         ` Borislav Petkov
  0 siblings, 1 reply; 66+ messages in thread
From: H. Peter Anvin @ 2016-01-20 15:09 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Brian Gerst, the arch/x86 maintainers, Linux Kernel Mailing List,
	Ingo Molnar, Denys Vlasenko, Andy Lutomirski, Linus Torvalds

On 01/20/16 07:01, Borislav Petkov wrote:
> On Tue, Jan 19, 2016 at 08:02:01PM -0800, H. Peter Anvin wrote:
>> 	... "i" (1 << (bit & 7)),
>> 	    "m" (((const char *)boot_cpu_data->x86_capability)[bit >> 3]) ...
> 
> Nice!
> 
> I was going to do:
> 
> 		"i" (1 << (bit & 31)),
>                 "m" (((boot_cpu_data->x86_capability)[bit >> 5]))
> 

But then you're using testl and get long immediates.

(And the parentheses around boot_cpu_data->x86_capability are redundant.)

	-hpa

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-20 15:09                       ` H. Peter Anvin
@ 2016-01-20 16:04                         ` Borislav Petkov
  2016-01-20 16:16                           ` H. Peter Anvin
  0 siblings, 1 reply; 66+ messages in thread
From: Borislav Petkov @ 2016-01-20 16:04 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Brian Gerst, the arch/x86 maintainers, Linux Kernel Mailing List,
	Ingo Molnar, Denys Vlasenko, Andy Lutomirski, Linus Torvalds

On Wed, Jan 20, 2016 at 07:09:39AM -0800, H. Peter Anvin wrote:
> But then you're using testl and get long immediates.
> 
> (And the parentheses around boot_cpu_data->x86_capability are redundant.)

Right.

Ok, below is what builds here. So no SOBs etc.

All this include hell wankery is so that we can use boot_cpu_data in
cpufeature.h. And that's not simple because boot/mkcpustr.c includes it
too so if I carve out struct cpuinfo_x86 to a separate asm/cpuinfo.h
header, it complains because it doesn't see it.

Thus this _ASM_BOOT_MKCPUSTR_ yucky marker to stop arch/x86/boot from
including it.

I'm very open to better ideas. :-)

Other than that, we do:

+                        "6: testb %[bitnum],%[cap_word]\n"
+                        "   jnz %l[t_yes]\n"
+                        "   jmp %l[t_no]\n"
+                        ".previous\n"
+                        : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
+                            [bitnum] "i" (1 << (bit & 7)),
+                            [cap_word] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])

and that has the TESTB.

Thoughts?

---
 arch/x86/boot/mkcpustr.c          |  5 +++
 arch/x86/include/asm/cpufeature.h | 34 +++++++++++++-----
 arch/x86/include/asm/cpuinfo.h    | 73 +++++++++++++++++++++++++++++++++++++++
 arch/x86/include/asm/processor.h  | 65 ----------------------------------
 arch/x86/kernel/cpu/common.c      |  6 ----
 arch/x86/kernel/vmlinux.lds.S     |  9 +++--
 6 files changed, 110 insertions(+), 82 deletions(-)
 create mode 100644 arch/x86/include/asm/cpuinfo.h

diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
index 637097e66a62..c32113a0e3d4 100644
--- a/arch/x86/boot/mkcpustr.c
+++ b/arch/x86/boot/mkcpustr.c
@@ -17,6 +17,11 @@
 
 #include "../include/asm/required-features.h"
 #include "../include/asm/disabled-features.h"
+
+/*
+ * Stop cpufeature.h from including cpuinfo.h in kernel proper.
+ */
+#define _ASM_BOOT_MKCPUSTR_
 #include "../include/asm/cpufeature.h"
 #include "../kernel/cpu/capflags.c"
 
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 7ad8c9464297..a16cee2376c4 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -15,6 +15,10 @@
 #define NCAPINTS	16	/* N 32-bit words worth of info */
 #define NBUGINTS	1	/* N 32-bit bug flags */
 
+#ifndef _ASM_BOOT_MKCPUSTR_
+#include <asm/cpuinfo.h>
+#endif
+
 /*
  * Note: If the comment begins with a quoted string, that string is used
  * in /proc/cpuinfo instead of the macro name.  If the string is "",
@@ -412,7 +416,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
 
 #if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS)
 extern void warn_pre_alternatives(void);
-extern bool __static_cpu_has_safe(u16 bit);
 
 /*
  * Static testing of CPU features.  Used the same as boot_cpu_has().
@@ -505,7 +508,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
 static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 {
 #ifdef CC_HAVE_ASM_GOTO
-		asm_volatile_goto("1: jmp %l[t_dynamic]\n"
+		asm_volatile_goto("1: jmp 6f\n"
 			 "2:\n"
 			 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
 			         "((5f-4f) - (2b-1b)),0x90\n"
@@ -530,17 +533,23 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 			 " .byte 0\n"			/* repl len */
 			 " .byte 0\n"			/* pad len */
 			 ".previous\n"
-			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
-			 : : t_dynamic, t_no);
+			 ".section .altinstr_aux,\"ax\"\n"
+			 "6: testb %[bitnum],%[cap_word]\n"
+			 "   jnz %l[t_yes]\n"
+			 "   jmp %l[t_no]\n"
+			 ".previous\n"
+			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
+			     [bitnum] "i" (1 << (bit & 7)),
+			     [cap_word] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+			 : : t_yes, t_no);
+	t_yes:
 		return true;
 	t_no:
 		return false;
-	t_dynamic:
-		return __static_cpu_has_safe(bit);
 #else
 		u8 flag;
 		/* Open-coded due to __stringify() in ALTERNATIVE() */
-		asm volatile("1: movb $2,%0\n"
+		asm volatile("1: jmp 7f\n"
 			     "2:\n"
 			     ".section .altinstructions,\"a\"\n"
 			     " .long 1b - .\n"		/* src offset */
@@ -572,9 +581,16 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 			     "5: movb $1,%0\n"
 			     "6:\n"
 			     ".previous\n"
+			     ".section .altinstr_aux,\"ax\"\n"
+			     "7: testb %[bitnum],%[cap_word]\n"
+			     "   setnz %0\n"
+			     "   jmp 2b\n"
+			     ".previous\n"
 			     : "=qm" (flag)
-			     : "i" (bit), "i" (X86_FEATURE_ALWAYS));
-		return (flag == 2 ? __static_cpu_has_safe(bit) : flag);
+			     : "i" (bit), "i" (X86_FEATURE_ALWAYS),
+			       [bitnum] "i" (1 << (bit & 7)),
+			       [cap_word] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3]));
+		return (flag != 0);
 #endif /* CC_HAVE_ASM_GOTO */
 }
 
diff --git a/arch/x86/include/asm/cpuinfo.h b/arch/x86/include/asm/cpuinfo.h
new file mode 100644
index 000000000000..f906b538fdb4
--- /dev/null
+++ b/arch/x86/include/asm/cpuinfo.h
@@ -0,0 +1,73 @@
+#ifndef _ASM_X86_CPUINFO_H
+#define _ASM_X86_CPUINFO_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/types.h>
+/*
+ *  CPU type and hardware bug flags. Kept separately for each CPU.
+ *  Members of this structure are referenced in head.S, so think twice
+ *  before touching them. [mj]
+ */
+
+struct cpuinfo_x86 {
+	__u8			x86;		/* CPU family */
+	__u8			x86_vendor;	/* CPU vendor */
+	__u8			x86_model;
+	__u8			x86_mask;
+#ifdef CONFIG_X86_32
+	char			wp_works_ok;	/* It doesn't on 386's */
+
+	/* Problems on some 486Dx4's and old 386's: */
+	char			rfu;
+	char			pad0;
+	char			pad1;
+#else
+	/* Number of 4K pages in DTLB/ITLB combined(in pages): */
+	int			x86_tlbsize;
+#endif
+	__u8			x86_virt_bits;
+	__u8			x86_phys_bits;
+	/* CPUID returned core id bits: */
+	__u8			x86_coreid_bits;
+	/* Max extended CPUID function supported: */
+	__u32			extended_cpuid_level;
+	/* Maximum supported CPUID level, -1=no CPUID: */
+	int			cpuid_level;
+	__u32			x86_capability[NCAPINTS + NBUGINTS];
+	char			x86_vendor_id[16];
+	char			x86_model_id[64];
+	/* in KB - valid for CPUS which support this call: */
+	int			x86_cache_size;
+	int			x86_cache_alignment;	/* In bytes */
+	/* Cache QoS architectural values: */
+	int			x86_cache_max_rmid;	/* max index */
+	int			x86_cache_occ_scale;	/* scale to bytes */
+	int			x86_power;
+	unsigned long		loops_per_jiffy;
+	/* cpuid returned max cores value: */
+	u16			 x86_max_cores;
+	u16			apicid;
+	u16			initial_apicid;
+	u16			x86_clflush_size;
+	/* number of cores as seen by the OS: */
+	u16			booted_cores;
+	/* Physical processor id: */
+	u16			phys_proc_id;
+	/* Core id: */
+	u16			cpu_core_id;
+	/* Compute unit id */
+	u8			compute_unit_id;
+	/* Index into per_cpu list: */
+	u16			cpu_index;
+	u32			microcode;
+};
+
+/*
+ * capabilities of CPUs
+ */
+extern struct cpuinfo_x86	boot_cpu_data;
+extern struct cpuinfo_x86	new_cpu_data;
+#endif
+
+#endif /* _ASM_X86_CPUINFO_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2d5a50cb61a2..240d8f8d8c1b 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -79,65 +79,6 @@ extern u16 __read_mostly tlb_lld_2m[NR_INFO];
 extern u16 __read_mostly tlb_lld_4m[NR_INFO];
 extern u16 __read_mostly tlb_lld_1g[NR_INFO];
 
-/*
- *  CPU type and hardware bug flags. Kept separately for each CPU.
- *  Members of this structure are referenced in head.S, so think twice
- *  before touching them. [mj]
- */
-
-struct cpuinfo_x86 {
-	__u8			x86;		/* CPU family */
-	__u8			x86_vendor;	/* CPU vendor */
-	__u8			x86_model;
-	__u8			x86_mask;
-#ifdef CONFIG_X86_32
-	char			wp_works_ok;	/* It doesn't on 386's */
-
-	/* Problems on some 486Dx4's and old 386's: */
-	char			rfu;
-	char			pad0;
-	char			pad1;
-#else
-	/* Number of 4K pages in DTLB/ITLB combined(in pages): */
-	int			x86_tlbsize;
-#endif
-	__u8			x86_virt_bits;
-	__u8			x86_phys_bits;
-	/* CPUID returned core id bits: */
-	__u8			x86_coreid_bits;
-	/* Max extended CPUID function supported: */
-	__u32			extended_cpuid_level;
-	/* Maximum supported CPUID level, -1=no CPUID: */
-	int			cpuid_level;
-	__u32			x86_capability[NCAPINTS + NBUGINTS];
-	char			x86_vendor_id[16];
-	char			x86_model_id[64];
-	/* in KB - valid for CPUS which support this call: */
-	int			x86_cache_size;
-	int			x86_cache_alignment;	/* In bytes */
-	/* Cache QoS architectural values: */
-	int			x86_cache_max_rmid;	/* max index */
-	int			x86_cache_occ_scale;	/* scale to bytes */
-	int			x86_power;
-	unsigned long		loops_per_jiffy;
-	/* cpuid returned max cores value: */
-	u16			 x86_max_cores;
-	u16			apicid;
-	u16			initial_apicid;
-	u16			x86_clflush_size;
-	/* number of cores as seen by the OS: */
-	u16			booted_cores;
-	/* Physical processor id: */
-	u16			phys_proc_id;
-	/* Core id: */
-	u16			cpu_core_id;
-	/* Compute unit id */
-	u8			compute_unit_id;
-	/* Index into per_cpu list: */
-	u16			cpu_index;
-	u32			microcode;
-};
-
 #define X86_VENDOR_INTEL	0
 #define X86_VENDOR_CYRIX	1
 #define X86_VENDOR_AMD		2
@@ -149,12 +90,6 @@ struct cpuinfo_x86 {
 
 #define X86_VENDOR_UNKNOWN	0xff
 
-/*
- * capabilities of CPUs
- */
-extern struct cpuinfo_x86	boot_cpu_data;
-extern struct cpuinfo_x86	new_cpu_data;
-
 extern struct tss_struct	doublefault_tss;
 extern __u32			cpu_caps_cleared[NCAPINTS];
 extern __u32			cpu_caps_set[NCAPINTS];
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 37830de8f60a..897c65bd3faa 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1483,12 +1483,6 @@ void warn_pre_alternatives(void)
 EXPORT_SYMBOL_GPL(warn_pre_alternatives);
 #endif
 
-inline bool __static_cpu_has_safe(u16 bit)
-{
-	return boot_cpu_has(bit);
-}
-EXPORT_SYMBOL_GPL(__static_cpu_has_safe);
-
 static void bsp_resume(void)
 {
 	if (this_cpu->c_bsp_resume)
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 35868bf529b9..486dc0e60599 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -244,9 +244,14 @@ SECTIONS
 	 */
 	.altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
 		*(.altinstr_replacement)
+
 		/*
-		 * Auxiliary section for misc instruction patching tasks. See
-		 * static_cpu_has(), for an example.
+		 * Section for code used exclusively before alternatives are
+		 * run. All references to such code must be patched out by
+		 * alternatives, normally by using a patch with
+		 * X86_FEATURE_ALWAYS.
+		 *
+		 * See static_cpu_has() for an example.
 		 */
 		*(.altinstr_aux)
 	}
-- 
2.3.5

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-20 16:04                         ` Borislav Petkov
@ 2016-01-20 16:16                           ` H. Peter Anvin
  0 siblings, 0 replies; 66+ messages in thread
From: H. Peter Anvin @ 2016-01-20 16:16 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Brian Gerst, the arch/x86 maintainers, Linux Kernel Mailing List,
	Ingo Molnar, Denys Vlasenko, Andy Lutomirski, Linus Torvalds

On 01/20/16 08:04, Borislav Petkov wrote:
> On Wed, Jan 20, 2016 at 07:09:39AM -0800, H. Peter Anvin wrote:
>> But then you're using testl and get long immediates.
>>
>> (And the parentheses around boot_cpu_data->x86_capability are redundant.)
> 
> Right.
> 
> Ok, below is what builds here. So no SOBs etc.
> 
> All this include hell wankery is so that we can use boot_cpu_data in
> cpufeature.h. And that's not simple because boot/mkcpustr.c includes it
> too so if I carve out struct cpuinfo_x86 to a separate asm/cpuinfo.h
> header, it complains because it doesn't see it.
> 
> Thus this _ASM_BOOT_MKCPUSTR_ yucky marker to stop arch/x86/boot from
> including it.
> 
> I'm very open to better ideas. :-)
> 

I think the right answer is to split the macros that define specific CPU
features into a separate file.  It would also make it assembly-safe
which would be useful in some other places.

However, I do also want to observe that we already do have this specific
marker... it is called __KERNEL__.

	-hpa

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-20 10:41                             ` H. Peter Anvin
@ 2016-01-21 22:14                               ` Borislav Petkov
  2016-01-21 22:22                                 ` H. Peter Anvin
  0 siblings, 1 reply; 66+ messages in thread
From: Borislav Petkov @ 2016-01-21 22:14 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andy Lutomirski, Brian Gerst, the arch/x86 maintainers,
	Linux Kernel Mailing List, Ingo Molnar, Denys Vlasenko,
	Linus Torvalds

On Wed, Jan 20, 2016 at 02:41:22AM -0800, H. Peter Anvin wrote:
> Ah. What would be even more of a win would be to rebias
> static_cpu_has_bug() so that the fallthrough case is the functional
> one. Easily done by reversing the labels.

By reversing you mean this:


---
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 77c51f4c15b7..49fa56f2b083 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -174,10 +174,10 @@ static __always_inline __pure bool _static_cpu_has(u16 bit)
                             [bitnum] "i" (1 << (bit & 7)),
                             [cap_word] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
                         : : t_yes, t_no);
-       t_yes:
-               return true;
        t_no:
                return false;
+       t_yes:
+               return true;
 #else
                return boot_cpu_has(bit);
 #endif /* CC_HAVE_ASM_GOTO */
---

?

In any case, here's what happens with the current patchset:

vmlinux:

ffffffff8100472a:       e9 50 0e de 00          jmpq   ffffffff81de557f <__alt_instructions_end+0x7aa>
ffffffff8100472f:       66 8c d0                mov    %ss,%ax
ffffffff81004732:       66 83 f8 18             cmp    $0x18,%ax
ffffffff81004736:       74 07                   je     ffffffff8100473f <__switch_to+0x2ef>
ffffffff81004738:       b8 18 00 00 00          mov    $0x18,%eax
ffffffff8100473d:       8e d0                   mov    %eax,%ss
ffffffff8100473f:       48 83 c4 18             add    $0x18,%rsp
ffffffff81004743:       4c 89 e0                mov    %r12,%rax
ffffffff81004746:       5b                      pop    %rbx
ffffffff81004747:       41 5c                   pop    %r12
ffffffff81004749:       41 5d                   pop    %r13
ffffffff8100474b:       41 5e                   pop    %r14
ffffffff8100474d:       41 5f                   pop    %r15
ffffffff8100474f:       5d                      pop    %rbp
ffffffff81004750:       c3                      retq

That first JMP above sends us to the dynamic section which is in asm now:

ffffffff81de557f:       f6 05 8f de d1 ff 01    testb  $0x1,-0x2e2171(%rip)        # ffffffff81b03415 <boot_cpu_data+0x55>
ffffffff81de5586:       0f 85 a3 f1 21 ff       jne    ffffffff8100472f <__switch_to+0x2df>
ffffffff81de558c:       e9 ae f1 21 ff          jmpq   ffffffff8100473f <__switch_to+0x2ef>

After X86_FEATURE_ALWAYS patching, that first JMP has become a 2-byte JMP:

[    0.306333] apply_alternatives: feat: 3*32+21, old: (ffffffff8100472a, len: 5), repl: (ffffffff81de4e12, len: 5), pad: 0
[    0.308005] ffffffff8100472a: old_insn: e9 50 0e de 00
[    0.312012] ffffffff81de4e12: rpl_insn: e9 28 f9 21 ff
[    0.318201] recompute_jump: target RIP: ffffffff8100473f, new_displ: 0x15
[    0.320007] recompute_jump: final displ: 0x00000013, JMP 0xffffffff8100473f
[    0.324005] ffffffff8100472a: final_insn: eb 13 0f 1f 00

so basically we jump over the %ss fixup:

ffffffff8100472a:	eb 13 0f 1f 00		jmp    ffffffff8100473f
ffffffff8100472f:       66 8c d0                mov    %ss,%ax
ffffffff81004732:       66 83 f8 18             cmp    $0x18,%ax
ffffffff81004736:       74 07                   je     ffffffff8100473f <__switch_to+0x2ef>
ffffffff81004738:       b8 18 00 00 00          mov    $0x18,%eax
ffffffff8100473d:       8e d0                   mov    %eax,%ss
ffffffff8100473f:       48 83 c4 18             add    $0x18,%rsp		<----
ffffffff81004743:       4c 89 e0                mov    %r12,%rax
ffffffff81004746:       5b                      pop    %rbx
ffffffff81004747:       41 5c                   pop    %r12
ffffffff81004749:       41 5d                   pop    %r13
ffffffff8100474b:       41 5e                   pop    %r14
ffffffff8100474d:       41 5f                   pop    %r15
ffffffff8100474f:       5d                      pop    %rbp
ffffffff81004750:       c3                      retq


After X86_BUG_SYSRET_SS_ATTRS patching:

[    0.330367] apply_alternatives: feat: 16*32+8, old: (ffffffff8100472a, len: 5), repl: (ffffffff81de3996, len: 0), pad: 0
[    0.332005] ffffffff8100472a: old_insn: eb 13 0f 1f 00
[    0.338332] ffffffff8100472a: final_insn: 0f 1f 44 00 00

ffffffff8100472a:	0f 1f 44 00 00          nop
ffffffff8100472f:       66 8c d0                mov    %ss,%ax
ffffffff81004732:       66 83 f8 18             cmp    $0x18,%ax
ffffffff81004736:       74 07                   je     ffffffff8100473f <__switch_to+0x2ef>
ffffffff81004738:       b8 18 00 00 00          mov    $0x18,%eax
ffffffff8100473d:       8e d0                   mov    %eax,%ss
ffffffff8100473f:       48 83 c4 18             add    $0x18,%rsp
ffffffff81004743:       4c 89 e0                mov    %r12,%rax
ffffffff81004746:       5b                      pop    %rbx
ffffffff81004747:       41 5c                   pop    %r12
ffffffff81004749:       41 5d                   pop    %r13
ffffffff8100474b:       41 5e                   pop    %r14
ffffffff8100474d:       41 5f                   pop    %r15
ffffffff8100474f:       5d                      pop    %rbp
ffffffff81004750:       c3                      retq

So the penalty for the !X86_BUG_SYSRET_SS_ATTRS CPUs is a 2-byte JMP. Do
we care?

In the case we do, we could do this:

	JMP ss_fixup
ret:
	RET return prev_p;
ss_fixup:
	<fixup SS>
	jmp ret

and the !X86_BUG_SYSRET_SS_ATTRS CPUs would overwrite that
"JMP ss_fixup" with a NOP and they're fine. However, the
X86_BUG_SYSRET_SS_ATTRS CPUs will have to do two jumps, one to the fixup
code and one back to RET.

Now, how about I convert

                unsigned short ss_sel;
                savesegment(ss, ss_sel);
                if (ss_sel != __KERNEL_DS)
                        loadsegment(ss, __KERNEL_DS);

into asm and into an alternative()?

Then, the !X86_BUG_SYSRET_SS_ATTRS CPUs will trade off that JMP with a
bunch of NOPs which will pollute I$.

Hmmm.

-- 
Regards/Gruss,
    Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-21 22:14                               ` Borislav Petkov
@ 2016-01-21 22:22                                 ` H. Peter Anvin
  2016-01-21 22:56                                   ` Borislav Petkov
  0 siblings, 1 reply; 66+ messages in thread
From: H. Peter Anvin @ 2016-01-21 22:22 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Andy Lutomirski, Brian Gerst, the arch/x86 maintainers,
	Linux Kernel Mailing List, Ingo Molnar, Denys Vlasenko,
	Linus Torvalds

On January 21, 2016 2:14:42 PM PST, Borislav Petkov <bp@suse.de> wrote:
>On Wed, Jan 20, 2016 at 02:41:22AM -0800, H. Peter Anvin wrote:
>> Ah. What would be even more of a win would be to rebias
>> static_cpu_has_bug() so that the fallthrough case is the functional
>> one. Easily done by reversing the labels.
>
>By reversing you mean this:
>
>
>---
>diff --git a/arch/x86/include/asm/cpufeature.h
>b/arch/x86/include/asm/cpufeature.h
>index 77c51f4c15b7..49fa56f2b083 100644
>--- a/arch/x86/include/asm/cpufeature.h
>+++ b/arch/x86/include/asm/cpufeature.h
>@@ -174,10 +174,10 @@ static __always_inline __pure bool
>_static_cpu_has(u16 bit)
>                             [bitnum] "i" (1 << (bit & 7)),
>[cap_word] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
>                         : : t_yes, t_no);
>-       t_yes:
>-               return true;
>        t_no:
>                return false;
>+       t_yes:
>+               return true;
> #else
>                return boot_cpu_has(bit);
> #endif /* CC_HAVE_ASM_GOTO */
>---
>
>?
>
>In any case, here's what happens with the current patchset:
>
>vmlinux:
>
>ffffffff8100472a:       e9 50 0e de 00          jmpq   ffffffff81de557f
><__alt_instructions_end+0x7aa>
>ffffffff8100472f:       66 8c d0                mov    %ss,%ax
>ffffffff81004732:       66 83 f8 18             cmp    $0x18,%ax
>ffffffff81004736:       74 07                   je     ffffffff8100473f
><__switch_to+0x2ef>
>ffffffff81004738:       b8 18 00 00 00          mov    $0x18,%eax
>ffffffff8100473d:       8e d0                   mov    %eax,%ss
>ffffffff8100473f:       48 83 c4 18             add    $0x18,%rsp
>ffffffff81004743:       4c 89 e0                mov    %r12,%rax
>ffffffff81004746:       5b                      pop    %rbx
>ffffffff81004747:       41 5c                   pop    %r12
>ffffffff81004749:       41 5d                   pop    %r13
>ffffffff8100474b:       41 5e                   pop    %r14
>ffffffff8100474d:       41 5f                   pop    %r15
>ffffffff8100474f:       5d                      pop    %rbp
>ffffffff81004750:       c3                      retq
>
>That first JMP above sends us to the dynamic section which is in asm
>now:
>
>ffffffff81de557f:       f6 05 8f de d1 ff 01    testb 
>$0x1,-0x2e2171(%rip)        # ffffffff81b03415 <boot_cpu_data+0x55>
>ffffffff81de5586:       0f 85 a3 f1 21 ff       jne    ffffffff8100472f
><__switch_to+0x2df>
>ffffffff81de558c:       e9 ae f1 21 ff          jmpq   ffffffff8100473f
><__switch_to+0x2ef>
>
>After X86_FEATURE_ALWAYS patching, that first JMP has become a 2-byte
>JMP:
>
>[    0.306333] apply_alternatives: feat: 3*32+21, old:
>(ffffffff8100472a, len: 5), repl: (ffffffff81de4e12, len: 5), pad: 0
>[    0.308005] ffffffff8100472a: old_insn: e9 50 0e de 00
>[    0.312012] ffffffff81de4e12: rpl_insn: e9 28 f9 21 ff
>[    0.318201] recompute_jump: target RIP: ffffffff8100473f, new_displ:
>0x15
>[    0.320007] recompute_jump: final displ: 0x00000013, JMP
>0xffffffff8100473f
>[    0.324005] ffffffff8100472a: final_insn: eb 13 0f 1f 00
>
>so basically we jump over the %ss fixup:
>
>ffffffff8100472a:	eb 13 0f 1f 00		jmp    ffffffff8100473f
>ffffffff8100472f:       66 8c d0                mov    %ss,%ax
>ffffffff81004732:       66 83 f8 18             cmp    $0x18,%ax
>ffffffff81004736:       74 07                   je     ffffffff8100473f
><__switch_to+0x2ef>
>ffffffff81004738:       b8 18 00 00 00          mov    $0x18,%eax
>ffffffff8100473d:       8e d0                   mov    %eax,%ss
>ffffffff8100473f:       48 83 c4 18             add   
>$0x18,%rsp		<----
>ffffffff81004743:       4c 89 e0                mov    %r12,%rax
>ffffffff81004746:       5b                      pop    %rbx
>ffffffff81004747:       41 5c                   pop    %r12
>ffffffff81004749:       41 5d                   pop    %r13
>ffffffff8100474b:       41 5e                   pop    %r14
>ffffffff8100474d:       41 5f                   pop    %r15
>ffffffff8100474f:       5d                      pop    %rbp
>ffffffff81004750:       c3                      retq
>
>
>After X86_BUG_SYSRET_SS_ATTRS patching:
>
>[    0.330367] apply_alternatives: feat: 16*32+8, old:
>(ffffffff8100472a, len: 5), repl: (ffffffff81de3996, len: 0), pad: 0
>[    0.332005] ffffffff8100472a: old_insn: eb 13 0f 1f 00
>[    0.338332] ffffffff8100472a: final_insn: 0f 1f 44 00 00
>
>ffffffff8100472a:	0f 1f 44 00 00          nop
>ffffffff8100472f:       66 8c d0                mov    %ss,%ax
>ffffffff81004732:       66 83 f8 18             cmp    $0x18,%ax
>ffffffff81004736:       74 07                   je     ffffffff8100473f
><__switch_to+0x2ef>
>ffffffff81004738:       b8 18 00 00 00          mov    $0x18,%eax
>ffffffff8100473d:       8e d0                   mov    %eax,%ss
>ffffffff8100473f:       48 83 c4 18             add    $0x18,%rsp
>ffffffff81004743:       4c 89 e0                mov    %r12,%rax
>ffffffff81004746:       5b                      pop    %rbx
>ffffffff81004747:       41 5c                   pop    %r12
>ffffffff81004749:       41 5d                   pop    %r13
>ffffffff8100474b:       41 5e                   pop    %r14
>ffffffff8100474d:       41 5f                   pop    %r15
>ffffffff8100474f:       5d                      pop    %rbp
>ffffffff81004750:       c3                      retq
>
>So the penalty for the !X86_BUG_SYSRET_SS_ATTRS CPUs is a 2-byte JMP.
>Do
>we care?
>
>In the case we do, we could do this:
>
>	JMP ss_fixup
>ret:
>	RET return prev_p;
>ss_fixup:
>	<fixup SS>
>	jmp ret
>
>and the !X86_BUG_SYSRET_SS_ATTRS CPUs would overwrite that
>"JMP ss_fixup" with a NOP and they're fine. However, the
>X86_BUG_SYSRET_SS_ATTRS CPUs will have to do two jumps, one to the
>fixup
>code and one back to RET.
>
>Now, how about I convert
>
>                unsigned short ss_sel;
>                savesegment(ss, ss_sel);
>                if (ss_sel != __KERNEL_DS)
>                        loadsegment(ss, __KERNEL_DS);
>
>into asm and into an alternative()?
>
>Then, the !X86_BUG_SYSRET_SS_ATTRS CPUs will trade off that JMP with a
>bunch of NOPs which will pollute I$.
>
>Hmmm.

Yes, having t_no as the fallthrough case ought to move the yes code out of line.

The current code probably pollutes the i$ too.
-- 
Sent from my Android device with K-9 Mail. Please excuse brevity and formatting.

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-21 22:22                                 ` H. Peter Anvin
@ 2016-01-21 22:56                                   ` Borislav Petkov
  2016-01-21 23:36                                     ` H. Peter Anvin
  2016-01-21 23:37                                     ` H. Peter Anvin
  0 siblings, 2 replies; 66+ messages in thread
From: Borislav Petkov @ 2016-01-21 22:56 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andy Lutomirski, Brian Gerst, the arch/x86 maintainers,
	Linux Kernel Mailing List, Ingo Molnar, Denys Vlasenko,
	Linus Torvalds

On Thu, Jan 21, 2016 at 02:22:28PM -0800, H. Peter Anvin wrote:
> Yes, having t_no as the fallthrough case ought to move the yes code
> out of line.

Dunno, maybe I'm doing something wrong:

I have this change:

---
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 7f09de998c93..f9833fcb8fcb 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -175,10 +175,10 @@ static __always_inline __pure bool _static_cpu_has(u16 bit)
                             [bitnum] "i" (1 << (bit & 7)),
                             [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
                         : : t_yes, t_no);
-       t_yes:
-               return true;
        t_no:
                return false;
+       t_yes:
+               return true;
 }
 
 #define static_cpu_has(bit)                                    \
---

and the resulting code looks even wrong (or my brain is fried for today
- one of the two).

vmlinux:

ffffffff810046ae:       e9 cc 0e de 00          jmpq   ffffffff81de557f <__alt_instructions_end+0x7aa>
ffffffff810046b3:       48 83 c4 18             add    $0x18,%rsp
ffffffff810046b7:       4c 89 e0                mov    %r12,%rax
ffffffff810046ba:       5b                      pop    %rbx
ffffffff810046bb:       41 5c                   pop    %r12
ffffffff810046bd:       41 5d                   pop    %r13
ffffffff810046bf:       41 5e                   pop    %r14
ffffffff810046c1:       41 5f                   pop    %r15
ffffffff810046c3:       5d                      pop    %rbp
ffffffff810046c4:       c3                      retq

dynamic branch:

ffffffff81de557f:       f6 05 8f de d1 ff 01    testb  $0x1,-0x2e2171(%rip)        # ffffffff81b03415 <boot_cpu_data+0x55>
ffffffff81de5586:       0f 85 f6 f1 21 ff       jne    ffffffff81004782 <__switch_to+0x332>
ffffffff81de558c:       e9 22 f1 21 ff          jmpq   ffffffff810046b3 <__switch_to+0x263>

after X86_FEATURE_ALWAYS patching:

[    0.288007] apply_alternatives: feat: 3*32+21, old: (ffffffff810046ae, len: 5), repl: (ffffffff81de4dff, len: 5), pad: 0
[    0.292004] ffffffff810046ae: old_insn: e9 cc 0e de 00
[    0.300013] ffffffff81de4dff: rpl_insn: e9 af f8 21 ff
[    0.308006] recompute_jump: target RIP: ffffffff810046b3, new_displ: 0x5
[    0.312006] recompute_jump: final displ: 0x00000003, JMP 0xffffffff810046b3
[    0.316006] ffffffff810046ae: final_insn: eb 03 0f 1f 00

ffffffff810046ae:       eb 03 0f 1f 00		jmp    ffffffff810046b3 ---
ffffffff810046b3:       48 83 c4 18             add    $0x18,%rsp	<--
ffffffff810046b7:       4c 89 e0                mov    %r12,%rax
ffffffff810046ba:       5b                      pop    %rbx
ffffffff810046bb:       41 5c                   pop    %r12
ffffffff810046bd:       41 5d                   pop    %r13
ffffffff810046bf:       41 5e                   pop    %r14
ffffffff810046c1:       41 5f                   pop    %r15
ffffffff810046c3:       5d                      pop    %rbp
ffffffff810046c4:       c3                      retq

so this is silly: we're basically jumping after the JMP instruction
itself. So that will be the case on !X86_BUG_SYSRET_SS_ATTRS CPUs.
Still a two-byte and now even a useless JMP.

The right thing to do would be to generate a NOP simply.

On X86_BUG_SYSRET_SS_ATTRS CPUs:

[    0.322014] apply_alternatives: feat: 16*32+8, old: (ffffffff810046ae, len: 5), repl: (ffffffff81de3962, len: 0), pad: 0
[    0.324005] ffffffff810046ae: old_insn: eb 03 0f 1f 00
[    0.332006] ffffffff810046ae: final_insn: 0f 1f 44 00 00

ffffffff810046ae:       0f 1f 44 00 00		nop
ffffffff810046b3:       48 83 c4 18             add    $0x18,%rsp
ffffffff810046b7:       4c 89 e0                mov    %r12,%rax
ffffffff810046ba:       5b                      pop    %rbx
ffffffff810046bb:       41 5c                   pop    %r12
ffffffff810046bd:       41 5d                   pop    %r13
ffffffff810046bf:       41 5e                   pop    %r14
ffffffff810046c1:       41 5f                   pop    %r15
ffffffff810046c3:       5d                      pop    %rbp
ffffffff810046c4:       c3                      retq

which is actually even wrong!

What it should've done is

	jne    ffffffff81004782

as the dynamic code did. At that address we have the ss fixup:

ffffffff81004782:       66 8c d0                mov    %ss,%ax
ffffffff81004785:       66 83 f8 18             cmp    $0x18,%ax
ffffffff81004789:       0f 84 24 ff ff ff       je     ffffffff810046b3 <__switch_to+0x263>
ffffffff8100478f:       b8 18 00 00 00          mov    $0x18,%eax
ffffffff81004794:       8e d0                   mov    %eax,%ss
ffffffff81004796:       e9 18 ff ff ff          jmpq   ffffffff810046b3 <__switch_to+0x263>

with the jump back to the ret code. Which means,
!X86_BUG_SYSRET_SS_ATTRS CPUs get to do a forward and a backward JMP. So
even if it did the right thing, it would be two JMPs.

Meh.

I need to think about something better.

-- 
Regards/Gruss,
    Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-21 22:56                                   ` Borislav Petkov
@ 2016-01-21 23:36                                     ` H. Peter Anvin
  2016-01-21 23:37                                     ` H. Peter Anvin
  1 sibling, 0 replies; 66+ messages in thread
From: H. Peter Anvin @ 2016-01-21 23:36 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Andy Lutomirski, Brian Gerst, the arch/x86 maintainers,
	Linux Kernel Mailing List, Ingo Molnar, Denys Vlasenko,
	Linus Torvalds

On 01/21/16 14:56, Borislav Petkov wrote:
> 
> so this is silly: we're basically jumping after the JMP instruction
> itself. So that will be the case on !X86_BUG_SYSRET_SS_ATTRS CPUs.
> Still a two-byte and now even a useless JMP.
> 
> The right thing to do would be to generate a NOP simply.
> 

OK, so gcc isn't as clever as I thought.

	-hpa

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-21 22:56                                   ` Borislav Petkov
  2016-01-21 23:36                                     ` H. Peter Anvin
@ 2016-01-21 23:37                                     ` H. Peter Anvin
  2016-01-22 10:32                                       ` Borislav Petkov
  1 sibling, 1 reply; 66+ messages in thread
From: H. Peter Anvin @ 2016-01-21 23:37 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Andy Lutomirski, Brian Gerst, the arch/x86 maintainers,
	Linux Kernel Mailing List, Ingo Molnar, Denys Vlasenko,
	Linus Torvalds

Maybe a label attribute would help, I don't know.

	-hpa

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86: static_cpu_has_safe: discard dynamic check after init
  2016-01-21 23:37                                     ` H. Peter Anvin
@ 2016-01-22 10:32                                       ` Borislav Petkov
  0 siblings, 0 replies; 66+ messages in thread
From: Borislav Petkov @ 2016-01-22 10:32 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andy Lutomirski, Brian Gerst, the arch/x86 maintainers,
	Linux Kernel Mailing List, Ingo Molnar, Denys Vlasenko,
	Linus Torvalds

On Thu, Jan 21, 2016 at 03:37:16PM -0800, H. Peter Anvin wrote:
> Maybe a label attribute would help, I don't know.

Here's another version which works, not really better though:

Change is this:

---
+       asm_volatile_goto(ALTERNATIVE("", "jmp %l[t_fixup_ss]",
+                                     X86_BUG_SYSRET_SS_ATTRS)
+                       : : : : t_fixup_ss);
+
+       return prev_p;
+
+t_fixup_ss:

	<snip comment>

+       savesegment(ss, ss_sel);
+       if (ss_sel != __KERNEL_DS)
+               loadsegment(ss, __KERNEL_DS);
 
        return prev_p;
---

with two "return prev_p" with the hope that gcc won't generate a second
JMP back to the frame restore and ret code. But, nah, it does.

vmlinux:

ffffffff8100472a:       90                      nop
ffffffff8100472b:       90                      nop
ffffffff8100472c:       90                      nop
ffffffff8100472d:       90                      nop
ffffffff8100472e:       90                      nop
ffffffff8100472f:       48 83 c4 18             add    $0x18,%rsp
ffffffff81004733:       4c 89 e0                mov    %r12,%rax
ffffffff81004736:       5b                      pop    %rbx
ffffffff81004737:       41 5c                   pop    %r12
ffffffff81004739:       41 5d                   pop    %r13
ffffffff8100473b:       41 5e                   pop    %r14
ffffffff8100473d:       41 5f                   pop    %r15
ffffffff8100473f:       5d                      pop    %rbp
ffffffff81004740:       c3                      retq

after patching on an X86_BUG_SYSRET_SS_ATTRS CPU:

[    0.264007] apply_alternatives: feat: 16*32+8, old: (ffffffff8100472a, len: 5), repl: (ffffffff81de4e05, len: 5), pad: 5
[    0.268005] ffffffff8100472a: old_insn: 90 90 90 90 90
[    0.273510] ffffffff81de4e05: rpl_insn: e9 68 f9 21 ff
[    0.277496] recompute_jump: target RIP: ffffffff81004772, new_displ: 0x48
[    0.280005] recompute_jump: final displ: 0x00000046, JMP 0xffffffff81004772
[    0.283159] ffffffff8100472a: final_insn: eb 46 0f 1f 00


ffffffff8100472a:       eb 46 0f 1f 00		jmp ffffffff81004772
ffffffff8100472f:       48 83 c4 18             add    $0x18,%rsp
ffffffff81004733:       4c 89 e0                mov    %r12,%rax
ffffffff81004736:       5b                      pop    %rbx
ffffffff81004737:       41 5c                   pop    %r12
ffffffff81004739:       41 5d                   pop    %r13
ffffffff8100473b:       41 5e                   pop    %r14
ffffffff8100473d:       41 5f                   pop    %r15
ffffffff8100473f:       5d                      pop    %rbp
ffffffff81004740:       c3                      retq

so a two-byte JMP jumping to:

ffffffff81004772:       66 8c d0                mov    %ss,%ax
ffffffff81004775:       66 83 f8 18             cmp    $0x18,%ax
ffffffff81004779:       74 b4                   je     ffffffff8100472f <__switch_to+0x2df>
ffffffff8100477b:       b8 18 00 00 00          mov    $0x18,%eax
ffffffff81004780:       8e d0                   mov    %eax,%ss
ffffffff81004782:       eb ab                   jmp    ffffffff8100472f <__switch_to+0x2df>

which does the fixup and jumps back to ...472f which restores the frame
and returns.

I wish I could be able to tell gcc to not jump back but add the function
return here too as we don't care about code size in that case.

And it's not like it is really better on !X86_BUG_SYSRET_SS_ATTRS CPUs -
there we have the 5-byte padding NOP being converted to 5-byte one:

[    0.293164] ffffffff8100472a: [0:5) optimized NOPs: 0f 1f 44 00 00

I need to talk to my gcc guy... :)

-- 
Regards/Gruss,
    Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [PATCH] x86/head_64.S: do not use temporary register to check alignment
@ 2016-01-23  6:50 Alexander Kuleshov
  2016-01-26  9:31 ` Borislav Petkov
  0 siblings, 1 reply; 66+ messages in thread
From: Alexander Kuleshov @ 2016-01-23  6:50 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Ingo Molnar, H . Peter Anvin, Andy Lutomirski, Borislav Petkov,
	Denys Vlasenko, Andrey Ryabinin, x86, linux-kernel,
	Alexander Kuleshov

We are using temporary %rax register during checking of kernel address
alignment. We can ged rid of it since testl instruction is safe and does
not change value of the rbp register.

Signed-off-by: Alexander Kuleshov <kuleshovmail@gmail.com>
Suggested-by: Brian Gerst <brgerst@gmail.com>
---
 arch/x86/kernel/head_64.S | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index ffdc0e8..7c21029 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -76,9 +76,7 @@ startup_64:
 	subq	$_text - __START_KERNEL_map, %rbp
 
 	/* Is the address not 2M aligned? */
-	movq	%rbp, %rax
-	andl	$~PMD_PAGE_MASK, %eax
-	testl	%eax, %eax
+	testl	$~PMD_PAGE_MASK, %ebp
 	jnz	bad_address
 
 	/*
-- 
2.7.0.25.gfc10eb5

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* Re: [PATCH] x86/head_64.S: do not use temporary register to check alignment
  2016-01-23  6:50 [PATCH] x86/head_64.S: do not use temporary register to check alignment Alexander Kuleshov
@ 2016-01-26  9:31 ` Borislav Petkov
  0 siblings, 0 replies; 66+ messages in thread
From: Borislav Petkov @ 2016-01-26  9:31 UTC (permalink / raw)
  To: Alexander Kuleshov
  Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andy Lutomirski,
	Denys Vlasenko, Andrey Ryabinin, x86, linux-kernel

On Sat, Jan 23, 2016 at 12:50:28PM +0600, Alexander Kuleshov wrote:
> We are using temporary %rax register during checking of kernel address
> alignment. We can ged rid of it since testl instruction is safe and does
> not change value of the rbp register.
> 
> Signed-off-by: Alexander Kuleshov <kuleshovmail@gmail.com>
> Suggested-by: Brian Gerst <brgerst@gmail.com>
> ---
>  arch/x86/kernel/head_64.S | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
> 
> diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
> index ffdc0e8..7c21029 100644
> --- a/arch/x86/kernel/head_64.S
> +++ b/arch/x86/kernel/head_64.S
> @@ -76,9 +76,7 @@ startup_64:
>  	subq	$_text - __START_KERNEL_map, %rbp
>  
>  	/* Is the address not 2M aligned? */
> -	movq	%rbp, %rax
> -	andl	$~PMD_PAGE_MASK, %eax
> -	testl	%eax, %eax
> +	testl	$~PMD_PAGE_MASK, %ebp
>  	jnz	bad_address
>  
>  	/*
> --

Applied, thanks.

-- 
Regards/Gruss,
    Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [PATCH 00/10] tip-queue 2016-01-26, rest
@ 2016-01-26 21:12 Borislav Petkov
  2016-01-26 21:12 ` [PATCH 01/10] x86/asm: Add condition codes clobber to memory barrier macros Borislav Petkov
                   ` (9 more replies)
  0 siblings, 10 replies; 66+ messages in thread
From: Borislav Petkov @ 2016-01-26 21:12 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML

From: Borislav Petkov <bp@suse.de>

Hi,

this is the rest of the series today. Main one is the cpufeature
cleanup. I merged the AVIC patch from tip:x86/cpu so that the rest
applies cleanly. The rest is simplifications/cleanups.

Please apply,
thanks.

Alexander Kuleshov (1):
  x86/head_64: Simplify kernel load address alignment check

Borislav Petkov (5):
  x86/cpufeature: Carve out X86_FEATURE_*
  x86/cpufeature: Replace the old static_cpu_has() with safe variant
  x86/cpufeature: Get rid of the non-asm goto variant
  x86/alternatives: Add an auxilary section
  x86/vdso: Use static_cpu_has()

Brian Gerst (1):
  x86/alternatives: Discard dynamic check after init

Michael S. Tsirkin (3):
  x86/asm: Add condition codes clobber to memory barrier macros
  x86/asm: Drop a comment left over from X86_OOSTORE
  x86/asm: Tweak the comment about wmb() use for IO

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [PATCH 01/10] x86/asm: Add condition codes clobber to memory barrier macros
  2016-01-26 21:12 [PATCH 00/10] tip-queue 2016-01-26, rest Borislav Petkov
@ 2016-01-26 21:12 ` Borislav Petkov
  2016-01-26 21:12 ` [PATCH 02/10] x86/asm: Drop a comment left over from X86_OOSTORE Borislav Petkov
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 66+ messages in thread
From: Borislav Petkov @ 2016-01-26 21:12 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML

From: "Michael S. Tsirkin" <mst@redhat.com>

ADDL clobbers flags (such as CF) but barrier.h didn't tell this to gcc.
Historically, gcc doesn't need them on x86, and always considers flags
clobbered. We are probably missing the cc clobber in a *lot* of places
for this reason.

But even if not necessary, it's probably a good thing to add for
documentation, and in case gcc semantics ever change.

Reported-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: virtualization <virtualization@lists.linux-foundation.org>
Link: http://lkml.kernel.org/r/1452715911-12067-2-git-send-email-mst@redhat.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/include/asm/barrier.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 0681d2532527..5bce7865b623 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -15,9 +15,12 @@
  * Some non-Intel clones support out of order store. wmb() ceases to be a
  * nop for these.
  */
-#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
-#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
-#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
+#define mb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "mfence", \
+				      X86_FEATURE_XMM2) ::: "memory", "cc")
+#define rmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "lfence", \
+				       X86_FEATURE_XMM2) ::: "memory", "cc")
+#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "sfence", \
+				       X86_FEATURE_XMM2) ::: "memory", "cc")
 #else
 #define mb() 	asm volatile("mfence":::"memory")
 #define rmb()	asm volatile("lfence":::"memory")
-- 
2.3.5

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 02/10] x86/asm: Drop a comment left over from X86_OOSTORE
  2016-01-26 21:12 [PATCH 00/10] tip-queue 2016-01-26, rest Borislav Petkov
  2016-01-26 21:12 ` [PATCH 01/10] x86/asm: Add condition codes clobber to memory barrier macros Borislav Petkov
@ 2016-01-26 21:12 ` Borislav Petkov
  2016-01-26 21:12 ` [PATCH 03/10] x86/asm: Tweak the comment about wmb() use for IO Borislav Petkov
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 66+ messages in thread
From: Borislav Petkov @ 2016-01-26 21:12 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML

From: "Michael S. Tsirkin" <mst@redhat.com>

The comment about wmb() being non-nop to deal with non-intel CPUs is a
left over from before commit

  09df7c4c8097 ("x86: Remove CONFIG_X86_OOSTORE").

It makes no sense now: in particular, wmb() is not a nop even for regular
intel CPUs because of weird use-cases e.g. dealing with WC memory.

Drop this comment.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: virtualization <virtualization@lists.linux-foundation.org>
Link: http://lkml.kernel.org/r/1452715911-12067-3-git-send-email-mst@redhat.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/include/asm/barrier.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 5bce7865b623..d2aa66a3a4b5 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -11,10 +11,6 @@
  */
 
 #ifdef CONFIG_X86_32
-/*
- * Some non-Intel clones support out of order store. wmb() ceases to be a
- * nop for these.
- */
 #define mb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "mfence", \
 				      X86_FEATURE_XMM2) ::: "memory", "cc")
 #define rmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "lfence", \
-- 
2.3.5

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 03/10] x86/asm: Tweak the comment about wmb() use for IO
  2016-01-26 21:12 [PATCH 00/10] tip-queue 2016-01-26, rest Borislav Petkov
  2016-01-26 21:12 ` [PATCH 01/10] x86/asm: Add condition codes clobber to memory barrier macros Borislav Petkov
  2016-01-26 21:12 ` [PATCH 02/10] x86/asm: Drop a comment left over from X86_OOSTORE Borislav Petkov
@ 2016-01-26 21:12 ` Borislav Petkov
  2016-01-26 21:12 ` [PATCH 04/10] x86/cpufeature: Carve out X86_FEATURE_* Borislav Petkov
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 66+ messages in thread
From: Borislav Petkov @ 2016-01-26 21:12 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML

From: "Michael S. Tsirkin" <mst@redhat.com>

On x86, we *do* still use the non-nop rmb()/wmb() for IO barriers, but
even that is generally questionable.

Leave them around for historical reasons, unless somebody can point to a
case where they care about the performance. Tweak the comment so people
don't think they are strictly required in all cases.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: virtualization <virtualization@lists.linux-foundation.org>
Link: http://lkml.kernel.org/r/1452715911-12067-4-git-send-email-mst@redhat.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/include/asm/barrier.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index d2aa66a3a4b5..4f95b2affd88 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -6,8 +6,8 @@
 
 /*
  * Force strict CPU ordering.
- * And yes, this is required on UP too when we're talking
- * to devices.
+ *
+ * And yes, this might be required on UP too when we're talking to devices.
  */
 
 #ifdef CONFIG_X86_32
-- 
2.3.5

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 04/10] x86/cpufeature: Carve out X86_FEATURE_*
  2016-01-26 21:12 [PATCH 00/10] tip-queue 2016-01-26, rest Borislav Petkov
                   ` (2 preceding siblings ...)
  2016-01-26 21:12 ` [PATCH 03/10] x86/asm: Tweak the comment about wmb() use for IO Borislav Petkov
@ 2016-01-26 21:12 ` Borislav Petkov
  2016-01-30 13:18   ` [tip:x86/asm] " tip-bot for Borislav Petkov
  2016-01-26 21:12 ` [PATCH 05/10] x86/cpufeature: Replace the old static_cpu_has() with safe variant Borislav Petkov
                   ` (5 subsequent siblings)
  9 siblings, 1 reply; 66+ messages in thread
From: Borislav Petkov @ 2016-01-26 21:12 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML

From: Borislav Petkov <bp@suse.de>

Move them to a separate header and have the following dependency:

x86/cpufeatures.h <- x86/processor.h <- x86/cpufeature.h

This makes it easier to use the header in asm code and not include the
whole cpufeature.h and add guards for asm.

Signed-off-by: Borislav Petkov <bp@suse.de>
Suggested-by: "H. Peter Anvin" <hpa@zytor.com>
---
 Documentation/kernel-parameters.txt      |   2 +-
 arch/x86/boot/cpuflags.h                 |   2 +-
 arch/x86/boot/mkcpustr.c                 |   2 +-
 arch/x86/crypto/crc32-pclmul_glue.c      |   2 +-
 arch/x86/crypto/crc32c-intel_glue.c      |   2 +-
 arch/x86/crypto/crct10dif-pclmul_glue.c  |   2 +-
 arch/x86/entry/common.c                  |   1 +
 arch/x86/entry/entry_32.S                |   2 +-
 arch/x86/entry/vdso/vdso32-setup.c       |   1 -
 arch/x86/entry/vdso/vdso32/system_call.S |   2 +-
 arch/x86/entry/vdso/vma.c                |   1 +
 arch/x86/include/asm/alternative.h       |   6 -
 arch/x86/include/asm/apic.h              |   1 -
 arch/x86/include/asm/arch_hweight.h      |   2 +
 arch/x86/include/asm/cmpxchg.h           |   1 +
 arch/x86/include/asm/cpufeature.h        | 284 +-----------------------------
 arch/x86/include/asm/cpufeatures.h       | 288 +++++++++++++++++++++++++++++++
 arch/x86/include/asm/fpu/internal.h      |   1 +
 arch/x86/include/asm/irq_work.h          |   2 +-
 arch/x86/include/asm/mwait.h             |   2 +
 arch/x86/include/asm/processor.h         |   3 +-
 arch/x86/include/asm/smap.h              |   2 +-
 arch/x86/include/asm/smp.h               |   1 -
 arch/x86/include/asm/thread_info.h       |   2 +-
 arch/x86/include/asm/tlbflush.h          |   1 +
 arch/x86/include/asm/uaccess_64.h        |   2 +-
 arch/x86/kernel/cpu/Makefile             |   2 +-
 arch/x86/kernel/cpu/centaur.c            |   2 +-
 arch/x86/kernel/cpu/cyrix.c              |   1 +
 arch/x86/kernel/cpu/intel.c              |   2 +-
 arch/x86/kernel/cpu/intel_cacheinfo.c    |   2 +-
 arch/x86/kernel/cpu/match.c              |   2 +-
 arch/x86/kernel/cpu/mkcapflags.sh        |   6 +-
 arch/x86/kernel/cpu/mtrr/main.c          |   2 +-
 arch/x86/kernel/cpu/transmeta.c          |   2 +-
 arch/x86/kernel/e820.c                   |   1 +
 arch/x86/kernel/head_32.S                |   2 +-
 arch/x86/kernel/hpet.c                   |   1 +
 arch/x86/kernel/msr.c                    |   2 +-
 arch/x86/kernel/verify_cpu.S             |   2 +-
 arch/x86/lib/clear_page_64.S             |   2 +-
 arch/x86/lib/copy_page_64.S              |   2 +-
 arch/x86/lib/copy_user_64.S              |   2 +-
 arch/x86/lib/memcpy_64.S                 |   2 +-
 arch/x86/lib/memmove_64.S                |   2 +-
 arch/x86/lib/memset_64.S                 |   2 +-
 arch/x86/mm/setup_nx.c                   |   1 +
 arch/x86/oprofile/op_model_amd.c         |   1 -
 arch/x86/um/asm/barrier.h                |   2 +-
 lib/atomic64_test.c                      |   2 +-
 50 files changed, 336 insertions(+), 328 deletions(-)
 create mode 100644 arch/x86/include/asm/cpufeatures.h

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index b7d44871effc..07c661ae8596 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -661,7 +661,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
 	clearcpuid=BITNUM [X86]
 			Disable CPUID feature X for the kernel. See
-			arch/x86/include/asm/cpufeature.h for the valid bit
+			arch/x86/include/asm/cpufeatures.h for the valid bit
 			numbers. Note the Linux specific bits are not necessarily
 			stable over kernel options, but the vendor specific
 			ones should be.
diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h
index ea97697e51e4..4cb404fd45ce 100644
--- a/arch/x86/boot/cpuflags.h
+++ b/arch/x86/boot/cpuflags.h
@@ -1,7 +1,7 @@
 #ifndef BOOT_CPUFLAGS_H
 #define BOOT_CPUFLAGS_H
 
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/processor-flags.h>
 
 struct cpu_features {
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
index 637097e66a62..f72498dc90d2 100644
--- a/arch/x86/boot/mkcpustr.c
+++ b/arch/x86/boot/mkcpustr.c
@@ -17,7 +17,7 @@
 
 #include "../include/asm/required-features.h"
 #include "../include/asm/disabled-features.h"
-#include "../include/asm/cpufeature.h"
+#include "../include/asm/cpufeatures.h"
 #include "../kernel/cpu/capflags.c"
 
 int main(void)
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
index 07d2c6c86a54..27226df3f7d8 100644
--- a/arch/x86/crypto/crc32-pclmul_glue.c
+++ b/arch/x86/crypto/crc32-pclmul_glue.c
@@ -33,7 +33,7 @@
 #include <linux/crc32.h>
 #include <crypto/internal/hash.h>
 
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/cpu_device_id.h>
 #include <asm/fpu/api.h>
 
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index 0e9871693f24..0857b1a1de3b 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -30,7 +30,7 @@
 #include <linux/kernel.h>
 #include <crypto/internal/hash.h>
 
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/cpu_device_id.h>
 #include <asm/fpu/internal.h>
 
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
index a3fcfc97a311..cd4df9322501 100644
--- a/arch/x86/crypto/crct10dif-pclmul_glue.c
+++ b/arch/x86/crypto/crct10dif-pclmul_glue.c
@@ -30,7 +30,7 @@
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <asm/fpu/api.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/cpu_device_id.h>
 
 asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 03663740c866..02e3d89bb137 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -26,6 +26,7 @@
 #include <asm/traps.h>
 #include <asm/vdso.h>
 #include <asm/uaccess.h>
+#include <asm/cpufeature.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 77d8c5112900..4c5228352744 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -40,7 +40,7 @@
 #include <asm/processor-flags.h>
 #include <asm/ftrace.h>
 #include <asm/irq_vectors.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c
index 08a317a9ae4b..7853b53959cd 100644
--- a/arch/x86/entry/vdso/vdso32-setup.c
+++ b/arch/x86/entry/vdso/vdso32-setup.c
@@ -11,7 +11,6 @@
 #include <linux/kernel.h>
 #include <linux/mm_types.h>
 
-#include <asm/cpufeature.h>
 #include <asm/processor.h>
 #include <asm/vdso.h>
 
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S
index 3a1d9297074b..0109ac6cb79c 100644
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -3,7 +3,7 @@
 */
 
 #include <asm/dwarf2.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 /*
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index b8f69e264ac4..5471ac362147 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -20,6 +20,7 @@
 #include <asm/page.h>
 #include <asm/hpet.h>
 #include <asm/desc.h>
+#include <asm/cpufeature.h>
 
 #if defined(CONFIG_X86_64)
 unsigned int __read_mostly vdso64_enabled = 1;
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 7bfc85bbb8ff..99afb665a004 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -152,12 +152,6 @@ static inline int alternatives_text_reserved(void *start, void *end)
 	".popsection"
 
 /*
- * This must be included *after* the definition of ALTERNATIVE due to
- * <asm/arch_hweight.h>
- */
-#include <asm/cpufeature.h>
-
-/*
  * Alternative instructions for different CPU types or capabilities.
  *
  * This allows to use optimized instructions even on generic binary
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index c80f6b6f3da2..0899cfc8dfe8 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -6,7 +6,6 @@
 
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>
-#include <asm/processor.h>
 #include <asm/apicdef.h>
 #include <linux/atomic.h>
 #include <asm/fixmap.h>
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index 259a7c1ef709..02e799fa43d1 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_X86_HWEIGHT_H
 #define _ASM_X86_HWEIGHT_H
 
+#include <asm/cpufeatures.h>
+
 #ifdef CONFIG_64BIT
 /* popcnt %edi, %eax -- redundant REX prefix for alignment */
 #define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7"
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index ad19841eddfe..9733361fed6f 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -2,6 +2,7 @@
 #define ASM_X86_CMPXCHG_H
 
 #include <linux/compiler.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative.h> /* Provides LOCK_PREFIX */
 
 /*
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index bbf166e805be..3cce9f3c5cb1 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -1,289 +1,7 @@
-/*
- * Defines x86 CPU feature bits
- */
 #ifndef _ASM_X86_CPUFEATURE_H
 #define _ASM_X86_CPUFEATURE_H
 
-#ifndef _ASM_X86_REQUIRED_FEATURES_H
-#include <asm/required-features.h>
-#endif
-
-#ifndef _ASM_X86_DISABLED_FEATURES_H
-#include <asm/disabled-features.h>
-#endif
-
-#define NCAPINTS	16	/* N 32-bit words worth of info */
-#define NBUGINTS	1	/* N 32-bit bug flags */
-
-/*
- * Note: If the comment begins with a quoted string, that string is used
- * in /proc/cpuinfo instead of the macro name.  If the string is "",
- * this feature bit is not displayed in /proc/cpuinfo at all.
- */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
-#define X86_FEATURE_FPU		( 0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME		( 0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE		( 0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE		( 0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC		( 0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR		( 0*32+ 5) /* Model-Specific Registers */
-#define X86_FEATURE_PAE		( 0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE		( 0*32+ 7) /* Machine Check Exception */
-#define X86_FEATURE_CX8		( 0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC	( 0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP		( 0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR	( 0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE		( 0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA		( 0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV	( 0*32+15) /* CMOV instructions */
-					  /* (plus FCMOVcc, FCOMI with FPU) */
-#define X86_FEATURE_PAT		( 0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36	( 0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN		( 0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLUSH	( 0*32+19) /* CLFLUSH instruction */
-#define X86_FEATURE_DS		( 0*32+21) /* "dts" Debug Store */
-#define X86_FEATURE_ACPI	( 0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX		( 0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR	( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
-#define X86_FEATURE_XMM		( 0*32+25) /* "sse" */
-#define X86_FEATURE_XMM2	( 0*32+26) /* "sse2" */
-#define X86_FEATURE_SELFSNOOP	( 0*32+27) /* "ss" CPU self snoop */
-#define X86_FEATURE_HT		( 0*32+28) /* Hyper-Threading */
-#define X86_FEATURE_ACC		( 0*32+29) /* "tm" Automatic clock control */
-#define X86_FEATURE_IA64	( 0*32+30) /* IA-64 processor */
-#define X86_FEATURE_PBE		( 0*32+31) /* Pending Break Enable */
-
-/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
-/* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL	( 1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP		( 1*32+19) /* MP Capable. */
-#define X86_FEATURE_NX		( 1*32+20) /* Execute Disable */
-#define X86_FEATURE_MMXEXT	( 1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_FXSR_OPT	( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
-#define X86_FEATURE_GBPAGES	( 1*32+26) /* "pdpe1gb" GB pages */
-#define X86_FEATURE_RDTSCP	( 1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM		( 1*32+29) /* Long Mode (x86-64) */
-#define X86_FEATURE_3DNOWEXT	( 1*32+30) /* AMD 3DNow! extensions */
-#define X86_FEATURE_3DNOW	( 1*32+31) /* 3DNow! */
-
-/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY	( 2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN	( 2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI	( 2*32+ 3) /* LongRun table interface */
-
-/* Other features, Linux-defined mapping, word 3 */
-/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX	( 3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR	( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR	( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR	( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
-/* cpu types for specific tunings: */
-#define X86_FEATURE_K8		( 3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_K7		( 3*32+ 5) /* "" Athlon */
-#define X86_FEATURE_P3		( 3*32+ 6) /* "" P3 */
-#define X86_FEATURE_P4		( 3*32+ 7) /* "" P4 */
-#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_UP		( 3*32+ 9) /* smp kernel running on up */
-/* free, was #define X86_FEATURE_FXSAVE_LEAK ( 3*32+10) * "" FXSAVE leaks FOP/FIP/FOP */
-#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_PEBS	( 3*32+12) /* Precise-Event Based Sampling */
-#define X86_FEATURE_BTS		( 3*32+13) /* Branch Trace Store */
-#define X86_FEATURE_SYSCALL32	( 3*32+14) /* "" syscall in ia32 userspace */
-#define X86_FEATURE_SYSENTER32	( 3*32+15) /* "" sysenter in ia32 userspace */
-#define X86_FEATURE_REP_GOOD	( 3*32+16) /* rep microcode works well */
-#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
-#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
-/* free, was #define X86_FEATURE_11AP	( 3*32+19) * "" Bad local APIC aka 11AP */
-#define X86_FEATURE_NOPL	( 3*32+20) /* The NOPL (0F 1F) instructions */
-#define X86_FEATURE_ALWAYS	( 3*32+21) /* "" Always-present feature */
-#define X86_FEATURE_XTOPOLOGY	( 3*32+22) /* cpu topology enum extensions */
-#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
-#define X86_FEATURE_NONSTOP_TSC	( 3*32+24) /* TSC does not stop in C states */
-/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
-#define X86_FEATURE_EXTD_APICID	( 3*32+26) /* has extended APICID (8 bits) */
-#define X86_FEATURE_AMD_DCM     ( 3*32+27) /* multi-node processor */
-#define X86_FEATURE_APERFMPERF	( 3*32+28) /* APERFMPERF */
-#define X86_FEATURE_EAGER_FPU	( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
-#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
-#define X86_FEATURE_XMM3	( 4*32+ 0) /* "pni" SSE-3 */
-#define X86_FEATURE_PCLMULQDQ	( 4*32+ 1) /* PCLMULQDQ instruction */
-#define X86_FEATURE_DTES64	( 4*32+ 2) /* 64-bit Debug Store */
-#define X86_FEATURE_MWAIT	( 4*32+ 3) /* "monitor" Monitor/Mwait support */
-#define X86_FEATURE_DSCPL	( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
-#define X86_FEATURE_VMX		( 4*32+ 5) /* Hardware virtualization */
-#define X86_FEATURE_SMX		( 4*32+ 6) /* Safer mode */
-#define X86_FEATURE_EST		( 4*32+ 7) /* Enhanced SpeedStep */
-#define X86_FEATURE_TM2		( 4*32+ 8) /* Thermal Monitor 2 */
-#define X86_FEATURE_SSSE3	( 4*32+ 9) /* Supplemental SSE-3 */
-#define X86_FEATURE_CID		( 4*32+10) /* Context ID */
-#define X86_FEATURE_SDBG	( 4*32+11) /* Silicon Debug */
-#define X86_FEATURE_FMA		( 4*32+12) /* Fused multiply-add */
-#define X86_FEATURE_CX16	( 4*32+13) /* CMPXCHG16B */
-#define X86_FEATURE_XTPR	( 4*32+14) /* Send Task Priority Messages */
-#define X86_FEATURE_PDCM	( 4*32+15) /* Performance Capabilities */
-#define X86_FEATURE_PCID	( 4*32+17) /* Process Context Identifiers */
-#define X86_FEATURE_DCA		( 4*32+18) /* Direct Cache Access */
-#define X86_FEATURE_XMM4_1	( 4*32+19) /* "sse4_1" SSE-4.1 */
-#define X86_FEATURE_XMM4_2	( 4*32+20) /* "sse4_2" SSE-4.2 */
-#define X86_FEATURE_X2APIC	( 4*32+21) /* x2APIC */
-#define X86_FEATURE_MOVBE	( 4*32+22) /* MOVBE instruction */
-#define X86_FEATURE_POPCNT      ( 4*32+23) /* POPCNT instruction */
-#define X86_FEATURE_TSC_DEADLINE_TIMER	( 4*32+24) /* Tsc deadline timer */
-#define X86_FEATURE_AES		( 4*32+25) /* AES instructions */
-#define X86_FEATURE_XSAVE	( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
-#define X86_FEATURE_OSXSAVE	( 4*32+27) /* "" XSAVE enabled in the OS */
-#define X86_FEATURE_AVX		( 4*32+28) /* Advanced Vector Extensions */
-#define X86_FEATURE_F16C	( 4*32+29) /* 16-bit fp conversions */
-#define X86_FEATURE_RDRAND	( 4*32+30) /* The RDRAND instruction */
-#define X86_FEATURE_HYPERVISOR	( 4*32+31) /* Running on a hypervisor */
-
-/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
-#define X86_FEATURE_XSTORE	( 5*32+ 2) /* "rng" RNG present (xstore) */
-#define X86_FEATURE_XSTORE_EN	( 5*32+ 3) /* "rng_en" RNG enabled */
-#define X86_FEATURE_XCRYPT	( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
-#define X86_FEATURE_XCRYPT_EN	( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
-#define X86_FEATURE_ACE2	( 5*32+ 8) /* Advanced Cryptography Engine v2 */
-#define X86_FEATURE_ACE2_EN	( 5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE		( 5*32+10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN	( 5*32+11) /* PHE enabled */
-#define X86_FEATURE_PMM		( 5*32+12) /* PadLock Montgomery Multiplier */
-#define X86_FEATURE_PMM_EN	( 5*32+13) /* PMM enabled */
-
-/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
-#define X86_FEATURE_LAHF_LM	( 6*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY	( 6*32+ 1) /* If yes HyperThreading not valid */
-#define X86_FEATURE_SVM		( 6*32+ 2) /* Secure virtual machine */
-#define X86_FEATURE_EXTAPIC	( 6*32+ 3) /* Extended APIC space */
-#define X86_FEATURE_CR8_LEGACY	( 6*32+ 4) /* CR8 in 32-bit mode */
-#define X86_FEATURE_ABM		( 6*32+ 5) /* Advanced bit manipulation */
-#define X86_FEATURE_SSE4A	( 6*32+ 6) /* SSE-4A */
-#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
-#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
-#define X86_FEATURE_OSVW	( 6*32+ 9) /* OS Visible Workaround */
-#define X86_FEATURE_IBS		( 6*32+10) /* Instruction Based Sampling */
-#define X86_FEATURE_XOP		( 6*32+11) /* extended AVX instructions */
-#define X86_FEATURE_SKINIT	( 6*32+12) /* SKINIT/STGI instructions */
-#define X86_FEATURE_WDT		( 6*32+13) /* Watchdog timer */
-#define X86_FEATURE_LWP		( 6*32+15) /* Light Weight Profiling */
-#define X86_FEATURE_FMA4	( 6*32+16) /* 4 operands MAC instructions */
-#define X86_FEATURE_TCE		( 6*32+17) /* translation cache extension */
-#define X86_FEATURE_NODEID_MSR	( 6*32+19) /* NodeId MSR */
-#define X86_FEATURE_TBM		( 6*32+21) /* trailing bit manipulations */
-#define X86_FEATURE_TOPOEXT	( 6*32+22) /* topology extensions CPUID leafs */
-#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
-#define X86_FEATURE_PERFCTR_NB  ( 6*32+24) /* NB performance counter extensions */
-#define X86_FEATURE_BPEXT	(6*32+26) /* data breakpoint extension */
-#define X86_FEATURE_PERFCTR_L2	( 6*32+28) /* L2 performance counter extensions */
-#define X86_FEATURE_MWAITX	( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
-
-/*
- * Auxiliary flags: Linux defined - For features scattered in various
- * CPUID levels like 0x6, 0xA etc, word 7.
- *
- * Reuse free bits when adding new feature flags!
- */
-
-#define X86_FEATURE_CPB		( 7*32+ 2) /* AMD Core Performance Boost */
-#define X86_FEATURE_EPB		( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
-
-#define X86_FEATURE_HW_PSTATE	( 7*32+ 8) /* AMD HW-PState */
-#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-
-#define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
-
-/* Virtualization flags: Linux defined, word 8 */
-#define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
-#define X86_FEATURE_VNMI        ( 8*32+ 1) /* Intel Virtual NMI */
-#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
-#define X86_FEATURE_EPT         ( 8*32+ 3) /* Intel Extended Page Table */
-#define X86_FEATURE_VPID        ( 8*32+ 4) /* Intel Virtual Processor ID */
-
-#define X86_FEATURE_VMMCALL     ( 8*32+15) /* Prefer vmmcall to vmcall */
-#define X86_FEATURE_XENPV       ( 8*32+16) /* "" Xen paravirtual guest */
-
-
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
-#define X86_FEATURE_FSGSBASE	( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
-#define X86_FEATURE_TSC_ADJUST	( 9*32+ 1) /* TSC adjustment MSR 0x3b */
-#define X86_FEATURE_BMI1	( 9*32+ 3) /* 1st group bit manipulation extensions */
-#define X86_FEATURE_HLE		( 9*32+ 4) /* Hardware Lock Elision */
-#define X86_FEATURE_AVX2	( 9*32+ 5) /* AVX2 instructions */
-#define X86_FEATURE_SMEP	( 9*32+ 7) /* Supervisor Mode Execution Protection */
-#define X86_FEATURE_BMI2	( 9*32+ 8) /* 2nd group bit manipulation extensions */
-#define X86_FEATURE_ERMS	( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
-#define X86_FEATURE_INVPCID	( 9*32+10) /* Invalidate Processor Context ID */
-#define X86_FEATURE_RTM		( 9*32+11) /* Restricted Transactional Memory */
-#define X86_FEATURE_CQM		( 9*32+12) /* Cache QoS Monitoring */
-#define X86_FEATURE_MPX		( 9*32+14) /* Memory Protection Extension */
-#define X86_FEATURE_AVX512F	( 9*32+16) /* AVX-512 Foundation */
-#define X86_FEATURE_RDSEED	( 9*32+18) /* The RDSEED instruction */
-#define X86_FEATURE_ADX		( 9*32+19) /* The ADCX and ADOX instructions */
-#define X86_FEATURE_SMAP	( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_PCOMMIT	( 9*32+22) /* PCOMMIT instruction */
-#define X86_FEATURE_CLFLUSHOPT	( 9*32+23) /* CLFLUSHOPT instruction */
-#define X86_FEATURE_CLWB	( 9*32+24) /* CLWB instruction */
-#define X86_FEATURE_AVX512PF	( 9*32+26) /* AVX-512 Prefetch */
-#define X86_FEATURE_AVX512ER	( 9*32+27) /* AVX-512 Exponential and Reciprocal */
-#define X86_FEATURE_AVX512CD	( 9*32+28) /* AVX-512 Conflict Detection */
-#define X86_FEATURE_SHA_NI	( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
-
-/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
-#define X86_FEATURE_XSAVEOPT	(10*32+ 0) /* XSAVEOPT */
-#define X86_FEATURE_XSAVEC	(10*32+ 1) /* XSAVEC */
-#define X86_FEATURE_XGETBV1	(10*32+ 2) /* XGETBV with ECX = 1 */
-#define X86_FEATURE_XSAVES	(10*32+ 3) /* XSAVES/XRSTORS */
-
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
-#define X86_FEATURE_CQM_LLC	(11*32+ 1) /* LLC QoS if 1 */
-
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
-#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
-
-/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
-#define X86_FEATURE_CLZERO	(13*32+0) /* CLZERO instruction */
-
-/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
-#define X86_FEATURE_DTHERM	(14*32+ 0) /* Digital Thermal Sensor */
-#define X86_FEATURE_IDA		(14*32+ 1) /* Intel Dynamic Acceleration */
-#define X86_FEATURE_ARAT	(14*32+ 2) /* Always Running APIC Timer */
-#define X86_FEATURE_PLN		(14*32+ 4) /* Intel Power Limit Notification */
-#define X86_FEATURE_PTS		(14*32+ 6) /* Intel Package Thermal Status */
-#define X86_FEATURE_HWP		(14*32+ 7) /* Intel Hardware P-states */
-#define X86_FEATURE_HWP_NOTIFY	(14*32+ 8) /* HWP Notification */
-#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
-#define X86_FEATURE_HWP_EPP	(14*32+10) /* HWP Energy Perf. Preference */
-#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
-
-/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
-#define X86_FEATURE_NPT		(15*32+ 0) /* Nested Page Table support */
-#define X86_FEATURE_LBRV	(15*32+ 1) /* LBR Virtualization support */
-#define X86_FEATURE_SVML	(15*32+ 2) /* "svm_lock" SVM locking MSR */
-#define X86_FEATURE_NRIPS	(15*32+ 3) /* "nrip_save" SVM next_rip save */
-#define X86_FEATURE_TSCRATEMSR  (15*32+ 4) /* "tsc_scale" TSC scaling support */
-#define X86_FEATURE_VMCBCLEAN   (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
-#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
-#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
-#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
-#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
-#define X86_FEATURE_AVIC	(15*32+13) /* Virtual Interrupt Controller */
-
-/*
- * BUG word(s)
- */
-#define X86_BUG(x)		(NCAPINTS*32 + (x))
-
-#define X86_BUG_F00F		X86_BUG(0) /* Intel F00F */
-#define X86_BUG_FDIV		X86_BUG(1) /* FPU FDIV */
-#define X86_BUG_COMA		X86_BUG(2) /* Cyrix 6x86 coma */
-#define X86_BUG_AMD_TLB_MMATCH	X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
-#define X86_BUG_AMD_APIC_C1E	X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
-#define X86_BUG_11AP		X86_BUG(5) /* Bad local APIC aka 11AP */
-#define X86_BUG_FXSAVE_LEAK	X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
-#define X86_BUG_CLFLUSH_MONITOR	X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
-#define X86_BUG_SYSRET_SS_ATTRS	X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#include <asm/processor.h>
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
new file mode 100644
index 000000000000..0ceb6adc8a48
--- /dev/null
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -0,0 +1,288 @@
+#ifndef _ASM_X86_CPUFEATURES_H
+#define _ASM_X86_CPUFEATURES_H
+
+#ifndef _ASM_X86_REQUIRED_FEATURES_H
+#include <asm/required-features.h>
+#endif
+
+#ifndef _ASM_X86_DISABLED_FEATURES_H
+#include <asm/disabled-features.h>
+#endif
+
+/*
+ * Defines x86 CPU feature bits
+ */
+#define NCAPINTS	16	/* N 32-bit words worth of info */
+#define NBUGINTS	1	/* N 32-bit bug flags */
+
+/*
+ * Note: If the comment begins with a quoted string, that string is used
+ * in /proc/cpuinfo instead of the macro name.  If the string is "",
+ * this feature bit is not displayed in /proc/cpuinfo at all.
+ */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
+#define X86_FEATURE_FPU		( 0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME		( 0*32+ 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE		( 0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE		( 0*32+ 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC		( 0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR		( 0*32+ 5) /* Model-Specific Registers */
+#define X86_FEATURE_PAE		( 0*32+ 6) /* Physical Address Extensions */
+#define X86_FEATURE_MCE		( 0*32+ 7) /* Machine Check Exception */
+#define X86_FEATURE_CX8		( 0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC	( 0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP		( 0*32+11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR	( 0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE		( 0*32+13) /* Page Global Enable */
+#define X86_FEATURE_MCA		( 0*32+14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV	( 0*32+15) /* CMOV instructions */
+					  /* (plus FCMOVcc, FCOMI with FPU) */
+#define X86_FEATURE_PAT		( 0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36	( 0*32+17) /* 36-bit PSEs */
+#define X86_FEATURE_PN		( 0*32+18) /* Processor serial number */
+#define X86_FEATURE_CLFLUSH	( 0*32+19) /* CLFLUSH instruction */
+#define X86_FEATURE_DS		( 0*32+21) /* "dts" Debug Store */
+#define X86_FEATURE_ACPI	( 0*32+22) /* ACPI via MSR */
+#define X86_FEATURE_MMX		( 0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR	( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+#define X86_FEATURE_XMM		( 0*32+25) /* "sse" */
+#define X86_FEATURE_XMM2	( 0*32+26) /* "sse2" */
+#define X86_FEATURE_SELFSNOOP	( 0*32+27) /* "ss" CPU self snoop */
+#define X86_FEATURE_HT		( 0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_ACC		( 0*32+29) /* "tm" Automatic clock control */
+#define X86_FEATURE_IA64	( 0*32+30) /* IA-64 processor */
+#define X86_FEATURE_PBE		( 0*32+31) /* Pending Break Enable */
+
+/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+/* Don't duplicate feature flags which are redundant with Intel! */
+#define X86_FEATURE_SYSCALL	( 1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MP		( 1*32+19) /* MP Capable. */
+#define X86_FEATURE_NX		( 1*32+20) /* Execute Disable */
+#define X86_FEATURE_MMXEXT	( 1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_FXSR_OPT	( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
+#define X86_FEATURE_GBPAGES	( 1*32+26) /* "pdpe1gb" GB pages */
+#define X86_FEATURE_RDTSCP	( 1*32+27) /* RDTSCP */
+#define X86_FEATURE_LM		( 1*32+29) /* Long Mode (x86-64) */
+#define X86_FEATURE_3DNOWEXT	( 1*32+30) /* AMD 3DNow! extensions */
+#define X86_FEATURE_3DNOW	( 1*32+31) /* 3DNow! */
+
+/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
+#define X86_FEATURE_RECOVERY	( 2*32+ 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN	( 2*32+ 1) /* Longrun power control */
+#define X86_FEATURE_LRTI	( 2*32+ 3) /* LongRun table interface */
+
+/* Other features, Linux-defined mapping, word 3 */
+/* This range is used for feature bits which conflict or are synthesized */
+#define X86_FEATURE_CXMMX	( 3*32+ 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR	( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR	( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR	( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
+/* cpu types for specific tunings: */
+#define X86_FEATURE_K8		( 3*32+ 4) /* "" Opteron, Athlon64 */
+#define X86_FEATURE_K7		( 3*32+ 5) /* "" Athlon */
+#define X86_FEATURE_P3		( 3*32+ 6) /* "" P3 */
+#define X86_FEATURE_P4		( 3*32+ 7) /* "" P4 */
+#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
+#define X86_FEATURE_UP		( 3*32+ 9) /* smp kernel running on up */
+/* free, was #define X86_FEATURE_FXSAVE_LEAK ( 3*32+10) * "" FXSAVE leaks FOP/FIP/FOP */
+#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS	( 3*32+12) /* Precise-Event Based Sampling */
+#define X86_FEATURE_BTS		( 3*32+13) /* Branch Trace Store */
+#define X86_FEATURE_SYSCALL32	( 3*32+14) /* "" syscall in ia32 userspace */
+#define X86_FEATURE_SYSENTER32	( 3*32+15) /* "" sysenter in ia32 userspace */
+#define X86_FEATURE_REP_GOOD	( 3*32+16) /* rep microcode works well */
+#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
+#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
+/* free, was #define X86_FEATURE_11AP	( 3*32+19) * "" Bad local APIC aka 11AP */
+#define X86_FEATURE_NOPL	( 3*32+20) /* The NOPL (0F 1F) instructions */
+#define X86_FEATURE_ALWAYS	( 3*32+21) /* "" Always-present feature */
+#define X86_FEATURE_XTOPOLOGY	( 3*32+22) /* cpu topology enum extensions */
+#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
+#define X86_FEATURE_NONSTOP_TSC	( 3*32+24) /* TSC does not stop in C states */
+/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
+#define X86_FEATURE_EXTD_APICID	( 3*32+26) /* has extended APICID (8 bits) */
+#define X86_FEATURE_AMD_DCM     ( 3*32+27) /* multi-node processor */
+#define X86_FEATURE_APERFMPERF	( 3*32+28) /* APERFMPERF */
+#define X86_FEATURE_EAGER_FPU	( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
+#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
+#define X86_FEATURE_XMM3	( 4*32+ 0) /* "pni" SSE-3 */
+#define X86_FEATURE_PCLMULQDQ	( 4*32+ 1) /* PCLMULQDQ instruction */
+#define X86_FEATURE_DTES64	( 4*32+ 2) /* 64-bit Debug Store */
+#define X86_FEATURE_MWAIT	( 4*32+ 3) /* "monitor" Monitor/Mwait support */
+#define X86_FEATURE_DSCPL	( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
+#define X86_FEATURE_VMX		( 4*32+ 5) /* Hardware virtualization */
+#define X86_FEATURE_SMX		( 4*32+ 6) /* Safer mode */
+#define X86_FEATURE_EST		( 4*32+ 7) /* Enhanced SpeedStep */
+#define X86_FEATURE_TM2		( 4*32+ 8) /* Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3	( 4*32+ 9) /* Supplemental SSE-3 */
+#define X86_FEATURE_CID		( 4*32+10) /* Context ID */
+#define X86_FEATURE_SDBG	( 4*32+11) /* Silicon Debug */
+#define X86_FEATURE_FMA		( 4*32+12) /* Fused multiply-add */
+#define X86_FEATURE_CX16	( 4*32+13) /* CMPXCHG16B */
+#define X86_FEATURE_XTPR	( 4*32+14) /* Send Task Priority Messages */
+#define X86_FEATURE_PDCM	( 4*32+15) /* Performance Capabilities */
+#define X86_FEATURE_PCID	( 4*32+17) /* Process Context Identifiers */
+#define X86_FEATURE_DCA		( 4*32+18) /* Direct Cache Access */
+#define X86_FEATURE_XMM4_1	( 4*32+19) /* "sse4_1" SSE-4.1 */
+#define X86_FEATURE_XMM4_2	( 4*32+20) /* "sse4_2" SSE-4.2 */
+#define X86_FEATURE_X2APIC	( 4*32+21) /* x2APIC */
+#define X86_FEATURE_MOVBE	( 4*32+22) /* MOVBE instruction */
+#define X86_FEATURE_POPCNT      ( 4*32+23) /* POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE_TIMER	( 4*32+24) /* Tsc deadline timer */
+#define X86_FEATURE_AES		( 4*32+25) /* AES instructions */
+#define X86_FEATURE_XSAVE	( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
+#define X86_FEATURE_OSXSAVE	( 4*32+27) /* "" XSAVE enabled in the OS */
+#define X86_FEATURE_AVX		( 4*32+28) /* Advanced Vector Extensions */
+#define X86_FEATURE_F16C	( 4*32+29) /* 16-bit fp conversions */
+#define X86_FEATURE_RDRAND	( 4*32+30) /* The RDRAND instruction */
+#define X86_FEATURE_HYPERVISOR	( 4*32+31) /* Running on a hypervisor */
+
+/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
+#define X86_FEATURE_XSTORE	( 5*32+ 2) /* "rng" RNG present (xstore) */
+#define X86_FEATURE_XSTORE_EN	( 5*32+ 3) /* "rng_en" RNG enabled */
+#define X86_FEATURE_XCRYPT	( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
+#define X86_FEATURE_XCRYPT_EN	( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
+#define X86_FEATURE_ACE2	( 5*32+ 8) /* Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN	( 5*32+ 9) /* ACE v2 enabled */
+#define X86_FEATURE_PHE		( 5*32+10) /* PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN	( 5*32+11) /* PHE enabled */
+#define X86_FEATURE_PMM		( 5*32+12) /* PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN	( 5*32+13) /* PMM enabled */
+
+/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
+#define X86_FEATURE_LAHF_LM	( 6*32+ 0) /* LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY	( 6*32+ 1) /* If yes HyperThreading not valid */
+#define X86_FEATURE_SVM		( 6*32+ 2) /* Secure virtual machine */
+#define X86_FEATURE_EXTAPIC	( 6*32+ 3) /* Extended APIC space */
+#define X86_FEATURE_CR8_LEGACY	( 6*32+ 4) /* CR8 in 32-bit mode */
+#define X86_FEATURE_ABM		( 6*32+ 5) /* Advanced bit manipulation */
+#define X86_FEATURE_SSE4A	( 6*32+ 6) /* SSE-4A */
+#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
+#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
+#define X86_FEATURE_OSVW	( 6*32+ 9) /* OS Visible Workaround */
+#define X86_FEATURE_IBS		( 6*32+10) /* Instruction Based Sampling */
+#define X86_FEATURE_XOP		( 6*32+11) /* extended AVX instructions */
+#define X86_FEATURE_SKINIT	( 6*32+12) /* SKINIT/STGI instructions */
+#define X86_FEATURE_WDT		( 6*32+13) /* Watchdog timer */
+#define X86_FEATURE_LWP		( 6*32+15) /* Light Weight Profiling */
+#define X86_FEATURE_FMA4	( 6*32+16) /* 4 operands MAC instructions */
+#define X86_FEATURE_TCE		( 6*32+17) /* translation cache extension */
+#define X86_FEATURE_NODEID_MSR	( 6*32+19) /* NodeId MSR */
+#define X86_FEATURE_TBM		( 6*32+21) /* trailing bit manipulations */
+#define X86_FEATURE_TOPOEXT	( 6*32+22) /* topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB  ( 6*32+24) /* NB performance counter extensions */
+#define X86_FEATURE_BPEXT	(6*32+26) /* data breakpoint extension */
+#define X86_FEATURE_PERFCTR_L2	( 6*32+28) /* L2 performance counter extensions */
+#define X86_FEATURE_MWAITX	( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
+
+/*
+ * Auxiliary flags: Linux defined - For features scattered in various
+ * CPUID levels like 0x6, 0xA etc, word 7.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+
+#define X86_FEATURE_CPB		( 7*32+ 2) /* AMD Core Performance Boost */
+#define X86_FEATURE_EPB		( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
+
+#define X86_FEATURE_HW_PSTATE	( 7*32+ 8) /* AMD HW-PState */
+#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+
+#define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
+
+/* Virtualization flags: Linux defined, word 8 */
+#define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
+#define X86_FEATURE_VNMI        ( 8*32+ 1) /* Intel Virtual NMI */
+#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
+#define X86_FEATURE_EPT         ( 8*32+ 3) /* Intel Extended Page Table */
+#define X86_FEATURE_VPID        ( 8*32+ 4) /* Intel Virtual Processor ID */
+
+#define X86_FEATURE_VMMCALL     ( 8*32+15) /* Prefer vmmcall to vmcall */
+#define X86_FEATURE_XENPV       ( 8*32+16) /* "" Xen paravirtual guest */
+
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
+#define X86_FEATURE_FSGSBASE	( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
+#define X86_FEATURE_TSC_ADJUST	( 9*32+ 1) /* TSC adjustment MSR 0x3b */
+#define X86_FEATURE_BMI1	( 9*32+ 3) /* 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE		( 9*32+ 4) /* Hardware Lock Elision */
+#define X86_FEATURE_AVX2	( 9*32+ 5) /* AVX2 instructions */
+#define X86_FEATURE_SMEP	( 9*32+ 7) /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2	( 9*32+ 8) /* 2nd group bit manipulation extensions */
+#define X86_FEATURE_ERMS	( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
+#define X86_FEATURE_INVPCID	( 9*32+10) /* Invalidate Processor Context ID */
+#define X86_FEATURE_RTM		( 9*32+11) /* Restricted Transactional Memory */
+#define X86_FEATURE_CQM		( 9*32+12) /* Cache QoS Monitoring */
+#define X86_FEATURE_MPX		( 9*32+14) /* Memory Protection Extension */
+#define X86_FEATURE_AVX512F	( 9*32+16) /* AVX-512 Foundation */
+#define X86_FEATURE_RDSEED	( 9*32+18) /* The RDSEED instruction */
+#define X86_FEATURE_ADX		( 9*32+19) /* The ADCX and ADOX instructions */
+#define X86_FEATURE_SMAP	( 9*32+20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_PCOMMIT	( 9*32+22) /* PCOMMIT instruction */
+#define X86_FEATURE_CLFLUSHOPT	( 9*32+23) /* CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB	( 9*32+24) /* CLWB instruction */
+#define X86_FEATURE_AVX512PF	( 9*32+26) /* AVX-512 Prefetch */
+#define X86_FEATURE_AVX512ER	( 9*32+27) /* AVX-512 Exponential and Reciprocal */
+#define X86_FEATURE_AVX512CD	( 9*32+28) /* AVX-512 Conflict Detection */
+#define X86_FEATURE_SHA_NI	( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
+
+/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
+#define X86_FEATURE_XSAVEOPT	(10*32+ 0) /* XSAVEOPT */
+#define X86_FEATURE_XSAVEC	(10*32+ 1) /* XSAVEC */
+#define X86_FEATURE_XGETBV1	(10*32+ 2) /* XGETBV with ECX = 1 */
+#define X86_FEATURE_XSAVES	(10*32+ 3) /* XSAVES/XRSTORS */
+
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
+#define X86_FEATURE_CQM_LLC	(11*32+ 1) /* LLC QoS if 1 */
+
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
+#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
+
+/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
+#define X86_FEATURE_CLZERO	(13*32+0) /* CLZERO instruction */
+
+/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
+#define X86_FEATURE_DTHERM	(14*32+ 0) /* Digital Thermal Sensor */
+#define X86_FEATURE_IDA		(14*32+ 1) /* Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT	(14*32+ 2) /* Always Running APIC Timer */
+#define X86_FEATURE_PLN		(14*32+ 4) /* Intel Power Limit Notification */
+#define X86_FEATURE_PTS		(14*32+ 6) /* Intel Package Thermal Status */
+#define X86_FEATURE_HWP		(14*32+ 7) /* Intel Hardware P-states */
+#define X86_FEATURE_HWP_NOTIFY	(14*32+ 8) /* HWP Notification */
+#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
+#define X86_FEATURE_HWP_EPP	(14*32+10) /* HWP Energy Perf. Preference */
+#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
+
+/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
+#define X86_FEATURE_NPT		(15*32+ 0) /* Nested Page Table support */
+#define X86_FEATURE_LBRV	(15*32+ 1) /* LBR Virtualization support */
+#define X86_FEATURE_SVML	(15*32+ 2) /* "svm_lock" SVM locking MSR */
+#define X86_FEATURE_NRIPS	(15*32+ 3) /* "nrip_save" SVM next_rip save */
+#define X86_FEATURE_TSCRATEMSR  (15*32+ 4) /* "tsc_scale" TSC scaling support */
+#define X86_FEATURE_VMCBCLEAN   (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
+#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
+#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
+#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
+#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
+#define X86_FEATURE_AVIC	(15*32+13) /* Virtual Interrupt Controller */
+
+/*
+ * BUG word(s)
+ */
+#define X86_BUG(x)		(NCAPINTS*32 + (x))
+
+#define X86_BUG_F00F		X86_BUG(0) /* Intel F00F */
+#define X86_BUG_FDIV		X86_BUG(1) /* FPU FDIV */
+#define X86_BUG_COMA		X86_BUG(2) /* Cyrix 6x86 coma */
+#define X86_BUG_AMD_TLB_MMATCH	X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
+#define X86_BUG_AMD_APIC_C1E	X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
+#define X86_BUG_11AP		X86_BUG(5) /* Bad local APIC aka 11AP */
+#define X86_BUG_FXSAVE_LEAK	X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
+#define X86_BUG_CLFLUSH_MONITOR	X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
+#define X86_BUG_SYSRET_SS_ATTRS	X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+
+#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 0fd440df63f1..d01199def781 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -17,6 +17,7 @@
 #include <asm/user.h>
 #include <asm/fpu/api.h>
 #include <asm/fpu/xstate.h>
+#include <asm/cpufeature.h>
 
 /*
  * High level FPU state handling functions:
diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h
index 78162f8e248b..d0afb05c84fc 100644
--- a/arch/x86/include/asm/irq_work.h
+++ b/arch/x86/include/asm/irq_work.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_IRQ_WORK_H
 #define _ASM_IRQ_WORK_H
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 
 static inline bool arch_irq_work_has_interrupt(void)
 {
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index c70689b5e5aa..0deeb2d26df7 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -3,6 +3,8 @@
 
 #include <linux/sched.h>
 
+#include <asm/cpufeature.h>
+
 #define MWAIT_SUBSTATE_MASK		0xf
 #define MWAIT_CSTATE_MASK		0xf
 #define MWAIT_SUBSTATE_SIZE		4
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2d5a50cb61a2..491a3d9dbb15 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -13,7 +13,7 @@ struct vm86;
 #include <asm/types.h>
 #include <uapi/asm/sigcontext.h>
 #include <asm/current.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/page.h>
 #include <asm/pgtable_types.h>
 #include <asm/percpu.h>
@@ -24,7 +24,6 @@ struct vm86;
 #include <asm/fpu/types.h>
 
 #include <linux/personality.h>
-#include <linux/cpumask.h>
 #include <linux/cache.h>
 #include <linux/threads.h>
 #include <linux/math64.h>
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index ba665ebd17bb..db333300bd4b 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -15,7 +15,7 @@
 
 #include <linux/stringify.h>
 #include <asm/nops.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 
 /* "Raw" instruction opcodes */
 #define __ASM_CLAC	.byte 0x0f,0x01,0xca
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index dfcf0727623b..20a3de5cb3b0 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -16,7 +16,6 @@
 #endif
 #include <asm/thread_info.h>
 #include <asm/cpumask.h>
-#include <asm/cpufeature.h>
 
 extern int smp_num_siblings;
 extern unsigned int num_processors;
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index c7b551028740..c0778fcab06d 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -49,7 +49,7 @@
  */
 #ifndef __ASSEMBLY__
 struct task_struct;
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <linux/atomic.h>
 
 struct thread_info {
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 6df2029405a3..0bb31cb8c73b 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -5,6 +5,7 @@
 #include <linux/sched.h>
 
 #include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/special_insns.h>
 
 #ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index f2f9b39b274a..d83a55b95a48 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -8,7 +8,7 @@
 #include <linux/errno.h>
 #include <linux/lockdep.h>
 #include <asm/alternative.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/page.h>
 
 /*
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 58031303e304..faa7b5204129 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -64,7 +64,7 @@ ifdef CONFIG_X86_FEATURE_NAMES
 quiet_cmd_mkcapflags = MKCAP   $@
       cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@
 
-cpufeature = $(src)/../../include/asm/cpufeature.h
+cpufeature = $(src)/../../include/asm/cpufeatures.h
 
 targets += capflags.c
 $(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index ae20be6e483c..6608c03c2126 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -1,7 +1,7 @@
 #include <linux/bitops.h>
 #include <linux/kernel.h>
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/e820.h>
 #include <asm/mtrr.h>
 #include <asm/msr.h>
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index aaf152e79637..15e47c1cd412 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -8,6 +8,7 @@
 #include <linux/timer.h>
 #include <asm/pci-direct.h>
 #include <asm/tsc.h>
+#include <asm/cpufeature.h>
 
 #include "cpu.h"
 
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 565648bc1a0a..9299e3bdfad6 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -8,7 +8,7 @@
 #include <linux/module.h>
 #include <linux/uaccess.h>
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/pgtable.h>
 #include <asm/msr.h>
 #include <asm/bugs.h>
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 0b6c52388cf4..341449c49f34 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -14,7 +14,7 @@
 #include <linux/sysfs.h>
 #include <linux/pci.h>
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/amd_nb.h>
 #include <asm/smp.h>
 
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index afa9f0d487ea..fbb5e90557a5 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -1,5 +1,5 @@
 #include <asm/cpu_device_id.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <linux/cpu.h>
 #include <linux/module.h>
 #include <linux/slab.h>
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh
index 3f20710a5b23..6988c74409a8 100644
--- a/arch/x86/kernel/cpu/mkcapflags.sh
+++ b/arch/x86/kernel/cpu/mkcapflags.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 #
-# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeature.h
+# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeatures.h
 #
 
 IN=$1
@@ -49,8 +49,8 @@ dump_array()
 trap 'rm "$OUT"' EXIT
 
 (
-	echo "#ifndef _ASM_X86_CPUFEATURE_H"
-	echo "#include <asm/cpufeature.h>"
+	echo "#ifndef _ASM_X86_CPUFEATURES_H"
+	echo "#include <asm/cpufeatures.h>"
 	echo "#endif"
 	echo ""
 
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 5c3d149ee91c..74f1d90f9c29 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -47,7 +47,7 @@
 #include <linux/smp.h>
 #include <linux/syscore_ops.h>
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/e820.h>
 #include <asm/mtrr.h>
 #include <asm/msr.h>
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index 252da7aceca6..a19a663282b5 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -1,6 +1,6 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/msr.h>
 #include "cpu.h"
 
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 569c1e4f96fe..b3c2a697820a 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -24,6 +24,7 @@
 #include <asm/e820.h>
 #include <asm/proto.h>
 #include <asm/setup.h>
+#include <asm/cpufeature.h>
 
 /*
  * The e820 map is the map that gets modified e.g. with command line parameters
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 6bc9ae24b6d2..af1112980dd4 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -19,7 +19,7 @@
 #include <asm/setup.h>
 #include <asm/processor-flags.h>
 #include <asm/msr-index.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/percpu.h>
 #include <asm/nops.h>
 #include <asm/bootparam.h>
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index b8e6ff5cd5d0..be0ebbb6d1d1 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -12,6 +12,7 @@
 #include <linux/pm.h>
 #include <linux/io.h>
 
+#include <asm/cpufeature.h>
 #include <asm/irqdomain.h>
 #include <asm/fixmap.h>
 #include <asm/hpet.h>
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 64f9616f93f1..7f3550acde1b 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -40,7 +40,7 @@
 #include <linux/uaccess.h>
 #include <linux/gfp.h>
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/msr.h>
 
 static struct class *msr_class;
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index 07efb35ee4bc..014ea59aa153 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -30,7 +30,7 @@
  * 	appropriately. Either display a message or halt.
  */
 
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/msr-index.h>
 
 verify_cpu:
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index a2fe51b00cce..65be7cfaf947 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,5 +1,5 @@
 #include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 /*
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 009f98216b7e..24ef1c2104d4 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -1,7 +1,7 @@
 /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
 
 #include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 /*
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 982ce34f4a9b..fba343062055 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -10,7 +10,7 @@
 #include <asm/current.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 16698bba87de..a0de849435ad 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -1,7 +1,7 @@
 /* Copyright 2002 Andi Kleen */
 
 #include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 /*
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index ca2afdd6d98e..90ce01bee00c 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -6,7 +6,7 @@
  *	- Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
  */
 #include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 #undef memmove
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 2661fad05827..c9c81227ea37 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -1,7 +1,7 @@
 /* Copyright 2002 Andi Kleen, SuSE Labs */
 
 #include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 .weak memset
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
index 92e2eacb3321..f65a33f505b6 100644
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c
@@ -4,6 +4,7 @@
 
 #include <asm/pgtable.h>
 #include <asm/proto.h>
+#include <asm/cpufeature.h>
 
 static int disable_nx;
 
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 50d86c0e9ba4..660a83c8287b 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -24,7 +24,6 @@
 #include <asm/nmi.h>
 #include <asm/apic.h>
 #include <asm/processor.h>
-#include <asm/cpufeature.h>
 
 #include "op_x86_model.h"
 #include "op_counter.h"
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index 755481f14d90..764ac2fc53fe 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -3,7 +3,7 @@
 
 #include <asm/asm.h>
 #include <asm/segment.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/cmpxchg.h>
 #include <asm/nops.h>
 
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index d62de8bf022d..123481814320 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -17,7 +17,7 @@
 #include <linux/atomic.h>
 
 #ifdef CONFIG_X86
-#include <asm/processor.h>	/* for boot_cpu_has below */
+#include <asm/cpufeature.h>	/* for boot_cpu_has below */
 #endif
 
 #define TEST(bit, op, c_op, val)				\
-- 
2.3.5

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 05/10] x86/cpufeature: Replace the old static_cpu_has() with safe variant
  2016-01-26 21:12 [PATCH 00/10] tip-queue 2016-01-26, rest Borislav Petkov
                   ` (3 preceding siblings ...)
  2016-01-26 21:12 ` [PATCH 04/10] x86/cpufeature: Carve out X86_FEATURE_* Borislav Petkov
@ 2016-01-26 21:12 ` Borislav Petkov
  2016-01-30 13:19   ` [tip:x86/asm] " tip-bot for Borislav Petkov
  2016-01-26 21:12 ` [PATCH 06/10] x86/cpufeature: Get rid of the non-asm goto variant Borislav Petkov
                   ` (4 subsequent siblings)
  9 siblings, 1 reply; 66+ messages in thread
From: Borislav Petkov @ 2016-01-26 21:12 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML

From: Borislav Petkov <bp@suse.de>

So the old one didn't work properly before alternatives had run. And it
was supposed to provide an optimized JMP because the assumption was that
the offset it is jumping to is within a signed byte and thus a two-byte
JMP.

So I did an x86_64 allyesconfig build and dumped all possible sites
where static_cpu_has() was used. The optimization amounted to all in all
12(!) places where static_cpu_has() had generated a 2-byte JMP. Which
has saved us a whopping 36 bytes!

This clearly is not worth the trouble so we can remove it. The only
place where the optimization might count - in __switch_to() - we will
handle differently. But that's not subject of this patch.

Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/Kconfig.debug               |  10 ----
 arch/x86/include/asm/cpufeature.h    | 100 +++--------------------------------
 arch/x86/include/asm/fpu/internal.h  |  14 ++---
 arch/x86/kernel/apic/apic_numachip.c |   4 +-
 arch/x86/kernel/cpu/common.c         |  12 +----
 arch/x86/kernel/vm86_32.c            |   2 +-
 drivers/cpufreq/intel_pstate.c       |   2 +-
 fs/btrfs/disk-io.c                   |   2 +-
 8 files changed, 21 insertions(+), 125 deletions(-)

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 9b18ed97a8a2..68a2d1f0a683 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -350,16 +350,6 @@ config DEBUG_IMR_SELFTEST
 
 	  If unsure say N here.
 
-config X86_DEBUG_STATIC_CPU_HAS
-	bool "Debug alternatives"
-	depends on DEBUG_KERNEL
-	---help---
-	  This option causes additional code to be generated which
-	  fails if static_cpu_has() is used before alternatives have
-	  run.
-
-	  If unsure, say N.
-
 config X86_DEBUG_FPU
 	bool "Debug the x86 FPU code"
 	depends on DEBUG_KERNEL
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 3cce9f3c5cb1..a261cf2e7907 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -125,103 +125,19 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
 #define cpu_has_osxsave		boot_cpu_has(X86_FEATURE_OSXSAVE)
 #define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
 /*
- * Do not add any more of those clumsy macros - use static_cpu_has_safe() for
+ * Do not add any more of those clumsy macros - use static_cpu_has() for
  * fast paths and boot_cpu_has() otherwise!
  */
 
 #if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS)
-extern void warn_pre_alternatives(void);
-extern bool __static_cpu_has_safe(u16 bit);
+extern bool __static_cpu_has(u16 bit);
 
 /*
  * Static testing of CPU features.  Used the same as boot_cpu_has().
  * These are only valid after alternatives have run, but will statically
  * patch the target code for additional performance.
  */
-static __always_inline __pure bool __static_cpu_has(u16 bit)
-{
-#ifdef CC_HAVE_ASM_GOTO
-
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-
-		/*
-		 * Catch too early usage of this before alternatives
-		 * have run.
-		 */
-		asm_volatile_goto("1: jmp %l[t_warn]\n"
-			 "2:\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"
-			 " .long 0\n"		/* no replacement */
-			 " .word %P0\n"		/* 1: do replace */
-			 " .byte 2b - 1b\n"	/* source len */
-			 " .byte 0\n"		/* replacement len */
-			 " .byte 0\n"		/* pad len */
-			 ".previous\n"
-			 /* skipping size check since replacement size = 0 */
-			 : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
-
-#endif
-
-		asm_volatile_goto("1: jmp %l[t_no]\n"
-			 "2:\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"
-			 " .long 0\n"		/* no replacement */
-			 " .word %P0\n"		/* feature bit */
-			 " .byte 2b - 1b\n"	/* source len */
-			 " .byte 0\n"		/* replacement len */
-			 " .byte 0\n"		/* pad len */
-			 ".previous\n"
-			 /* skipping size check since replacement size = 0 */
-			 : : "i" (bit) : : t_no);
-		return true;
-	t_no:
-		return false;
-
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-	t_warn:
-		warn_pre_alternatives();
-		return false;
-#endif
-
-#else /* CC_HAVE_ASM_GOTO */
-
-		u8 flag;
-		/* Open-coded due to __stringify() in ALTERNATIVE() */
-		asm volatile("1: movb $0,%0\n"
-			     "2:\n"
-			     ".section .altinstructions,\"a\"\n"
-			     " .long 1b - .\n"
-			     " .long 3f - .\n"
-			     " .word %P1\n"		/* feature bit */
-			     " .byte 2b - 1b\n"		/* source len */
-			     " .byte 4f - 3f\n"		/* replacement len */
-			     " .byte 0\n"		/* pad len */
-			     ".previous\n"
-			     ".section .discard,\"aw\",@progbits\n"
-			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
-			     ".previous\n"
-			     ".section .altinstr_replacement,\"ax\"\n"
-			     "3: movb $1,%0\n"
-			     "4:\n"
-			     ".previous\n"
-			     : "=qm" (flag) : "i" (bit));
-		return flag;
-
-#endif /* CC_HAVE_ASM_GOTO */
-}
-
-#define static_cpu_has(bit)					\
-(								\
-	__builtin_constant_p(boot_cpu_has(bit)) ?		\
-		boot_cpu_has(bit) :				\
-	__builtin_constant_p(bit) ?				\
-		__static_cpu_has(bit) :				\
-		boot_cpu_has(bit)				\
-)
-
-static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
+static __always_inline __pure bool _static_cpu_has(u16 bit)
 {
 #ifdef CC_HAVE_ASM_GOTO
 		asm_volatile_goto("1: jmp %l[t_dynamic]\n"
@@ -255,7 +171,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 	t_no:
 		return false;
 	t_dynamic:
-		return __static_cpu_has_safe(bit);
+		return __static_cpu_has(bit);
 #else
 		u8 flag;
 		/* Open-coded due to __stringify() in ALTERNATIVE() */
@@ -293,22 +209,21 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 			     ".previous\n"
 			     : "=qm" (flag)
 			     : "i" (bit), "i" (X86_FEATURE_ALWAYS));
-		return (flag == 2 ? __static_cpu_has_safe(bit) : flag);
+		return (flag == 2 ? __static_cpu_has(bit) : flag);
 #endif /* CC_HAVE_ASM_GOTO */
 }
 
-#define static_cpu_has_safe(bit)				\
+#define static_cpu_has(bit)					\
 (								\
 	__builtin_constant_p(boot_cpu_has(bit)) ?		\
 		boot_cpu_has(bit) :				\
-		_static_cpu_has_safe(bit)			\
+		_static_cpu_has(bit)				\
 )
 #else
 /*
  * gcc 3.x is too stupid to do the static test; fall back to dynamic.
  */
 #define static_cpu_has(bit)		boot_cpu_has(bit)
-#define static_cpu_has_safe(bit)	boot_cpu_has(bit)
 #endif
 
 #define cpu_has_bug(c, bit)		cpu_has(c, (bit))
@@ -316,7 +231,6 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 #define clear_cpu_bug(c, bit)		clear_cpu_cap(c, (bit))
 
 #define static_cpu_has_bug(bit)		static_cpu_has((bit))
-#define static_cpu_has_bug_safe(bit)	static_cpu_has_safe((bit))
 #define boot_cpu_has_bug(bit)		cpu_has_bug(&boot_cpu_data, (bit))
 
 #define MAX_CPU_FEATURES		(NCAPINTS * 32)
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index d01199def781..c2e46eb96b6d 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -59,22 +59,22 @@ extern u64 fpu__get_supported_xfeatures_mask(void);
  */
 static __always_inline __pure bool use_eager_fpu(void)
 {
-	return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
+	return static_cpu_has(X86_FEATURE_EAGER_FPU);
 }
 
 static __always_inline __pure bool use_xsaveopt(void)
 {
-	return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
+	return static_cpu_has(X86_FEATURE_XSAVEOPT);
 }
 
 static __always_inline __pure bool use_xsave(void)
 {
-	return static_cpu_has_safe(X86_FEATURE_XSAVE);
+	return static_cpu_has(X86_FEATURE_XSAVE);
 }
 
 static __always_inline __pure bool use_fxsr(void)
 {
-	return static_cpu_has_safe(X86_FEATURE_FXSR);
+	return static_cpu_has(X86_FEATURE_FXSR);
 }
 
 /*
@@ -301,7 +301,7 @@ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
 
 	WARN_ON(system_state != SYSTEM_BOOTING);
 
-	if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+	if (static_cpu_has(X86_FEATURE_XSAVES))
 		XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
 	else
 		XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
@@ -323,7 +323,7 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
 
 	WARN_ON(system_state != SYSTEM_BOOTING);
 
-	if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+	if (static_cpu_has(X86_FEATURE_XSAVES))
 		XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
 	else
 		XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
@@ -461,7 +461,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
 	 * pending. Clear the x87 state here by setting it to fixed values.
 	 * "m" is a random variable that should be in L1.
 	 */
-	if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
+	if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {
 		asm volatile(
 			"fnclex\n\t"
 			"emms\n\t"
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index c80c02c6ec49..ab5c2c685a3c 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -30,7 +30,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x)
 	unsigned long value;
 	unsigned int id = (x >> 24) & 0xff;
 
-	if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+	if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
 		rdmsrl(MSR_FAM10H_NODE_ID, value);
 		id |= (value << 2) & 0xff00;
 	}
@@ -178,7 +178,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
 	this_cpu_write(cpu_llc_id, node);
 
 	/* Account for nodes per socket in multi-core-module processors */
-	if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+	if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
 		rdmsrl(MSR_FAM10H_NODE_ID, val);
 		nodes = ((val >> 3) & 7) + 1;
 	}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 37830de8f60a..ee499817f3f5 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1475,19 +1475,11 @@ void cpu_init(void)
 }
 #endif
 
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-void warn_pre_alternatives(void)
-{
-	WARN(1, "You're using static_cpu_has before alternatives have run!\n");
-}
-EXPORT_SYMBOL_GPL(warn_pre_alternatives);
-#endif
-
-inline bool __static_cpu_has_safe(u16 bit)
+inline bool __static_cpu_has(u16 bit)
 {
 	return boot_cpu_has(bit);
 }
-EXPORT_SYMBOL_GPL(__static_cpu_has_safe);
+EXPORT_SYMBOL_GPL(__static_cpu_has);
 
 static void bsp_resume(void)
 {
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 483231ebbb0b..e6a1edc2f958 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -358,7 +358,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
 	/* make room for real-mode segments */
 	tsk->thread.sp0 += 16;
 
-	if (static_cpu_has_safe(X86_FEATURE_SEP))
+	if (static_cpu_has(X86_FEATURE_SEP))
 		tsk->thread.sysenter_cs = 0;
 
 	load_sp0(tss, &tsk->thread);
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index cd83d477e32d..3a4b39afc0ab 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1431,7 +1431,7 @@ static int __init intel_pstate_init(void)
 	if (!all_cpu_data)
 		return -ENOMEM;
 
-	if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) {
+	if (static_cpu_has(X86_FEATURE_HWP) && !no_hwp) {
 		pr_info("intel_pstate: HWP enabled\n");
 		hwp_active++;
 	}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 42a378a4eefb..6baedf3d806f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -923,7 +923,7 @@ static int check_async_write(struct inode *inode, unsigned long bio_flags)
 	if (bio_flags & EXTENT_BIO_TREE_LOG)
 		return 0;
 #ifdef CONFIG_X86
-	if (static_cpu_has_safe(X86_FEATURE_XMM4_2))
+	if (static_cpu_has(X86_FEATURE_XMM4_2))
 		return 0;
 #endif
 	return 1;
-- 
2.3.5

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 06/10] x86/cpufeature: Get rid of the non-asm goto variant
  2016-01-26 21:12 [PATCH 00/10] tip-queue 2016-01-26, rest Borislav Petkov
                   ` (4 preceding siblings ...)
  2016-01-26 21:12 ` [PATCH 05/10] x86/cpufeature: Replace the old static_cpu_has() with safe variant Borislav Petkov
@ 2016-01-26 21:12 ` Borislav Petkov
  2016-01-27  3:36   ` Brian Gerst
  2016-01-26 21:12 ` [PATCH 07/10] x86/alternatives: Add an auxilary section Borislav Petkov
                   ` (3 subsequent siblings)
  9 siblings, 1 reply; 66+ messages in thread
From: Borislav Petkov @ 2016-01-26 21:12 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML

From: Borislav Petkov <bp@suse.de>

I can simply quote hpa from the mail:

"Get rid of the non-asm goto variant and just fall back to dynamic if
asm goto is unavailable. It doesn't make any sense, really, if it is
supposed to be safe, and by now the asm goto-capable gcc is in more wide
use. (Originally the gcc 3.x fallback to pure dynamic didn't exist,
either.)"

Booy, am I lazy.

Cleanup the whole CC_HAVE_ASM_GOTO ifdeffery too, while at it.

Signed-off-by: Borislav Petkov <bp@suse.de>
Suggested-by: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/include/asm/cpufeature.h | 49 ++++-----------------------------------
 1 file changed, 5 insertions(+), 44 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index a261cf2e7907..d48bf024f335 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -129,17 +129,16 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
  * fast paths and boot_cpu_has() otherwise!
  */
 
-#if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS)
+#if CC_HAVE_ASM_GOTO && defined(CONFIG_X86_FAST_FEATURE_TESTS)
 extern bool __static_cpu_has(u16 bit);
 
 /*
  * Static testing of CPU features.  Used the same as boot_cpu_has().
- * These are only valid after alternatives have run, but will statically
- * patch the target code for additional performance.
+ * These will statically patch the target code for additional
+ * performance.
  */
 static __always_inline __pure bool _static_cpu_has(u16 bit)
 {
-#ifdef CC_HAVE_ASM_GOTO
 		asm_volatile_goto("1: jmp %l[t_dynamic]\n"
 			 "2:\n"
 			 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
@@ -172,45 +171,6 @@ static __always_inline __pure bool _static_cpu_has(u16 bit)
 		return false;
 	t_dynamic:
 		return __static_cpu_has(bit);
-#else
-		u8 flag;
-		/* Open-coded due to __stringify() in ALTERNATIVE() */
-		asm volatile("1: movb $2,%0\n"
-			     "2:\n"
-			     ".section .altinstructions,\"a\"\n"
-			     " .long 1b - .\n"		/* src offset */
-			     " .long 3f - .\n"		/* repl offset */
-			     " .word %P2\n"		/* always replace */
-			     " .byte 2b - 1b\n"		/* source len */
-			     " .byte 4f - 3f\n"		/* replacement len */
-			     " .byte 0\n"		/* pad len */
-			     ".previous\n"
-			     ".section .discard,\"aw\",@progbits\n"
-			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
-			     ".previous\n"
-			     ".section .altinstr_replacement,\"ax\"\n"
-			     "3: movb $0,%0\n"
-			     "4:\n"
-			     ".previous\n"
-			     ".section .altinstructions,\"a\"\n"
-			     " .long 1b - .\n"		/* src offset */
-			     " .long 5f - .\n"		/* repl offset */
-			     " .word %P1\n"		/* feature bit */
-			     " .byte 4b - 3b\n"		/* src len */
-			     " .byte 6f - 5f\n"		/* repl len */
-			     " .byte 0\n"		/* pad len */
-			     ".previous\n"
-			     ".section .discard,\"aw\",@progbits\n"
-			     " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
-			     ".previous\n"
-			     ".section .altinstr_replacement,\"ax\"\n"
-			     "5: movb $1,%0\n"
-			     "6:\n"
-			     ".previous\n"
-			     : "=qm" (flag)
-			     : "i" (bit), "i" (X86_FEATURE_ALWAYS));
-		return (flag == 2 ? __static_cpu_has(bit) : flag);
-#endif /* CC_HAVE_ASM_GOTO */
 }
 
 #define static_cpu_has(bit)					\
@@ -221,7 +181,8 @@ static __always_inline __pure bool _static_cpu_has(u16 bit)
 )
 #else
 /*
- * gcc 3.x is too stupid to do the static test; fall back to dynamic.
+ * Fall back to dynamic for gcc versions which don't support asm goto. Should be
+ * a minority now anyway.
  */
 #define static_cpu_has(bit)		boot_cpu_has(bit)
 #endif
-- 
2.3.5

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 07/10] x86/alternatives: Add an auxilary section
  2016-01-26 21:12 [PATCH 00/10] tip-queue 2016-01-26, rest Borislav Petkov
                   ` (5 preceding siblings ...)
  2016-01-26 21:12 ` [PATCH 06/10] x86/cpufeature: Get rid of the non-asm goto variant Borislav Petkov
@ 2016-01-26 21:12 ` Borislav Petkov
  2016-01-30 13:19   ` [tip:x86/asm] " tip-bot for Borislav Petkov
  2016-01-26 21:12 ` [PATCH 08/10] x86/alternatives: Discard dynamic check after init Borislav Petkov
                   ` (2 subsequent siblings)
  9 siblings, 1 reply; 66+ messages in thread
From: Borislav Petkov @ 2016-01-26 21:12 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML

From: Borislav Petkov <bp@suse.de>

Add .altinstr_aux for additional instructions which will be used before
and/or during patching. All stuff which needs more sophisticated
patching should go there. See next patch.

Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/kernel/vmlinux.lds.S | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 74e4bf11f562..92dc211c11db 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -195,6 +195,17 @@ SECTIONS
 	:init
 #endif
 
+	/*
+	 * Section for code used exclusively before alternatives are run. All
+	 * references to such code must be patched out by alternatives, normally
+	 * by using X86_FEATURE_ALWAYS CPU feature bit.
+	 *
+	 * See static_cpu_has() for an example.
+	 */
+	.altinstr_aux : AT(ADDR(.altinstr_aux) - LOAD_OFFSET) {
+		*(.altinstr_aux)
+	}
+
 	INIT_DATA_SECTION(16)
 
 	.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
-- 
2.3.5

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 08/10] x86/alternatives: Discard dynamic check after init
  2016-01-26 21:12 [PATCH 00/10] tip-queue 2016-01-26, rest Borislav Petkov
                   ` (6 preceding siblings ...)
  2016-01-26 21:12 ` [PATCH 07/10] x86/alternatives: Add an auxilary section Borislav Petkov
@ 2016-01-26 21:12 ` Borislav Petkov
  2016-01-26 21:12 ` [PATCH 09/10] x86/vdso: Use static_cpu_has() Borislav Petkov
  2016-01-26 21:12 ` [PATCH 10/10] x86/head_64: Simplify kernel load address alignment check Borislav Petkov
  9 siblings, 0 replies; 66+ messages in thread
From: Borislav Petkov @ 2016-01-26 21:12 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML

From: Brian Gerst <brgerst@gmail.com>

Move the code to do the dynamic check to the altinstr_aux section so
that it is discarded after alternatives have run and a static branch has
been chosen.

This way we're changing the dynamic branch from C code to assembly,
which makes it *substantially* smaller while avoiding a completely
unnecessary call to an out of line function.

Boris: change it to do TESTB, as hpa suggests.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Kristen Carlson Accardi <kristen@linux.intel.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86-ml <x86@kernel.org>
Link: http://lkml.kernel.org/r/1452972124-7380-1-git-send-email-brgerst@gmail.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/include/asm/cpufeature.h | 19 ++++++++++++-------
 arch/x86/kernel/cpu/common.c      |  6 ------
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index d48bf024f335..cab88ff86caf 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -130,8 +130,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
  */
 
 #if CC_HAVE_ASM_GOTO && defined(CONFIG_X86_FAST_FEATURE_TESTS)
-extern bool __static_cpu_has(u16 bit);
-
 /*
  * Static testing of CPU features.  Used the same as boot_cpu_has().
  * These will statically patch the target code for additional
@@ -139,7 +137,7 @@ extern bool __static_cpu_has(u16 bit);
  */
 static __always_inline __pure bool _static_cpu_has(u16 bit)
 {
-		asm_volatile_goto("1: jmp %l[t_dynamic]\n"
+		asm_volatile_goto("1: jmp 6f\n"
 			 "2:\n"
 			 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
 			         "((5f-4f) - (2b-1b)),0x90\n"
@@ -164,13 +162,20 @@ static __always_inline __pure bool _static_cpu_has(u16 bit)
 			 " .byte 0\n"			/* repl len */
 			 " .byte 0\n"			/* pad len */
 			 ".previous\n"
-			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
-			 : : t_dynamic, t_no);
+			 ".section .altinstr_aux,\"ax\"\n"
+			 "6:\n"
+			 " testb %[bitnum],%[cap_byte]\n"
+			 " jnz %l[t_yes]\n"
+			 " jmp %l[t_no]\n"
+			 ".previous\n"
+			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
+			     [bitnum] "i" (1 << (bit & 7)),
+			     [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+			 : : t_yes, t_no);
+	t_yes:
 		return true;
 	t_no:
 		return false;
-	t_dynamic:
-		return __static_cpu_has(bit);
 }
 
 #define static_cpu_has(bit)					\
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ee499817f3f5..079d83fc6488 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1475,12 +1475,6 @@ void cpu_init(void)
 }
 #endif
 
-inline bool __static_cpu_has(u16 bit)
-{
-	return boot_cpu_has(bit);
-}
-EXPORT_SYMBOL_GPL(__static_cpu_has);
-
 static void bsp_resume(void)
 {
 	if (this_cpu->c_bsp_resume)
-- 
2.3.5

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 09/10] x86/vdso: Use static_cpu_has()
  2016-01-26 21:12 [PATCH 00/10] tip-queue 2016-01-26, rest Borislav Petkov
                   ` (7 preceding siblings ...)
  2016-01-26 21:12 ` [PATCH 08/10] x86/alternatives: Discard dynamic check after init Borislav Petkov
@ 2016-01-26 21:12 ` Borislav Petkov
  2016-01-30 13:20   ` [tip:x86/asm] " tip-bot for Borislav Petkov
  2016-01-26 21:12 ` [PATCH 10/10] x86/head_64: Simplify kernel load address alignment check Borislav Petkov
  9 siblings, 1 reply; 66+ messages in thread
From: Borislav Petkov @ 2016-01-26 21:12 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML

From: Borislav Petkov <bp@suse.de>

... and simplify and speed up a tad.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@amacapital.net>
---
 arch/x86/entry/vdso/vma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 5471ac362147..6b46648588d8 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -255,7 +255,7 @@ static void vgetcpu_cpu_init(void *arg)
 #ifdef CONFIG_NUMA
 	node = cpu_to_node(cpu);
 #endif
-	if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
+	if (static_cpu_has(X86_FEATURE_RDTSCP))
 		write_rdtscp_aux((node << 12) | cpu);
 
 	/*
-- 
2.3.5

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 10/10] x86/head_64: Simplify kernel load address alignment check
  2016-01-26 21:12 [PATCH 00/10] tip-queue 2016-01-26, rest Borislav Petkov
                   ` (8 preceding siblings ...)
  2016-01-26 21:12 ` [PATCH 09/10] x86/vdso: Use static_cpu_has() Borislav Petkov
@ 2016-01-26 21:12 ` Borislav Petkov
  2016-01-30 13:20   ` [tip:x86/boot] x86/boot: " tip-bot for Alexander Kuleshov
  9 siblings, 1 reply; 66+ messages in thread
From: Borislav Petkov @ 2016-01-26 21:12 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML

From: Alexander Kuleshov <kuleshovmail@gmail.com>

We are using %rax as temporary register to check the kernel address
alignment. We don't really have to since the TEST instruction does not
clobber the destination operand.

Suggested-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Alexander Kuleshov <kuleshovmail@gmail.com>
Cc: Alexander Popov <alpopov@ptsecurity.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1453531828-19291-1-git-send-email-kuleshovmail@gmail.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/kernel/head_64.S | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index ffdc0e860390..7c21029cb733 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -76,9 +76,7 @@ startup_64:
 	subq	$_text - __START_KERNEL_map, %rbp
 
 	/* Is the address not 2M aligned? */
-	movq	%rbp, %rax
-	andl	$~PMD_PAGE_MASK, %eax
-	testl	%eax, %eax
+	testl	$~PMD_PAGE_MASK, %ebp
 	jnz	bad_address
 
 	/*
-- 
2.3.5

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* Re: [PATCH 06/10] x86/cpufeature: Get rid of the non-asm goto variant
  2016-01-26 21:12 ` [PATCH 06/10] x86/cpufeature: Get rid of the non-asm goto variant Borislav Petkov
@ 2016-01-27  3:36   ` Brian Gerst
  2016-01-27  8:41     ` Borislav Petkov
  0 siblings, 1 reply; 66+ messages in thread
From: Brian Gerst @ 2016-01-27  3:36 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: Ingo Molnar, LKML

On Tue, Jan 26, 2016 at 4:12 PM, Borislav Petkov <bp@alien8.de> wrote:
> From: Borislav Petkov <bp@suse.de>
>
> I can simply quote hpa from the mail:
>
> "Get rid of the non-asm goto variant and just fall back to dynamic if
> asm goto is unavailable. It doesn't make any sense, really, if it is
> supposed to be safe, and by now the asm goto-capable gcc is in more wide
> use. (Originally the gcc 3.x fallback to pure dynamic didn't exist,
> either.)"
>
> Booy, am I lazy.
>
> Cleanup the whole CC_HAVE_ASM_GOTO ifdeffery too, while at it.
>
> Signed-off-by: Borislav Petkov <bp@suse.de>
> Suggested-by: "H. Peter Anvin" <hpa@zytor.com>
> ---
>  arch/x86/include/asm/cpufeature.h | 49 ++++-----------------------------------
>  1 file changed, 5 insertions(+), 44 deletions(-)
>
> diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
> index a261cf2e7907..d48bf024f335 100644
> --- a/arch/x86/include/asm/cpufeature.h
> +++ b/arch/x86/include/asm/cpufeature.h
> @@ -129,17 +129,16 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
>   * fast paths and boot_cpu_has() otherwise!
>   */
>
> -#if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS)
> +#if CC_HAVE_ASM_GOTO && defined(CONFIG_X86_FAST_FEATURE_TESTS)

This should be:
#if defined(CC_HAVE_ASM_GOTO) && ...

--
Brian Gerst

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 06/10] x86/cpufeature: Get rid of the non-asm goto variant
  2016-01-27  3:36   ` Brian Gerst
@ 2016-01-27  8:41     ` Borislav Petkov
  2016-01-27  8:43       ` [PATCH -v1.1 " Borislav Petkov
  2016-01-27  8:45       ` [PATCH -v1.1 8/10] x86/alternatives: Discard dynamic check after init Borislav Petkov
  0 siblings, 2 replies; 66+ messages in thread
From: Borislav Petkov @ 2016-01-27  8:41 UTC (permalink / raw)
  To: Brian Gerst; +Cc: Ingo Molnar, LKML

On Tue, Jan 26, 2016 at 10:36:21PM -0500, Brian Gerst wrote:
> This should be:
> #if defined(CC_HAVE_ASM_GOTO) && ...

Good catch, thanks!

I thought about it while typing and then gcc didn't complain so I forgot
again. :-\

Ingo, I'm sending the patches which changed as a reply to this message.

Thanks.

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [PATCH -v1.1 06/10] x86/cpufeature: Get rid of the non-asm goto variant
  2016-01-27  8:41     ` Borislav Petkov
@ 2016-01-27  8:43       ` Borislav Petkov
  2016-01-30 13:19         ` [tip:x86/asm] " tip-bot for Borislav Petkov
  2016-01-27  8:45       ` [PATCH -v1.1 8/10] x86/alternatives: Discard dynamic check after init Borislav Petkov
  1 sibling, 1 reply; 66+ messages in thread
From: Borislav Petkov @ 2016-01-27  8:43 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Brian Gerst, LKML

From: Borislav Petkov <bp@suse.de>
Date: Wed, 20 Jan 2016 19:14:03 +0100
Subject: [PATCH 1/4] x86/cpufeature: Get rid of the non-asm goto variant

I can simply quote hpa from the mail:

"Get rid of the non-asm goto variant and just fall back to dynamic if
asm goto is unavailable. It doesn't make any sense, really, if it is
supposed to be safe, and by now the asm goto-capable gcc is in more wide
use. (Originally the gcc 3.x fallback to pure dynamic didn't exist,
either.)"

Booy, am I lazy.

Cleanup the whole CC_HAVE_ASM_GOTO ifdeffery too, while at it.

Signed-off-by: Borislav Petkov <bp@suse.de>
Suggested-by: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/include/asm/cpufeature.h | 49 ++++-----------------------------------
 1 file changed, 5 insertions(+), 44 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index a261cf2e7907..9048c1bbc519 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -129,17 +129,16 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
  * fast paths and boot_cpu_has() otherwise!
  */
 
-#if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS)
+#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
 extern bool __static_cpu_has(u16 bit);
 
 /*
  * Static testing of CPU features.  Used the same as boot_cpu_has().
- * These are only valid after alternatives have run, but will statically
- * patch the target code for additional performance.
+ * These will statically patch the target code for additional
+ * performance.
  */
 static __always_inline __pure bool _static_cpu_has(u16 bit)
 {
-#ifdef CC_HAVE_ASM_GOTO
 		asm_volatile_goto("1: jmp %l[t_dynamic]\n"
 			 "2:\n"
 			 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
@@ -172,45 +171,6 @@ static __always_inline __pure bool _static_cpu_has(u16 bit)
 		return false;
 	t_dynamic:
 		return __static_cpu_has(bit);
-#else
-		u8 flag;
-		/* Open-coded due to __stringify() in ALTERNATIVE() */
-		asm volatile("1: movb $2,%0\n"
-			     "2:\n"
-			     ".section .altinstructions,\"a\"\n"
-			     " .long 1b - .\n"		/* src offset */
-			     " .long 3f - .\n"		/* repl offset */
-			     " .word %P2\n"		/* always replace */
-			     " .byte 2b - 1b\n"		/* source len */
-			     " .byte 4f - 3f\n"		/* replacement len */
-			     " .byte 0\n"		/* pad len */
-			     ".previous\n"
-			     ".section .discard,\"aw\",@progbits\n"
-			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
-			     ".previous\n"
-			     ".section .altinstr_replacement,\"ax\"\n"
-			     "3: movb $0,%0\n"
-			     "4:\n"
-			     ".previous\n"
-			     ".section .altinstructions,\"a\"\n"
-			     " .long 1b - .\n"		/* src offset */
-			     " .long 5f - .\n"		/* repl offset */
-			     " .word %P1\n"		/* feature bit */
-			     " .byte 4b - 3b\n"		/* src len */
-			     " .byte 6f - 5f\n"		/* repl len */
-			     " .byte 0\n"		/* pad len */
-			     ".previous\n"
-			     ".section .discard,\"aw\",@progbits\n"
-			     " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
-			     ".previous\n"
-			     ".section .altinstr_replacement,\"ax\"\n"
-			     "5: movb $1,%0\n"
-			     "6:\n"
-			     ".previous\n"
-			     : "=qm" (flag)
-			     : "i" (bit), "i" (X86_FEATURE_ALWAYS));
-		return (flag == 2 ? __static_cpu_has(bit) : flag);
-#endif /* CC_HAVE_ASM_GOTO */
 }
 
 #define static_cpu_has(bit)					\
@@ -221,7 +181,8 @@ static __always_inline __pure bool _static_cpu_has(u16 bit)
 )
 #else
 /*
- * gcc 3.x is too stupid to do the static test; fall back to dynamic.
+ * Fall back to dynamic for gcc versions which don't support asm goto. Should be
+ * a minority now anyway.
  */
 #define static_cpu_has(bit)		boot_cpu_has(bit)
 #endif
-- 
2.3.5

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH -v1.1 8/10] x86/alternatives: Discard dynamic check after init
  2016-01-27  8:41     ` Borislav Petkov
  2016-01-27  8:43       ` [PATCH -v1.1 " Borislav Petkov
@ 2016-01-27  8:45       ` Borislav Petkov
  2016-01-30 13:20         ` [tip:x86/asm] " tip-bot for Brian Gerst
  1 sibling, 1 reply; 66+ messages in thread
From: Borislav Petkov @ 2016-01-27  8:45 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Brian Gerst, LKML

From: Brian Gerst <brgerst@gmail.com>
Date: Sat, 16 Jan 2016 14:22:04 -0500
Subject: [PATCH -v1.1 8/10] x86/alternatives: Discard dynamic check after init

Move the code to do the dynamic check to the altinstr_aux section so
that it is discarded after alternatives have run and a static branch has
been chosen.

This way we're changing the dynamic branch from C code to assembly,
which makes it *substantially* smaller while avoiding a completely
unnecessary call to an out of line function.

Boris: change it to do TESTB, as hpa suggests.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Kristen Carlson Accardi <kristen@linux.intel.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86-ml <x86@kernel.org>
Link: http://lkml.kernel.org/r/1452972124-7380-1-git-send-email-brgerst@gmail.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/include/asm/cpufeature.h | 19 ++++++++++++-------
 arch/x86/kernel/cpu/common.c      |  6 ------
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 9048c1bbc519..9fba7a5dd24a 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -130,8 +130,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
  */
 
 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
-extern bool __static_cpu_has(u16 bit);
-
 /*
  * Static testing of CPU features.  Used the same as boot_cpu_has().
  * These will statically patch the target code for additional
@@ -139,7 +137,7 @@ extern bool __static_cpu_has(u16 bit);
  */
 static __always_inline __pure bool _static_cpu_has(u16 bit)
 {
-		asm_volatile_goto("1: jmp %l[t_dynamic]\n"
+		asm_volatile_goto("1: jmp 6f\n"
 			 "2:\n"
 			 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
 			         "((5f-4f) - (2b-1b)),0x90\n"
@@ -164,13 +162,20 @@ static __always_inline __pure bool _static_cpu_has(u16 bit)
 			 " .byte 0\n"			/* repl len */
 			 " .byte 0\n"			/* pad len */
 			 ".previous\n"
-			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
-			 : : t_dynamic, t_no);
+			 ".section .altinstr_aux,\"ax\"\n"
+			 "6:\n"
+			 " testb %[bitnum],%[cap_byte]\n"
+			 " jnz %l[t_yes]\n"
+			 " jmp %l[t_no]\n"
+			 ".previous\n"
+			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
+			     [bitnum] "i" (1 << (bit & 7)),
+			     [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+			 : : t_yes, t_no);
+	t_yes:
 		return true;
 	t_no:
 		return false;
-	t_dynamic:
-		return __static_cpu_has(bit);
 }
 
 #define static_cpu_has(bit)					\
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ee499817f3f5..079d83fc6488 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1475,12 +1475,6 @@ void cpu_init(void)
 }
 #endif
 
-inline bool __static_cpu_has(u16 bit)
-{
-	return boot_cpu_has(bit);
-}
-EXPORT_SYMBOL_GPL(__static_cpu_has);
-
 static void bsp_resume(void)
 {
 	if (this_cpu->c_bsp_resume)
-- 
2.3.5


-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [tip:x86/asm] x86/cpufeature: Carve out X86_FEATURE_*
  2016-01-26 21:12 ` [PATCH 04/10] x86/cpufeature: Carve out X86_FEATURE_* Borislav Petkov
@ 2016-01-30 13:18   ` tip-bot for Borislav Petkov
  0 siblings, 0 replies; 66+ messages in thread
From: tip-bot for Borislav Petkov @ 2016-01-30 13:18 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: dvlasenk, brgerst, bp, linux-kernel, tglx, peterz, bp, luto, hpa,
	torvalds, mingo

Commit-ID:  cd4d09ec6f6c12a2cc3db5b7d8876a325a53545b
Gitweb:     http://git.kernel.org/tip/cd4d09ec6f6c12a2cc3db5b7d8876a325a53545b
Author:     Borislav Petkov <bp@suse.de>
AuthorDate: Tue, 26 Jan 2016 22:12:04 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Sat, 30 Jan 2016 11:22:17 +0100

x86/cpufeature: Carve out X86_FEATURE_*

Move them to a separate header and have the following
dependency:

  x86/cpufeatures.h <- x86/processor.h <- x86/cpufeature.h

This makes it easier to use the header in asm code and not
include the whole cpufeature.h and add guards for asm.

Suggested-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1453842730-28463-5-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/kernel-parameters.txt                |   2 +-
 arch/x86/boot/cpuflags.h                           |   2 +-
 arch/x86/boot/mkcpustr.c                           |   2 +-
 arch/x86/crypto/crc32-pclmul_glue.c                |   2 +-
 arch/x86/crypto/crc32c-intel_glue.c                |   2 +-
 arch/x86/crypto/crct10dif-pclmul_glue.c            |   2 +-
 arch/x86/entry/common.c                            |   1 +
 arch/x86/entry/entry_32.S                          |   2 +-
 arch/x86/entry/vdso/vdso32-setup.c                 |   1 -
 arch/x86/entry/vdso/vdso32/system_call.S           |   2 +-
 arch/x86/entry/vdso/vma.c                          |   1 +
 arch/x86/include/asm/alternative.h                 |   6 -
 arch/x86/include/asm/apic.h                        |   1 -
 arch/x86/include/asm/arch_hweight.h                |   2 +
 arch/x86/include/asm/cmpxchg.h                     |   1 +
 arch/x86/include/asm/cpufeature.h                  | 284 +----------------
 .../include/asm/{cpufeature.h => cpufeatures.h}    | 336 +--------------------
 arch/x86/include/asm/fpu/internal.h                |   1 +
 arch/x86/include/asm/irq_work.h                    |   2 +-
 arch/x86/include/asm/mwait.h                       |   2 +
 arch/x86/include/asm/processor.h                   |   3 +-
 arch/x86/include/asm/smap.h                        |   2 +-
 arch/x86/include/asm/smp.h                         |   1 -
 arch/x86/include/asm/thread_info.h                 |   2 +-
 arch/x86/include/asm/tlbflush.h                    |   1 +
 arch/x86/include/asm/uaccess_64.h                  |   2 +-
 arch/x86/kernel/cpu/Makefile                       |   2 +-
 arch/x86/kernel/cpu/centaur.c                      |   2 +-
 arch/x86/kernel/cpu/cyrix.c                        |   1 +
 arch/x86/kernel/cpu/intel.c                        |   2 +-
 arch/x86/kernel/cpu/intel_cacheinfo.c              |   2 +-
 arch/x86/kernel/cpu/match.c                        |   2 +-
 arch/x86/kernel/cpu/mkcapflags.sh                  |   6 +-
 arch/x86/kernel/cpu/mtrr/main.c                    |   2 +-
 arch/x86/kernel/cpu/transmeta.c                    |   2 +-
 arch/x86/kernel/e820.c                             |   1 +
 arch/x86/kernel/head_32.S                          |   2 +-
 arch/x86/kernel/hpet.c                             |   1 +
 arch/x86/kernel/msr.c                              |   2 +-
 arch/x86/kernel/verify_cpu.S                       |   2 +-
 arch/x86/lib/clear_page_64.S                       |   2 +-
 arch/x86/lib/copy_page_64.S                        |   2 +-
 arch/x86/lib/copy_user_64.S                        |   2 +-
 arch/x86/lib/memcpy_64.S                           |   2 +-
 arch/x86/lib/memmove_64.S                          |   2 +-
 arch/x86/lib/memset_64.S                           |   2 +-
 arch/x86/mm/setup_nx.c                             |   1 +
 arch/x86/oprofile/op_model_amd.c                   |   1 -
 arch/x86/um/asm/barrier.h                          |   2 +-
 lib/atomic64_test.c                                |   2 +-
 50 files changed, 54 insertions(+), 658 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 87d40a7..c0c6253 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -666,7 +666,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
 	clearcpuid=BITNUM [X86]
 			Disable CPUID feature X for the kernel. See
-			arch/x86/include/asm/cpufeature.h for the valid bit
+			arch/x86/include/asm/cpufeatures.h for the valid bit
 			numbers. Note the Linux specific bits are not necessarily
 			stable over kernel options, but the vendor specific
 			ones should be.
diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h
index ea97697..4cb404f 100644
--- a/arch/x86/boot/cpuflags.h
+++ b/arch/x86/boot/cpuflags.h
@@ -1,7 +1,7 @@
 #ifndef BOOT_CPUFLAGS_H
 #define BOOT_CPUFLAGS_H
 
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/processor-flags.h>
 
 struct cpu_features {
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
index 637097e..f72498d 100644
--- a/arch/x86/boot/mkcpustr.c
+++ b/arch/x86/boot/mkcpustr.c
@@ -17,7 +17,7 @@
 
 #include "../include/asm/required-features.h"
 #include "../include/asm/disabled-features.h"
-#include "../include/asm/cpufeature.h"
+#include "../include/asm/cpufeatures.h"
 #include "../kernel/cpu/capflags.c"
 
 int main(void)
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
index 07d2c6c..27226df 100644
--- a/arch/x86/crypto/crc32-pclmul_glue.c
+++ b/arch/x86/crypto/crc32-pclmul_glue.c
@@ -33,7 +33,7 @@
 #include <linux/crc32.h>
 #include <crypto/internal/hash.h>
 
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/cpu_device_id.h>
 #include <asm/fpu/api.h>
 
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index 0e98716..0857b1a 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -30,7 +30,7 @@
 #include <linux/kernel.h>
 #include <crypto/internal/hash.h>
 
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/cpu_device_id.h>
 #include <asm/fpu/internal.h>
 
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
index a3fcfc9..cd4df93 100644
--- a/arch/x86/crypto/crct10dif-pclmul_glue.c
+++ b/arch/x86/crypto/crct10dif-pclmul_glue.c
@@ -30,7 +30,7 @@
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <asm/fpu/api.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/cpu_device_id.h>
 
 asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 75175f9..c6ab2eb 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -26,6 +26,7 @@
 #include <asm/traps.h>
 #include <asm/vdso.h>
 #include <asm/uaccess.h>
+#include <asm/cpufeature.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 77d8c51..4c52283 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -40,7 +40,7 @@
 #include <asm/processor-flags.h>
 #include <asm/ftrace.h>
 #include <asm/irq_vectors.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c
index 08a317a..7853b53 100644
--- a/arch/x86/entry/vdso/vdso32-setup.c
+++ b/arch/x86/entry/vdso/vdso32-setup.c
@@ -11,7 +11,6 @@
 #include <linux/kernel.h>
 #include <linux/mm_types.h>
 
-#include <asm/cpufeature.h>
 #include <asm/processor.h>
 #include <asm/vdso.h>
 
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S
index 3a1d929..0109ac6 100644
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -3,7 +3,7 @@
 */
 
 #include <asm/dwarf2.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 /*
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 7c912fe..429d54d 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -20,6 +20,7 @@
 #include <asm/page.h>
 #include <asm/hpet.h>
 #include <asm/desc.h>
+#include <asm/cpufeature.h>
 
 #if defined(CONFIG_X86_64)
 unsigned int __read_mostly vdso64_enabled = 1;
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 7bfc85b..99afb66 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -152,12 +152,6 @@ static inline int alternatives_text_reserved(void *start, void *end)
 	".popsection"
 
 /*
- * This must be included *after* the definition of ALTERNATIVE due to
- * <asm/arch_hweight.h>
- */
-#include <asm/cpufeature.h>
-
-/*
  * Alternative instructions for different CPU types or capabilities.
  *
  * This allows to use optimized instructions even on generic binary
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index c80f6b6..0899cfc 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -6,7 +6,6 @@
 
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>
-#include <asm/processor.h>
 #include <asm/apicdef.h>
 #include <linux/atomic.h>
 #include <asm/fixmap.h>
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index 259a7c1..02e799f 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_X86_HWEIGHT_H
 #define _ASM_X86_HWEIGHT_H
 
+#include <asm/cpufeatures.h>
+
 #ifdef CONFIG_64BIT
 /* popcnt %edi, %eax -- redundant REX prefix for alignment */
 #define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7"
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index ad19841..9733361 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -2,6 +2,7 @@
 #define ASM_X86_CMPXCHG_H
 
 #include <linux/compiler.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative.h> /* Provides LOCK_PREFIX */
 
 /*
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index bbf166e..3cce9f3 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -1,289 +1,7 @@
-/*
- * Defines x86 CPU feature bits
- */
 #ifndef _ASM_X86_CPUFEATURE_H
 #define _ASM_X86_CPUFEATURE_H
 
-#ifndef _ASM_X86_REQUIRED_FEATURES_H
-#include <asm/required-features.h>
-#endif
-
-#ifndef _ASM_X86_DISABLED_FEATURES_H
-#include <asm/disabled-features.h>
-#endif
-
-#define NCAPINTS	16	/* N 32-bit words worth of info */
-#define NBUGINTS	1	/* N 32-bit bug flags */
-
-/*
- * Note: If the comment begins with a quoted string, that string is used
- * in /proc/cpuinfo instead of the macro name.  If the string is "",
- * this feature bit is not displayed in /proc/cpuinfo at all.
- */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
-#define X86_FEATURE_FPU		( 0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME		( 0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE		( 0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE		( 0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC		( 0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR		( 0*32+ 5) /* Model-Specific Registers */
-#define X86_FEATURE_PAE		( 0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE		( 0*32+ 7) /* Machine Check Exception */
-#define X86_FEATURE_CX8		( 0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC	( 0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP		( 0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR	( 0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE		( 0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA		( 0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV	( 0*32+15) /* CMOV instructions */
-					  /* (plus FCMOVcc, FCOMI with FPU) */
-#define X86_FEATURE_PAT		( 0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36	( 0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN		( 0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLUSH	( 0*32+19) /* CLFLUSH instruction */
-#define X86_FEATURE_DS		( 0*32+21) /* "dts" Debug Store */
-#define X86_FEATURE_ACPI	( 0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX		( 0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR	( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
-#define X86_FEATURE_XMM		( 0*32+25) /* "sse" */
-#define X86_FEATURE_XMM2	( 0*32+26) /* "sse2" */
-#define X86_FEATURE_SELFSNOOP	( 0*32+27) /* "ss" CPU self snoop */
-#define X86_FEATURE_HT		( 0*32+28) /* Hyper-Threading */
-#define X86_FEATURE_ACC		( 0*32+29) /* "tm" Automatic clock control */
-#define X86_FEATURE_IA64	( 0*32+30) /* IA-64 processor */
-#define X86_FEATURE_PBE		( 0*32+31) /* Pending Break Enable */
-
-/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
-/* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL	( 1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP		( 1*32+19) /* MP Capable. */
-#define X86_FEATURE_NX		( 1*32+20) /* Execute Disable */
-#define X86_FEATURE_MMXEXT	( 1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_FXSR_OPT	( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
-#define X86_FEATURE_GBPAGES	( 1*32+26) /* "pdpe1gb" GB pages */
-#define X86_FEATURE_RDTSCP	( 1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM		( 1*32+29) /* Long Mode (x86-64) */
-#define X86_FEATURE_3DNOWEXT	( 1*32+30) /* AMD 3DNow! extensions */
-#define X86_FEATURE_3DNOW	( 1*32+31) /* 3DNow! */
-
-/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY	( 2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN	( 2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI	( 2*32+ 3) /* LongRun table interface */
-
-/* Other features, Linux-defined mapping, word 3 */
-/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX	( 3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR	( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR	( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR	( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
-/* cpu types for specific tunings: */
-#define X86_FEATURE_K8		( 3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_K7		( 3*32+ 5) /* "" Athlon */
-#define X86_FEATURE_P3		( 3*32+ 6) /* "" P3 */
-#define X86_FEATURE_P4		( 3*32+ 7) /* "" P4 */
-#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_UP		( 3*32+ 9) /* smp kernel running on up */
-/* free, was #define X86_FEATURE_FXSAVE_LEAK ( 3*32+10) * "" FXSAVE leaks FOP/FIP/FOP */
-#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_PEBS	( 3*32+12) /* Precise-Event Based Sampling */
-#define X86_FEATURE_BTS		( 3*32+13) /* Branch Trace Store */
-#define X86_FEATURE_SYSCALL32	( 3*32+14) /* "" syscall in ia32 userspace */
-#define X86_FEATURE_SYSENTER32	( 3*32+15) /* "" sysenter in ia32 userspace */
-#define X86_FEATURE_REP_GOOD	( 3*32+16) /* rep microcode works well */
-#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
-#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
-/* free, was #define X86_FEATURE_11AP	( 3*32+19) * "" Bad local APIC aka 11AP */
-#define X86_FEATURE_NOPL	( 3*32+20) /* The NOPL (0F 1F) instructions */
-#define X86_FEATURE_ALWAYS	( 3*32+21) /* "" Always-present feature */
-#define X86_FEATURE_XTOPOLOGY	( 3*32+22) /* cpu topology enum extensions */
-#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
-#define X86_FEATURE_NONSTOP_TSC	( 3*32+24) /* TSC does not stop in C states */
-/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
-#define X86_FEATURE_EXTD_APICID	( 3*32+26) /* has extended APICID (8 bits) */
-#define X86_FEATURE_AMD_DCM     ( 3*32+27) /* multi-node processor */
-#define X86_FEATURE_APERFMPERF	( 3*32+28) /* APERFMPERF */
-#define X86_FEATURE_EAGER_FPU	( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
-#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
-#define X86_FEATURE_XMM3	( 4*32+ 0) /* "pni" SSE-3 */
-#define X86_FEATURE_PCLMULQDQ	( 4*32+ 1) /* PCLMULQDQ instruction */
-#define X86_FEATURE_DTES64	( 4*32+ 2) /* 64-bit Debug Store */
-#define X86_FEATURE_MWAIT	( 4*32+ 3) /* "monitor" Monitor/Mwait support */
-#define X86_FEATURE_DSCPL	( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
-#define X86_FEATURE_VMX		( 4*32+ 5) /* Hardware virtualization */
-#define X86_FEATURE_SMX		( 4*32+ 6) /* Safer mode */
-#define X86_FEATURE_EST		( 4*32+ 7) /* Enhanced SpeedStep */
-#define X86_FEATURE_TM2		( 4*32+ 8) /* Thermal Monitor 2 */
-#define X86_FEATURE_SSSE3	( 4*32+ 9) /* Supplemental SSE-3 */
-#define X86_FEATURE_CID		( 4*32+10) /* Context ID */
-#define X86_FEATURE_SDBG	( 4*32+11) /* Silicon Debug */
-#define X86_FEATURE_FMA		( 4*32+12) /* Fused multiply-add */
-#define X86_FEATURE_CX16	( 4*32+13) /* CMPXCHG16B */
-#define X86_FEATURE_XTPR	( 4*32+14) /* Send Task Priority Messages */
-#define X86_FEATURE_PDCM	( 4*32+15) /* Performance Capabilities */
-#define X86_FEATURE_PCID	( 4*32+17) /* Process Context Identifiers */
-#define X86_FEATURE_DCA		( 4*32+18) /* Direct Cache Access */
-#define X86_FEATURE_XMM4_1	( 4*32+19) /* "sse4_1" SSE-4.1 */
-#define X86_FEATURE_XMM4_2	( 4*32+20) /* "sse4_2" SSE-4.2 */
-#define X86_FEATURE_X2APIC	( 4*32+21) /* x2APIC */
-#define X86_FEATURE_MOVBE	( 4*32+22) /* MOVBE instruction */
-#define X86_FEATURE_POPCNT      ( 4*32+23) /* POPCNT instruction */
-#define X86_FEATURE_TSC_DEADLINE_TIMER	( 4*32+24) /* Tsc deadline timer */
-#define X86_FEATURE_AES		( 4*32+25) /* AES instructions */
-#define X86_FEATURE_XSAVE	( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
-#define X86_FEATURE_OSXSAVE	( 4*32+27) /* "" XSAVE enabled in the OS */
-#define X86_FEATURE_AVX		( 4*32+28) /* Advanced Vector Extensions */
-#define X86_FEATURE_F16C	( 4*32+29) /* 16-bit fp conversions */
-#define X86_FEATURE_RDRAND	( 4*32+30) /* The RDRAND instruction */
-#define X86_FEATURE_HYPERVISOR	( 4*32+31) /* Running on a hypervisor */
-
-/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
-#define X86_FEATURE_XSTORE	( 5*32+ 2) /* "rng" RNG present (xstore) */
-#define X86_FEATURE_XSTORE_EN	( 5*32+ 3) /* "rng_en" RNG enabled */
-#define X86_FEATURE_XCRYPT	( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
-#define X86_FEATURE_XCRYPT_EN	( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
-#define X86_FEATURE_ACE2	( 5*32+ 8) /* Advanced Cryptography Engine v2 */
-#define X86_FEATURE_ACE2_EN	( 5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE		( 5*32+10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN	( 5*32+11) /* PHE enabled */
-#define X86_FEATURE_PMM		( 5*32+12) /* PadLock Montgomery Multiplier */
-#define X86_FEATURE_PMM_EN	( 5*32+13) /* PMM enabled */
-
-/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
-#define X86_FEATURE_LAHF_LM	( 6*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY	( 6*32+ 1) /* If yes HyperThreading not valid */
-#define X86_FEATURE_SVM		( 6*32+ 2) /* Secure virtual machine */
-#define X86_FEATURE_EXTAPIC	( 6*32+ 3) /* Extended APIC space */
-#define X86_FEATURE_CR8_LEGACY	( 6*32+ 4) /* CR8 in 32-bit mode */
-#define X86_FEATURE_ABM		( 6*32+ 5) /* Advanced bit manipulation */
-#define X86_FEATURE_SSE4A	( 6*32+ 6) /* SSE-4A */
-#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
-#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
-#define X86_FEATURE_OSVW	( 6*32+ 9) /* OS Visible Workaround */
-#define X86_FEATURE_IBS		( 6*32+10) /* Instruction Based Sampling */
-#define X86_FEATURE_XOP		( 6*32+11) /* extended AVX instructions */
-#define X86_FEATURE_SKINIT	( 6*32+12) /* SKINIT/STGI instructions */
-#define X86_FEATURE_WDT		( 6*32+13) /* Watchdog timer */
-#define X86_FEATURE_LWP		( 6*32+15) /* Light Weight Profiling */
-#define X86_FEATURE_FMA4	( 6*32+16) /* 4 operands MAC instructions */
-#define X86_FEATURE_TCE		( 6*32+17) /* translation cache extension */
-#define X86_FEATURE_NODEID_MSR	( 6*32+19) /* NodeId MSR */
-#define X86_FEATURE_TBM		( 6*32+21) /* trailing bit manipulations */
-#define X86_FEATURE_TOPOEXT	( 6*32+22) /* topology extensions CPUID leafs */
-#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
-#define X86_FEATURE_PERFCTR_NB  ( 6*32+24) /* NB performance counter extensions */
-#define X86_FEATURE_BPEXT	(6*32+26) /* data breakpoint extension */
-#define X86_FEATURE_PERFCTR_L2	( 6*32+28) /* L2 performance counter extensions */
-#define X86_FEATURE_MWAITX	( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
-
-/*
- * Auxiliary flags: Linux defined - For features scattered in various
- * CPUID levels like 0x6, 0xA etc, word 7.
- *
- * Reuse free bits when adding new feature flags!
- */
-
-#define X86_FEATURE_CPB		( 7*32+ 2) /* AMD Core Performance Boost */
-#define X86_FEATURE_EPB		( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
-
-#define X86_FEATURE_HW_PSTATE	( 7*32+ 8) /* AMD HW-PState */
-#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-
-#define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
-
-/* Virtualization flags: Linux defined, word 8 */
-#define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
-#define X86_FEATURE_VNMI        ( 8*32+ 1) /* Intel Virtual NMI */
-#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
-#define X86_FEATURE_EPT         ( 8*32+ 3) /* Intel Extended Page Table */
-#define X86_FEATURE_VPID        ( 8*32+ 4) /* Intel Virtual Processor ID */
-
-#define X86_FEATURE_VMMCALL     ( 8*32+15) /* Prefer vmmcall to vmcall */
-#define X86_FEATURE_XENPV       ( 8*32+16) /* "" Xen paravirtual guest */
-
-
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
-#define X86_FEATURE_FSGSBASE	( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
-#define X86_FEATURE_TSC_ADJUST	( 9*32+ 1) /* TSC adjustment MSR 0x3b */
-#define X86_FEATURE_BMI1	( 9*32+ 3) /* 1st group bit manipulation extensions */
-#define X86_FEATURE_HLE		( 9*32+ 4) /* Hardware Lock Elision */
-#define X86_FEATURE_AVX2	( 9*32+ 5) /* AVX2 instructions */
-#define X86_FEATURE_SMEP	( 9*32+ 7) /* Supervisor Mode Execution Protection */
-#define X86_FEATURE_BMI2	( 9*32+ 8) /* 2nd group bit manipulation extensions */
-#define X86_FEATURE_ERMS	( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
-#define X86_FEATURE_INVPCID	( 9*32+10) /* Invalidate Processor Context ID */
-#define X86_FEATURE_RTM		( 9*32+11) /* Restricted Transactional Memory */
-#define X86_FEATURE_CQM		( 9*32+12) /* Cache QoS Monitoring */
-#define X86_FEATURE_MPX		( 9*32+14) /* Memory Protection Extension */
-#define X86_FEATURE_AVX512F	( 9*32+16) /* AVX-512 Foundation */
-#define X86_FEATURE_RDSEED	( 9*32+18) /* The RDSEED instruction */
-#define X86_FEATURE_ADX		( 9*32+19) /* The ADCX and ADOX instructions */
-#define X86_FEATURE_SMAP	( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_PCOMMIT	( 9*32+22) /* PCOMMIT instruction */
-#define X86_FEATURE_CLFLUSHOPT	( 9*32+23) /* CLFLUSHOPT instruction */
-#define X86_FEATURE_CLWB	( 9*32+24) /* CLWB instruction */
-#define X86_FEATURE_AVX512PF	( 9*32+26) /* AVX-512 Prefetch */
-#define X86_FEATURE_AVX512ER	( 9*32+27) /* AVX-512 Exponential and Reciprocal */
-#define X86_FEATURE_AVX512CD	( 9*32+28) /* AVX-512 Conflict Detection */
-#define X86_FEATURE_SHA_NI	( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
-
-/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
-#define X86_FEATURE_XSAVEOPT	(10*32+ 0) /* XSAVEOPT */
-#define X86_FEATURE_XSAVEC	(10*32+ 1) /* XSAVEC */
-#define X86_FEATURE_XGETBV1	(10*32+ 2) /* XGETBV with ECX = 1 */
-#define X86_FEATURE_XSAVES	(10*32+ 3) /* XSAVES/XRSTORS */
-
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
-#define X86_FEATURE_CQM_LLC	(11*32+ 1) /* LLC QoS if 1 */
-
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
-#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
-
-/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
-#define X86_FEATURE_CLZERO	(13*32+0) /* CLZERO instruction */
-
-/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
-#define X86_FEATURE_DTHERM	(14*32+ 0) /* Digital Thermal Sensor */
-#define X86_FEATURE_IDA		(14*32+ 1) /* Intel Dynamic Acceleration */
-#define X86_FEATURE_ARAT	(14*32+ 2) /* Always Running APIC Timer */
-#define X86_FEATURE_PLN		(14*32+ 4) /* Intel Power Limit Notification */
-#define X86_FEATURE_PTS		(14*32+ 6) /* Intel Package Thermal Status */
-#define X86_FEATURE_HWP		(14*32+ 7) /* Intel Hardware P-states */
-#define X86_FEATURE_HWP_NOTIFY	(14*32+ 8) /* HWP Notification */
-#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
-#define X86_FEATURE_HWP_EPP	(14*32+10) /* HWP Energy Perf. Preference */
-#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
-
-/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
-#define X86_FEATURE_NPT		(15*32+ 0) /* Nested Page Table support */
-#define X86_FEATURE_LBRV	(15*32+ 1) /* LBR Virtualization support */
-#define X86_FEATURE_SVML	(15*32+ 2) /* "svm_lock" SVM locking MSR */
-#define X86_FEATURE_NRIPS	(15*32+ 3) /* "nrip_save" SVM next_rip save */
-#define X86_FEATURE_TSCRATEMSR  (15*32+ 4) /* "tsc_scale" TSC scaling support */
-#define X86_FEATURE_VMCBCLEAN   (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
-#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
-#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
-#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
-#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
-#define X86_FEATURE_AVIC	(15*32+13) /* Virtual Interrupt Controller */
-
-/*
- * BUG word(s)
- */
-#define X86_BUG(x)		(NCAPINTS*32 + (x))
-
-#define X86_BUG_F00F		X86_BUG(0) /* Intel F00F */
-#define X86_BUG_FDIV		X86_BUG(1) /* FPU FDIV */
-#define X86_BUG_COMA		X86_BUG(2) /* Cyrix 6x86 coma */
-#define X86_BUG_AMD_TLB_MMATCH	X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
-#define X86_BUG_AMD_APIC_C1E	X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
-#define X86_BUG_11AP		X86_BUG(5) /* Bad local APIC aka 11AP */
-#define X86_BUG_FXSAVE_LEAK	X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
-#define X86_BUG_CLFLUSH_MONITOR	X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
-#define X86_BUG_SYSRET_SS_ATTRS	X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#include <asm/processor.h>
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeatures.h
similarity index 59%
copy from arch/x86/include/asm/cpufeature.h
copy to arch/x86/include/asm/cpufeatures.h
index bbf166e..0ceb6ad 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -1,8 +1,5 @@
-/*
- * Defines x86 CPU feature bits
- */
-#ifndef _ASM_X86_CPUFEATURE_H
-#define _ASM_X86_CPUFEATURE_H
+#ifndef _ASM_X86_CPUFEATURES_H
+#define _ASM_X86_CPUFEATURES_H
 
 #ifndef _ASM_X86_REQUIRED_FEATURES_H
 #include <asm/required-features.h>
@@ -12,6 +9,9 @@
 #include <asm/disabled-features.h>
 #endif
 
+/*
+ * Defines x86 CPU feature bits
+ */
 #define NCAPINTS	16	/* N 32-bit words worth of info */
 #define NBUGINTS	1	/* N 32-bit bug flags */
 
@@ -285,328 +285,4 @@
 #define X86_BUG_CLFLUSH_MONITOR	X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
 #define X86_BUG_SYSRET_SS_ATTRS	X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
 
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
-
-#include <asm/asm.h>
-#include <linux/bitops.h>
-
-enum cpuid_leafs
-{
-	CPUID_1_EDX		= 0,
-	CPUID_8000_0001_EDX,
-	CPUID_8086_0001_EDX,
-	CPUID_LNX_1,
-	CPUID_1_ECX,
-	CPUID_C000_0001_EDX,
-	CPUID_8000_0001_ECX,
-	CPUID_LNX_2,
-	CPUID_LNX_3,
-	CPUID_7_0_EBX,
-	CPUID_D_1_EAX,
-	CPUID_F_0_EDX,
-	CPUID_F_1_EDX,
-	CPUID_8000_0008_EBX,
-	CPUID_6_EAX,
-	CPUID_8000_000A_EDX,
-};
-
-#ifdef CONFIG_X86_FEATURE_NAMES
-extern const char * const x86_cap_flags[NCAPINTS*32];
-extern const char * const x86_power_flags[32];
-#define X86_CAP_FMT "%s"
-#define x86_cap_flag(flag) x86_cap_flags[flag]
-#else
-#define X86_CAP_FMT "%d:%d"
-#define x86_cap_flag(flag) ((flag) >> 5), ((flag) & 31)
-#endif
-
-/*
- * In order to save room, we index into this array by doing
- * X86_BUG_<name> - NCAPINTS*32.
- */
-extern const char * const x86_bug_flags[NBUGINTS*32];
-
-#define test_cpu_cap(c, bit)						\
-	 test_bit(bit, (unsigned long *)((c)->x86_capability))
-
-#define REQUIRED_MASK_BIT_SET(bit)					\
-	 ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) ||	\
-	   (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) ||	\
-	   (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) ||	\
-	   (((bit)>>5)==3 && (1UL<<((bit)&31) & REQUIRED_MASK3)) ||	\
-	   (((bit)>>5)==4 && (1UL<<((bit)&31) & REQUIRED_MASK4)) ||	\
-	   (((bit)>>5)==5 && (1UL<<((bit)&31) & REQUIRED_MASK5)) ||	\
-	   (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) ||	\
-	   (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) ||	\
-	   (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) ||	\
-	   (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) )
-
-#define DISABLED_MASK_BIT_SET(bit)					\
-	 ( (((bit)>>5)==0 && (1UL<<((bit)&31) & DISABLED_MASK0)) ||	\
-	   (((bit)>>5)==1 && (1UL<<((bit)&31) & DISABLED_MASK1)) ||	\
-	   (((bit)>>5)==2 && (1UL<<((bit)&31) & DISABLED_MASK2)) ||	\
-	   (((bit)>>5)==3 && (1UL<<((bit)&31) & DISABLED_MASK3)) ||	\
-	   (((bit)>>5)==4 && (1UL<<((bit)&31) & DISABLED_MASK4)) ||	\
-	   (((bit)>>5)==5 && (1UL<<((bit)&31) & DISABLED_MASK5)) ||	\
-	   (((bit)>>5)==6 && (1UL<<((bit)&31) & DISABLED_MASK6)) ||	\
-	   (((bit)>>5)==7 && (1UL<<((bit)&31) & DISABLED_MASK7)) ||	\
-	   (((bit)>>5)==8 && (1UL<<((bit)&31) & DISABLED_MASK8)) ||	\
-	   (((bit)>>5)==9 && (1UL<<((bit)&31) & DISABLED_MASK9)) )
-
-#define cpu_has(c, bit)							\
-	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :	\
-	 test_cpu_cap(c, bit))
-
-#define this_cpu_has(bit)						\
-	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : 	\
-	 x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))
-
-/*
- * This macro is for detection of features which need kernel
- * infrastructure to be used.  It may *not* directly test the CPU
- * itself.  Use the cpu_has() family if you want true runtime
- * testing of CPU features, like in hypervisor code where you are
- * supporting a possible guest feature where host support for it
- * is not relevant.
- */
-#define cpu_feature_enabled(bit)	\
-	(__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 :	\
-	 cpu_has(&boot_cpu_data, bit))
-
-#define boot_cpu_has(bit)	cpu_has(&boot_cpu_data, bit)
-
-#define set_cpu_cap(c, bit)	set_bit(bit, (unsigned long *)((c)->x86_capability))
-#define clear_cpu_cap(c, bit)	clear_bit(bit, (unsigned long *)((c)->x86_capability))
-#define setup_clear_cpu_cap(bit) do { \
-	clear_cpu_cap(&boot_cpu_data, bit);	\
-	set_bit(bit, (unsigned long *)cpu_caps_cleared); \
-} while (0)
-#define setup_force_cpu_cap(bit) do { \
-	set_cpu_cap(&boot_cpu_data, bit);	\
-	set_bit(bit, (unsigned long *)cpu_caps_set);	\
-} while (0)
-
-#define cpu_has_fpu		boot_cpu_has(X86_FEATURE_FPU)
-#define cpu_has_pse		boot_cpu_has(X86_FEATURE_PSE)
-#define cpu_has_tsc		boot_cpu_has(X86_FEATURE_TSC)
-#define cpu_has_pge		boot_cpu_has(X86_FEATURE_PGE)
-#define cpu_has_apic		boot_cpu_has(X86_FEATURE_APIC)
-#define cpu_has_fxsr		boot_cpu_has(X86_FEATURE_FXSR)
-#define cpu_has_xmm		boot_cpu_has(X86_FEATURE_XMM)
-#define cpu_has_xmm2		boot_cpu_has(X86_FEATURE_XMM2)
-#define cpu_has_aes		boot_cpu_has(X86_FEATURE_AES)
-#define cpu_has_avx		boot_cpu_has(X86_FEATURE_AVX)
-#define cpu_has_avx2		boot_cpu_has(X86_FEATURE_AVX2)
-#define cpu_has_clflush		boot_cpu_has(X86_FEATURE_CLFLUSH)
-#define cpu_has_gbpages		boot_cpu_has(X86_FEATURE_GBPAGES)
-#define cpu_has_arch_perfmon	boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
-#define cpu_has_pat		boot_cpu_has(X86_FEATURE_PAT)
-#define cpu_has_x2apic		boot_cpu_has(X86_FEATURE_X2APIC)
-#define cpu_has_xsave		boot_cpu_has(X86_FEATURE_XSAVE)
-#define cpu_has_xsaves		boot_cpu_has(X86_FEATURE_XSAVES)
-#define cpu_has_osxsave		boot_cpu_has(X86_FEATURE_OSXSAVE)
-#define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
-/*
- * Do not add any more of those clumsy macros - use static_cpu_has_safe() for
- * fast paths and boot_cpu_has() otherwise!
- */
-
-#if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS)
-extern void warn_pre_alternatives(void);
-extern bool __static_cpu_has_safe(u16 bit);
-
-/*
- * Static testing of CPU features.  Used the same as boot_cpu_has().
- * These are only valid after alternatives have run, but will statically
- * patch the target code for additional performance.
- */
-static __always_inline __pure bool __static_cpu_has(u16 bit)
-{
-#ifdef CC_HAVE_ASM_GOTO
-
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-
-		/*
-		 * Catch too early usage of this before alternatives
-		 * have run.
-		 */
-		asm_volatile_goto("1: jmp %l[t_warn]\n"
-			 "2:\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"
-			 " .long 0\n"		/* no replacement */
-			 " .word %P0\n"		/* 1: do replace */
-			 " .byte 2b - 1b\n"	/* source len */
-			 " .byte 0\n"		/* replacement len */
-			 " .byte 0\n"		/* pad len */
-			 ".previous\n"
-			 /* skipping size check since replacement size = 0 */
-			 : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
-
-#endif
-
-		asm_volatile_goto("1: jmp %l[t_no]\n"
-			 "2:\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"
-			 " .long 0\n"		/* no replacement */
-			 " .word %P0\n"		/* feature bit */
-			 " .byte 2b - 1b\n"	/* source len */
-			 " .byte 0\n"		/* replacement len */
-			 " .byte 0\n"		/* pad len */
-			 ".previous\n"
-			 /* skipping size check since replacement size = 0 */
-			 : : "i" (bit) : : t_no);
-		return true;
-	t_no:
-		return false;
-
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-	t_warn:
-		warn_pre_alternatives();
-		return false;
-#endif
-
-#else /* CC_HAVE_ASM_GOTO */
-
-		u8 flag;
-		/* Open-coded due to __stringify() in ALTERNATIVE() */
-		asm volatile("1: movb $0,%0\n"
-			     "2:\n"
-			     ".section .altinstructions,\"a\"\n"
-			     " .long 1b - .\n"
-			     " .long 3f - .\n"
-			     " .word %P1\n"		/* feature bit */
-			     " .byte 2b - 1b\n"		/* source len */
-			     " .byte 4f - 3f\n"		/* replacement len */
-			     " .byte 0\n"		/* pad len */
-			     ".previous\n"
-			     ".section .discard,\"aw\",@progbits\n"
-			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
-			     ".previous\n"
-			     ".section .altinstr_replacement,\"ax\"\n"
-			     "3: movb $1,%0\n"
-			     "4:\n"
-			     ".previous\n"
-			     : "=qm" (flag) : "i" (bit));
-		return flag;
-
-#endif /* CC_HAVE_ASM_GOTO */
-}
-
-#define static_cpu_has(bit)					\
-(								\
-	__builtin_constant_p(boot_cpu_has(bit)) ?		\
-		boot_cpu_has(bit) :				\
-	__builtin_constant_p(bit) ?				\
-		__static_cpu_has(bit) :				\
-		boot_cpu_has(bit)				\
-)
-
-static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
-{
-#ifdef CC_HAVE_ASM_GOTO
-		asm_volatile_goto("1: jmp %l[t_dynamic]\n"
-			 "2:\n"
-			 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
-			         "((5f-4f) - (2b-1b)),0x90\n"
-			 "3:\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"		/* src offset */
-			 " .long 4f - .\n"		/* repl offset */
-			 " .word %P1\n"			/* always replace */
-			 " .byte 3b - 1b\n"		/* src len */
-			 " .byte 5f - 4f\n"		/* repl len */
-			 " .byte 3b - 2b\n"		/* pad len */
-			 ".previous\n"
-			 ".section .altinstr_replacement,\"ax\"\n"
-			 "4: jmp %l[t_no]\n"
-			 "5:\n"
-			 ".previous\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"		/* src offset */
-			 " .long 0\n"			/* no replacement */
-			 " .word %P0\n"			/* feature bit */
-			 " .byte 3b - 1b\n"		/* src len */
-			 " .byte 0\n"			/* repl len */
-			 " .byte 0\n"			/* pad len */
-			 ".previous\n"
-			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
-			 : : t_dynamic, t_no);
-		return true;
-	t_no:
-		return false;
-	t_dynamic:
-		return __static_cpu_has_safe(bit);
-#else
-		u8 flag;
-		/* Open-coded due to __stringify() in ALTERNATIVE() */
-		asm volatile("1: movb $2,%0\n"
-			     "2:\n"
-			     ".section .altinstructions,\"a\"\n"
-			     " .long 1b - .\n"		/* src offset */
-			     " .long 3f - .\n"		/* repl offset */
-			     " .word %P2\n"		/* always replace */
-			     " .byte 2b - 1b\n"		/* source len */
-			     " .byte 4f - 3f\n"		/* replacement len */
-			     " .byte 0\n"		/* pad len */
-			     ".previous\n"
-			     ".section .discard,\"aw\",@progbits\n"
-			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
-			     ".previous\n"
-			     ".section .altinstr_replacement,\"ax\"\n"
-			     "3: movb $0,%0\n"
-			     "4:\n"
-			     ".previous\n"
-			     ".section .altinstructions,\"a\"\n"
-			     " .long 1b - .\n"		/* src offset */
-			     " .long 5f - .\n"		/* repl offset */
-			     " .word %P1\n"		/* feature bit */
-			     " .byte 4b - 3b\n"		/* src len */
-			     " .byte 6f - 5f\n"		/* repl len */
-			     " .byte 0\n"		/* pad len */
-			     ".previous\n"
-			     ".section .discard,\"aw\",@progbits\n"
-			     " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
-			     ".previous\n"
-			     ".section .altinstr_replacement,\"ax\"\n"
-			     "5: movb $1,%0\n"
-			     "6:\n"
-			     ".previous\n"
-			     : "=qm" (flag)
-			     : "i" (bit), "i" (X86_FEATURE_ALWAYS));
-		return (flag == 2 ? __static_cpu_has_safe(bit) : flag);
-#endif /* CC_HAVE_ASM_GOTO */
-}
-
-#define static_cpu_has_safe(bit)				\
-(								\
-	__builtin_constant_p(boot_cpu_has(bit)) ?		\
-		boot_cpu_has(bit) :				\
-		_static_cpu_has_safe(bit)			\
-)
-#else
-/*
- * gcc 3.x is too stupid to do the static test; fall back to dynamic.
- */
-#define static_cpu_has(bit)		boot_cpu_has(bit)
-#define static_cpu_has_safe(bit)	boot_cpu_has(bit)
-#endif
-
-#define cpu_has_bug(c, bit)		cpu_has(c, (bit))
-#define set_cpu_bug(c, bit)		set_cpu_cap(c, (bit))
-#define clear_cpu_bug(c, bit)		clear_cpu_cap(c, (bit))
-
-#define static_cpu_has_bug(bit)		static_cpu_has((bit))
-#define static_cpu_has_bug_safe(bit)	static_cpu_has_safe((bit))
-#define boot_cpu_has_bug(bit)		cpu_has_bug(&boot_cpu_data, (bit))
-
-#define MAX_CPU_FEATURES		(NCAPINTS * 32)
-#define cpu_have_feature		boot_cpu_has
-
-#define CPU_FEATURE_TYPEFMT		"x86,ven%04Xfam%04Xmod%04X"
-#define CPU_FEATURE_TYPEVAL		boot_cpu_data.x86_vendor, boot_cpu_data.x86, \
-					boot_cpu_data.x86_model
-
-#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
-#endif /* _ASM_X86_CPUFEATURE_H */
+#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 0fd440d..d01199d 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -17,6 +17,7 @@
 #include <asm/user.h>
 #include <asm/fpu/api.h>
 #include <asm/fpu/xstate.h>
+#include <asm/cpufeature.h>
 
 /*
  * High level FPU state handling functions:
diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h
index 78162f8..d0afb05 100644
--- a/arch/x86/include/asm/irq_work.h
+++ b/arch/x86/include/asm/irq_work.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_IRQ_WORK_H
 #define _ASM_IRQ_WORK_H
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 
 static inline bool arch_irq_work_has_interrupt(void)
 {
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index c70689b..0deeb2d 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -3,6 +3,8 @@
 
 #include <linux/sched.h>
 
+#include <asm/cpufeature.h>
+
 #define MWAIT_SUBSTATE_MASK		0xf
 #define MWAIT_CSTATE_MASK		0xf
 #define MWAIT_SUBSTATE_SIZE		4
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2d5a50c..491a3d9 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -13,7 +13,7 @@ struct vm86;
 #include <asm/types.h>
 #include <uapi/asm/sigcontext.h>
 #include <asm/current.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/page.h>
 #include <asm/pgtable_types.h>
 #include <asm/percpu.h>
@@ -24,7 +24,6 @@ struct vm86;
 #include <asm/fpu/types.h>
 
 #include <linux/personality.h>
-#include <linux/cpumask.h>
 #include <linux/cache.h>
 #include <linux/threads.h>
 #include <linux/math64.h>
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index ba665eb..db33330 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -15,7 +15,7 @@
 
 #include <linux/stringify.h>
 #include <asm/nops.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 
 /* "Raw" instruction opcodes */
 #define __ASM_CLAC	.byte 0x0f,0x01,0xca
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index dfcf072..20a3de5 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -16,7 +16,6 @@
 #endif
 #include <asm/thread_info.h>
 #include <asm/cpumask.h>
-#include <asm/cpufeature.h>
 
 extern int smp_num_siblings;
 extern unsigned int num_processors;
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index c7b5510..c0778fc 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -49,7 +49,7 @@
  */
 #ifndef __ASSEMBLY__
 struct task_struct;
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <linux/atomic.h>
 
 struct thread_info {
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 6df2029..0bb31cb 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -5,6 +5,7 @@
 #include <linux/sched.h>
 
 #include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/special_insns.h>
 
 #ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index b89c34c..3076986 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -8,7 +8,7 @@
 #include <linux/errno.h>
 #include <linux/lockdep.h>
 #include <asm/alternative.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/page.h>
 
 /*
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 5803130..faa7b52 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -64,7 +64,7 @@ ifdef CONFIG_X86_FEATURE_NAMES
 quiet_cmd_mkcapflags = MKCAP   $@
       cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@
 
-cpufeature = $(src)/../../include/asm/cpufeature.h
+cpufeature = $(src)/../../include/asm/cpufeatures.h
 
 targets += capflags.c
 $(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index ae20be6..6608c03 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -1,7 +1,7 @@
 #include <linux/bitops.h>
 #include <linux/kernel.h>
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/e820.h>
 #include <asm/mtrr.h>
 #include <asm/msr.h>
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index aaf152e..15e47c1 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -8,6 +8,7 @@
 #include <linux/timer.h>
 #include <asm/pci-direct.h>
 #include <asm/tsc.h>
+#include <asm/cpufeature.h>
 
 #include "cpu.h"
 
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 565648b..9299e3b 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -8,7 +8,7 @@
 #include <linux/module.h>
 #include <linux/uaccess.h>
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/pgtable.h>
 #include <asm/msr.h>
 #include <asm/bugs.h>
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 0b6c523..341449c 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -14,7 +14,7 @@
 #include <linux/sysfs.h>
 #include <linux/pci.h>
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/amd_nb.h>
 #include <asm/smp.h>
 
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index afa9f0d..fbb5e90 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -1,5 +1,5 @@
 #include <asm/cpu_device_id.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <linux/cpu.h>
 #include <linux/module.h>
 #include <linux/slab.h>
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh
index 3f20710..6988c74 100644
--- a/arch/x86/kernel/cpu/mkcapflags.sh
+++ b/arch/x86/kernel/cpu/mkcapflags.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 #
-# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeature.h
+# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeatures.h
 #
 
 IN=$1
@@ -49,8 +49,8 @@ dump_array()
 trap 'rm "$OUT"' EXIT
 
 (
-	echo "#ifndef _ASM_X86_CPUFEATURE_H"
-	echo "#include <asm/cpufeature.h>"
+	echo "#ifndef _ASM_X86_CPUFEATURES_H"
+	echo "#include <asm/cpufeatures.h>"
 	echo "#endif"
 	echo ""
 
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 5c3d149..74f1d90 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -47,7 +47,7 @@
 #include <linux/smp.h>
 #include <linux/syscore_ops.h>
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/e820.h>
 #include <asm/mtrr.h>
 #include <asm/msr.h>
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index 252da7a..a19a663 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -1,6 +1,6 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/msr.h>
 #include "cpu.h"
 
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 569c1e4..b3c2a69 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -24,6 +24,7 @@
 #include <asm/e820.h>
 #include <asm/proto.h>
 #include <asm/setup.h>
+#include <asm/cpufeature.h>
 
 /*
  * The e820 map is the map that gets modified e.g. with command line parameters
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 6bc9ae2..af11129 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -19,7 +19,7 @@
 #include <asm/setup.h>
 #include <asm/processor-flags.h>
 #include <asm/msr-index.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/percpu.h>
 #include <asm/nops.h>
 #include <asm/bootparam.h>
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index b8e6ff5..be0ebbb 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -12,6 +12,7 @@
 #include <linux/pm.h>
 #include <linux/io.h>
 
+#include <asm/cpufeature.h>
 #include <asm/irqdomain.h>
 #include <asm/fixmap.h>
 #include <asm/hpet.h>
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 64f9616..7f3550a 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -40,7 +40,7 @@
 #include <linux/uaccess.h>
 #include <linux/gfp.h>
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/msr.h>
 
 static struct class *msr_class;
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index 07efb35..014ea59 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -30,7 +30,7 @@
  * 	appropriately. Either display a message or halt.
  */
 
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/msr-index.h>
 
 verify_cpu:
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index a2fe51b..65be7cf 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,5 +1,5 @@
 #include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 /*
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 009f982..24ef1c2 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -1,7 +1,7 @@
 /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
 
 #include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 /*
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 982ce34..fba3430 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -10,7 +10,7 @@
 #include <asm/current.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 16698bb..a0de849 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -1,7 +1,7 @@
 /* Copyright 2002 Andi Kleen */
 
 #include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 /*
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index ca2afdd..90ce01b 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -6,7 +6,7 @@
  *	- Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
  */
 #include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 #undef memmove
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 2661fad..c9c8122 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -1,7 +1,7 @@
 /* Copyright 2002 Andi Kleen, SuSE Labs */
 
 #include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 .weak memset
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
index 92e2eac..f65a33f 100644
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c
@@ -4,6 +4,7 @@
 
 #include <asm/pgtable.h>
 #include <asm/proto.h>
+#include <asm/cpufeature.h>
 
 static int disable_nx;
 
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 50d86c0..660a83c 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -24,7 +24,6 @@
 #include <asm/nmi.h>
 #include <asm/apic.h>
 #include <asm/processor.h>
-#include <asm/cpufeature.h>
 
 #include "op_x86_model.h"
 #include "op_counter.h"
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index 174781a..00c3190 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -3,7 +3,7 @@
 
 #include <asm/asm.h>
 #include <asm/segment.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/cmpxchg.h>
 #include <asm/nops.h>
 
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index d62de8b..1234818 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -17,7 +17,7 @@
 #include <linux/atomic.h>
 
 #ifdef CONFIG_X86
-#include <asm/processor.h>	/* for boot_cpu_has below */
+#include <asm/cpufeature.h>	/* for boot_cpu_has below */
 #endif
 
 #define TEST(bit, op, c_op, val)				\

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [tip:x86/asm] x86/cpufeature: Replace the old static_cpu_has() with safe variant
  2016-01-26 21:12 ` [PATCH 05/10] x86/cpufeature: Replace the old static_cpu_has() with safe variant Borislav Petkov
@ 2016-01-30 13:19   ` tip-bot for Borislav Petkov
  0 siblings, 0 replies; 66+ messages in thread
From: tip-bot for Borislav Petkov @ 2016-01-30 13:19 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: bp, torvalds, peterz, dvlasenk, hpa, linux-kernel, bp, mingo,
	brgerst, luto, tglx

Commit-ID:  bc696ca05f5a8927329ec276a892341e006b00ba
Gitweb:     http://git.kernel.org/tip/bc696ca05f5a8927329ec276a892341e006b00ba
Author:     Borislav Petkov <bp@suse.de>
AuthorDate: Tue, 26 Jan 2016 22:12:05 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Sat, 30 Jan 2016 11:22:18 +0100

x86/cpufeature: Replace the old static_cpu_has() with safe variant

So the old one didn't work properly before alternatives had run.
And it was supposed to provide an optimized JMP because the
assumption was that the offset it is jumping to is within a
signed byte and thus a two-byte JMP.

So I did an x86_64 allyesconfig build and dumped all possible
sites where static_cpu_has() was used. The optimization amounted
to all in all 12(!) places where static_cpu_has() had generated
a 2-byte JMP. Which has saved us a whopping 36 bytes!

This clearly is not worth the trouble so we can remove it. The
only place where the optimization might count - in __switch_to()
- we will handle differently. But that's not subject of this
patch.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1453842730-28463-6-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig.debug               |  10 ----
 arch/x86/include/asm/cpufeature.h    | 100 +++--------------------------------
 arch/x86/include/asm/fpu/internal.h  |  14 ++---
 arch/x86/kernel/apic/apic_numachip.c |   4 +-
 arch/x86/kernel/cpu/common.c         |  12 +----
 arch/x86/kernel/vm86_32.c            |   2 +-
 drivers/cpufreq/intel_pstate.c       |   2 +-
 fs/btrfs/disk-io.c                   |   2 +-
 8 files changed, 21 insertions(+), 125 deletions(-)

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 9b18ed9..68a2d1f 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -350,16 +350,6 @@ config DEBUG_IMR_SELFTEST
 
 	  If unsure say N here.
 
-config X86_DEBUG_STATIC_CPU_HAS
-	bool "Debug alternatives"
-	depends on DEBUG_KERNEL
-	---help---
-	  This option causes additional code to be generated which
-	  fails if static_cpu_has() is used before alternatives have
-	  run.
-
-	  If unsure, say N.
-
 config X86_DEBUG_FPU
 	bool "Debug the x86 FPU code"
 	depends on DEBUG_KERNEL
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 3cce9f3..a261cf2 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -125,103 +125,19 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
 #define cpu_has_osxsave		boot_cpu_has(X86_FEATURE_OSXSAVE)
 #define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
 /*
- * Do not add any more of those clumsy macros - use static_cpu_has_safe() for
+ * Do not add any more of those clumsy macros - use static_cpu_has() for
  * fast paths and boot_cpu_has() otherwise!
  */
 
 #if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS)
-extern void warn_pre_alternatives(void);
-extern bool __static_cpu_has_safe(u16 bit);
+extern bool __static_cpu_has(u16 bit);
 
 /*
  * Static testing of CPU features.  Used the same as boot_cpu_has().
  * These are only valid after alternatives have run, but will statically
  * patch the target code for additional performance.
  */
-static __always_inline __pure bool __static_cpu_has(u16 bit)
-{
-#ifdef CC_HAVE_ASM_GOTO
-
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-
-		/*
-		 * Catch too early usage of this before alternatives
-		 * have run.
-		 */
-		asm_volatile_goto("1: jmp %l[t_warn]\n"
-			 "2:\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"
-			 " .long 0\n"		/* no replacement */
-			 " .word %P0\n"		/* 1: do replace */
-			 " .byte 2b - 1b\n"	/* source len */
-			 " .byte 0\n"		/* replacement len */
-			 " .byte 0\n"		/* pad len */
-			 ".previous\n"
-			 /* skipping size check since replacement size = 0 */
-			 : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
-
-#endif
-
-		asm_volatile_goto("1: jmp %l[t_no]\n"
-			 "2:\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"
-			 " .long 0\n"		/* no replacement */
-			 " .word %P0\n"		/* feature bit */
-			 " .byte 2b - 1b\n"	/* source len */
-			 " .byte 0\n"		/* replacement len */
-			 " .byte 0\n"		/* pad len */
-			 ".previous\n"
-			 /* skipping size check since replacement size = 0 */
-			 : : "i" (bit) : : t_no);
-		return true;
-	t_no:
-		return false;
-
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-	t_warn:
-		warn_pre_alternatives();
-		return false;
-#endif
-
-#else /* CC_HAVE_ASM_GOTO */
-
-		u8 flag;
-		/* Open-coded due to __stringify() in ALTERNATIVE() */
-		asm volatile("1: movb $0,%0\n"
-			     "2:\n"
-			     ".section .altinstructions,\"a\"\n"
-			     " .long 1b - .\n"
-			     " .long 3f - .\n"
-			     " .word %P1\n"		/* feature bit */
-			     " .byte 2b - 1b\n"		/* source len */
-			     " .byte 4f - 3f\n"		/* replacement len */
-			     " .byte 0\n"		/* pad len */
-			     ".previous\n"
-			     ".section .discard,\"aw\",@progbits\n"
-			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
-			     ".previous\n"
-			     ".section .altinstr_replacement,\"ax\"\n"
-			     "3: movb $1,%0\n"
-			     "4:\n"
-			     ".previous\n"
-			     : "=qm" (flag) : "i" (bit));
-		return flag;
-
-#endif /* CC_HAVE_ASM_GOTO */
-}
-
-#define static_cpu_has(bit)					\
-(								\
-	__builtin_constant_p(boot_cpu_has(bit)) ?		\
-		boot_cpu_has(bit) :				\
-	__builtin_constant_p(bit) ?				\
-		__static_cpu_has(bit) :				\
-		boot_cpu_has(bit)				\
-)
-
-static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
+static __always_inline __pure bool _static_cpu_has(u16 bit)
 {
 #ifdef CC_HAVE_ASM_GOTO
 		asm_volatile_goto("1: jmp %l[t_dynamic]\n"
@@ -255,7 +171,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 	t_no:
 		return false;
 	t_dynamic:
-		return __static_cpu_has_safe(bit);
+		return __static_cpu_has(bit);
 #else
 		u8 flag;
 		/* Open-coded due to __stringify() in ALTERNATIVE() */
@@ -293,22 +209,21 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 			     ".previous\n"
 			     : "=qm" (flag)
 			     : "i" (bit), "i" (X86_FEATURE_ALWAYS));
-		return (flag == 2 ? __static_cpu_has_safe(bit) : flag);
+		return (flag == 2 ? __static_cpu_has(bit) : flag);
 #endif /* CC_HAVE_ASM_GOTO */
 }
 
-#define static_cpu_has_safe(bit)				\
+#define static_cpu_has(bit)					\
 (								\
 	__builtin_constant_p(boot_cpu_has(bit)) ?		\
 		boot_cpu_has(bit) :				\
-		_static_cpu_has_safe(bit)			\
+		_static_cpu_has(bit)				\
 )
 #else
 /*
  * gcc 3.x is too stupid to do the static test; fall back to dynamic.
  */
 #define static_cpu_has(bit)		boot_cpu_has(bit)
-#define static_cpu_has_safe(bit)	boot_cpu_has(bit)
 #endif
 
 #define cpu_has_bug(c, bit)		cpu_has(c, (bit))
@@ -316,7 +231,6 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 #define clear_cpu_bug(c, bit)		clear_cpu_cap(c, (bit))
 
 #define static_cpu_has_bug(bit)		static_cpu_has((bit))
-#define static_cpu_has_bug_safe(bit)	static_cpu_has_safe((bit))
 #define boot_cpu_has_bug(bit)		cpu_has_bug(&boot_cpu_data, (bit))
 
 #define MAX_CPU_FEATURES		(NCAPINTS * 32)
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index d01199d..c2e46eb 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -59,22 +59,22 @@ extern u64 fpu__get_supported_xfeatures_mask(void);
  */
 static __always_inline __pure bool use_eager_fpu(void)
 {
-	return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
+	return static_cpu_has(X86_FEATURE_EAGER_FPU);
 }
 
 static __always_inline __pure bool use_xsaveopt(void)
 {
-	return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
+	return static_cpu_has(X86_FEATURE_XSAVEOPT);
 }
 
 static __always_inline __pure bool use_xsave(void)
 {
-	return static_cpu_has_safe(X86_FEATURE_XSAVE);
+	return static_cpu_has(X86_FEATURE_XSAVE);
 }
 
 static __always_inline __pure bool use_fxsr(void)
 {
-	return static_cpu_has_safe(X86_FEATURE_FXSR);
+	return static_cpu_has(X86_FEATURE_FXSR);
 }
 
 /*
@@ -301,7 +301,7 @@ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
 
 	WARN_ON(system_state != SYSTEM_BOOTING);
 
-	if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+	if (static_cpu_has(X86_FEATURE_XSAVES))
 		XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
 	else
 		XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
@@ -323,7 +323,7 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
 
 	WARN_ON(system_state != SYSTEM_BOOTING);
 
-	if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+	if (static_cpu_has(X86_FEATURE_XSAVES))
 		XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
 	else
 		XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
@@ -461,7 +461,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
 	 * pending. Clear the x87 state here by setting it to fixed values.
 	 * "m" is a random variable that should be in L1.
 	 */
-	if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
+	if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {
 		asm volatile(
 			"fnclex\n\t"
 			"emms\n\t"
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index c80c02c..ab5c2c6 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -30,7 +30,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x)
 	unsigned long value;
 	unsigned int id = (x >> 24) & 0xff;
 
-	if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+	if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
 		rdmsrl(MSR_FAM10H_NODE_ID, value);
 		id |= (value << 2) & 0xff00;
 	}
@@ -178,7 +178,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
 	this_cpu_write(cpu_llc_id, node);
 
 	/* Account for nodes per socket in multi-core-module processors */
-	if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+	if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
 		rdmsrl(MSR_FAM10H_NODE_ID, val);
 		nodes = ((val >> 3) & 7) + 1;
 	}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 37830de..ee49981 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1475,19 +1475,11 @@ void cpu_init(void)
 }
 #endif
 
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-void warn_pre_alternatives(void)
-{
-	WARN(1, "You're using static_cpu_has before alternatives have run!\n");
-}
-EXPORT_SYMBOL_GPL(warn_pre_alternatives);
-#endif
-
-inline bool __static_cpu_has_safe(u16 bit)
+inline bool __static_cpu_has(u16 bit)
 {
 	return boot_cpu_has(bit);
 }
-EXPORT_SYMBOL_GPL(__static_cpu_has_safe);
+EXPORT_SYMBOL_GPL(__static_cpu_has);
 
 static void bsp_resume(void)
 {
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index e574b85..3dce1ca 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -362,7 +362,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
 	/* make room for real-mode segments */
 	tsk->thread.sp0 += 16;
 
-	if (static_cpu_has_safe(X86_FEATURE_SEP))
+	if (static_cpu_has(X86_FEATURE_SEP))
 		tsk->thread.sysenter_cs = 0;
 
 	load_sp0(tss, &tsk->thread);
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index cd83d47..3a4b39a 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1431,7 +1431,7 @@ static int __init intel_pstate_init(void)
 	if (!all_cpu_data)
 		return -ENOMEM;
 
-	if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) {
+	if (static_cpu_has(X86_FEATURE_HWP) && !no_hwp) {
 		pr_info("intel_pstate: HWP enabled\n");
 		hwp_active++;
 	}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index dd08e29..d928649 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -930,7 +930,7 @@ static int check_async_write(struct inode *inode, unsigned long bio_flags)
 	if (bio_flags & EXTENT_BIO_TREE_LOG)
 		return 0;
 #ifdef CONFIG_X86
-	if (static_cpu_has_safe(X86_FEATURE_XMM4_2))
+	if (static_cpu_has(X86_FEATURE_XMM4_2))
 		return 0;
 #endif
 	return 1;

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [tip:x86/asm] x86/cpufeature: Get rid of the non-asm goto variant
  2016-01-27  8:43       ` [PATCH -v1.1 " Borislav Petkov
@ 2016-01-30 13:19         ` tip-bot for Borislav Petkov
  0 siblings, 0 replies; 66+ messages in thread
From: tip-bot for Borislav Petkov @ 2016-01-30 13:19 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: tglx, brgerst, hpa, bp, linux-kernel, bp, mingo, dvlasenk, luto,
	torvalds, peterz

Commit-ID:  a362bf9f5e7dd659b96d01382da7b855f4e5a7a1
Gitweb:     http://git.kernel.org/tip/a362bf9f5e7dd659b96d01382da7b855f4e5a7a1
Author:     Borislav Petkov <bp@alien8.de>
AuthorDate: Wed, 27 Jan 2016 09:43:25 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Sat, 30 Jan 2016 11:22:19 +0100

x86/cpufeature: Get rid of the non-asm goto variant

I can simply quote hpa from the mail:

  "Get rid of the non-asm goto variant and just fall back to
   dynamic if asm goto is unavailable. It doesn't make any sense,
   really, if it is supposed to be safe, and by now the asm
   goto-capable gcc is in more wide use. (Originally the gcc 3.x
   fallback to pure dynamic didn't exist, either.)"

Booy, am I lazy.

Cleanup the whole CC_HAVE_ASM_GOTO ifdeffery too, while at it.

Suggested-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20160127084325.GB30712@pd.tnic
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/cpufeature.h | 49 ++++-----------------------------------
 1 file changed, 5 insertions(+), 44 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index a261cf2..9048c1b 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -129,17 +129,16 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
  * fast paths and boot_cpu_has() otherwise!
  */
 
-#if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS)
+#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
 extern bool __static_cpu_has(u16 bit);
 
 /*
  * Static testing of CPU features.  Used the same as boot_cpu_has().
- * These are only valid after alternatives have run, but will statically
- * patch the target code for additional performance.
+ * These will statically patch the target code for additional
+ * performance.
  */
 static __always_inline __pure bool _static_cpu_has(u16 bit)
 {
-#ifdef CC_HAVE_ASM_GOTO
 		asm_volatile_goto("1: jmp %l[t_dynamic]\n"
 			 "2:\n"
 			 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
@@ -172,45 +171,6 @@ static __always_inline __pure bool _static_cpu_has(u16 bit)
 		return false;
 	t_dynamic:
 		return __static_cpu_has(bit);
-#else
-		u8 flag;
-		/* Open-coded due to __stringify() in ALTERNATIVE() */
-		asm volatile("1: movb $2,%0\n"
-			     "2:\n"
-			     ".section .altinstructions,\"a\"\n"
-			     " .long 1b - .\n"		/* src offset */
-			     " .long 3f - .\n"		/* repl offset */
-			     " .word %P2\n"		/* always replace */
-			     " .byte 2b - 1b\n"		/* source len */
-			     " .byte 4f - 3f\n"		/* replacement len */
-			     " .byte 0\n"		/* pad len */
-			     ".previous\n"
-			     ".section .discard,\"aw\",@progbits\n"
-			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
-			     ".previous\n"
-			     ".section .altinstr_replacement,\"ax\"\n"
-			     "3: movb $0,%0\n"
-			     "4:\n"
-			     ".previous\n"
-			     ".section .altinstructions,\"a\"\n"
-			     " .long 1b - .\n"		/* src offset */
-			     " .long 5f - .\n"		/* repl offset */
-			     " .word %P1\n"		/* feature bit */
-			     " .byte 4b - 3b\n"		/* src len */
-			     " .byte 6f - 5f\n"		/* repl len */
-			     " .byte 0\n"		/* pad len */
-			     ".previous\n"
-			     ".section .discard,\"aw\",@progbits\n"
-			     " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
-			     ".previous\n"
-			     ".section .altinstr_replacement,\"ax\"\n"
-			     "5: movb $1,%0\n"
-			     "6:\n"
-			     ".previous\n"
-			     : "=qm" (flag)
-			     : "i" (bit), "i" (X86_FEATURE_ALWAYS));
-		return (flag == 2 ? __static_cpu_has(bit) : flag);
-#endif /* CC_HAVE_ASM_GOTO */
 }
 
 #define static_cpu_has(bit)					\
@@ -221,7 +181,8 @@ static __always_inline __pure bool _static_cpu_has(u16 bit)
 )
 #else
 /*
- * gcc 3.x is too stupid to do the static test; fall back to dynamic.
+ * Fall back to dynamic for gcc versions which don't support asm goto. Should be
+ * a minority now anyway.
  */
 #define static_cpu_has(bit)		boot_cpu_has(bit)
 #endif

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [tip:x86/asm] x86/alternatives: Add an auxilary section
  2016-01-26 21:12 ` [PATCH 07/10] x86/alternatives: Add an auxilary section Borislav Petkov
@ 2016-01-30 13:19   ` tip-bot for Borislav Petkov
  0 siblings, 0 replies; 66+ messages in thread
From: tip-bot for Borislav Petkov @ 2016-01-30 13:19 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: dvlasenk, linux-kernel, mingo, brgerst, tglx, hpa, bp, bp, luto,
	peterz, torvalds

Commit-ID:  337e4cc84021212a87b04b77b65cccc49304909e
Gitweb:     http://git.kernel.org/tip/337e4cc84021212a87b04b77b65cccc49304909e
Author:     Borislav Petkov <bp@suse.de>
AuthorDate: Tue, 26 Jan 2016 22:12:07 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Sat, 30 Jan 2016 11:22:20 +0100

x86/alternatives: Add an auxilary section

Add .altinstr_aux for additional instructions which will be used
before and/or during patching. All stuff which needs more
sophisticated patching should go there. See next patch.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1453842730-28463-8-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/vmlinux.lds.S | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 74e4bf1..92dc211 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -195,6 +195,17 @@ SECTIONS
 	:init
 #endif
 
+	/*
+	 * Section for code used exclusively before alternatives are run. All
+	 * references to such code must be patched out by alternatives, normally
+	 * by using X86_FEATURE_ALWAYS CPU feature bit.
+	 *
+	 * See static_cpu_has() for an example.
+	 */
+	.altinstr_aux : AT(ADDR(.altinstr_aux) - LOAD_OFFSET) {
+		*(.altinstr_aux)
+	}
+
 	INIT_DATA_SECTION(16)
 
 	.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [tip:x86/asm] x86/alternatives: Discard dynamic check after init
  2016-01-27  8:45       ` [PATCH -v1.1 8/10] x86/alternatives: Discard dynamic check after init Borislav Petkov
@ 2016-01-30 13:20         ` tip-bot for Brian Gerst
  0 siblings, 0 replies; 66+ messages in thread
From: tip-bot for Brian Gerst @ 2016-01-30 13:20 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: dvlasenk, hpa, tglx, akpm, linux-kernel, brgerst, kristen,
	boris.ostrovsky, prarit, luto, dyoung, luto, ross.zwisler, bp,
	torvalds, labbott, bp, mingo, peterz

Commit-ID:  2476f2fa20568bd5d9e09cd35bcd73e99a6f4cc6
Gitweb:     http://git.kernel.org/tip/2476f2fa20568bd5d9e09cd35bcd73e99a6f4cc6
Author:     Brian Gerst <brgerst@gmail.com>
AuthorDate: Wed, 27 Jan 2016 09:45:25 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Sat, 30 Jan 2016 11:22:22 +0100

x86/alternatives: Discard dynamic check after init

Move the code to do the dynamic check to the altinstr_aux
section so that it is discarded after alternatives have run and
a static branch has been chosen.

This way we're changing the dynamic branch from C code to
assembly, which makes it *substantially* smaller while avoiding
a completely unnecessary call to an out of line function.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
[ Changed it to do TESTB, as hpa suggested. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Young <dyoung@redhat.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kristen Carlson Accardi <kristen@linux.intel.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1452972124-7380-1-git-send-email-brgerst@gmail.com
Link: http://lkml.kernel.org/r/20160127084525.GC30712@pd.tnic
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/cpufeature.h | 19 ++++++++++++-------
 arch/x86/kernel/cpu/common.c      |  6 ------
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 9048c1b..9fba7a5 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -130,8 +130,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
  */
 
 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
-extern bool __static_cpu_has(u16 bit);
-
 /*
  * Static testing of CPU features.  Used the same as boot_cpu_has().
  * These will statically patch the target code for additional
@@ -139,7 +137,7 @@ extern bool __static_cpu_has(u16 bit);
  */
 static __always_inline __pure bool _static_cpu_has(u16 bit)
 {
-		asm_volatile_goto("1: jmp %l[t_dynamic]\n"
+		asm_volatile_goto("1: jmp 6f\n"
 			 "2:\n"
 			 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
 			         "((5f-4f) - (2b-1b)),0x90\n"
@@ -164,13 +162,20 @@ static __always_inline __pure bool _static_cpu_has(u16 bit)
 			 " .byte 0\n"			/* repl len */
 			 " .byte 0\n"			/* pad len */
 			 ".previous\n"
-			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
-			 : : t_dynamic, t_no);
+			 ".section .altinstr_aux,\"ax\"\n"
+			 "6:\n"
+			 " testb %[bitnum],%[cap_byte]\n"
+			 " jnz %l[t_yes]\n"
+			 " jmp %l[t_no]\n"
+			 ".previous\n"
+			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
+			     [bitnum] "i" (1 << (bit & 7)),
+			     [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+			 : : t_yes, t_no);
+	t_yes:
 		return true;
 	t_no:
 		return false;
-	t_dynamic:
-		return __static_cpu_has(bit);
 }
 
 #define static_cpu_has(bit)					\
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ee49981..079d83f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1475,12 +1475,6 @@ void cpu_init(void)
 }
 #endif
 
-inline bool __static_cpu_has(u16 bit)
-{
-	return boot_cpu_has(bit);
-}
-EXPORT_SYMBOL_GPL(__static_cpu_has);
-
 static void bsp_resume(void)
 {
 	if (this_cpu->c_bsp_resume)

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [tip:x86/asm] x86/vdso: Use static_cpu_has()
  2016-01-26 21:12 ` [PATCH 09/10] x86/vdso: Use static_cpu_has() Borislav Petkov
@ 2016-01-30 13:20   ` tip-bot for Borislav Petkov
  0 siblings, 0 replies; 66+ messages in thread
From: tip-bot for Borislav Petkov @ 2016-01-30 13:20 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: torvalds, luto, peterz, bp, brgerst, dvlasenk, tglx,
	linux-kernel, mingo, bp, hpa

Commit-ID:  8c725306993198f845038dc9e45a1267099867a6
Gitweb:     http://git.kernel.org/tip/8c725306993198f845038dc9e45a1267099867a6
Author:     Borislav Petkov <bp@suse.de>
AuthorDate: Tue, 26 Jan 2016 22:12:09 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Sat, 30 Jan 2016 11:22:23 +0100

x86/vdso: Use static_cpu_has()

... and simplify and speed up a tad.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1453842730-28463-10-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/vdso/vma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 429d54d..10f7045 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -285,7 +285,7 @@ static void vgetcpu_cpu_init(void *arg)
 #ifdef CONFIG_NUMA
 	node = cpu_to_node(cpu);
 #endif
-	if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
+	if (static_cpu_has(X86_FEATURE_RDTSCP))
 		write_rdtscp_aux((node << 12) | cpu);
 
 	/*

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [tip:x86/boot] x86/boot: Simplify kernel load address alignment check
  2016-01-26 21:12 ` [PATCH 10/10] x86/head_64: Simplify kernel load address alignment check Borislav Petkov
@ 2016-01-30 13:20   ` tip-bot for Alexander Kuleshov
  0 siblings, 0 replies; 66+ messages in thread
From: tip-bot for Alexander Kuleshov @ 2016-01-30 13:20 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: ryabinin.a.a, mingo, alpopov, linux-kernel, luto, luto, brgerst,
	bp, dvlasenk, peterz, kuleshovmail, torvalds, tglx, hpa, bp

Commit-ID:  a4733143085d6c782ac1e6c85778655b6bac1d4e
Gitweb:     http://git.kernel.org/tip/a4733143085d6c782ac1e6c85778655b6bac1d4e
Author:     Alexander Kuleshov <kuleshovmail@gmail.com>
AuthorDate: Tue, 26 Jan 2016 22:12:10 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Sat, 30 Jan 2016 11:22:48 +0100

x86/boot: Simplify kernel load address alignment check

We are using %rax as temporary register to check the kernel
address alignment. We don't really have to since the TEST
instruction does not clobber the destination operand.

Suggested-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Alexander Kuleshov <kuleshovmail@gmail.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Alexander Popov <alpopov@ptsecurity.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1453531828-19291-1-git-send-email-kuleshovmail@gmail.com
Link: http://lkml.kernel.org/r/1453842730-28463-11-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/head_64.S | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index ffdc0e8..7c21029 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -76,9 +76,7 @@ startup_64:
 	subq	$_text - __START_KERNEL_map, %rbp
 
 	/* Is the address not 2M aligned? */
-	movq	%rbp, %rax
-	andl	$~PMD_PAGE_MASK, %eax
-	testl	%eax, %eax
+	testl	$~PMD_PAGE_MASK, %ebp
 	jnz	bad_address
 
 	/*

^ permalink raw reply related	[flat|nested] 66+ messages in thread

end of thread, other threads:[~2016-01-30 13:22 UTC | newest]

Thread overview: 66+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-01-26 21:12 [PATCH 00/10] tip-queue 2016-01-26, rest Borislav Petkov
2016-01-26 21:12 ` [PATCH 01/10] x86/asm: Add condition codes clobber to memory barrier macros Borislav Petkov
2016-01-26 21:12 ` [PATCH 02/10] x86/asm: Drop a comment left over from X86_OOSTORE Borislav Petkov
2016-01-26 21:12 ` [PATCH 03/10] x86/asm: Tweak the comment about wmb() use for IO Borislav Petkov
2016-01-26 21:12 ` [PATCH 04/10] x86/cpufeature: Carve out X86_FEATURE_* Borislav Petkov
2016-01-30 13:18   ` [tip:x86/asm] " tip-bot for Borislav Petkov
2016-01-26 21:12 ` [PATCH 05/10] x86/cpufeature: Replace the old static_cpu_has() with safe variant Borislav Petkov
2016-01-30 13:19   ` [tip:x86/asm] " tip-bot for Borislav Petkov
2016-01-26 21:12 ` [PATCH 06/10] x86/cpufeature: Get rid of the non-asm goto variant Borislav Petkov
2016-01-27  3:36   ` Brian Gerst
2016-01-27  8:41     ` Borislav Petkov
2016-01-27  8:43       ` [PATCH -v1.1 " Borislav Petkov
2016-01-30 13:19         ` [tip:x86/asm] " tip-bot for Borislav Petkov
2016-01-27  8:45       ` [PATCH -v1.1 8/10] x86/alternatives: Discard dynamic check after init Borislav Petkov
2016-01-30 13:20         ` [tip:x86/asm] " tip-bot for Brian Gerst
2016-01-26 21:12 ` [PATCH 07/10] x86/alternatives: Add an auxilary section Borislav Petkov
2016-01-30 13:19   ` [tip:x86/asm] " tip-bot for Borislav Petkov
2016-01-26 21:12 ` [PATCH 08/10] x86/alternatives: Discard dynamic check after init Borislav Petkov
2016-01-26 21:12 ` [PATCH 09/10] x86/vdso: Use static_cpu_has() Borislav Petkov
2016-01-30 13:20   ` [tip:x86/asm] " tip-bot for Borislav Petkov
2016-01-26 21:12 ` [PATCH 10/10] x86/head_64: Simplify kernel load address alignment check Borislav Petkov
2016-01-30 13:20   ` [tip:x86/boot] x86/boot: " tip-bot for Alexander Kuleshov
  -- strict thread matches above, loose matches on Subject: below --
2016-01-23  6:50 [PATCH] x86/head_64.S: do not use temporary register to check alignment Alexander Kuleshov
2016-01-26  9:31 ` Borislav Petkov
2016-01-16 19:22 [PATCH] x86: static_cpu_has_safe: discard dynamic check after init Brian Gerst
2016-01-16 19:36 ` Borislav Petkov
2016-01-16 19:58   ` Brian Gerst
2016-01-17 10:33     ` Borislav Petkov
2016-01-18 16:52       ` Brian Gerst
2016-01-18 17:49         ` Andy Lutomirski
2016-01-18 18:14         ` Borislav Petkov
2016-01-18 18:29           ` Andy Lutomirski
2016-01-18 18:39             ` Borislav Petkov
2016-01-18 19:45               ` H. Peter Anvin
2016-01-18 23:05                 ` Borislav Petkov
2016-01-18 23:13                   ` H. Peter Anvin
2016-01-18 23:25                     ` Borislav Petkov
2016-01-19 13:57                       ` Borislav Petkov
2016-01-19 16:23                         ` Borislav Petkov
2016-01-19 23:10                         ` Borislav Petkov
2016-01-19 23:26                           ` Andy Lutomirski
2016-01-19 23:49                             ` Boris Petkov
2016-01-20  4:03                         ` H. Peter Anvin
2016-01-20 10:33                           ` Borislav Petkov
2016-01-20 10:41                             ` H. Peter Anvin
2016-01-21 22:14                               ` Borislav Petkov
2016-01-21 22:22                                 ` H. Peter Anvin
2016-01-21 22:56                                   ` Borislav Petkov
2016-01-21 23:36                                     ` H. Peter Anvin
2016-01-21 23:37                                     ` H. Peter Anvin
2016-01-22 10:32                                       ` Borislav Petkov
2016-01-18 18:51           ` Borislav Petkov
2016-01-19  1:10             ` Borislav Petkov
2016-01-19  1:33               ` H. Peter Anvin
2016-01-19  9:22                 ` Borislav Petkov
2016-01-20  4:02                   ` H. Peter Anvin
2016-01-20  4:39                     ` Brian Gerst
2016-01-20  4:42                       ` H. Peter Anvin
2016-01-20 10:50                         ` Borislav Petkov
2016-01-20 10:55                           ` H. Peter Anvin
2016-01-20 11:05                             ` Borislav Petkov
2016-01-20 14:48                               ` H. Peter Anvin
2016-01-20 15:01                     ` Borislav Petkov
2016-01-20 15:09                       ` H. Peter Anvin
2016-01-20 16:04                         ` Borislav Petkov
2016-01-20 16:16                           ` H. Peter Anvin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).