All of lore.kernel.org
 help / color / mirror / Atom feed
From: Borislav Petkov <bp@suse.de>
To: x86-ml <x86@kernel.org>
Cc: Denys Vlasenko <dvlasenk@redhat.com>,
	"H. Peter Anvin" <hpa@zytor.com>, Brian Gerst <brgerst@gmail.com>,
	LKML <linux-kernel@vger.kernel.org>,
	Dmitry Vyukov <dvyukov@google.com>,
	Andi Kleen <andi@firstfloor.org>,
	zengzhaoxiu@163.com, Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Kees Cook <keescook@chromium.org>,
	Zhaoxiu Zeng <zhaoxiu.zeng@gmail.com>,
	Andy Lutomirski <luto@amacapital.net>,
	Peter Zijlstra <peterz@infradead.org>
Subject: [PATCH -v2] x86/hweight: Get rid of the special calling convention
Date: Tue, 10 May 2016 18:53:18 +0200	[thread overview]
Message-ID: <20160510165318.GD28520@pd.tnic> (raw)
In-Reply-To: <20160505140446.GE534@pd.tnic>

From: Borislav Petkov <bp@suse.de>
Date: Wed, 4 May 2016 18:52:09 +0200
Subject: [PATCH -v2] x86/hweight: Get rid of the special calling convention

People complained about ARCH_HWEIGHT_CFLAGS and how it throws a wrench
into kcov, lto, etc, experimentation.

And its not like we absolutely need it so let's get rid of it and
streamline it a bit. I had to do some carving out of facilities so that
the include hell doesn't swallow me.

We still need to hardcode POPCNT and register operands as some old gas
versions which we support, do not know about POPCNT.

Signed-off-by: Borislav Petkov <bp@suse.de>
---

-v2: Revert to the old spelled-out POPCNT insn bytes.

 arch/x86/Kconfig                      |   5 --
 arch/x86/include/asm/arch_hweight.h   |  38 +++++------
 arch/x86/include/asm/cpufeature.h     | 112 +-------------------------------
 arch/x86/include/asm/cpuinfo.h        |  65 +++++++++++++++++++
 arch/x86/include/asm/processor.h      |  63 +-----------------
 arch/x86/include/asm/static_cpu_has.h | 116 ++++++++++++++++++++++++++++++++++
 lib/Makefile                          |   5 --
 7 files changed, 204 insertions(+), 200 deletions(-)
 create mode 100644 arch/x86/include/asm/cpuinfo.h
 create mode 100644 arch/x86/include/asm/static_cpu_has.h

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7bb15747fea2..79e0bcd61cb1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -292,11 +292,6 @@ config X86_32_LAZY_GS
 	def_bool y
 	depends on X86_32 && !CC_STACKPROTECTOR
 
-config ARCH_HWEIGHT_CFLAGS
-	string
-	default "-fcall-saved-ecx -fcall-saved-edx" if X86_32
-	default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64
-
 config ARCH_SUPPORTS_UPROBES
 	def_bool y
 
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index 02e799fa43d1..324f5fb30392 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -2,10 +2,11 @@
 #define _ASM_X86_HWEIGHT_H
 
 #include <asm/cpufeatures.h>
+#include <asm/static_cpu_has.h>
 
 #ifdef CONFIG_64BIT
-/* popcnt %edi, %eax -- redundant REX prefix for alignment */
-#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7"
+/* popcnt %edi, %eax */
+#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc7"
 /* popcnt %rdi, %rax */
 #define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7"
 #define REG_IN "D"
@@ -17,21 +18,19 @@
 #define REG_OUT "a"
 #endif
 
-/*
- * __sw_hweightXX are called from within the alternatives below
- * and callee-clobbered registers need to be taken care of. See
- * ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective
- * compiler switches.
- */
 static __always_inline unsigned int __arch_hweight32(unsigned int w)
 {
-	unsigned int res = 0;
+	unsigned int res;
 
-	asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT)
-		     : "="REG_OUT (res)
-		     : REG_IN (w));
+	if (likely(static_cpu_has(X86_FEATURE_POPCNT))) {
+		/* popcnt %eax, %eax */
+		asm volatile(POPCNT32
+				: "="REG_OUT (res)
+				: REG_IN (w));
 
-	return res;
+		return res;
+	}
+	return __sw_hweight32(w);
 }
 
 static inline unsigned int __arch_hweight16(unsigned int w)
@@ -53,13 +52,16 @@ static inline unsigned long __arch_hweight64(__u64 w)
 #else
 static __always_inline unsigned long __arch_hweight64(__u64 w)
 {
-	unsigned long res = 0;
+	unsigned long res;
 
-	asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT)
-		     : "="REG_OUT (res)
-		     : REG_IN (w));
+	if (likely(static_cpu_has(X86_FEATURE_POPCNT))) {
+		asm volatile(POPCNT64
+				: "="REG_OUT (res)
+				: REG_IN (w));
 
-	return res;
+		return res;
+	}
+	return __sw_hweight64(w);
 }
 #endif /* CONFIG_X86_32 */
 
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 07c942d84662..9a70b12ae8df 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -6,6 +6,8 @@
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 
 #include <asm/asm.h>
+#include <asm/static_cpu_has.h>
+
 #include <linux/bitops.h>
 
 enum cpuid_leafs
@@ -45,51 +47,6 @@ extern const char * const x86_power_flags[32];
  */
 extern const char * const x86_bug_flags[NBUGINTS*32];
 
-#define test_cpu_cap(c, bit)						\
-	 test_bit(bit, (unsigned long *)((c)->x86_capability))
-
-#define REQUIRED_MASK_BIT_SET(bit)					\
-	 ( (((bit)>>5)==0  && (1UL<<((bit)&31) & REQUIRED_MASK0 )) ||	\
-	   (((bit)>>5)==1  && (1UL<<((bit)&31) & REQUIRED_MASK1 )) ||	\
-	   (((bit)>>5)==2  && (1UL<<((bit)&31) & REQUIRED_MASK2 )) ||	\
-	   (((bit)>>5)==3  && (1UL<<((bit)&31) & REQUIRED_MASK3 )) ||	\
-	   (((bit)>>5)==4  && (1UL<<((bit)&31) & REQUIRED_MASK4 )) ||	\
-	   (((bit)>>5)==5  && (1UL<<((bit)&31) & REQUIRED_MASK5 )) ||	\
-	   (((bit)>>5)==6  && (1UL<<((bit)&31) & REQUIRED_MASK6 )) ||	\
-	   (((bit)>>5)==7  && (1UL<<((bit)&31) & REQUIRED_MASK7 )) ||	\
-	   (((bit)>>5)==8  && (1UL<<((bit)&31) & REQUIRED_MASK8 )) ||	\
-	   (((bit)>>5)==9  && (1UL<<((bit)&31) & REQUIRED_MASK9 )) ||	\
-	   (((bit)>>5)==10 && (1UL<<((bit)&31) & REQUIRED_MASK10)) ||	\
-	   (((bit)>>5)==11 && (1UL<<((bit)&31) & REQUIRED_MASK11)) ||	\
-	   (((bit)>>5)==12 && (1UL<<((bit)&31) & REQUIRED_MASK12)) ||	\
-	   (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK13)) ||	\
-	   (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK14)) ||	\
-	   (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK15)) ||	\
-	   (((bit)>>5)==14 && (1UL<<((bit)&31) & REQUIRED_MASK16)) )
-
-#define DISABLED_MASK_BIT_SET(bit)					\
-	 ( (((bit)>>5)==0  && (1UL<<((bit)&31) & DISABLED_MASK0 )) ||	\
-	   (((bit)>>5)==1  && (1UL<<((bit)&31) & DISABLED_MASK1 )) ||	\
-	   (((bit)>>5)==2  && (1UL<<((bit)&31) & DISABLED_MASK2 )) ||	\
-	   (((bit)>>5)==3  && (1UL<<((bit)&31) & DISABLED_MASK3 )) ||	\
-	   (((bit)>>5)==4  && (1UL<<((bit)&31) & DISABLED_MASK4 )) ||	\
-	   (((bit)>>5)==5  && (1UL<<((bit)&31) & DISABLED_MASK5 )) ||	\
-	   (((bit)>>5)==6  && (1UL<<((bit)&31) & DISABLED_MASK6 )) ||	\
-	   (((bit)>>5)==7  && (1UL<<((bit)&31) & DISABLED_MASK7 )) ||	\
-	   (((bit)>>5)==8  && (1UL<<((bit)&31) & DISABLED_MASK8 )) ||	\
-	   (((bit)>>5)==9  && (1UL<<((bit)&31) & DISABLED_MASK9 )) ||	\
-	   (((bit)>>5)==10 && (1UL<<((bit)&31) & DISABLED_MASK10)) ||	\
-	   (((bit)>>5)==11 && (1UL<<((bit)&31) & DISABLED_MASK11)) ||	\
-	   (((bit)>>5)==12 && (1UL<<((bit)&31) & DISABLED_MASK12)) ||	\
-	   (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK13)) ||	\
-	   (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK14)) ||	\
-	   (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK15)) ||	\
-	   (((bit)>>5)==14 && (1UL<<((bit)&31) & DISABLED_MASK16)) )
-
-#define cpu_has(c, bit)							\
-	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :	\
-	 test_cpu_cap(c, bit))
-
 #define this_cpu_has(bit)						\
 	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : 	\
 	 x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))
@@ -105,8 +62,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
 #define cpu_feature_enabled(bit)	\
 	(__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit))
 
-#define boot_cpu_has(bit)	cpu_has(&boot_cpu_data, bit)
-
 #define set_cpu_cap(c, bit)	set_bit(bit, (unsigned long *)((c)->x86_capability))
 #define clear_cpu_cap(c, bit)	clear_bit(bit, (unsigned long *)((c)->x86_capability))
 #define setup_clear_cpu_cap(bit) do { \
@@ -118,69 +73,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
 	set_bit(bit, (unsigned long *)cpu_caps_set);	\
 } while (0)
 
-#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
-/*
- * Static testing of CPU features.  Used the same as boot_cpu_has().
- * These will statically patch the target code for additional
- * performance.
- */
-static __always_inline __pure bool _static_cpu_has(u16 bit)
-{
-		asm_volatile_goto("1: jmp 6f\n"
-			 "2:\n"
-			 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
-			         "((5f-4f) - (2b-1b)),0x90\n"
-			 "3:\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"		/* src offset */
-			 " .long 4f - .\n"		/* repl offset */
-			 " .word %P1\n"			/* always replace */
-			 " .byte 3b - 1b\n"		/* src len */
-			 " .byte 5f - 4f\n"		/* repl len */
-			 " .byte 3b - 2b\n"		/* pad len */
-			 ".previous\n"
-			 ".section .altinstr_replacement,\"ax\"\n"
-			 "4: jmp %l[t_no]\n"
-			 "5:\n"
-			 ".previous\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"		/* src offset */
-			 " .long 0\n"			/* no replacement */
-			 " .word %P0\n"			/* feature bit */
-			 " .byte 3b - 1b\n"		/* src len */
-			 " .byte 0\n"			/* repl len */
-			 " .byte 0\n"			/* pad len */
-			 ".previous\n"
-			 ".section .altinstr_aux,\"ax\"\n"
-			 "6:\n"
-			 " testb %[bitnum],%[cap_byte]\n"
-			 " jnz %l[t_yes]\n"
-			 " jmp %l[t_no]\n"
-			 ".previous\n"
-			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
-			     [bitnum] "i" (1 << (bit & 7)),
-			     [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
-			 : : t_yes, t_no);
-	t_yes:
-		return true;
-	t_no:
-		return false;
-}
-
-#define static_cpu_has(bit)					\
-(								\
-	__builtin_constant_p(boot_cpu_has(bit)) ?		\
-		boot_cpu_has(bit) :				\
-		_static_cpu_has(bit)				\
-)
-#else
-/*
- * Fall back to dynamic for gcc versions which don't support asm goto. Should be
- * a minority now anyway.
- */
-#define static_cpu_has(bit)		boot_cpu_has(bit)
-#endif
-
 #define cpu_has_bug(c, bit)		cpu_has(c, (bit))
 #define set_cpu_bug(c, bit)		set_cpu_cap(c, (bit))
 #define clear_cpu_bug(c, bit)		clear_cpu_cap(c, (bit))
diff --git a/arch/x86/include/asm/cpuinfo.h b/arch/x86/include/asm/cpuinfo.h
new file mode 100644
index 000000000000..a6632044f199
--- /dev/null
+++ b/arch/x86/include/asm/cpuinfo.h
@@ -0,0 +1,65 @@
+#ifndef _ASM_X86_CPUINFO_H_
+#define _ASM_X86_CPUINFO_H_
+
+/*
+ *  CPU type and hardware bug flags. Kept separately for each CPU.
+ *  Members of this structure are referenced in head.S, so think twice
+ *  before touching them. [mj]
+ */
+struct cpuinfo_x86 {
+	__u8			x86;		/* CPU family */
+	__u8			x86_vendor;	/* CPU vendor */
+	__u8			x86_model;
+	__u8			x86_mask;
+#ifdef CONFIG_X86_32
+	char			wp_works_ok;	/* It doesn't on 386's */
+
+	/* Problems on some 486Dx4's and old 386's: */
+	char			rfu;
+	char			pad0;
+	char			pad1;
+#else
+	/* Number of 4K pages in DTLB/ITLB combined(in pages): */
+	int			x86_tlbsize;
+#endif
+	__u8			x86_virt_bits;
+	__u8			x86_phys_bits;
+	/* CPUID returned core id bits: */
+	__u8			x86_coreid_bits;
+	/* Max extended CPUID function supported: */
+	__u32			extended_cpuid_level;
+	/* Maximum supported CPUID level, -1=no CPUID: */
+	int			cpuid_level;
+	__u32			x86_capability[NCAPINTS + NBUGINTS];
+	char			x86_vendor_id[16];
+	char			x86_model_id[64];
+	/* in KB - valid for CPUS which support this call: */
+	int			x86_cache_size;
+	int			x86_cache_alignment;	/* In bytes */
+	/* Cache QoS architectural values: */
+	int			x86_cache_max_rmid;	/* max index */
+	int			x86_cache_occ_scale;	/* scale to bytes */
+	int			x86_power;
+	unsigned long		loops_per_jiffy;
+	/* cpuid returned max cores value: */
+	u16			 x86_max_cores;
+	u16			apicid;
+	u16			initial_apicid;
+	u16			x86_clflush_size;
+	/* number of cores as seen by the OS: */
+	u16			booted_cores;
+	/* Physical processor id: */
+	u16			phys_proc_id;
+	/* Logical processor id: */
+	u16			logical_proc_id;
+	/* Core id: */
+	u16			cpu_core_id;
+	/* Index into per_cpu list: */
+	u16			cpu_index;
+	u32			microcode;
+};
+
+extern struct cpuinfo_x86	boot_cpu_data;
+extern struct cpuinfo_x86	new_cpu_data;
+
+#endif /* _ASM_X86_CPUINFO_H_ */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 62c6cc3cc5d3..6f6555b20e3d 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -22,6 +22,7 @@ struct vm86;
 #include <asm/nops.h>
 #include <asm/special_insns.h>
 #include <asm/fpu/types.h>
+#include <asm/cpuinfo.h>
 
 #include <linux/personality.h>
 #include <linux/cache.h>
@@ -78,65 +79,6 @@ extern u16 __read_mostly tlb_lld_2m[NR_INFO];
 extern u16 __read_mostly tlb_lld_4m[NR_INFO];
 extern u16 __read_mostly tlb_lld_1g[NR_INFO];
 
-/*
- *  CPU type and hardware bug flags. Kept separately for each CPU.
- *  Members of this structure are referenced in head.S, so think twice
- *  before touching them. [mj]
- */
-
-struct cpuinfo_x86 {
-	__u8			x86;		/* CPU family */
-	__u8			x86_vendor;	/* CPU vendor */
-	__u8			x86_model;
-	__u8			x86_mask;
-#ifdef CONFIG_X86_32
-	char			wp_works_ok;	/* It doesn't on 386's */
-
-	/* Problems on some 486Dx4's and old 386's: */
-	char			rfu;
-	char			pad0;
-	char			pad1;
-#else
-	/* Number of 4K pages in DTLB/ITLB combined(in pages): */
-	int			x86_tlbsize;
-#endif
-	__u8			x86_virt_bits;
-	__u8			x86_phys_bits;
-	/* CPUID returned core id bits: */
-	__u8			x86_coreid_bits;
-	/* Max extended CPUID function supported: */
-	__u32			extended_cpuid_level;
-	/* Maximum supported CPUID level, -1=no CPUID: */
-	int			cpuid_level;
-	__u32			x86_capability[NCAPINTS + NBUGINTS];
-	char			x86_vendor_id[16];
-	char			x86_model_id[64];
-	/* in KB - valid for CPUS which support this call: */
-	int			x86_cache_size;
-	int			x86_cache_alignment;	/* In bytes */
-	/* Cache QoS architectural values: */
-	int			x86_cache_max_rmid;	/* max index */
-	int			x86_cache_occ_scale;	/* scale to bytes */
-	int			x86_power;
-	unsigned long		loops_per_jiffy;
-	/* cpuid returned max cores value: */
-	u16			 x86_max_cores;
-	u16			apicid;
-	u16			initial_apicid;
-	u16			x86_clflush_size;
-	/* number of cores as seen by the OS: */
-	u16			booted_cores;
-	/* Physical processor id: */
-	u16			phys_proc_id;
-	/* Logical processor id: */
-	u16			logical_proc_id;
-	/* Core id: */
-	u16			cpu_core_id;
-	/* Index into per_cpu list: */
-	u16			cpu_index;
-	u32			microcode;
-};
-
 #define X86_VENDOR_INTEL	0
 #define X86_VENDOR_CYRIX	1
 #define X86_VENDOR_AMD		2
@@ -151,9 +93,6 @@ struct cpuinfo_x86 {
 /*
  * capabilities of CPUs
  */
-extern struct cpuinfo_x86	boot_cpu_data;
-extern struct cpuinfo_x86	new_cpu_data;
-
 extern struct tss_struct	doublefault_tss;
 extern __u32			cpu_caps_cleared[NCAPINTS];
 extern __u32			cpu_caps_set[NCAPINTS];
diff --git a/arch/x86/include/asm/static_cpu_has.h b/arch/x86/include/asm/static_cpu_has.h
new file mode 100644
index 000000000000..648ada0c7ffe
--- /dev/null
+++ b/arch/x86/include/asm/static_cpu_has.h
@@ -0,0 +1,116 @@
+#ifndef _ASM_X86_STATIC_CPU_HAS_H
+#define _ASM_X86_STATIC_CPU_HAS_H
+
+#include <asm/cpuinfo.h>
+
+#define test_cpu_cap(c, bit)						\
+	 test_bit(bit, (unsigned long *)((c)->x86_capability))
+
+#define REQUIRED_MASK_BIT_SET(bit)					\
+	 ( (((bit)>>5)==0  && (1UL<<((bit)&31) & REQUIRED_MASK0 )) ||	\
+	   (((bit)>>5)==1  && (1UL<<((bit)&31) & REQUIRED_MASK1 )) ||	\
+	   (((bit)>>5)==2  && (1UL<<((bit)&31) & REQUIRED_MASK2 )) ||	\
+	   (((bit)>>5)==3  && (1UL<<((bit)&31) & REQUIRED_MASK3 )) ||	\
+	   (((bit)>>5)==4  && (1UL<<((bit)&31) & REQUIRED_MASK4 )) ||	\
+	   (((bit)>>5)==5  && (1UL<<((bit)&31) & REQUIRED_MASK5 )) ||	\
+	   (((bit)>>5)==6  && (1UL<<((bit)&31) & REQUIRED_MASK6 )) ||	\
+	   (((bit)>>5)==7  && (1UL<<((bit)&31) & REQUIRED_MASK7 )) ||	\
+	   (((bit)>>5)==8  && (1UL<<((bit)&31) & REQUIRED_MASK8 )) ||	\
+	   (((bit)>>5)==9  && (1UL<<((bit)&31) & REQUIRED_MASK9 )) ||	\
+	   (((bit)>>5)==10 && (1UL<<((bit)&31) & REQUIRED_MASK10)) ||	\
+	   (((bit)>>5)==11 && (1UL<<((bit)&31) & REQUIRED_MASK11)) ||	\
+	   (((bit)>>5)==12 && (1UL<<((bit)&31) & REQUIRED_MASK12)) ||	\
+	   (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK13)) ||	\
+	   (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK14)) ||	\
+	   (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK15)) ||	\
+	   (((bit)>>5)==14 && (1UL<<((bit)&31) & REQUIRED_MASK16)) )
+
+#define DISABLED_MASK_BIT_SET(bit)					\
+	 ( (((bit)>>5)==0  && (1UL<<((bit)&31) & DISABLED_MASK0 )) ||	\
+	   (((bit)>>5)==1  && (1UL<<((bit)&31) & DISABLED_MASK1 )) ||	\
+	   (((bit)>>5)==2  && (1UL<<((bit)&31) & DISABLED_MASK2 )) ||	\
+	   (((bit)>>5)==3  && (1UL<<((bit)&31) & DISABLED_MASK3 )) ||	\
+	   (((bit)>>5)==4  && (1UL<<((bit)&31) & DISABLED_MASK4 )) ||	\
+	   (((bit)>>5)==5  && (1UL<<((bit)&31) & DISABLED_MASK5 )) ||	\
+	   (((bit)>>5)==6  && (1UL<<((bit)&31) & DISABLED_MASK6 )) ||	\
+	   (((bit)>>5)==7  && (1UL<<((bit)&31) & DISABLED_MASK7 )) ||	\
+	   (((bit)>>5)==8  && (1UL<<((bit)&31) & DISABLED_MASK8 )) ||	\
+	   (((bit)>>5)==9  && (1UL<<((bit)&31) & DISABLED_MASK9 )) ||	\
+	   (((bit)>>5)==10 && (1UL<<((bit)&31) & DISABLED_MASK10)) ||	\
+	   (((bit)>>5)==11 && (1UL<<((bit)&31) & DISABLED_MASK11)) ||	\
+	   (((bit)>>5)==12 && (1UL<<((bit)&31) & DISABLED_MASK12)) ||	\
+	   (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK13)) ||	\
+	   (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK14)) ||	\
+	   (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK15)) ||	\
+	   (((bit)>>5)==14 && (1UL<<((bit)&31) & DISABLED_MASK16)) )
+
+#define cpu_has(c, bit)							\
+	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :	\
+	 test_cpu_cap(c, bit))
+
+#define boot_cpu_has(bit)	cpu_has(&boot_cpu_data, bit)
+
+#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
+/*
+ * Static testing of CPU features.  Used the same as boot_cpu_has().
+ * These will statically patch the target code for additional
+ * performance.
+ */
+static __always_inline __pure bool _static_cpu_has(u16 bit)
+{
+		asm_volatile_goto("1: jmp 6f\n"
+			 "2:\n"
+			 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+			         "((5f-4f) - (2b-1b)),0x90\n"
+			 "3:\n"
+			 ".section .altinstructions,\"a\"\n"
+			 " .long 1b - .\n"		/* src offset */
+			 " .long 4f - .\n"		/* repl offset */
+			 " .word %P1\n"			/* always replace */
+			 " .byte 3b - 1b\n"		/* src len */
+			 " .byte 5f - 4f\n"		/* repl len */
+			 " .byte 3b - 2b\n"		/* pad len */
+			 ".previous\n"
+			 ".section .altinstr_replacement,\"ax\"\n"
+			 "4: jmp %l[t_no]\n"
+			 "5:\n"
+			 ".previous\n"
+			 ".section .altinstructions,\"a\"\n"
+			 " .long 1b - .\n"		/* src offset */
+			 " .long 0\n"			/* no replacement */
+			 " .word %P0\n"			/* feature bit */
+			 " .byte 3b - 1b\n"		/* src len */
+			 " .byte 0\n"			/* repl len */
+			 " .byte 0\n"			/* pad len */
+			 ".previous\n"
+			 ".section .altinstr_aux,\"ax\"\n"
+			 "6:\n"
+			 " testb %[bitnum],%[cap_byte]\n"
+			 " jnz %l[t_yes]\n"
+			 " jmp %l[t_no]\n"
+			 ".previous\n"
+			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
+			     [bitnum] "i" (1 << (bit & 7)),
+			     [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+			 : : t_yes, t_no);
+	t_yes:
+		return true;
+	t_no:
+		return false;
+}
+
+#define static_cpu_has(bit)					\
+(								\
+	__builtin_constant_p(boot_cpu_has(bit)) ?		\
+		boot_cpu_has(bit) :				\
+		_static_cpu_has(bit)				\
+)
+#else
+/*
+ * Fall back to dynamic for gcc versions which don't support asm goto. Should be
+ * a minority now anyway.
+ */
+#define static_cpu_has(bit)		boot_cpu_has(bit)
+#endif
+
+#endif /* _ASM_X86_STATIC_CPU_HAS_H */
diff --git a/lib/Makefile b/lib/Makefile
index a65e9a861535..55ad20701dc0 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -15,9 +15,6 @@ KCOV_INSTRUMENT_rbtree.o := n
 KCOV_INSTRUMENT_list_debug.o := n
 KCOV_INSTRUMENT_debugobjects.o := n
 KCOV_INSTRUMENT_dynamic_debug.o := n
-# Kernel does not boot if we instrument this file as it uses custom calling
-# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS).
-KCOV_INSTRUMENT_hweight.o := n
 
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
@@ -72,8 +69,6 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
 obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
 obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
 
-GCOV_PROFILE_hweight.o := n
-CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 
 obj-$(CONFIG_BTREE) += btree.o
-- 
2.7.3

  reply	other threads:[~2016-05-10 16:53 UTC|newest]

Thread overview: 104+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-05  2:06 [PATCH V2 01/30] bitops: add parity functions Zeng Zhaoxiu
2016-04-05  4:23 ` [PATCH V2 02/30] Include generic parity.h in some architectures' bitops.h Zeng Zhaoxiu
2016-04-05  4:23 ` Zeng Zhaoxiu
2016-04-05  4:23   ` Zeng Zhaoxiu
2016-04-05  4:23   ` Zeng Zhaoxiu
2016-04-05  4:23   ` Zeng Zhaoxiu
2016-04-05  4:23   ` Zeng Zhaoxiu
2016-04-05  4:23   ` Zeng Zhaoxiu
2016-04-06  8:41   ` [PATCH v2 " zengzhaoxiu
2016-04-06  8:41   ` zengzhaoxiu
2016-04-06  8:41     ` zengzhaoxiu at 163.com
2016-04-06  8:41     ` zengzhaoxiu
2016-04-06  8:41     ` zengzhaoxiu
2016-04-06  8:41     ` zengzhaoxiu
2016-04-11 17:31     ` Alexey Brodkin
2016-04-11 17:31       ` Alexey Brodkin
2016-04-11 17:31       ` Alexey Brodkin
2016-04-11 17:31       ` Alexey Brodkin
2016-04-05 19:04 ` [PATCH V2 01/30] bitops: add parity functions Sam Ravnborg
2016-04-06  5:33   ` Zeng Zhaoxiu
2016-04-06  8:24     ` Sam Ravnborg
2016-04-06  8:22   ` [PATCH v2 " zengzhaoxiu
2016-04-06  8:46 ` [PATCH v2 03/30] Add alpha-specific " zengzhaoxiu
2016-04-06  8:53 ` [PATCH v2 04/30] Add blackfin-specific " zengzhaoxiu
2016-04-06  8:57 ` [PATCH v2 05/30] Add ia64-specific " zengzhaoxiu
2016-04-06  8:57   ` zengzhaoxiu
2016-04-06  8:59 ` [PATCH v2 06/30] Add mips-specific " zengzhaoxiu
2016-04-06 10:23   ` zengzhaoxiu
2016-04-06  9:03 ` [PATCH v2 07/30] Add powerpc-specific " zengzhaoxiu
2016-04-06  9:07 ` [PATCH v2 08/30] Add sparc-specific " zengzhaoxiu
2016-04-06  9:07   ` zengzhaoxiu
2016-04-06 16:37   ` Josip Rodin
2016-04-06 18:44   ` Sam Ravnborg
2016-04-06 18:44     ` Sam Ravnborg
2016-04-07  3:56     ` Zeng Zhaoxiu
2016-04-07  3:56       ` Zeng Zhaoxiu
2016-04-06  9:08 ` [PATCH v2 09/30] Add tile-specific " zengzhaoxiu
2016-04-06 13:27   ` Chris Metcalf
2016-04-07  3:55     ` Zeng Zhaoxiu
2016-04-06  9:14 ` [PATCH v2 10/30] Add x86-specific " zengzhaoxiu
2016-04-06 10:13   ` Borislav Petkov
2016-04-06 10:37     ` One Thousand Gnomes
2016-04-06 10:53       ` Borislav Petkov
2016-04-07  3:55         ` Zeng Zhaoxiu
2016-04-07  9:39           ` Borislav Petkov
2016-04-11  2:43       ` Zeng Zhaoxiu
2016-04-15  2:11         ` Borislav Petkov
2016-04-07  3:55     ` Zeng Zhaoxiu
2016-04-07  9:41       ` Borislav Petkov
2016-04-06 19:45   ` Andi Kleen
2016-04-07  3:56     ` Zeng Zhaoxiu
2016-04-07  6:31     ` Dmitry Vyukov
2016-04-07  9:43       ` Borislav Petkov
2016-05-04 18:46         ` [RFC PATCH] x86/hweight: Get rid of the special calling convention Borislav Petkov
2016-05-04 19:31           ` Brian Gerst
2016-05-04 19:33             ` H. Peter Anvin
2016-05-04 19:41               ` Borislav Petkov
2016-05-04 19:49                 ` H. Peter Anvin
2016-05-04 20:22                   ` Borislav Petkov
2016-05-04 20:51                     ` H. Peter Anvin
2016-05-04 21:09                     ` Andi Kleen
2016-05-05 13:02                     ` Denys Vlasenko
2016-05-05 14:04                       ` Borislav Petkov
2016-05-10 16:53                         ` Borislav Petkov [this message]
2016-05-10 17:23                           ` [PATCH -v2] " Peter Zijlstra
2016-05-10 19:02                             ` Borislav Petkov
2016-05-10 19:03                             ` H. Peter Anvin
2016-05-10 19:10                               ` Borislav Petkov
2016-05-10 22:30                                 ` H. Peter Anvin
2016-05-11  4:11                                   ` Borislav Petkov
2016-05-11 11:15                                     ` Brian Gerst
2016-05-11 11:24                                       ` Peter Zijlstra
2016-05-11 12:47                                         ` Borislav Petkov
2016-05-12  4:54                                         ` H. Peter Anvin
2016-05-12 11:57                                           ` Borislav Petkov
2016-05-12 12:14                                             ` Peter Zijlstra
2016-05-12 13:09                                               ` Borislav Petkov
2016-05-18 10:38                                                 ` Borislav Petkov
2016-04-07 14:10     ` [PATCH v2 10/30] Add x86-specific parity functions One Thousand Gnomes
2016-04-06  9:27 ` [PATCH v2 11/30] sunrpc: use parity8 zengzhaoxiu
2016-04-06  9:30 ` [PATCH v2 12/30] mips: use parity functions in cerr-sb1.c zengzhaoxiu
2016-04-06  9:36 ` [PATCH v2 13/30] bch: use parity32 zengzhaoxiu
2016-04-06  9:39 ` [PATCH v2 14/30] media: use parity8 in vivid-vbi-gen.c zengzhaoxiu
2016-04-06  9:41 ` [PATCH v2 15/30] media: use parity functions in saa7115 zengzhaoxiu
2016-04-06  9:43 ` [PATCH v2 16/30] input: use parity32 in grip_mp zengzhaoxiu
2016-04-06  9:44 ` [PATCH v2 17/30] input: use parity64 in sidewinder zengzhaoxiu
2016-04-06  9:45 ` [PATCH v2 18/30] input: use parity16 in ams_delta_serio zengzhaoxiu
2016-04-06  9:47 ` [PATCH v2 19/30] scsi: use parity32 in isci's phy zengzhaoxiu
2016-04-06  9:52 ` [PATCH v2 20/30] mtd: use parity16 in ssfdc zengzhaoxiu
2016-04-06  9:53 ` [PATCH v2 21/30] mtd: use parity functions in inftlcore zengzhaoxiu
2016-04-06  9:58 ` [PATCH v2 22/30] crypto: use parity functions in qat_hal zengzhaoxiu
2016-04-06 10:05 ` [PATCH v2 23/30] mtd: use parity16 in sm_ftl zengzhaoxiu
2016-04-06 10:11 ` [PATCH v2 24/30] ethernet: use parity8 in sun/niu.c zengzhaoxiu
2016-04-06 10:14 ` [PATCH v2 25/30] input: use parity8 in pcips2 zengzhaoxiu
2016-04-06 10:15 ` [PATCH v2 26/30] input: use parity8 in sa1111ps2 zengzhaoxiu
2016-04-06 10:16 ` [PATCH v2 27/30] iio: use parity32 in adxrs450 zengzhaoxiu
2016-04-10 14:37   ` Jonathan Cameron
2016-04-10 14:41     ` Lars-Peter Clausen
2016-04-10 15:13       ` Jonathan Cameron
2016-04-10 15:14         ` Jonathan Cameron
2016-04-06 10:18 ` [PATCH v2 28/30] serial: use parity32 in max3100 zengzhaoxiu
2016-04-06 10:25   ` Greg KH
2016-04-06 10:20 ` [PATCH v2 29/30] input: use parity8 in elantech zengzhaoxiu
2016-04-06 10:21 ` [PATCH v2 30/30] ethernet: use parity8 in broadcom/tg3.c zengzhaoxiu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160510165318.GD28520@pd.tnic \
    --to=bp@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=andi@firstfloor.org \
    --cc=brgerst@gmail.com \
    --cc=dvlasenk@redhat.com \
    --cc=dvyukov@google.com \
    --cc=hpa@zytor.com \
    --cc=keescook@chromium.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@amacapital.net \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    --cc=zengzhaoxiu@163.com \
    --cc=zhaoxiu.zeng@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.