All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexey Dobriyan <adobriyan@gmail.com>
To: tglx@linutronix.de, mingo@redhat.com, bp@alien8.de, hpa@zytor.com
Cc: linux-kernel@vger.kernel.org, x86@kernel.org,
	adobriyan@gmail.com, linux-kbuild@vger.kernel.org,
	yamada.masahiro@socionext.com, michal.lkml@markovi.net
Subject: [PATCH 2/5] x86_64, -march=native: POPCNT support
Date: Mon, 22 Jul 2019 23:27:20 +0300	[thread overview]
Message-ID: <20190722202723.13408-2-adobriyan@gmail.com> (raw)
In-Reply-To: <20190722202723.13408-1-adobriyan@gmail.com>

Detect POPCNT instruction support and inline hweigth*() functions
if it is supported by CPU.

Detect POPCNT at boot time and conditionally refuse to boot.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
 arch/x86/include/asm/arch_hweight.h           | 24 +++++++++++++++++++
 arch/x86/include/asm/segment.h                |  1 +
 arch/x86/kernel/verify_cpu.S                  |  8 +++++++
 arch/x86/lib/Makefile                         |  5 +++-
 .../drm/i915/display/intel_display_power.c    |  2 +-
 drivers/misc/sgi-gru/grumain.c                |  2 +-
 fs/btrfs/tree-checker.c                       |  4 ++--
 include/linux/bitops.h                        |  2 ++
 lib/Makefile                                  |  2 ++
 scripts/kconfig/cpuid.c                       |  7 ++++++
 scripts/march-native.sh                       |  2 ++
 11 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index ba88edd0d58b..3797aa57baa5 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -2,6 +2,28 @@
 #ifndef _ASM_X86_HWEIGHT_H
 #define _ASM_X86_HWEIGHT_H
 
+#ifdef CONFIG_MARCH_NATIVE_POPCNT
+static inline unsigned int __arch_hweight64(uint64_t x)
+{
+	return __builtin_popcountll(x);
+}
+
+static inline unsigned int __arch_hweight32(uint32_t x)
+{
+	return __builtin_popcount(x);
+}
+
+static inline unsigned int __arch_hweight16(uint16_t x)
+{
+	return __builtin_popcount(x);
+}
+
+static inline unsigned int __arch_hweight8(uint8_t x)
+{
+	return __builtin_popcount(x);
+}
+#else
+
 #include <asm/cpufeatures.h>
 
 #ifdef CONFIG_64BIT
@@ -53,3 +75,5 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
 #endif /* CONFIG_X86_32 */
 
 #endif
+
+#endif
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index ac3892920419..d314c6b9b632 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -4,6 +4,7 @@
 
 #include <linux/const.h>
 #include <asm/alternative.h>
+#include <asm/cpufeatures.h>
 
 /*
  * Constructor for a conventional segment GDT (or LDT) entry.
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index a024c4f7ba56..a9be8904faa3 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -134,6 +134,14 @@ ENTRY(verify_cpu)
 	movl $1,%eax
 	ret
 .Lverify_cpu_sse_ok:
+
+#ifdef CONFIG_MARCH_NATIVE_POPCNT
+	mov	$1, %eax
+	cpuid
+	bt	$23, %ecx
+	jnc	.Lverify_cpu_no_longmode
+#endif
+
 	popf				# Restore caller passed flags
 	xorl %eax, %eax
 	ret
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 5246db42de45..7dc0e71b0ef3 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -40,7 +40,10 @@ lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
 lib-$(CONFIG_FUNCTION_ERROR_INJECTION)	+= error-inject.o
 lib-$(CONFIG_RETPOLINE) += retpoline.o
 
-obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
+obj-y += msr.o msr-reg.o msr-reg-export.o
+ifneq ($(CONFIG_MARCH_NATIVE_POPCNT),y)
+	obj-y += hweight.o
+endif
 obj-y += iomem.o
 
 ifeq ($(CONFIG_X86_32),y)
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index c93ad512014c..9066105f2fea 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -1570,7 +1570,7 @@ static void print_power_domains(struct i915_power_domains *power_domains,
 {
 	enum intel_display_power_domain domain;
 
-	DRM_DEBUG_DRIVER("%s (%lu):\n", prefix, hweight64(mask));
+	DRM_DEBUG_DRIVER("%s (%u):\n", prefix, hweight64(mask));
 	for_each_power_domain(domain, mask)
 		DRM_DEBUG_DRIVER("%s use_count %d\n",
 				 intel_display_power_domain_str(domain),
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
index 40ac59dd018c..30cfeeb28e74 100644
--- a/drivers/misc/sgi-gru/grumain.c
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -833,7 +833,7 @@ void gru_steal_context(struct gru_thread_state *gts)
 	}
 	gru_dbg(grudev,
 		"stole gid %d, ctxnum %d from gts %p. Need cb %d, ds %d;"
-		" avail cb %ld, ds %ld\n",
+		" avail cb %u, ds %u\n",
 		gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map),
 		hweight64(gru->gs_dsr_map));
 }
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index ccd5706199d7..2d33c6ae0e61 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -478,7 +478,7 @@ static int check_block_group_item(struct extent_buffer *leaf,
 	flags = btrfs_block_group_flags(&bgi);
 	if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) {
 		block_group_err(leaf, slot,
-"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
+"invalid profile flags, have 0x%llx (%u bits set) expect no more than 1 bit set",
 			flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
 			hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
 		return -EUCLEAN;
@@ -491,7 +491,7 @@ static int check_block_group_item(struct extent_buffer *leaf,
 	    type != (BTRFS_BLOCK_GROUP_METADATA |
 			   BTRFS_BLOCK_GROUP_DATA)) {
 		block_group_err(leaf, slot,
-"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
+"invalid type, have 0x%llx (%u bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
 			type, hweight64(type),
 			BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
 			BTRFS_BLOCK_GROUP_SYSTEM,
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index cf074bce3eb3..655b120bba66 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -7,10 +7,12 @@
 #define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
 #define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_TYPE(long))
 
+#ifndef CONFIG_MARCH_NATIVE_POPCNT
 extern unsigned int __sw_hweight8(unsigned int w);
 extern unsigned int __sw_hweight16(unsigned int w);
 extern unsigned int __sw_hweight32(unsigned int w);
 extern unsigned long __sw_hweight64(__u64 w);
+#endif
 
 /*
  * Include this here because some architectures need generic_ffs/fls in
diff --git a/lib/Makefile b/lib/Makefile
index 095601ce371d..32400f3a3328 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -114,7 +114,9 @@ obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
 
 obj-y += logic_pio.o
 
+ifneq ($(CONFIG_MARCH_NATIVE_POPCNT),y)
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
+endif
 
 obj-$(CONFIG_BTREE) += btree.o
 obj-$(CONFIG_INTERVAL_TREE) += interval_tree.o
diff --git a/scripts/kconfig/cpuid.c b/scripts/kconfig/cpuid.c
index 81b292382e26..9efc0d9464d8 100644
--- a/scripts/kconfig/cpuid.c
+++ b/scripts/kconfig/cpuid.c
@@ -43,6 +43,8 @@ static inline void cpuid2(uint32_t eax0, uint32_t ecx0, uint32_t *eax, uint32_t
 	);
 }
 
+static bool popcnt	= false;
+
 static uint32_t eax0_max;
 
 static void intel(void)
@@ -52,6 +54,10 @@ static void intel(void)
 	if (eax0_max >= 1) {
 		cpuid(1, &eax, &ecx, &edx, &ebx);
 //		printf("%08x %08x %08x %08x\n", eax, ecx, edx, ebx);
+
+		if (ecx & (1 << 23)) {
+			popcnt = true;
+		}
 	}
 }
 
@@ -72,6 +78,7 @@ int main(int argc, char *argv[])
 	}
 
 #define _(x)	if (streq(opt, #x)) return x ? EXIT_SUCCESS : EXIT_FAILURE
+	_(popcnt);
 #undef _
 
 	return EXIT_FAILURE;
diff --git a/scripts/march-native.sh b/scripts/march-native.sh
index 29a33c80b62b..c3059f93ed2b 100755
--- a/scripts/march-native.sh
+++ b/scripts/march-native.sh
@@ -41,6 +41,8 @@ COLLECT_GCC_OPTIONS=$(
 )
 echo "-march=native: $COLLECT_GCC_OPTIONS"
 
+"$CPUID" popcnt		&& option "CONFIG_MARCH_NATIVE_POPCNT"
+
 for i in $COLLECT_GCC_OPTIONS; do
 	case $i in
 		*/cc1|-E|-quiet|-v|/dev/null|--param|-fstack-protector*)
-- 
2.21.0


  reply	other threads:[~2019-07-22 20:28 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-22 20:27 [PATCH 1/5] x86_64: -march=native support Alexey Dobriyan
2019-07-22 20:27 ` Alexey Dobriyan [this message]
2019-07-22 21:12   ` [PATCH 2/5] x86_64, -march=native: POPCNT support Peter Zijlstra
2019-07-22 21:15     ` Alexey Dobriyan
2019-07-22 21:27       ` Alexey Dobriyan
2019-07-23  7:20       ` Peter Zijlstra
2019-07-23 20:04         ` Alexey Dobriyan
2019-07-22 20:27 ` [PATCH 3/5] x86_64, -march=native: REP MOVSB support Alexey Dobriyan
2019-07-22 20:27 ` [PATCH 4/5] x86_64, -march=native: REP STOSB support Alexey Dobriyan
2019-07-22 20:27 ` [PATCH 5/5] x86_64, -march=native: MOVBE support Alexey Dobriyan
  -- strict thread matches above, loose matches on Subject: below --
2019-07-04 20:47 [PATCH 1/5] x86_64: -march=native support Alexey Dobriyan
2019-07-04 20:47 ` [PATCH 2/5] x86_64, -march=native: POPCNT support Alexey Dobriyan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190722202723.13408-2-adobriyan@gmail.com \
    --to=adobriyan@gmail.com \
    --cc=bp@alien8.de \
    --cc=hpa@zytor.com \
    --cc=linux-kbuild@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=michal.lkml@markovi.net \
    --cc=mingo@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    --cc=yamada.masahiro@socionext.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.