All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features.
@ 2016-10-12 17:57 Piotr Luc
  2016-10-16 11:21 ` [tip:x86/urgent] " tip-bot for Piotr Luc
  0 siblings, 1 reply; 18+ messages in thread
From: Piotr Luc @ 2016-10-12 17:57 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, x86, linux-kernel

AVX512_4VNNIW  - Vector instructions for deep learning enhanced word
variable precision.
AVX512_4FMAPS - Vector instructions for deep learning floating-point
single precision.

The new instructions are to be used in future Intel Xeon & Xeon Phi
processors.

The spec can be found in Intel Software Developer Manual or in
Instruction Set Extensions Programming Reference. See
https://software.intel.com/sites/default/files/managed/69/78/319433-025.pdf.

Signed-off-by: Piotr Luc <piotr.luc@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/x86/include/asm/cpufeature.h              | 7 +++++--
 arch/x86/include/asm/cpufeatures.h             | 6 +++++-
 arch/x86/include/asm/disabled-features.h       | 3 ++-
 arch/x86/include/asm/required-features.h       | 3 ++-
 arch/x86/kernel/cpu/common.c                   | 1 +
 arch/x86/kernel/fpu/xstate.c                   | 2 ++
 tools/arch/x86/include/asm/cpufeatures.h       | 6 +++++-
 tools/arch/x86/include/asm/disabled-features.h | 3 ++-
 tools/arch/x86/include/asm/required-features.h | 3 ++-
 9 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 1d2b69f..617452e 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -28,6 +28,7 @@ enum cpuid_leafs
 	CPUID_8000_000A_EDX,
 	CPUID_7_ECX,
 	CPUID_8000_0007_EBX,
+	CPUID_7_EDX,
 };
 
 #ifdef CONFIG_X86_FEATURE_NAMES
@@ -78,8 +79,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) ||	\
 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) ||	\
 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) ||	\
+	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) ||	\
 	   REQUIRED_MASK_CHECK					  ||	\
-	   BUILD_BUG_ON_ZERO(NCAPINTS != 18))
+	   BUILD_BUG_ON_ZERO(NCAPINTS != 19))
 
 #define DISABLED_MASK_BIT_SET(feature_bit)				\
 	 ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  0, feature_bit) ||	\
@@ -100,8 +102,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) ||	\
 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) ||	\
 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) ||	\
+	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) ||	\
 	   DISABLED_MASK_CHECK					  ||	\
-	   BUILD_BUG_ON_ZERO(NCAPINTS != 18))
+	   BUILD_BUG_ON_ZERO(NCAPINTS != 19))
 
 #define cpu_has(c, bit)							\
 	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :	\
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 92a8308..309fd2d 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -12,7 +12,7 @@
 /*
  * Defines x86 CPU feature bits
  */
-#define NCAPINTS	18	/* N 32-bit words worth of info */
+#define NCAPINTS	19	/* N 32-bit words worth of info */
 #define NBUGINTS	1	/* N 32-bit bug flags */
 
 /*
@@ -286,6 +286,10 @@
 #define X86_FEATURE_SUCCOR	(17*32+1) /* Uncorrectable error containment and recovery */
 #define X86_FEATURE_SMCA	(17*32+3) /* Scalable MCA */
 
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (edx), word 18 */
+#define X86_FEATURE_AVX512_4VNNIW  (18*32+2) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS  (18*32+3) /* AVX-512 Multiply Accumulation Single precision */
+
 /*
  * BUG word(s)
  */
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 85599ad..8b45e08 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -57,6 +57,7 @@
 #define DISABLED_MASK15	0
 #define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE)
 #define DISABLED_MASK17	0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define DISABLED_MASK18	0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
 
 #endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index fac9a5c..6847d85 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -100,6 +100,7 @@
 #define REQUIRED_MASK15	0
 #define REQUIRED_MASK16	0
 #define REQUIRED_MASK17	0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define REQUIRED_MASK18	0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
 
 #endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index bcc9ccc..6e6189f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -675,6 +675,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
 
 		c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006);
 		c->x86_capability[CPUID_7_ECX] = ecx;
+		c->x86_capability[CPUID_7_EDX] = edx;
 	}
 
 	/* Extended state features: level 0x0000000d */
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 01567aa..7dbd480 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -73,6 +73,8 @@ void fpu__xstate_clear_all_cpu_caps(void)
 	setup_clear_cpu_cap(X86_FEATURE_MPX);
 	setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
 	setup_clear_cpu_cap(X86_FEATURE_PKU);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
 }
 
 /*
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 92a8308..309fd2d 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -12,7 +12,7 @@
 /*
  * Defines x86 CPU feature bits
  */
-#define NCAPINTS	18	/* N 32-bit words worth of info */
+#define NCAPINTS	19	/* N 32-bit words worth of info */
 #define NBUGINTS	1	/* N 32-bit bug flags */
 
 /*
@@ -286,6 +286,10 @@
 #define X86_FEATURE_SUCCOR	(17*32+1) /* Uncorrectable error containment and recovery */
 #define X86_FEATURE_SMCA	(17*32+3) /* Scalable MCA */
 
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (edx), word 18 */
+#define X86_FEATURE_AVX512_4VNNIW  (18*32+2) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS  (18*32+3) /* AVX-512 Multiply Accumulation Single precision */
+
 /*
  * BUG word(s)
  */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 85599ad..8b45e08 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -57,6 +57,7 @@
 #define DISABLED_MASK15	0
 #define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE)
 #define DISABLED_MASK17	0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define DISABLED_MASK18	0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
 
 #endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
index fac9a5c..6847d85 100644
--- a/tools/arch/x86/include/asm/required-features.h
+++ b/tools/arch/x86/include/asm/required-features.h
@@ -100,6 +100,7 @@
 #define REQUIRED_MASK15	0
 #define REQUIRED_MASK16	0
 #define REQUIRED_MASK17	0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define REQUIRED_MASK18	0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
 
 #endif /* _ASM_X86_REQUIRED_FEATURES_H */
-- 
2.10.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features
  2016-10-12 17:57 [PATCH] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features Piotr Luc
@ 2016-10-16 11:21 ` tip-bot for Piotr Luc
  2016-10-16 14:22   ` Borislav Petkov
  0 siblings, 1 reply; 18+ messages in thread
From: tip-bot for Piotr Luc @ 2016-10-16 11:21 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: dave.hansen, bp, tglx, peterz, linux-kernel, dvlasenk, jpoimboe,
	brgerst, hpa, mingo, luto, torvalds, piotr.luc

Commit-ID:  a518dcc82b6162009c8ca3d169fe61c81536ff17
Gitweb:     http://git.kernel.org/tip/a518dcc82b6162009c8ca3d169fe61c81536ff17
Author:     Piotr Luc <piotr.luc@intel.com>
AuthorDate: Wed, 12 Oct 2016 19:57:31 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Sun, 16 Oct 2016 11:32:11 +0200

x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features

AVX512_4VNNIW  - Vector instructions for deep learning enhanced word
variable precision.
AVX512_4FMAPS - Vector instructions for deep learning floating-point
single precision.

The new instructions are to be used in future Intel Xeon & Xeon Phi
processors.

The spec can be found in Intel Software Developer Manual or in
Instruction Set Extensions Programming Reference. See
https://software.intel.com/sites/default/files/managed/69/78/319433-025.pdf.

Signed-off-by: Piotr Luc <piotr.luc@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20161012175731.29619-1-piotr.luc@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/cpufeature.h              | 7 +++++--
 arch/x86/include/asm/cpufeatures.h             | 6 +++++-
 arch/x86/include/asm/disabled-features.h       | 3 ++-
 arch/x86/include/asm/required-features.h       | 3 ++-
 arch/x86/kernel/cpu/common.c                   | 1 +
 arch/x86/kernel/fpu/xstate.c                   | 2 ++
 tools/arch/x86/include/asm/cpufeatures.h       | 6 +++++-
 tools/arch/x86/include/asm/disabled-features.h | 3 ++-
 tools/arch/x86/include/asm/required-features.h | 3 ++-
 9 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 1d2b69f..617452e 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -28,6 +28,7 @@ enum cpuid_leafs
 	CPUID_8000_000A_EDX,
 	CPUID_7_ECX,
 	CPUID_8000_0007_EBX,
+	CPUID_7_EDX,
 };
 
 #ifdef CONFIG_X86_FEATURE_NAMES
@@ -78,8 +79,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) ||	\
 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) ||	\
 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) ||	\
+	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) ||	\
 	   REQUIRED_MASK_CHECK					  ||	\
-	   BUILD_BUG_ON_ZERO(NCAPINTS != 18))
+	   BUILD_BUG_ON_ZERO(NCAPINTS != 19))
 
 #define DISABLED_MASK_BIT_SET(feature_bit)				\
 	 ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  0, feature_bit) ||	\
@@ -100,8 +102,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) ||	\
 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) ||	\
 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) ||	\
+	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) ||	\
 	   DISABLED_MASK_CHECK					  ||	\
-	   BUILD_BUG_ON_ZERO(NCAPINTS != 18))
+	   BUILD_BUG_ON_ZERO(NCAPINTS != 19))
 
 #define cpu_has(c, bit)							\
 	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :	\
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 1188bc8..6697b75 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -12,7 +12,7 @@
 /*
  * Defines x86 CPU feature bits
  */
-#define NCAPINTS	18	/* N 32-bit words worth of info */
+#define NCAPINTS	19	/* N 32-bit words worth of info */
 #define NBUGINTS	1	/* N 32-bit bug flags */
 
 /*
@@ -285,6 +285,10 @@
 #define X86_FEATURE_SUCCOR	(17*32+1) /* Uncorrectable error containment and recovery */
 #define X86_FEATURE_SMCA	(17*32+3) /* Scalable MCA */
 
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (edx), word 18 */
+#define X86_FEATURE_AVX512_4VNNIW  (18*32+2) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS  (18*32+3) /* AVX-512 Multiply Accumulation Single precision */
+
 /*
  * BUG word(s)
  */
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 85599ad..8b45e08 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -57,6 +57,7 @@
 #define DISABLED_MASK15	0
 #define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE)
 #define DISABLED_MASK17	0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define DISABLED_MASK18	0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
 
 #endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index fac9a5c..6847d85 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -100,6 +100,7 @@
 #define REQUIRED_MASK15	0
 #define REQUIRED_MASK16	0
 #define REQUIRED_MASK17	0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define REQUIRED_MASK18	0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
 
 #endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9bd910a..424a620 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -675,6 +675,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
 
 		c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006);
 		c->x86_capability[CPUID_7_ECX] = ecx;
+		c->x86_capability[CPUID_7_EDX] = edx;
 	}
 
 	/* Extended state features: level 0x0000000d */
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 124aa5c..095ef7d 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -74,6 +74,8 @@ void fpu__xstate_clear_all_cpu_caps(void)
 	setup_clear_cpu_cap(X86_FEATURE_MPX);
 	setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
 	setup_clear_cpu_cap(X86_FEATURE_PKU);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
 }
 
 /*
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 1188bc8..6697b75 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -12,7 +12,7 @@
 /*
  * Defines x86 CPU feature bits
  */
-#define NCAPINTS	18	/* N 32-bit words worth of info */
+#define NCAPINTS	19	/* N 32-bit words worth of info */
 #define NBUGINTS	1	/* N 32-bit bug flags */
 
 /*
@@ -285,6 +285,10 @@
 #define X86_FEATURE_SUCCOR	(17*32+1) /* Uncorrectable error containment and recovery */
 #define X86_FEATURE_SMCA	(17*32+3) /* Scalable MCA */
 
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (edx), word 18 */
+#define X86_FEATURE_AVX512_4VNNIW  (18*32+2) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS  (18*32+3) /* AVX-512 Multiply Accumulation Single precision */
+
 /*
  * BUG word(s)
  */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 85599ad..8b45e08 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -57,6 +57,7 @@
 #define DISABLED_MASK15	0
 #define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE)
 #define DISABLED_MASK17	0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define DISABLED_MASK18	0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
 
 #endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
index fac9a5c..6847d85 100644
--- a/tools/arch/x86/include/asm/required-features.h
+++ b/tools/arch/x86/include/asm/required-features.h
@@ -100,6 +100,7 @@
 #define REQUIRED_MASK15	0
 #define REQUIRED_MASK16	0
 #define REQUIRED_MASK17	0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define REQUIRED_MASK18	0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
 
 #endif /* _ASM_X86_REQUIRED_FEATURES_H */

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features
  2016-10-16 11:21 ` [tip:x86/urgent] " tip-bot for Piotr Luc
@ 2016-10-16 14:22   ` Borislav Petkov
  2016-10-16 16:02     ` hpa
  2016-10-17  7:55     ` Thomas Gleixner
  0 siblings, 2 replies; 18+ messages in thread
From: Borislav Petkov @ 2016-10-16 14:22 UTC (permalink / raw)
  To: Ingo Molnar, peterz, tglx, dave.hansen, torvalds, piotr.luc,
	luto, brgerst, hpa, linux-kernel, dvlasenk, jpoimboe
  Cc: linux-tip-commits

On Sun, Oct 16, 2016 at 04:21:49AM -0700, tip-bot for Piotr Luc wrote:
> Commit-ID:  a518dcc82b6162009c8ca3d169fe61c81536ff17
> Gitweb:     http://git.kernel.org/tip/a518dcc82b6162009c8ca3d169fe61c81536ff17
> Author:     Piotr Luc <piotr.luc@intel.com>
> AuthorDate: Wed, 12 Oct 2016 19:57:31 +0200
> Committer:  Ingo Molnar <mingo@kernel.org>
> CommitDate: Sun, 16 Oct 2016 11:32:11 +0200
> 
> x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features
> 
> AVX512_4VNNIW  - Vector instructions for deep learning enhanced word
> variable precision.
> AVX512_4FMAPS - Vector instructions for deep learning floating-point
> single precision.
> 
> The new instructions are to be used in future Intel Xeon & Xeon Phi
> processors.
> 
> The spec can be found in Intel Software Developer Manual or in
> Instruction Set Extensions Programming Reference. See
> https://software.intel.com/sites/default/files/managed/69/78/319433-025.pdf.
> 
> Signed-off-by: Piotr Luc <piotr.luc@intel.com>
> Reviewed-by: Dave Hansen <dave.hansen@intel.com>
> Cc: Andy Lutomirski <luto@kernel.org>
> Cc: Borislav Petkov <bp@alien8.de>
> Cc: Brian Gerst <brgerst@gmail.com>
> Cc: Denys Vlasenko <dvlasenk@redhat.com>
> Cc: H. Peter Anvin <hpa@zytor.com>
> Cc: Josh Poimboeuf <jpoimboe@redhat.com>
> Cc: Linus Torvalds <torvalds@linux-foundation.org>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Link: http://lkml.kernel.org/r/20161012175731.29619-1-piotr.luc@intel.com
> Signed-off-by: Ingo Molnar <mingo@kernel.org>

...

> diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
> index 1188bc8..6697b75 100644
> --- a/arch/x86/include/asm/cpufeatures.h
> +++ b/arch/x86/include/asm/cpufeatures.h
> @@ -12,7 +12,7 @@
>  /*
>   * Defines x86 CPU feature bits
>   */
> -#define NCAPINTS	18	/* N 32-bit words worth of info */
> +#define NCAPINTS	19	/* N 32-bit words worth of info */
>  #define NBUGINTS	1	/* N 32-bit bug flags */
>  
>  /*
> @@ -285,6 +285,10 @@
>  #define X86_FEATURE_SUCCOR	(17*32+1) /* Uncorrectable error containment and recovery */
>  #define X86_FEATURE_SMCA	(17*32+3) /* Scalable MCA */
>  
> +/* Intel-defined CPU features, CPUID level 0x00000007:0 (edx), word 18 */
> +#define X86_FEATURE_AVX512_4VNNIW  (18*32+2) /* AVX-512 Neural Network Instructions */
> +#define X86_FEATURE_AVX512_4FMAPS  (18*32+3) /* AVX-512 Multiply Accumulation Single precision */

This is getting ridiculous: we keep adding new leafs to
->x86_capability, thus bloating cpuinfo_x86 but then it is not even
worth it - this patch defines only two bits.

I know, I know, it is a CPUID leaf of features, we will need them, yadda
yadda but until we do, I'd suggest these all new feature bits to to
init_scattered_cpuid_features() and be carved out to a leaf of their
own *only* when we really, actually add them and fill up that leaf.
Otherwise, we have one fat and sparse x86_capability array.

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features
  2016-10-16 14:22   ` Borislav Petkov
@ 2016-10-16 16:02     ` hpa
  2016-10-16 16:35       ` Borislav Petkov
  2016-10-17  7:55     ` Thomas Gleixner
  1 sibling, 1 reply; 18+ messages in thread
From: hpa @ 2016-10-16 16:02 UTC (permalink / raw)
  To: Borislav Petkov, Ingo Molnar, peterz, tglx, dave.hansen,
	torvalds, piotr.luc, luto, brgerst, linux-kernel, dvlasenk,
	jpoimboe
  Cc: linux-tip-commits

On October 16, 2016 7:22:33 AM PDT, Borislav Petkov <bp@alien8.de> wrote:
>On Sun, Oct 16, 2016 at 04:21:49AM -0700, tip-bot for Piotr Luc wrote:
>> Commit-ID:  a518dcc82b6162009c8ca3d169fe61c81536ff17
>> Gitweb:    
>http://git.kernel.org/tip/a518dcc82b6162009c8ca3d169fe61c81536ff17
>> Author:     Piotr Luc <piotr.luc@intel.com>
>> AuthorDate: Wed, 12 Oct 2016 19:57:31 +0200
>> Committer:  Ingo Molnar <mingo@kernel.org>
>> CommitDate: Sun, 16 Oct 2016 11:32:11 +0200
>> 
>> x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features
>> 
>> AVX512_4VNNIW  - Vector instructions for deep learning enhanced word
>> variable precision.
>> AVX512_4FMAPS - Vector instructions for deep learning floating-point
>> single precision.
>> 
>> The new instructions are to be used in future Intel Xeon & Xeon Phi
>> processors.
>> 
>> The spec can be found in Intel Software Developer Manual or in
>> Instruction Set Extensions Programming Reference. See
>>
>https://software.intel.com/sites/default/files/managed/69/78/319433-025.pdf.
>> 
>> Signed-off-by: Piotr Luc <piotr.luc@intel.com>
>> Reviewed-by: Dave Hansen <dave.hansen@intel.com>
>> Cc: Andy Lutomirski <luto@kernel.org>
>> Cc: Borislav Petkov <bp@alien8.de>
>> Cc: Brian Gerst <brgerst@gmail.com>
>> Cc: Denys Vlasenko <dvlasenk@redhat.com>
>> Cc: H. Peter Anvin <hpa@zytor.com>
>> Cc: Josh Poimboeuf <jpoimboe@redhat.com>
>> Cc: Linus Torvalds <torvalds@linux-foundation.org>
>> Cc: Peter Zijlstra <peterz@infradead.org>
>> Cc: Thomas Gleixner <tglx@linutronix.de>
>> Link:
>http://lkml.kernel.org/r/20161012175731.29619-1-piotr.luc@intel.com
>> Signed-off-by: Ingo Molnar <mingo@kernel.org>
>
>...
>
>> diff --git a/arch/x86/include/asm/cpufeatures.h
>b/arch/x86/include/asm/cpufeatures.h
>> index 1188bc8..6697b75 100644
>> --- a/arch/x86/include/asm/cpufeatures.h
>> +++ b/arch/x86/include/asm/cpufeatures.h
>> @@ -12,7 +12,7 @@
>>  /*
>>   * Defines x86 CPU feature bits
>>   */
>> -#define NCAPINTS	18	/* N 32-bit words worth of info */
>> +#define NCAPINTS	19	/* N 32-bit words worth of info */
>>  #define NBUGINTS	1	/* N 32-bit bug flags */
>>  
>>  /*
>> @@ -285,6 +285,10 @@
>>  #define X86_FEATURE_SUCCOR	(17*32+1) /* Uncorrectable error
>containment and recovery */
>>  #define X86_FEATURE_SMCA	(17*32+3) /* Scalable MCA */
>>  
>> +/* Intel-defined CPU features, CPUID level 0x00000007:0 (edx), word
>18 */
>> +#define X86_FEATURE_AVX512_4VNNIW  (18*32+2) /* AVX-512 Neural
>Network Instructions */
>> +#define X86_FEATURE_AVX512_4FMAPS  (18*32+3) /* AVX-512 Multiply
>Accumulation Single precision */
>
>This is getting ridiculous: we keep adding new leafs to
>->x86_capability, thus bloating cpuinfo_x86 but then it is not even
>worth it - this patch defines only two bits.
>
>I know, I know, it is a CPUID leaf of features, we will need them,
>yadda
>yadda but until we do, I'd suggest these all new feature bits to to
>init_scattered_cpuid_features() and be carved out to a leaf of their
>own *only* when we really, actually add them and fill up that leaf.
>Otherwise, we have one fat and sparse x86_capability array.

No, please.  That would be worse than the disease.
-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features
  2016-10-16 16:02     ` hpa
@ 2016-10-16 16:35       ` Borislav Petkov
  2016-10-16 18:42         ` hpa
  0 siblings, 1 reply; 18+ messages in thread
From: Borislav Petkov @ 2016-10-16 16:35 UTC (permalink / raw)
  To: hpa
  Cc: Ingo Molnar, peterz, tglx, dave.hansen, torvalds, piotr.luc,
	luto, brgerst, linux-kernel, dvlasenk, jpoimboe,
	linux-tip-commits

On Sun, Oct 16, 2016 at 09:02:51AM -0700, hpa@zytor.com wrote:
> No, please.  That would be worse than the disease.

Why not?

I did that recently with a bunch of leaves and there were no issues:

2ccd71f1b278 ("x86/cpufeature: Move some of the scattered feature bits to x86_capability")

There it obviously made sense for 0x00000006 and 0x8000000a to have a
separate ->x86_capability leaf.

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features
  2016-10-16 16:35       ` Borislav Petkov
@ 2016-10-16 18:42         ` hpa
  2016-10-16 22:42           ` Borislav Petkov
  0 siblings, 1 reply; 18+ messages in thread
From: hpa @ 2016-10-16 18:42 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Ingo Molnar, peterz, tglx, dave.hansen, torvalds, piotr.luc,
	luto, brgerst, linux-kernel, dvlasenk, jpoimboe,
	linux-tip-commits

On October 16, 2016 9:35:57 AM PDT, Borislav Petkov <bp@alien8.de> wrote:
>On Sun, Oct 16, 2016 at 09:02:51AM -0700, hpa@zytor.com wrote:
>> No, please.  That would be worse than the disease.
>
>Why not?
>
>I did that recently with a bunch of leaves and there were no issues:
>
>2ccd71f1b278 ("x86/cpufeature: Move some of the scattered feature bits
>to x86_capability")
>
>There it obviously made sense for 0x00000006 and 0x8000000a to have a
>separate ->x86_capability leaf.

It's needlessly adding complexity for no reason, at least for the leaves that are going to add bits over time.  The x86_capability array is not an expensive resource.
-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features
  2016-10-16 18:42         ` hpa
@ 2016-10-16 22:42           ` Borislav Petkov
  2016-10-17  9:55             ` Luc, Piotr
  0 siblings, 1 reply; 18+ messages in thread
From: Borislav Petkov @ 2016-10-16 22:42 UTC (permalink / raw)
  To: hpa
  Cc: Ingo Molnar, peterz, tglx, dave.hansen, torvalds, piotr.luc,
	luto, brgerst, linux-kernel, dvlasenk, jpoimboe,
	linux-tip-commits

On Sun, Oct 16, 2016 at 11:42:26AM -0700, hpa@zytor.com wrote:
> It's needlessly adding complexity for no reason, at least for the

What complexity? The init_scattered_cpuid_features() version is a
trivial patch in comparison to the current version.

> leaves that are going to add bits over time.

Sure, except they don't get added or we don't need them or whatever, and
we end up with only a small number of bits actually being used.

I don't mind moving them to x86_capability later, when a high percentage
of the respective leaf is actually being used but not for a couple of
bits. That's just waste.

> The x86_capability array is not an expensive resource.

0.1% here, 0.1% there, the creeping bloat thing.

And again, the init_scattered_cpuid_features() hunk is much smaller.

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features
  2016-10-16 14:22   ` Borislav Petkov
  2016-10-16 16:02     ` hpa
@ 2016-10-17  7:55     ` Thomas Gleixner
  2016-10-17  8:20       ` Luc, Piotr
  1 sibling, 1 reply; 18+ messages in thread
From: Thomas Gleixner @ 2016-10-17  7:55 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Ingo Molnar, peterz, dave.hansen, torvalds, piotr.luc, luto,
	brgerst, hpa, linux-kernel, dvlasenk, jpoimboe,
	linux-tip-commits

On Sun, 16 Oct 2016, Borislav Petkov wrote:
> > The spec can be found in Intel Software Developer Manual or in
> > Instruction Set Extensions Programming Reference. See
> > https://software.intel.com/sites/default/files/managed/69/78/319433-025.pdf.
> 
> > +/* Intel-defined CPU features, CPUID level 0x00000007:0 (edx), word 18 */
> > +#define X86_FEATURE_AVX512_4VNNIW  (18*32+2) /* AVX-512 Neural Network Instructions */
> > +#define X86_FEATURE_AVX512_4FMAPS  (18*32+3) /* AVX-512 Multiply Accumulation Single precision */
> 
> This is getting ridiculous: we keep adding new leafs to
> ->x86_capability, thus bloating cpuinfo_x86 but then it is not even
> worth it - this patch defines only two bits.

What's worse is that the Instruction Set Extensions Programming Reference
manual says:

CPUID.(EAX=07H, ECX=0):EDX[bit 02] AVX512_4FMAPS
CPUID.(EAX=07H, ECX=0):EBX[bit 03] AVX512_4VNNIW

So AVX512_4VNNIW is in EBX not EDX. What's correct here? The manual or the patch?

I'm going to zap it.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features
  2016-10-17  7:55     ` Thomas Gleixner
@ 2016-10-17  8:20       ` Luc, Piotr
  2016-10-17  8:47         ` Ingo Molnar
  0 siblings, 1 reply; 18+ messages in thread
From: Luc, Piotr @ 2016-10-17  8:20 UTC (permalink / raw)
  To: tglx, bp
  Cc: mingo, peterz, linux-kernel, torvalds, linux-tip-commits,
	jpoimboe, hpa, brgerst, luto, Hansen, Dave, dvlasenk

On Mon, 2016-10-17 at 09:55 +0200, Thomas Gleixner wrote:
> On Sun, 16 Oct 2016, Borislav Petkov wrote:
> > 
> > > 
> > > The spec can be found in Intel Software Developer Manual or in
> > > Instruction Set Extensions Programming Reference. See
> > > https://software.intel.com/sites/default/files/managed/69/78/3194
> > > 33-025.pdf.
> > 
> > > 
> > > +/* Intel-defined CPU features, CPUID level 0x00000007:0 (edx),
> > > word 18 */
> > > +#define X86_FEATURE_AVX512_4VNNIW  (18*32+2) /* AVX-512 Neural
> > > Network Instructions */
> > > +#define X86_FEATURE_AVX512_4FMAPS  (18*32+3) /* AVX-512 Multiply
> > > Accumulation Single precision */
> > 
> > This is getting ridiculous: we keep adding new leafs to
> > ->x86_capability, thus bloating cpuinfo_x86 but then it is not even
> > worth it - this patch defines only two bits.
> 
> What's worse is that the Instruction Set Extensions Programming
> Reference
> manual says:
> 
> CPUID.(EAX=07H, ECX=0):EDX[bit 02] AVX512_4FMAPS
> CPUID.(EAX=07H, ECX=0):EBX[bit 03] AVX512_4VNNIW
> 
> So AVX512_4VNNIW is in EBX not EDX. What's correct here? The manual
> or the patch?
> 
> I'm going to zap it.
> 
The manual contains the typo in  table 2.1 on page 2.2.
Please compare it to the detailed description of CPUID in table 4.8 on
page 2-16.
There manual groups both new bits under EDX:

EDX    Bits 01 - 00: Reserved
       Bit 02: AVX512_4VNNIW (Vector instructions for deep learning
enhanced word variable precision.)
       Bit 03: AVX512_4FMAPS (Vector instructions for deep learning
floating-point single precision.)
       Bits 31-04: Reserved

The typo was acknowledged and is going to be fixed in next version of
the document. 

Regards,
Piotr

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features
  2016-10-17  8:20       ` Luc, Piotr
@ 2016-10-17  8:47         ` Ingo Molnar
  2016-10-17  9:53           ` Luc, Piotr
  0 siblings, 1 reply; 18+ messages in thread
From: Ingo Molnar @ 2016-10-17  8:47 UTC (permalink / raw)
  To: Luc, Piotr
  Cc: tglx, bp, peterz, linux-kernel, torvalds, linux-tip-commits,
	jpoimboe, hpa, brgerst, luto, Hansen, Dave, dvlasenk


* Luc, Piotr <Piotr.Luc@intel.com> wrote:

> On Mon, 2016-10-17 at 09:55 +0200, Thomas Gleixner wrote:
> > On Sun, 16 Oct 2016, Borislav Petkov wrote:
> > > 
> > > > 
> > > > The spec can be found in Intel Software Developer Manual or in
> > > > Instruction Set Extensions Programming Reference. See
> > > > https://software.intel.com/sites/default/files/managed/69/78/3194
> > > > 33-025.pdf.
> > > 
> > > > 
> > > > +/* Intel-defined CPU features, CPUID level 0x00000007:0 (edx),
> > > > word 18 */
> > > > +#define X86_FEATURE_AVX512_4VNNIW  (18*32+2) /* AVX-512 Neural
> > > > Network Instructions */
> > > > +#define X86_FEATURE_AVX512_4FMAPS  (18*32+3) /* AVX-512 Multiply
> > > > Accumulation Single precision */
> > > 
> > > This is getting ridiculous: we keep adding new leafs to
> > > ->x86_capability, thus bloating cpuinfo_x86 but then it is not even
> > > worth it - this patch defines only two bits.
> > 
> > What's worse is that the Instruction Set Extensions Programming
> > Reference
> > manual says:
> > 
> > CPUID.(EAX=07H, ECX=0):EDX[bit 02] AVX512_4FMAPS
> > CPUID.(EAX=07H, ECX=0):EBX[bit 03] AVX512_4VNNIW
> > 
> > So AVX512_4VNNIW is in EBX not EDX. What's correct here? The manual
> > or the patch?
> > 
> > I'm going to zap it.
> > 
> The manual contains the typo in  table 2.1 on page 2.2.
> Please compare it to the detailed description of CPUID in table 4.8 on
> page 2-16.
> There manual groups both new bits under EDX:
> 
> EDX    Bits 01 - 00: Reserved
>        Bit 02: AVX512_4VNNIW (Vector instructions for deep learning
> enhanced word variable precision.)
>        Bit 03: AVX512_4FMAPS (Vector instructions for deep learning
> floating-point single precision.)
>        Bits 31-04: Reserved
> 
> The typo was acknowledged and is going to be fixed in next version of
> the document. 

All of this should be pointed out in the changelog.

I've zapped the commit for the time being - let's iterate this once more, ok?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features
  2016-10-17  8:47         ` Ingo Molnar
@ 2016-10-17  9:53           ` Luc, Piotr
  2016-10-17  9:54             ` Thomas Gleixner
  0 siblings, 1 reply; 18+ messages in thread
From: Luc, Piotr @ 2016-10-17  9:53 UTC (permalink / raw)
  To: mingo
  Cc: linux-kernel, peterz, torvalds, tglx, linux-tip-commits,
	jpoimboe, hpa, brgerst, luto, bp, Hansen, Dave, dvlasenk

On Mon, 2016-10-17 at 10:47 +0200, Ingo Molnar wrote:
> * Luc, Piotr <Piotr.Luc@intel.com> wrote:
> 
> > 
> > On Mon, 2016-10-17 at 09:55 +0200, Thomas Gleixner wrote:
> > > 

> > The typo was acknowledged and is going to be fixed in next version
> > of
> > the document. 
> 
> All of this should be pointed out in the changelog.
> 
> I've zapped the commit for the time being - let's iterate this once
> more, ok?
> 

OK, I will add appropriate info.

What about moving initialization to init_scattered_cpuid_features()?

Regards,
Piotr

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features
  2016-10-17  9:53           ` Luc, Piotr
@ 2016-10-17  9:54             ` Thomas Gleixner
  2016-10-17 15:03               ` [PATCH v2] " Piotr Luc
  0 siblings, 1 reply; 18+ messages in thread
From: Thomas Gleixner @ 2016-10-17  9:54 UTC (permalink / raw)
  To: Luc, Piotr
  Cc: mingo, linux-kernel, peterz, torvalds, linux-tip-commits,
	jpoimboe, hpa, brgerst, luto, bp, Hansen, Dave, dvlasenk

On Mon, 17 Oct 2016, Luc, Piotr wrote:
> On Mon, 2016-10-17 at 10:47 +0200, Ingo Molnar wrote:
> > * Luc, Piotr <Piotr.Luc@intel.com> wrote:
> > >
> > > The typo was acknowledged and is going to be fixed in next version
> > > of the document.
> > 
> > All of this should be pointed out in the changelog.
> > 
> > I've zapped the commit for the time being - let's iterate this once
> > more, ok?
> > 
> 
> OK, I will add appropriate info.
> 
> What about moving initialization to init_scattered_cpuid_features()?

Yes, please. We can move it to a seperate leaf when a substantial amount of
bits is used.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features
  2016-10-16 22:42           ` Borislav Petkov
@ 2016-10-17  9:55             ` Luc, Piotr
  0 siblings, 0 replies; 18+ messages in thread
From: Luc, Piotr @ 2016-10-17  9:55 UTC (permalink / raw)
  To: hpa, bp
  Cc: mingo, peterz, linux-kernel, torvalds, tglx, linux-tip-commits,
	jpoimboe, brgerst, luto, Hansen, Dave, dvlasenk

On Mon, 2016-10-17 at 00:42 +0200, Borislav Petkov wrote:
> On Sun, Oct 16, 2016 at 11:42:26AM -0700, hpa@zytor.com wrote:
> > 
> > It's needlessly adding complexity for no reason, at least for the
> 
> What complexity? The init_scattered_cpuid_features() version is a
> trivial patch in comparison to the current version.
> 
> > 
> > leaves that are going to add bits over time.
> 
> Sure, except they don't get added or we don't need them or whatever,
> and
> we end up with only a small number of bits actually being used.
> 
> I don't mind moving them to x86_capability later, when a high
> percentage
> of the respective leaf is actually being used but not for a couple of
> bits. That's just waste.
> 
> > 
> > The x86_capability array is not an expensive resource.
> 
> 0.1% here, 0.1% there, the creeping bloat thing.
> 
> And again, the init_scattered_cpuid_features() hunk is much smaller.
> 
I agree, the scattered solution reduces data segment footprint in case
many cores.

Regards,
Piotr

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features.
  2016-10-17  9:54             ` Thomas Gleixner
@ 2016-10-17 15:03               ` Piotr Luc
  2016-10-18 13:00                 ` [v2] " Luc, Piotr
  0 siblings, 1 reply; 18+ messages in thread
From: Piotr Luc @ 2016-10-17 15:03 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Dave Hansen, Andy Lutomirski, Borislav Petkov,
	Brian Gerst, Denys Vlasenko, H . Peter Anvin, Josh Poimboeuf,
	Linus Torvalds, Peter Zijlstra, Thomas Gleixner, Ingo Molnar

AVX512_4VNNIW  - Vector instructions for deep learning enhanced word
variable precision.
AVX512_4FMAPS - Vector instructions for deep learning floating-point
single precision.

The new instructions are to be used in future Intel Xeon & Xeon Phi
processors.

The spec can be found in Intel Software Developer Manual (SDM) or in
Instruction Set Extensions Programming Reference (ISE).
The implementation is based on Table 2.8 "Information Returned by CPUID
Instruction" in ISE,
https://software.intel.com/sites/default/files/managed/69/78/319433-025.pdf.

v2: Initialize new bits in the scattered group. Add 

Signed-off-by: Piotr Luc <piotr.luc@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
---
 arch/x86/include/asm/cpufeatures.h       | 2 ++
 arch/x86/kernel/cpu/scattered.c          | 2 ++
 arch/x86/kernel/fpu/xstate.c             | 2 ++
 tools/arch/x86/include/asm/cpufeatures.h | 2 ++
 4 files changed, 8 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 92a8308..4ecbce9 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -195,6 +195,8 @@
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
 
 #define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 8cb57df..1db8dc4 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -32,6 +32,8 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 
 	static const struct cpuid_bit cpuid_bits[] = {
 		{ X86_FEATURE_INTEL_PT,		CR_EBX,25, 0x00000007, 0 },
+		{ X86_FEATURE_AVX512_4VNNIW,	CR_EDX, 2, 0x00000007, 0 },
+		{ X86_FEATURE_AVX512_4FMAPS,	CR_EDX, 3, 0x00000007, 0 },
 		{ X86_FEATURE_APERFMPERF,	CR_ECX, 0, 0x00000006, 0 },
 		{ X86_FEATURE_EPB,		CR_ECX, 3, 0x00000006, 0 },
 		{ X86_FEATURE_HW_PSTATE,	CR_EDX, 7, 0x80000007, 0 },
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 01567aa..7dbd480 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -73,6 +73,8 @@ void fpu__xstate_clear_all_cpu_caps(void)
 	setup_clear_cpu_cap(X86_FEATURE_MPX);
 	setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
 	setup_clear_cpu_cap(X86_FEATURE_PKU);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
 }
 
 /*
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 92a8308..4ecbce9 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -195,6 +195,8 @@
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
 
 #define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
-- 
2.10.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [v2] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features.
  2016-10-17 15:03               ` [PATCH v2] " Piotr Luc
@ 2016-10-18 13:00                 ` Luc, Piotr
  2016-10-18 13:08                   ` Ingo Molnar
  0 siblings, 1 reply; 18+ messages in thread
From: Luc, Piotr @ 2016-10-18 13:00 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, peterz, torvalds, tglx, jpoimboe, hpa, brgerst,
	mingo, luto, bp, Hansen, Dave, dvlasenk

On Mon, 2016-10-17 at 17:03 +0200, Piotr Luc wrote:
> v2: Initialize new bits in the scattered group. Add

The commit message is obviously broken. Sorry for that.
I will resend with fixed message.

Regards,
Piotr

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [v2] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features.
  2016-10-18 13:00                 ` [v2] " Luc, Piotr
@ 2016-10-18 13:08                   ` Ingo Molnar
  2016-10-18 15:01                     ` [PATCH v3] " Piotr Luc
  0 siblings, 1 reply; 18+ messages in thread
From: Ingo Molnar @ 2016-10-18 13:08 UTC (permalink / raw)
  To: Luc, Piotr
  Cc: x86, linux-kernel, peterz, torvalds, tglx, jpoimboe, hpa,
	brgerst, mingo, luto, bp, Hansen, Dave, dvlasenk


* Luc, Piotr <Piotr.Luc@intel.com> wrote:

> On Mon, 2016-10-17 at 17:03 +0200, Piotr Luc wrote:
> > v2: Initialize new bits in the scattered group. Add
> 
> The commit message is obviously broken. Sorry for that.
> I will resend with fixed message.

In the v3 patchlog please also please describe to what extent new instructions are 
enabled by the patch when run on real hardware (or on a simulator).

I.e. can user-space run those new instructions, while it couldn't before - or is 
the patch purely for /proc/cpuinfo enumeration?

I.e. a comprehensive before/after comparison.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v3] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features.
  2016-10-18 13:08                   ` Ingo Molnar
@ 2016-10-18 15:01                     ` Piotr Luc
  2016-10-19 15:43                       ` [tip:x86/urgent] " tip-bot for Piotr Luc
  0 siblings, 1 reply; 18+ messages in thread
From: Piotr Luc @ 2016-10-18 15:01 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Dave Hansen, Andy Lutomirski, Borislav Petkov,
	Brian Gerst, Denys Vlasenko, H . Peter Anvin, Josh Poimboeuf,
	Linus Torvalds, Peter Zijlstra, Thomas Gleixner, Ingo Molnar

AVX512_4VNNIW  - Vector instructions for deep learning enhanced word
variable precision.
AVX512_4FMAPS - Vector instructions for deep learning floating-point
single precision.

The new instructions are to be used in future Intel Xeon & Xeon Phi
processors. The bits 2&3 of CPUID[level:0x07, EDX] inform that new
instructions are supported by a processor and can be used by programs.

The patch defines new feature flags to enumerate new instruction groups
in /proc/cpuinfo accordingly to CPUID bits. Because correct xsave setup
is required to use AVX512 instruction, the patch clears the new feature
flags in CPU caps to inform programs not to use the instructions if the
setup fails. 

The spec can be found in Intel Software Developer Manual (SDM) or in
Instruction Set Extensions Programming Reference (ISE).
The implementation is based on Table 2.8 "Information Returned by CPUID
Instruction" in ISE,
https://software.intel.com/sites/default/files/managed/69/78/319433-025.pdf.

v2: Initialize new bits in the scattered group. Add reference to correct
    description of new feature bits.
v3: Fix v2 info. Add short info what the patch does.

Signed-off-by: Piotr Luc <piotr.luc@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
---
 arch/x86/include/asm/cpufeatures.h       | 2 ++
 arch/x86/kernel/cpu/scattered.c          | 2 ++
 arch/x86/kernel/fpu/xstate.c             | 2 ++
 tools/arch/x86/include/asm/cpufeatures.h | 2 ++
 4 files changed, 8 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 92a8308..4ecbce9 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -195,6 +195,8 @@
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
 
 #define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 8cb57df..1db8dc4 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -32,6 +32,8 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 
 	static const struct cpuid_bit cpuid_bits[] = {
 		{ X86_FEATURE_INTEL_PT,		CR_EBX,25, 0x00000007, 0 },
+		{ X86_FEATURE_AVX512_4VNNIW,	CR_EDX, 2, 0x00000007, 0 },
+		{ X86_FEATURE_AVX512_4FMAPS,	CR_EDX, 3, 0x00000007, 0 },
 		{ X86_FEATURE_APERFMPERF,	CR_ECX, 0, 0x00000006, 0 },
 		{ X86_FEATURE_EPB,		CR_ECX, 3, 0x00000006, 0 },
 		{ X86_FEATURE_HW_PSTATE,	CR_EDX, 7, 0x80000007, 0 },
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 01567aa..7dbd480 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -73,6 +73,8 @@ void fpu__xstate_clear_all_cpu_caps(void)
 	setup_clear_cpu_cap(X86_FEATURE_MPX);
 	setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
 	setup_clear_cpu_cap(X86_FEATURE_PKU);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
 }
 
 /*
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 92a8308..4ecbce9 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -195,6 +195,8 @@
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
 
 #define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
-- 
2.10.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [tip:x86/urgent] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features
  2016-10-18 15:01                     ` [PATCH v3] " Piotr Luc
@ 2016-10-19 15:43                       ` tip-bot for Piotr Luc
  0 siblings, 0 replies; 18+ messages in thread
From: tip-bot for Piotr Luc @ 2016-10-19 15:43 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: luto, brgerst, bp, dave.hansen, hpa, linux-kernel, mingo,
	torvalds, jpoimboe, peterz, piotr.luc, tglx, dvlasenk

Commit-ID:  8214899342981dbd49ae24aadbbd19e9e7830684
Gitweb:     http://git.kernel.org/tip/8214899342981dbd49ae24aadbbd19e9e7830684
Author:     Piotr Luc <piotr.luc@intel.com>
AuthorDate: Tue, 18 Oct 2016 17:01:11 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 19 Oct 2016 17:37:13 +0200

x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features

AVX512_4VNNIW  - Vector instructions for deep learning enhanced word
variable precision.
AVX512_4FMAPS - Vector instructions for deep learning floating-point
single precision.

These new instructions are to be used in future Intel Xeon & Xeon Phi
processors. The bits 2&3 of CPUID[level:0x07, EDX] inform that new
instructions are supported by a processor.

The spec can be found in the Intel Software Developer Manual (SDM) or in
the Instruction Set Extensions Programming Reference (ISE).

Define new feature flags to enumerate the new instructions in /proc/cpuinfo
accordingly to CPUID bits and add the required xsave extensions which are
required for proper operation.

Signed-off-by: Piotr Luc <piotr.luc@intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20161018150111.29926-1-piotr.luc@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/cpufeatures.h       | 2 ++
 arch/x86/kernel/cpu/scattered.c          | 2 ++
 arch/x86/kernel/fpu/xstate.c             | 2 ++
 tools/arch/x86/include/asm/cpufeatures.h | 2 ++
 4 files changed, 8 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 1188bc8..a396292 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -194,6 +194,8 @@
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
 
 #define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 8cb57df..1db8dc4 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -32,6 +32,8 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 
 	static const struct cpuid_bit cpuid_bits[] = {
 		{ X86_FEATURE_INTEL_PT,		CR_EBX,25, 0x00000007, 0 },
+		{ X86_FEATURE_AVX512_4VNNIW,	CR_EDX, 2, 0x00000007, 0 },
+		{ X86_FEATURE_AVX512_4FMAPS,	CR_EDX, 3, 0x00000007, 0 },
 		{ X86_FEATURE_APERFMPERF,	CR_ECX, 0, 0x00000006, 0 },
 		{ X86_FEATURE_EPB,		CR_ECX, 3, 0x00000006, 0 },
 		{ X86_FEATURE_HW_PSTATE,	CR_EDX, 7, 0x80000007, 0 },
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 124aa5c..095ef7d 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -74,6 +74,8 @@ void fpu__xstate_clear_all_cpu_caps(void)
 	setup_clear_cpu_cap(X86_FEATURE_MPX);
 	setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
 	setup_clear_cpu_cap(X86_FEATURE_PKU);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
 }
 
 /*
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 1188bc8..a396292 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -194,6 +194,8 @@
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
 
 #define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */

^ permalink raw reply related	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2016-10-19 15:45 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-12 17:57 [PATCH] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features Piotr Luc
2016-10-16 11:21 ` [tip:x86/urgent] " tip-bot for Piotr Luc
2016-10-16 14:22   ` Borislav Petkov
2016-10-16 16:02     ` hpa
2016-10-16 16:35       ` Borislav Petkov
2016-10-16 18:42         ` hpa
2016-10-16 22:42           ` Borislav Petkov
2016-10-17  9:55             ` Luc, Piotr
2016-10-17  7:55     ` Thomas Gleixner
2016-10-17  8:20       ` Luc, Piotr
2016-10-17  8:47         ` Ingo Molnar
2016-10-17  9:53           ` Luc, Piotr
2016-10-17  9:54             ` Thomas Gleixner
2016-10-17 15:03               ` [PATCH v2] " Piotr Luc
2016-10-18 13:00                 ` [v2] " Luc, Piotr
2016-10-18 13:08                   ` Ingo Molnar
2016-10-18 15:01                     ` [PATCH v3] " Piotr Luc
2016-10-19 15:43                       ` [tip:x86/urgent] " tip-bot for Piotr Luc

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.