linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH] x86/cpufeatures: Enumerate new AVX512 bfloat16 instructions
@ 2019-06-10 17:02 Fenghua Yu
  2019-06-10 19:20 ` Borislav Petkov
  0 siblings, 1 reply; 12+ messages in thread
From: Fenghua Yu @ 2019-06-10 17:02 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, Borislav Petkov, H Peter Anvin,
	Ravi V Shankar
  Cc: linux-kernel, x86, Fenghua Yu

AVX512 Vector Neural Network Instructions (VNNI) in Intel Deep Learning
Boost support bfloat16 format (BF16). BF16 is a short version of FP32 and
has several advantages over FP16. BF16 offers more than enough range for
deep learning training tasks and doesn't need to handle hardware exception
as this is a performance optimization. FP32 accumulation after the
multiply is essential to achieve sufficient numerical behavior on an
application level. 

AVX512 bfloat16 instructions can be enumerated by:
	CPUID.(EAX=7,ECX=1):EAX[bit 5] AVX512_BF16
    
Detailed information of the CPUID bit and AVX512 bfloat16 instructions
can be found in the latest Intel Architecture Instruction Set Extensions
and Future Features Programming Reference.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
---

Since split lock feature (to-be-upstreamed) occupies the last bit 
of word 7, need to create a new word 19 to host AVX512_BF16 and other
future features.

 arch/x86/include/asm/cpufeature.h        | 7 +++++--
 arch/x86/include/asm/cpufeatures.h       | 8 +++++++-
 arch/x86/include/asm/disabled-features.h | 3 ++-
 arch/x86/include/asm/required-features.h | 3 ++-
 arch/x86/kernel/cpu/cpuid-deps.c         | 1 +
 arch/x86/kernel/cpu/scattered.c          | 1 +
 6 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 0e56ff7e4848..cfb7d765ed86 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -30,6 +30,7 @@ enum cpuid_leafs
 	CPUID_7_ECX,
 	CPUID_8000_0007_EBX,
 	CPUID_7_EDX,
+	CPUID_LNX_4,
 };
 
 #ifdef CONFIG_X86_FEATURE_NAMES
@@ -81,8 +82,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) ||	\
 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) ||	\
 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) ||	\
+	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) ||	\
 	   REQUIRED_MASK_CHECK					  ||	\
-	   BUILD_BUG_ON_ZERO(NCAPINTS != 19))
+	   BUILD_BUG_ON_ZERO(NCAPINTS != 20))
 
 #define DISABLED_MASK_BIT_SET(feature_bit)				\
 	 ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  0, feature_bit) ||	\
@@ -104,8 +106,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) ||	\
 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) ||	\
 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) ||	\
+	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) ||	\
 	   DISABLED_MASK_CHECK					  ||	\
-	   BUILD_BUG_ON_ZERO(NCAPINTS != 19))
+	   BUILD_BUG_ON_ZERO(NCAPINTS != 20))
 
 #define cpu_has(c, bit)							\
 	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :	\
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 981ff9479648..7d76393ce916 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -13,7 +13,7 @@
 /*
  * Defines x86 CPU feature bits
  */
-#define NCAPINTS			19	   /* N 32-bit words worth of info */
+#define NCAPINTS			20	   /* N 32-bit words worth of info */
 #define NBUGINTS			1	   /* N 32-bit bug flags */
 
 /*
@@ -352,6 +352,12 @@
 #define X86_FEATURE_ARCH_CAPABILITIES	(18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
 #define X86_FEATURE_SPEC_CTRL_SSBD	(18*32+31) /* "" Speculative Store Bypass Disable */
 
+/*
+ * Extended auxiliary flags: Linux defined - For features scattered in various
+ * CPUID levels and sub-leaves like CPUID level 7 and sub-leaf 1, etc, word 19.
+ */
+#define X86_FEATURE_AVX512_BF16		(19*32+ 0) /* BFLOAT16 instructions */
+
 /*
  * BUG word(s)
  */
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index a5ea841cc6d2..f0f935f8d917 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -84,6 +84,7 @@
 #define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP)
 #define DISABLED_MASK17	0
 #define DISABLED_MASK18	0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
+#define DISABLED_MASK19	0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20)
 
 #endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index 6847d85400a8..fa5700097f64 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -101,6 +101,7 @@
 #define REQUIRED_MASK16	0
 #define REQUIRED_MASK17	0
 #define REQUIRED_MASK18	0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
+#define REQUIRED_MASK19	0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20)
 
 #endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index 2c0bd38a44ab..65d3e0c47a25 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -59,6 +59,7 @@ static const struct cpuid_dep cpuid_deps[] = {
 	{ X86_FEATURE_AVX512_4VNNIW,	X86_FEATURE_AVX512F   },
 	{ X86_FEATURE_AVX512_4FMAPS,	X86_FEATURE_AVX512F   },
 	{ X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512_BF16,	X86_FEATURE_AVX512VL  },
 	{}
 };
 
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 94aa1c72ca98..59d7a85db621 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -26,6 +26,7 @@ struct cpuid_bit {
 static const struct cpuid_bit cpuid_bits[] = {
 	{ X86_FEATURE_APERFMPERF,       CPUID_ECX,  0, 0x00000006, 0 },
 	{ X86_FEATURE_EPB,		CPUID_ECX,  3, 0x00000006, 0 },
+	{ X86_FEATURE_AVX512_BF16,	CPUID_EAX,  5, 0x00000007, 1 },
 	{ X86_FEATURE_CAT_L3,		CPUID_EBX,  1, 0x00000010, 0 },
 	{ X86_FEATURE_CAT_L2,		CPUID_EBX,  2, 0x00000010, 0 },
 	{ X86_FEATURE_CDP_L3,		CPUID_ECX,  2, 0x00000010, 1 },
-- 
2.19.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] x86/cpufeatures: Enumerate new AVX512 bfloat16 instructions
  2019-06-10 17:02 [RFC PATCH] x86/cpufeatures: Enumerate new AVX512 bfloat16 instructions Fenghua Yu
@ 2019-06-10 19:20 ` Borislav Petkov
  2019-06-11 18:19   ` Fenghua Yu
  0 siblings, 1 reply; 12+ messages in thread
From: Borislav Petkov @ 2019-06-10 19:20 UTC (permalink / raw)
  To: Fenghua Yu
  Cc: Thomas Gleixner, Ingo Molnar, H Peter Anvin, Ravi V Shankar,
	linux-kernel, x86

On Mon, Jun 10, 2019 at 10:02:38AM -0700, Fenghua Yu wrote:
> AVX512 Vector Neural Network Instructions (VNNI) in Intel Deep Learning
> Boost support bfloat16 format (BF16). BF16 is a short version of FP32 and
> has several advantages over FP16. BF16 offers more than enough range for
> deep learning training tasks and doesn't need to handle hardware exception
> as this is a performance optimization. FP32 accumulation after the
> multiply is essential to achieve sufficient numerical behavior on an
> application level. 
> 
> AVX512 bfloat16 instructions can be enumerated by:
> 	CPUID.(EAX=7,ECX=1):EAX[bit 5] AVX512_BF16
>     
> Detailed information of the CPUID bit and AVX512 bfloat16 instructions
> can be found in the latest Intel Architecture Instruction Set Extensions
> and Future Features Programming Reference.
> 
> Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
> ---
> 
> Since split lock feature (to-be-upstreamed) occupies the last bit 
> of word 7, need to create a new word 19 to host AVX512_BF16 and other
> future features.

Is CPUID.(EAX=7,ECX=1):EAX going to contain only feature bits? If so,
just make it a proper feature word instead of a linux-specific one.

Also, while on the subject, you can recycle word 11

/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */
#define X86_FEATURE_CQM_LLC             (11*32+ 1) /* LLC QoS if 1 */

and move it to scattered as it is a complete waste. Word 12 too, for
that matter. But do that in separate patches.

Thx.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] x86/cpufeatures: Enumerate new AVX512 bfloat16 instructions
  2019-06-10 19:20 ` Borislav Petkov
@ 2019-06-11 18:19   ` Fenghua Yu
  2019-06-11 19:47     ` Borislav Petkov
  0 siblings, 1 reply; 12+ messages in thread
From: Fenghua Yu @ 2019-06-11 18:19 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Thomas Gleixner, Ingo Molnar, H Peter Anvin, Ravi V Shankar,
	linux-kernel, x86

On Mon, Jun 10, 2019 at 09:20:26PM +0200, Borislav Petkov wrote:
> On Mon, Jun 10, 2019 at 10:02:38AM -0700, Fenghua Yu wrote:
> > AVX512 Vector Neural Network Instructions (VNNI) in Intel Deep Learning
> > Boost support bfloat16 format (BF16). BF16 is a short version of FP32 and
> > has several advantages over FP16. BF16 offers more than enough range for
> > deep learning training tasks and doesn't need to handle hardware exception
> > as this is a performance optimization. FP32 accumulation after the
> > multiply is essential to achieve sufficient numerical behavior on an
> > application level. 
> > 
> > AVX512 bfloat16 instructions can be enumerated by:
> > 	CPUID.(EAX=7,ECX=1):EAX[bit 5] AVX512_BF16
> >     
> > Detailed information of the CPUID bit and AVX512 bfloat16 instructions
> > can be found in the latest Intel Architecture Instruction Set Extensions
> > and Future Features Programming Reference.
> > 
> > Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
> > ---
> > 
> > Since split lock feature (to-be-upstreamed) occupies the last bit 
> > of word 7, need to create a new word 19 to host AVX512_BF16 and other
> > future features.
> 
> Is CPUID.(EAX=7,ECX=1):EAX going to contain only feature bits? If so,
> just make it a proper feature word instead of a linux-specific one.

AFAICT, there will be more features in the EAX register in the future.
Ok. I will use a feature word for CPUID.(EAX=7,ECX=1):EAX.

> 
> Also, while on the subject, you can recycle word 11
> 
> /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */
> #define X86_FEATURE_CQM_LLC             (11*32+ 1) /* LLC QoS if 1 */
> 
> and move it to scattered as it is a complete waste. Word 12 too, for
> that matter. But do that in separate patches.

So can I re-organize word 11 and 12 as follows?

1. Change word 11 to host scattered features.
2. Move the previos features in word 11 and word 12 to word 11:
/*
 * Extended auxiliary flags: Linux defined - For features scattered in various
 * CPUID levels and sub-leaves like CPUID level 7 and sub-leaf 1, etc, word 19.
 */
#define X86_FEATURE_CQM_LLC             (11*32+ 0) /* LLC QoS if 1 */
#define X86_FEATURE_CQM_OCCUP_LLC       (11*32+ 1) /* LLC occupancy monitoring */
#define X86_FEATURE_CQM_MBM_TOTAL       (11*32+ 2) /* LLC Total MBM monitoring */
#define X86_FEATURE_CQM_MBM_LOCAL       (11*32+ 3) /* LLC Local MBM monitoring */

3. Change word 12 to host CPUID.(EAX=7,ECX=1):EAX:
/* Intel-defined CPU features, CPUID level 0x7:1 (EAX), word 12 */
#define X86_FEATURE_AVX512_BF16         (12*32+ 0) /* BFLOAT16 instructions */

4. Do other necessary changes to match the new word 11 and word 12.

Thanks.

-Fenghua

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] x86/cpufeatures: Enumerate new AVX512 bfloat16 instructions
  2019-06-11 18:19   ` Fenghua Yu
@ 2019-06-11 19:47     ` Borislav Petkov
  2019-06-11 22:28       ` Fenghua Yu
  2019-06-12  3:32       ` Fenghua Yu
  0 siblings, 2 replies; 12+ messages in thread
From: Borislav Petkov @ 2019-06-11 19:47 UTC (permalink / raw)
  To: Fenghua Yu
  Cc: Thomas Gleixner, Ingo Molnar, H Peter Anvin, Ravi V Shankar,
	linux-kernel, x86

On Tue, Jun 11, 2019 at 11:19:20AM -0700, Fenghua Yu wrote:
> So can I re-organize word 11 and 12 as follows?
> 
> 1. Change word 11 to host scattered features.
> 2. Move the previos features in word 11 and word 12 to word 11:
> /*
>  * Extended auxiliary flags: Linux defined - For features scattered in various
>  * CPUID levels and sub-leaves like CPUID level 7 and sub-leaf 1, etc, word 19.
>  */
> #define X86_FEATURE_CQM_LLC             (11*32+ 0) /* LLC QoS if 1 */
> #define X86_FEATURE_CQM_OCCUP_LLC       (11*32+ 1) /* LLC occupancy monitoring */
> #define X86_FEATURE_CQM_MBM_TOTAL       (11*32+ 2) /* LLC Total MBM monitoring */
> #define X86_FEATURE_CQM_MBM_LOCAL       (11*32+ 3) /* LLC Local MBM monitoring */

Yap.

> 3. Change word 12 to host CPUID.(EAX=7,ECX=1):EAX:
> /* Intel-defined CPU features, CPUID level 0x7:1 (EAX), word 12 */
> #define X86_FEATURE_AVX512_BF16         (12*32+ 0) /* BFLOAT16 instructions */

This needs to be (12*32+ 5) if word 12 is going to map leaf
CPUID.(EAX=7,ECX=1):EAX.

At least judging from the arch extensions doc which lists EAX as:

Bits 04-00: Reserved.
Bit 05: AVX512_BF16. Vector Neural Network Instructions supporting BFLOAT16 inputs and conversion instructions from IEEE single precision.
Bits 31-06: Reserved.

> 4. Do other necessary changes to match the new word 11 and word 12.

But split in two patches: first does steps 1+2, second patch adds the
new leaf to word 12.

Thx.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] x86/cpufeatures: Enumerate new AVX512 bfloat16 instructions
  2019-06-11 19:47     ` Borislav Petkov
@ 2019-06-11 22:28       ` Fenghua Yu
  2019-06-12  3:29         ` Yu, Fenghua
  2019-06-12  3:32       ` Fenghua Yu
  1 sibling, 1 reply; 12+ messages in thread
From: Fenghua Yu @ 2019-06-11 22:28 UTC (permalink / raw)
  To: Borislav Petkov, g
  Cc: Thomas Gleixner, Ingo Molnar, H Peter Anvin, Ravi V Shankar,
	linux-kernel, x86

On Tue, Jun 11, 2019 at 09:47:02PM +0200, Borislav Petkov wrote:
> On Tue, Jun 11, 2019 at 11:19:20AM -0700, Fenghua Yu wrote:
> > So can I re-organize word 11 and 12 as follows?
> > 
> > 1. Change word 11 to host scattered features.
> > 2. Move the previos features in word 11 and word 12 to word 11:
> > /*
> >  * Extended auxiliary flags: Linux defined - For features scattered in various
> >  * CPUID levels and sub-leaves like CPUID level 7 and sub-leaf 1, etc, word 19.
> >  */
> > #define X86_FEATURE_CQM_LLC             (11*32+ 0) /* LLC QoS if 1 */
> > #define X86_FEATURE_CQM_OCCUP_LLC       (11*32+ 1) /* LLC occupancy monitoring */
> > #define X86_FEATURE_CQM_MBM_TOTAL       (11*32+ 2) /* LLC Total MBM monitoring */
> > #define X86_FEATURE_CQM_MBM_LOCAL       (11*32+ 3) /* LLC Local MBM monitoring */
> 
> Yap.
> 
> > 3. Change word 12 to host CPUID.(EAX=7,ECX=1):EAX:
> > /* Intel-defined CPU features, CPUID level 0x7:1 (EAX), word 12 */
> > #define X86_FEATURE_AVX512_BF16         (12*32+ 0) /* BFLOAT16 instructions */
> 
> This needs to be (12*32+ 5) if word 12 is going to map leaf
> CPUID.(EAX=7,ECX=1):EAX.
> 
> At least judging from the arch extensions doc which lists EAX as:
> 
> Bits 04-00: Reserved.
> Bit 05: AVX512_BF16. Vector Neural Network Instructions supporting BFLOAT16 inputs and conversion instructions from IEEE single precision.
> Bits 31-06: Reserved.

Yes, you are absolutely right. I'll defint it as (12*32+ 5).

> 
> > 4. Do other necessary changes to match the new word 11 and word 12.
> 
> But split in two patches: first does steps 1+2, second patch adds the
> new leaf to word 12.

There are two varialbes defined in cpuinfo_x86: x86_cache_max_rmid and
x86_cache_occ_scale. c->x86_cache_max_rmid is read from CPUID.0xf.1:ECX
and c->x86_cache_occ_scale is read from CPUID.0xf.1:EBX.

After getting X86_FEATURE_CQM_* from scattered, the two variables need
to be read from CPUID again. So the code of reading the two variables
need to be moved from before init_scattered_cpuid_features(c) to after
the function. This make the get_cpu_cap() code awkward.

And the two variables are ONLY used in resctrl monitoring configuration.
There is no need to store them in cpuinfo_x86 on each CPU.

I'm thinking to simplify and clean this part of code:

1. In patch #1:
- remove the definitions of x86_cache_max_rmid and x86_cache_occ_scale
from cpuinfo_x86
- remove assignment of c->x86_cache_max_rmid and c->x86_cache_occ_scale
from get_cpu_cap(c)
- get r->mon_scale and r->num_rmid in rdt_get_mon_l3_config(r) directly
from CPUID.0xf.1:EBX and CPUID.0xf.1:ECX.
2. In patch #2: do steps 1+2 to recycle word 11. After patch #1, I can
totally remove the code to get c->x86_cache_max_rmd and
c->x86_cache_occ_scale in get_cpu_cap(c). And patch #2 is cleaner.
3. In patch #3: add new word 12 to host CPUID.7.1:EAX

Do you think the patch #1 is necessary and this is a right patch set?

Thanks.

-Fenghua

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [RFC PATCH] x86/cpufeatures: Enumerate new AVX512 bfloat16 instructions
  2019-06-11 22:28       ` Fenghua Yu
@ 2019-06-12  3:29         ` Yu, Fenghua
  2019-06-12  3:59           ` Borislav Petkov
  0 siblings, 1 reply; 12+ messages in thread
From: Yu, Fenghua @ 2019-06-12  3:29 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Thomas Gleixner, Ingo Molnar, H Peter Anvin, Shankar, Ravi V,
	linux-kernel, x86

> On Tuesday, June 11, 2019 3:28 PM, Fenghua Yu wrote:
> On Tue, Jun 11, 2019 at 09:47:02PM +0200, Borislav Petkov wrote:
> > On Tue, Jun 11, 2019 at 11:19:20AM -0700, Fenghua Yu wrote:
> > > So can I re-organize word 11 and 12 as follows?
> > >
> > > 1. Change word 11 to host scattered features.
> > > 2. Move the previos features in word 11 and word 12 to word 11:
> > > /*
> > >  * Extended auxiliary flags: Linux defined - For features scattered
> > > in various
> > >  * CPUID levels and sub-leaves like CPUID level 7 and sub-leaf 1, etc,
> word 19.
> > >  */
> > > #define X86_FEATURE_CQM_LLC             (11*32+ 0) /* LLC QoS if 1 */
> > > #define X86_FEATURE_CQM_OCCUP_LLC       (11*32+ 1) /* LLC
> occupancy monitoring */
> > > #define X86_FEATURE_CQM_MBM_TOTAL       (11*32+ 2) /* LLC Total
> MBM monitoring */
> > > #define X86_FEATURE_CQM_MBM_LOCAL       (11*32+ 3) /* LLC Local
> MBM monitoring */
> >
> > Yap.
> >
> > > 3. Change word 12 to host CPUID.(EAX=7,ECX=1):EAX:
> > > /* Intel-defined CPU features, CPUID level 0x7:1 (EAX), word 12 */
> > > #define X86_FEATURE_AVX512_BF16         (12*32+ 0) /* BFLOAT16
> instructions */
> >
> > This needs to be (12*32+ 5) if word 12 is going to map leaf
> > CPUID.(EAX=7,ECX=1):EAX.
> >
> > At least judging from the arch extensions doc which lists EAX as:
> >
> > Bits 04-00: Reserved.
> > Bit 05: AVX512_BF16. Vector Neural Network Instructions supporting
> BFLOAT16 inputs and conversion instructions from IEEE single precision.
> > Bits 31-06: Reserved.
> 
> Yes, you are absolutely right. I'll defint it as (12*32+ 5).
> 
> >
> > > 4. Do other necessary changes to match the new word 11 and word 12.
> >
> > But split in two patches: first does steps 1+2, second patch adds the
> > new leaf to word 12.
> 
> There are two varialbes defined in cpuinfo_x86: x86_cache_max_rmid and
> x86_cache_occ_scale. c->x86_cache_max_rmid is read from CPUID.0xf.1:ECX
> and c->x86_cache_occ_scale is read from CPUID.0xf.1:EBX.
> 
> After getting X86_FEATURE_CQM_* from scattered, the two variables need
> to be read from CPUID again. So the code of reading the two variables need
> to be moved from before init_scattered_cpuid_features(c) to after the
> function. This make the get_cpu_cap() code awkward.
> 
> And the two variables are ONLY used in resctrl monitoring configuration.
> There is no need to store them in cpuinfo_x86 on each CPU.
> 
> I'm thinking to simplify and clean this part of code:
> 
> 1. In patch #1:
> - remove the definitions of x86_cache_max_rmid and x86_cache_occ_scale
> from cpuinfo_x86
> - remove assignment of c->x86_cache_max_rmid and c-
> >x86_cache_occ_scale from get_cpu_cap(c)
> - get r->mon_scale and r->num_rmid in rdt_get_mon_l3_config(r) directly
> from CPUID.0xf.1:EBX and CPUID.0xf.1:ECX.
> 2. In patch #2: do steps 1+2 to recycle word 11. After patch #1, I can totally
> remove the code to get c->x86_cache_max_rmd and
> c->x86_cache_occ_scale in get_cpu_cap(c). And patch #2 is cleaner.
> 3. In patch #3: add new word 12 to host CPUID.7.1:EAX
> 
> Do you think the patch #1 is necessary and this is a right patch set?

My bad. I studied a bit more and found the patch #1 is not needed. Please ignore my last email.
 
Thanks.
 
-Fenghua

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] x86/cpufeatures: Enumerate new AVX512 bfloat16 instructions
  2019-06-11 19:47     ` Borislav Petkov
  2019-06-11 22:28       ` Fenghua Yu
@ 2019-06-12  3:32       ` Fenghua Yu
  2019-06-12  4:02         ` Borislav Petkov
  1 sibling, 1 reply; 12+ messages in thread
From: Fenghua Yu @ 2019-06-12  3:32 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Thomas Gleixner, Ingo Molnar, H Peter Anvin, Ravi V Shankar,
	linux-kernel, x86

On Tue, Jun 11, 2019 at 09:47:02PM +0200, Borislav Petkov wrote:
> On Tue, Jun 11, 2019 at 11:19:20AM -0700, Fenghua Yu wrote:
> > So can I re-organize word 11 and 12 as follows?
> > 
> > 1. Change word 11 to host scattered features.
> > 2. Move the previos features in word 11 and word 12 to word 11:
> > /*
> >  * Extended auxiliary flags: Linux defined - For features scattered in various
> >  * CPUID levels and sub-leaves like CPUID level 7 and sub-leaf 1, etc, word 19.
> >  */
> > #define X86_FEATURE_CQM_LLC             (11*32+ 0) /* LLC QoS if 1 */
> > #define X86_FEATURE_CQM_OCCUP_LLC       (11*32+ 1) /* LLC occupancy monitoring */
> > #define X86_FEATURE_CQM_MBM_TOTAL       (11*32+ 2) /* LLC Total MBM monitoring */
> > #define X86_FEATURE_CQM_MBM_LOCAL       (11*32+ 3) /* LLC Local MBM monitoring */
> 
> Yap.
> 
> > 3. Change word 12 to host CPUID.(EAX=7,ECX=1):EAX:
> > /* Intel-defined CPU features, CPUID level 0x7:1 (EAX), word 12 */
> > #define X86_FEATURE_AVX512_BF16         (12*32+ 0) /* BFLOAT16 instructions */
> 
> This needs to be (12*32+ 5) if word 12 is going to map leaf
> CPUID.(EAX=7,ECX=1):EAX.
> 
> At least judging from the arch extensions doc which lists EAX as:
> 
> Bits 04-00: Reserved.
> Bit 05: AVX512_BF16. Vector Neural Network Instructions supporting BFLOAT16 inputs and conversion instructions from IEEE single precision.
> Bits 31-06: Reserved.
> 
> > 4. Do other necessary changes to match the new word 11 and word 12.
> 
> But split in two patches: first does steps 1+2, second patch adds the
> new leaf to word 12.

Currently KVM doesn't simulate scattered features (the ones in CPUID_LNX_*
in cpuid_leafs) as reverse_cpuid[] doesn't contain CPUID_LNX_*.

After the X86_FEATURES_CQM_* features are changed to scattered features,
they will not be simulated by KVM any more as CPUID_F_0_EDX and CPUID_F_1_EDX
are removed.

Should patch #1 simulate X86_FEATURE_CQM_* in KVM? Or let KVM guys handle
the scattered features?

Thanks.

-Fenghua

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] x86/cpufeatures: Enumerate new AVX512 bfloat16 instructions
  2019-06-12  3:29         ` Yu, Fenghua
@ 2019-06-12  3:59           ` Borislav Petkov
  2019-06-12 17:41             ` Fenghua Yu
  0 siblings, 1 reply; 12+ messages in thread
From: Borislav Petkov @ 2019-06-12  3:59 UTC (permalink / raw)
  To: Yu, Fenghua
  Cc: Thomas Gleixner, Ingo Molnar, H Peter Anvin, Shankar, Ravi V,
	linux-kernel, x86

On Wed, Jun 12, 2019 at 03:29:57AM +0000, Yu, Fenghua wrote:
> My bad. I studied a bit more and found the patch #1 is not needed.

Why, I think you were spot-on:

"And the two variables are ONLY used in resctrl monitoring
configuration. There is no need to store them in cpuinfo_x86 on each
CPU."

That was a real overkill to put them in cpuinfo_x86. The information
needed should simply be read out in rdt_get_mon_l3_config() and that's
it - no need to global values to store them.

Now removing them should be in a separate patch so that review is easy.

Or am I missing an aspect?

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] x86/cpufeatures: Enumerate new AVX512 bfloat16 instructions
  2019-06-12  3:32       ` Fenghua Yu
@ 2019-06-12  4:02         ` Borislav Petkov
  2019-06-12 14:10           ` Sean Christopherson
  0 siblings, 1 reply; 12+ messages in thread
From: Borislav Petkov @ 2019-06-12  4:02 UTC (permalink / raw)
  To: Fenghua Yu
  Cc: Thomas Gleixner, Ingo Molnar, H Peter Anvin, Ravi V Shankar,
	linux-kernel, x86

On Tue, Jun 11, 2019 at 08:32:59PM -0700, Fenghua Yu wrote:
> Currently KVM doesn't simulate scattered features (the ones in CPUID_LNX_*
> in cpuid_leafs) as reverse_cpuid[] doesn't contain CPUID_LNX_*.

43500e6f294d ("x86/cpufeatures: Remove get_scattered_cpuid_leaf()")

> After the X86_FEATURES_CQM_* features are changed to scattered features,
> they will not be simulated by KVM any more as CPUID_F_0_EDX and CPUID_F_1_EDX
> are removed.

Does KVM even support resctrl? I doubt only exporting a couple of CPUID
bits into the guest is enough...

> Should patch #1 simulate X86_FEATURE_CQM_* in KVM? Or let KVM guys handle
> the scattered features?

Right, the scattered thing was removed as KVM didn't need it,
apparently, see above.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] x86/cpufeatures: Enumerate new AVX512 bfloat16 instructions
  2019-06-12  4:02         ` Borislav Petkov
@ 2019-06-12 14:10           ` Sean Christopherson
  2019-06-12 17:04             ` Fenghua Yu
  0 siblings, 1 reply; 12+ messages in thread
From: Sean Christopherson @ 2019-06-12 14:10 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Fenghua Yu, Thomas Gleixner, Ingo Molnar, H Peter Anvin,
	Ravi V Shankar, linux-kernel, x86

On Wed, Jun 12, 2019 at 06:02:59AM +0200, Borislav Petkov wrote:
> On Tue, Jun 11, 2019 at 08:32:59PM -0700, Fenghua Yu wrote:
> > Currently KVM doesn't simulate scattered features (the ones in CPUID_LNX_*
> > in cpuid_leafs) as reverse_cpuid[] doesn't contain CPUID_LNX_*.
> 
> 43500e6f294d ("x86/cpufeatures: Remove get_scattered_cpuid_leaf()")
> 
> > After the X86_FEATURES_CQM_* features are changed to scattered features,
> > they will not be simulated by KVM any more as CPUID_F_0_EDX and CPUID_F_1_EDX
> > are removed.
> 
> Does KVM even support resctrl? I doubt only exporting a couple of CPUID
> bits into the guest is enough...

KVM doesn't support resctrl.  Moving X86_FEATURES_CQM_* to a scattered
leaf won't affect KVM.

> > Should patch #1 simulate X86_FEATURE_CQM_* in KVM? Or let KVM guys handle
> > the scattered features?
> 
> Right, the scattered thing was removed as KVM didn't need it,
> apparently, see above.

Let KVM people deal with it, in the unlikely event someone adds resctrl
support to KVM.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] x86/cpufeatures: Enumerate new AVX512 bfloat16 instructions
  2019-06-12 14:10           ` Sean Christopherson
@ 2019-06-12 17:04             ` Fenghua Yu
  0 siblings, 0 replies; 12+ messages in thread
From: Fenghua Yu @ 2019-06-12 17:04 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Borislav Petkov, Thomas Gleixner, Ingo Molnar, H Peter Anvin,
	Ravi V Shankar, linux-kernel, x86

On Wed, Jun 12, 2019 at 07:10:49AM -0700, Sean Christopherson wrote:
> On Wed, Jun 12, 2019 at 06:02:59AM +0200, Borislav Petkov wrote:
> > On Tue, Jun 11, 2019 at 08:32:59PM -0700, Fenghua Yu wrote:
> > > Currently KVM doesn't simulate scattered features (the ones in CPUID_LNX_*
> > > in cpuid_leafs) as reverse_cpuid[] doesn't contain CPUID_LNX_*.
> > 
> > 43500e6f294d ("x86/cpufeatures: Remove get_scattered_cpuid_leaf()")
> > 
> > > After the X86_FEATURES_CQM_* features are changed to scattered features,
> > > they will not be simulated by KVM any more as CPUID_F_0_EDX and CPUID_F_1_EDX
> > > are removed.
> > 
> > Does KVM even support resctrl? I doubt only exporting a couple of CPUID
> > bits into the guest is enough...
> 
> KVM doesn't support resctrl.  Moving X86_FEATURES_CQM_* to a scattered
> leaf won't affect KVM.
> 
> > > Should patch #1 simulate X86_FEATURE_CQM_* in KVM? Or let KVM guys handle
> > > the scattered features?
> > 
> > Right, the scattered thing was removed as KVM didn't need it,
> > apparently, see above.
> 
> Let KVM people deal with it, in the unlikely event someone adds resctrl
> support to KVM.

Thank you for your insight, Boris and Sean!

-Fenghua

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] x86/cpufeatures: Enumerate new AVX512 bfloat16 instructions
  2019-06-12  3:59           ` Borislav Petkov
@ 2019-06-12 17:41             ` Fenghua Yu
  0 siblings, 0 replies; 12+ messages in thread
From: Fenghua Yu @ 2019-06-12 17:41 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Thomas Gleixner, Ingo Molnar, H Peter Anvin, Shankar, Ravi V,
	linux-kernel, x86

On Wed, Jun 12, 2019 at 05:59:08AM +0200, Borislav Petkov wrote:
> On Wed, Jun 12, 2019 at 03:29:57AM +0000, Yu, Fenghua wrote:
> > My bad. I studied a bit more and found the patch #1 is not needed.
> 
> Why, I think you were spot-on:
> 
> "And the two variables are ONLY used in resctrl monitoring
> configuration. There is no need to store them in cpuinfo_x86 on each
> CPU."
> 
> That was a real overkill to put them in cpuinfo_x86. The information
> needed should simply be read out in rdt_get_mon_l3_config() and that's
> it - no need to global values to store them.
> 
> Now removing them should be in a separate patch so that review is easy.
> 
> Or am I missing an aspect?

x86_init_cache_qos() fnds the minimum number of rmid on all CPUs and
store it in boot_cpu_data.

If removing the two variables from cpuinfo_x86 and getting number of
rmid and occupancy scale in rdt_get_mon_l3_config() directly from
CPUID on the current CPU, we need to assume all CPUs have the same
number of rmid.

Is this a right assumption?

After think again, it might be a right assumption after all. AFAICT, each
Intel platform that supports resctrl has the same number of rmid on all
CPUs and there is no plan to have a resctrl platform that has different
numbers of rmid on CPUs.

So Ok, I will write the patch #1 that removes the two variables from
cpuinfo_x86 and gets the info directly from CPUID in rdt_get_mon_l3_config().

Thanks.

-Fenghua

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2019-06-12 17:50 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-10 17:02 [RFC PATCH] x86/cpufeatures: Enumerate new AVX512 bfloat16 instructions Fenghua Yu
2019-06-10 19:20 ` Borislav Petkov
2019-06-11 18:19   ` Fenghua Yu
2019-06-11 19:47     ` Borislav Petkov
2019-06-11 22:28       ` Fenghua Yu
2019-06-12  3:29         ` Yu, Fenghua
2019-06-12  3:59           ` Borislav Petkov
2019-06-12 17:41             ` Fenghua Yu
2019-06-12  3:32       ` Fenghua Yu
2019-06-12  4:02         ` Borislav Petkov
2019-06-12 14:10           ` Sean Christopherson
2019-06-12 17:04             ` Fenghua Yu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).