xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86/cpuid: AVX-512 Feature Detection
@ 2016-06-29  2:20 Luwei Kang
  2016-06-29  9:50 ` Andrew Cooper
  0 siblings, 1 reply; 13+ messages in thread
From: Luwei Kang @ 2016-06-29  2:20 UTC (permalink / raw)
  To: xen-devel; +Cc: andrew.cooper3, chao.p.peng, Luwei Kang, yong.y.wang, jbeulich

AVX-512 is an extention of AVX2. Its spec can be found at:
https://software.intel.com/sites/default/files/managed/b4/3a/319433-024.pdf
This patch detects AVX-512 features by CPUID.

Signed-off-by: Luwei Kang <luwei.kang@intel.com>
---
 xen/arch/x86/hvm/hvm.c                      | 14 ++++++++++++++
 xen/arch/x86/traps.c                        | 22 +++++++++++++++++++++-
 xen/include/public/arch-x86/cpufeatureset.h |  9 +++++++++
 xen/tools/gen-cpuid.py                      |  4 ++++
 4 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index c89ab6e..7696b1e 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3474,6 +3474,20 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
                                   xstate_sizes[_XSTATE_BNDCSR]);
             }
 
+            if ( _ebx & cpufeat_mask(X86_FEATURE_AVX512F) )
+            {
+                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_OPMASK] +
+                                  xstate_sizes[_XSTATE_OPMASK]);
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_ZMM] +
+                                  xstate_sizes[_XSTATE_ZMM]);
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_HI_ZMM] +
+                                  xstate_sizes[_XSTATE_HI_ZMM]);
+            }
+
             if ( _ecx & cpufeat_mask(X86_FEATURE_PKU) )
             {
                 xfeature_mask |= XSTATE_PKRU;
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 767d0b0..8fb697b 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -975,7 +975,7 @@ void pv_cpuid(struct cpu_user_regs *regs)
 
     switch ( leaf )
     {
-        uint32_t tmp, _ecx;
+        uint32_t tmp, _ecx, _ebx;
 
     case 0x00000001:
         c &= pv_featureset[FEATURESET_1c];
@@ -1157,6 +1157,26 @@ void pv_cpuid(struct cpu_user_regs *regs)
                                xstate_sizes[_XSTATE_YMM]);
             }
 
+            if ( !is_control_domain(currd) && !is_hardware_domain(currd) )
+                domain_cpuid(currd, 7, 0, &tmp, &_ebx, &tmp, &tmp);
+            else
+                cpuid_count(7, 0, &tmp, &_ebx, &tmp, &tmp);
+            _ebx &= pv_featureset[FEATURESET_7b0];
+
+            if ( _ebx & cpufeat_mask(X86_FEATURE_AVX512F) )
+            {
+                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_OPMASK] +
+                                  xstate_sizes[_XSTATE_OPMASK]);
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_ZMM] +
+                                  xstate_sizes[_XSTATE_ZMM]);
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_HI_ZMM] +
+                                  xstate_sizes[_XSTATE_HI_ZMM]);
+            }
+
             a = (uint32_t)xfeature_mask;
             d = (uint32_t)(xfeature_mask >> 32);
             c = xstate_size;
diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
index 39acf8c..9320c9e 100644
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -206,15 +206,24 @@ XEN_CPUFEATURE(PQM,           5*32+12) /*   Platform QoS Monitoring */
 XEN_CPUFEATURE(NO_FPU_SEL,    5*32+13) /*!  FPU CS/DS stored as zero */
 XEN_CPUFEATURE(MPX,           5*32+14) /*S  Memory Protection Extensions */
 XEN_CPUFEATURE(PQE,           5*32+15) /*   Platform QoS Enforcement */
+XEN_CPUFEATURE(AVX512F,       5*32+16) /*A  AVX-512 Foundation Instructions */
+XEN_CPUFEATURE(AVX512DQ,      5*32+17) /*A  AVX-512 Doubleword & Quadword Instrs */
 XEN_CPUFEATURE(RDSEED,        5*32+18) /*A  RDSEED instruction */
 XEN_CPUFEATURE(ADX,           5*32+19) /*A  ADCX, ADOX instructions */
 XEN_CPUFEATURE(SMAP,          5*32+20) /*S  Supervisor Mode Access Prevention */
+XEN_CPUFEATURE(AVX512IFMA,    5*32+21) /*A  AVX-512 Integer Fused Multiply Add */
 XEN_CPUFEATURE(CLFLUSHOPT,    5*32+23) /*A  CLFLUSHOPT instruction */
 XEN_CPUFEATURE(CLWB,          5*32+24) /*A  CLWB instruction */
+XEN_CPUFEATURE(AVX512PF,      5*32+26) /*A  AVX-512 Prefetch Instructions */
+XEN_CPUFEATURE(AVX512ER,      5*32+27) /*A  AVX-512 Exponent & Reciprocal Instrs */
+XEN_CPUFEATURE(AVX512CD,      5*32+28) /*A  AVX-512 Conflict Detection Instrs */
 XEN_CPUFEATURE(SHA,           5*32+29) /*A  SHA1 & SHA256 instructions */
+XEN_CPUFEATURE(AVX512BW,      5*32+30) /*A  AVX-512 Byte and Word Instructions */
+XEN_CPUFEATURE(AVX512VL,      5*32+31) /*A  AVX-512 Vector Length Extensions */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0.ecx, word 6 */
 XEN_CPUFEATURE(PREFETCHWT1,   6*32+ 0) /*A  PREFETCHWT1 instruction */
+XEN_CPUFEATURE(AVX512VBMI,    6*32+ 1) /*A  AVX-512 Vector Byte Manipulation Instrs */
 XEN_CPUFEATURE(PKU,           6*32+ 3) /*H  Protection Keys for Userspace */
 XEN_CPUFEATURE(OSPKE,         6*32+ 4) /*!  OS Protection Keys Enable */
 
diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py
index 7c45eca..897e660 100755
--- a/xen/tools/gen-cpuid.py
+++ b/xen/tools/gen-cpuid.py
@@ -235,6 +235,10 @@ def crunch_numbers(state):
         # subsequent instruction groups may only be VEX encoded.
         AVX: [FMA, FMA4, F16C, AVX2, XOP],
 
+        # AVX-512 is an extention of AVX2 and it depends on AVX2 available.
+        AVX2: [AVX512F, AVX512DQ, AVX512IFMA, AVX512PF, AVX512ER, AVX512CD,
+                AVX512BW, AVX512VL, AVX512VBMI],
+
         # CX16 is only encodable in Long Mode.  LAHF_LM indicates that the
         # SAHF/LAHF instructions are reintroduced in Long Mode.  1GB
         # superpages, PCID and PKU are only available in 4 level paging.
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] x86/cpuid: AVX-512 Feature Detection
  2016-06-29  2:20 [PATCH] x86/cpuid: AVX-512 Feature Detection Luwei Kang
@ 2016-06-29  9:50 ` Andrew Cooper
  2016-06-29  9:53   ` Andrew Cooper
  2016-06-29 10:03   ` Jan Beulich
  0 siblings, 2 replies; 13+ messages in thread
From: Andrew Cooper @ 2016-06-29  9:50 UTC (permalink / raw)
  To: Luwei Kang, xen-devel; +Cc: chao.p.peng, yong.y.wang, jbeulich

On 29/06/16 03:20, Luwei Kang wrote:
> AVX-512 is an extention of AVX2. Its spec can be found at:
> https://software.intel.com/sites/default/files/managed/b4/3a/319433-024.pdf
> This patch detects AVX-512 features by CPUID.
>
> Signed-off-by: Luwei Kang <luwei.kang@intel.com>
> ---
>  xen/arch/x86/hvm/hvm.c                      | 14 ++++++++++++++
>  xen/arch/x86/traps.c                        | 22 +++++++++++++++++++++-
>  xen/include/public/arch-x86/cpufeatureset.h |  9 +++++++++
>  xen/tools/gen-cpuid.py                      |  4 ++++
>  4 files changed, 48 insertions(+), 1 deletion(-)
>
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index c89ab6e..7696b1e 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -3474,6 +3474,20 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
>                                    xstate_sizes[_XSTATE_BNDCSR]);
>              }
>  
> +            if ( _ebx & cpufeat_mask(X86_FEATURE_AVX512F) )
> +            {
> +                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
> +                xstate_size = max(xstate_size,
> +                                  xstate_offsets[_XSTATE_OPMASK] +
> +                                  xstate_sizes[_XSTATE_OPMASK]);
> +                xstate_size = max(xstate_size,
> +                                  xstate_offsets[_XSTATE_ZMM] +
> +                                  xstate_sizes[_XSTATE_ZMM]);
> +                xstate_size = max(xstate_size,
> +                                  xstate_offsets[_XSTATE_HI_ZMM] +
> +                                  xstate_sizes[_XSTATE_HI_ZMM]);
> +            }
> +
>              if ( _ecx & cpufeat_mask(X86_FEATURE_PKU) )
>              {
>                  xfeature_mask |= XSTATE_PKRU;
> diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
> index 767d0b0..8fb697b 100644
> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -975,7 +975,7 @@ void pv_cpuid(struct cpu_user_regs *regs)
>  
>      switch ( leaf )
>      {
> -        uint32_t tmp, _ecx;
> +        uint32_t tmp, _ecx, _ebx;
>  
>      case 0x00000001:
>          c &= pv_featureset[FEATURESET_1c];
> @@ -1157,6 +1157,26 @@ void pv_cpuid(struct cpu_user_regs *regs)
>                                 xstate_sizes[_XSTATE_YMM]);
>              }
>  
> +            if ( !is_control_domain(currd) && !is_hardware_domain(currd) )
> +                domain_cpuid(currd, 7, 0, &tmp, &_ebx, &tmp, &tmp);
> +            else
> +                cpuid_count(7, 0, &tmp, &_ebx, &tmp, &tmp);
> +            _ebx &= pv_featureset[FEATURESET_7b0];
> +
> +            if ( _ebx & cpufeat_mask(X86_FEATURE_AVX512F) )
> +            {
> +                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
> +                xstate_size = max(xstate_size,
> +                                  xstate_offsets[_XSTATE_OPMASK] +
> +                                  xstate_sizes[_XSTATE_OPMASK]);
> +                xstate_size = max(xstate_size,
> +                                  xstate_offsets[_XSTATE_ZMM] +
> +                                  xstate_sizes[_XSTATE_ZMM]);
> +                xstate_size = max(xstate_size,
> +                                  xstate_offsets[_XSTATE_HI_ZMM] +
> +                                  xstate_sizes[_XSTATE_HI_ZMM]);
> +            }
> +
>              a = (uint32_t)xfeature_mask;
>              d = (uint32_t)(xfeature_mask >> 32);
>              c = xstate_size;
> diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
> index 39acf8c..9320c9e 100644
> --- a/xen/include/public/arch-x86/cpufeatureset.h
> +++ b/xen/include/public/arch-x86/cpufeatureset.h
> @@ -206,15 +206,24 @@ XEN_CPUFEATURE(PQM,           5*32+12) /*   Platform QoS Monitoring */
>  XEN_CPUFEATURE(NO_FPU_SEL,    5*32+13) /*!  FPU CS/DS stored as zero */
>  XEN_CPUFEATURE(MPX,           5*32+14) /*S  Memory Protection Extensions */
>  XEN_CPUFEATURE(PQE,           5*32+15) /*   Platform QoS Enforcement */
> +XEN_CPUFEATURE(AVX512F,       5*32+16) /*A  AVX-512 Foundation Instructions */
> +XEN_CPUFEATURE(AVX512DQ,      5*32+17) /*A  AVX-512 Doubleword & Quadword Instrs */
>  XEN_CPUFEATURE(RDSEED,        5*32+18) /*A  RDSEED instruction */
>  XEN_CPUFEATURE(ADX,           5*32+19) /*A  ADCX, ADOX instructions */
>  XEN_CPUFEATURE(SMAP,          5*32+20) /*S  Supervisor Mode Access Prevention */
> +XEN_CPUFEATURE(AVX512IFMA,    5*32+21) /*A  AVX-512 Integer Fused Multiply Add */
>  XEN_CPUFEATURE(CLFLUSHOPT,    5*32+23) /*A  CLFLUSHOPT instruction */
>  XEN_CPUFEATURE(CLWB,          5*32+24) /*A  CLWB instruction */
> +XEN_CPUFEATURE(AVX512PF,      5*32+26) /*A  AVX-512 Prefetch Instructions */
> +XEN_CPUFEATURE(AVX512ER,      5*32+27) /*A  AVX-512 Exponent & Reciprocal Instrs */
> +XEN_CPUFEATURE(AVX512CD,      5*32+28) /*A  AVX-512 Conflict Detection Instrs */
>  XEN_CPUFEATURE(SHA,           5*32+29) /*A  SHA1 & SHA256 instructions */
> +XEN_CPUFEATURE(AVX512BW,      5*32+30) /*A  AVX-512 Byte and Word Instructions */
> +XEN_CPUFEATURE(AVX512VL,      5*32+31) /*A  AVX-512 Vector Length Extensions */
>  
>  /* Intel-defined CPU features, CPUID level 0x00000007:0.ecx, word 6 */
>  XEN_CPUFEATURE(PREFETCHWT1,   6*32+ 0) /*A  PREFETCHWT1 instruction */
> +XEN_CPUFEATURE(AVX512VBMI,    6*32+ 1) /*A  AVX-512 Vector Byte Manipulation Instrs */
>  XEN_CPUFEATURE(PKU,           6*32+ 3) /*H  Protection Keys for Userspace */
>  XEN_CPUFEATURE(OSPKE,         6*32+ 4) /*!  OS Protection Keys Enable */
>  
> diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py
> index 7c45eca..897e660 100755
> --- a/xen/tools/gen-cpuid.py
> +++ b/xen/tools/gen-cpuid.py
> @@ -235,6 +235,10 @@ def crunch_numbers(state):
>          # subsequent instruction groups may only be VEX encoded.
>          AVX: [FMA, FMA4, F16C, AVX2, XOP],
>  
> +        # AVX-512 is an extention of AVX2 and it depends on AVX2 available.
> +        AVX2: [AVX512F, AVX512DQ, AVX512IFMA, AVX512PF, AVX512ER, AVX512CD,
> +                AVX512BW, AVX512VL, AVX512VBMI],

I think this needs adjusting.  AVX512F is the base feature and
indication of extra xstate, while all other AVX512 features (e.g.
AVX512DQ) are explicitly documented not needing to check for AVX512F if
the AVX512DQ bit is present.

I think it wants to look something like:

# AVX2 is an extension to AVX, providing mainly new integer instructions.
# In principle, AVX512 only depends on YMM register state, but many AVX2
# instructions are extended by AVX512F to 512-bit forms.
AVX2: [AVX512F],

# AVX512F is taken to mean hardware support for EVEX encoded instructions,
# 512bit registers, and the instructions themselves.  All further AVX512
features
# are built on top of AVX512F.
AVX512F: [AVX512DQ, AVX512IFMA, AVX512PF, AVX512ER, AVX512CD,
                 AVX512BW, AVX512VL, AVX512VBMI],

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] x86/cpuid: AVX-512 Feature Detection
  2016-06-29  9:50 ` Andrew Cooper
@ 2016-06-29  9:53   ` Andrew Cooper
  2016-06-29 10:03   ` Jan Beulich
  1 sibling, 0 replies; 13+ messages in thread
From: Andrew Cooper @ 2016-06-29  9:53 UTC (permalink / raw)
  To: Luwei Kang, xen-devel; +Cc: chao.p.peng, yong.y.wang, jbeulich

On 29/06/16 10:50, Andrew Cooper wrote:
> On 29/06/16 03:20, Luwei Kang wrote:
>> diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py
>> index 7c45eca..897e660 100755
>> --- a/xen/tools/gen-cpuid.py
>> +++ b/xen/tools/gen-cpuid.py
>> @@ -235,6 +235,10 @@ def crunch_numbers(state):
>>          # subsequent instruction groups may only be VEX encoded.
>>          AVX: [FMA, FMA4, F16C, AVX2, XOP],
>>  
>> +        # AVX-512 is an extention of AVX2 and it depends on AVX2 available.
>> +        AVX2: [AVX512F, AVX512DQ, AVX512IFMA, AVX512PF, AVX512ER, AVX512CD,
>> +                AVX512BW, AVX512VL, AVX512VBMI],
> I think this needs adjusting.  AVX512F is the base feature and
> indication of extra xstate, while all other AVX512 features (e.g.
> AVX512DQ) are explicitly documented not needing to check for AVX512F if
> the AVX512DQ bit is present.
>
> I think it wants to look something like:
>
> # AVX2 is an extension to AVX, providing mainly new integer instructions.
> # In principle, AVX512 only depends on YMM register state, but many AVX2
> # instructions are extended by AVX512F to 512-bit forms.
> AVX2: [AVX512F],
>
> # AVX512F is taken to mean hardware support for EVEX encoded instructions,
> # 512bit registers, and the instructions themselves.  All further AVX512
> features
> # are built on top of AVX512F.
> AVX512F: [AVX512DQ, AVX512IFMA, AVX512PF, AVX512ER, AVX512CD,
>                  AVX512BW, AVX512VL, AVX512VBMI],

P.S. Please sort that dictionary by the integer value of the key, so
AVX2 and AVX512F should be after _3DNOW.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] x86/cpuid: AVX-512 Feature Detection
  2016-06-29  9:50 ` Andrew Cooper
  2016-06-29  9:53   ` Andrew Cooper
@ 2016-06-29 10:03   ` Jan Beulich
  2016-06-29 11:37     ` Andrew Cooper
  1 sibling, 1 reply; 13+ messages in thread
From: Jan Beulich @ 2016-06-29 10:03 UTC (permalink / raw)
  To: Andrew Cooper, Luwei Kang, xen-devel; +Cc: chao.p.peng, yong.y.wang

>>> On 29.06.16 at 11:50, <andrew.cooper3@citrix.com> wrote:
> On 29/06/16 03:20, Luwei Kang wrote:
>> --- a/xen/tools/gen-cpuid.py
>> +++ b/xen/tools/gen-cpuid.py
>> @@ -235,6 +235,10 @@ def crunch_numbers(state):
>>          # subsequent instruction groups may only be VEX encoded.
>>          AVX: [FMA, FMA4, F16C, AVX2, XOP],
>>  
>> +        # AVX-512 is an extention of AVX2 and it depends on AVX2 available.
>> +        AVX2: [AVX512F, AVX512DQ, AVX512IFMA, AVX512PF, AVX512ER, AVX512CD,
>> +                AVX512BW, AVX512VL, AVX512VBMI],
> 
> I think this needs adjusting.  AVX512F is the base feature and
> indication of extra xstate, while all other AVX512 features (e.g.
> AVX512DQ) are explicitly documented not needing to check for AVX512F if
> the AVX512DQ bit is present.

I think the "not" here is wrong? At least my copy (rev 024) requires
all involved feature bits to be checked (see e.g. table 2-2 or the
individual instruction pages).

> I think it wants to look something like:
> 
> # AVX2 is an extension to AVX, providing mainly new integer instructions.
> # In principle, AVX512 only depends on YMM register state, but many AVX2

DYM ZMM register state here?

Jan

> # instructions are extended by AVX512F to 512-bit forms.
> AVX2: [AVX512F],
> 
> # AVX512F is taken to mean hardware support for EVEX encoded instructions,
> # 512bit registers, and the instructions themselves.  All further AVX512
> features
> # are built on top of AVX512F.
> AVX512F: [AVX512DQ, AVX512IFMA, AVX512PF, AVX512ER, AVX512CD,
>                  AVX512BW, AVX512VL, AVX512VBMI],
> 
> ~Andrew




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] x86/cpuid: AVX-512 Feature Detection
  2016-06-29 10:03   ` Jan Beulich
@ 2016-06-29 11:37     ` Andrew Cooper
  2016-06-29 14:13       ` Jan Beulich
  0 siblings, 1 reply; 13+ messages in thread
From: Andrew Cooper @ 2016-06-29 11:37 UTC (permalink / raw)
  To: Jan Beulich, Luwei Kang, xen-devel; +Cc: chao.p.peng, yong.y.wang

On 29/06/16 11:03, Jan Beulich wrote:
>>>> On 29.06.16 at 11:50, <andrew.cooper3@citrix.com> wrote:
>> On 29/06/16 03:20, Luwei Kang wrote:
>>> --- a/xen/tools/gen-cpuid.py
>>> +++ b/xen/tools/gen-cpuid.py
>>> @@ -235,6 +235,10 @@ def crunch_numbers(state):
>>>          # subsequent instruction groups may only be VEX encoded.
>>>          AVX: [FMA, FMA4, F16C, AVX2, XOP],
>>>  
>>> +        # AVX-512 is an extention of AVX2 and it depends on AVX2 available.
>>> +        AVX2: [AVX512F, AVX512DQ, AVX512IFMA, AVX512PF, AVX512ER, AVX512CD,
>>> +                AVX512BW, AVX512VL, AVX512VBMI],
>> I think this needs adjusting.  AVX512F is the base feature and
>> indication of extra xstate, while all other AVX512 features (e.g.
>> AVX512DQ) are explicitly documented not needing to check for AVX512F if
>> the AVX512DQ bit is present.
> I think the "not" here is wrong? At least my copy (rev 024) requires
> all involved feature bits to be checked (see e.g. table 2-2 or the
> individual instruction pages).

Hmm - yet another inconsistency.  Some instructions specify a CPUID
dependency for just AVX512F (EVEX.NDS.512.66.0F.W1 C2 /r ib VCMPPD k1
{k2}, zmm2, zmm3/m512/m64bcst{sae}, imm8), some for AVX512F and a second
feature (EVEX.256.66.0F38.W1 19 /r VBROADCASTSD ymm1 {k1}{z}, xmm2/m64)
, and some only for the second feature (EVEX.512.66.0F.W0 79 /r
VCVTPS2UQQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er})

FWIW, I still think the dependency expression is ok in its current form.

>
>> I think it wants to look something like:
>>
>> # AVX2 is an extension to AVX, providing mainly new integer instructions.
>> # In principle, AVX512 only depends on YMM register state, but many AVX2
> DYM ZMM register state here?

No.  AVX512 introduces ZMM registers.

To enable ZMM registers (and opmask) , YMM must be enabled in %xcr0, and
this is the dependency I am talking about.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] x86/cpuid: AVX-512 Feature Detection
  2016-06-29 11:37     ` Andrew Cooper
@ 2016-06-29 14:13       ` Jan Beulich
  0 siblings, 0 replies; 13+ messages in thread
From: Jan Beulich @ 2016-06-29 14:13 UTC (permalink / raw)
  To: Andrew Cooper, Luwei Kang; +Cc: chao.p.peng, yong.y.wang, xen-devel

>>> On 29.06.16 at 13:37, <andrew.cooper3@citrix.com> wrote:
> On 29/06/16 11:03, Jan Beulich wrote:
>>>>> On 29.06.16 at 11:50, <andrew.cooper3@citrix.com> wrote:
>>> On 29/06/16 03:20, Luwei Kang wrote:
>>>> --- a/xen/tools/gen-cpuid.py
>>>> +++ b/xen/tools/gen-cpuid.py
>>>> @@ -235,6 +235,10 @@ def crunch_numbers(state):
>>>>          # subsequent instruction groups may only be VEX encoded.
>>>>          AVX: [FMA, FMA4, F16C, AVX2, XOP],
>>>>  
>>>> +        # AVX-512 is an extention of AVX2 and it depends on AVX2 available.
>>>> +        AVX2: [AVX512F, AVX512DQ, AVX512IFMA, AVX512PF, AVX512ER, AVX512CD,
>>>> +                AVX512BW, AVX512VL, AVX512VBMI],
>>> I think this needs adjusting.  AVX512F is the base feature and
>>> indication of extra xstate, while all other AVX512 features (e.g.
>>> AVX512DQ) are explicitly documented not needing to check for AVX512F if
>>> the AVX512DQ bit is present.
>> I think the "not" here is wrong? At least my copy (rev 024) requires
>> all involved feature bits to be checked (see e.g. table 2-2 or the
>> individual instruction pages).
> 
> Hmm - yet another inconsistency.  Some instructions specify a CPUID
> dependency for just AVX512F (EVEX.NDS.512.66.0F.W1 C2 /r ib VCMPPD k1
> {k2}, zmm2, zmm3/m512/m64bcst{sae}, imm8), some for AVX512F and a second
> feature (EVEX.256.66.0F38.W1 19 /r VBROADCASTSD ymm1 {k1}{z}, xmm2/m64)
> , and some only for the second feature (EVEX.512.66.0F.W0 79 /r
> VCVTPS2UQQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er})
> 
> FWIW, I still think the dependency expression is ok in its current form.

Oh, of course - I didn't mean to put that under question.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] x86/cpuid: AVX-512 Feature Detection
  2016-06-29  1:57 Luwei Kang
@ 2016-06-29  9:21 ` Jan Beulich
  0 siblings, 0 replies; 13+ messages in thread
From: Jan Beulich @ 2016-06-29  9:21 UTC (permalink / raw)
  To: Luwei Kang; +Cc: andrew.cooper3, chao.p.peng, yong.y.wang, xen-devel

>>> On 29.06.16 at 03:57, <luwei.kang@intel.com> wrote:
> AVX-512 is an extention of AVX2. Its spec can be found at:
> https://software.intel.com/sites/default/files/managed/b4/3a/319433-024.pdf 
> This patch detects AVX-512 features by CPUID.
> 
> Signed-off-by: Luwei Kang <luwei.kang@intel.com>
> ---

Please version your patches (I see there's now even a 3rd one already
on the list) and add brief information on what changed between versions
here.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH] x86/cpuid: AVX-512 Feature Detection
@ 2016-06-29  1:57 Luwei Kang
  2016-06-29  9:21 ` Jan Beulich
  0 siblings, 1 reply; 13+ messages in thread
From: Luwei Kang @ 2016-06-29  1:57 UTC (permalink / raw)
  To: xen-devel; +Cc: andrew.cooper3, chao.p.peng, Luwei Kang, yong.y.wang, jbeulich

AVX-512 is an extention of AVX2. Its spec can be found at:
https://software.intel.com/sites/default/files/managed/b4/3a/319433-024.pdf
This patch detects AVX-512 features by CPUID.

Signed-off-by: Luwei Kang <luwei.kang@intel.com>
---
 xen/arch/x86/hvm/hvm.c                      | 14 ++++++++++++++
 xen/arch/x86/traps.c                        | 24 +++++++++++++++++++++++-
 xen/include/public/arch-x86/cpufeatureset.h |  9 +++++++++
 xen/tools/gen-cpuid.py                      |  4 ++++
 4 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index c89ab6e..7696b1e 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3474,6 +3474,20 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
                                   xstate_sizes[_XSTATE_BNDCSR]);
             }
 
+            if ( _ebx & cpufeat_mask(X86_FEATURE_AVX512F) )
+            {
+                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_OPMASK] +
+                                  xstate_sizes[_XSTATE_OPMASK]);
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_ZMM] +
+                                  xstate_sizes[_XSTATE_ZMM]);
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_HI_ZMM] +
+                                  xstate_sizes[_XSTATE_HI_ZMM]);
+            }
+
             if ( _ecx & cpufeat_mask(X86_FEATURE_PKU) )
             {
                 xfeature_mask |= XSTATE_PKRU;
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 767d0b0..a190103 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -975,7 +975,7 @@ void pv_cpuid(struct cpu_user_regs *regs)
 
     switch ( leaf )
     {
-        uint32_t tmp, _ecx;
+        uint32_t tmp, _ecx, _ebx;
 
     case 0x00000001:
         c &= pv_featureset[FEATURESET_1c];
@@ -1139,6 +1139,7 @@ void pv_cpuid(struct cpu_user_regs *regs)
             domain_cpuid(currd, 1, 0, &tmp, &tmp, &_ecx, &tmp);
         else
             _ecx = cpuid_ecx(1);
+
         _ecx &= pv_featureset[FEATURESET_1c];
 
         if ( !(_ecx & cpufeat_mask(X86_FEATURE_XSAVE)) || subleaf >= 63 )
@@ -1157,6 +1158,27 @@ void pv_cpuid(struct cpu_user_regs *regs)
                                xstate_sizes[_XSTATE_YMM]);
             }
 
+            if ( !is_control_domain(currd) && !is_hardware_domain(currd) )
+                domain_cpuid(currd, 7, 0, &tmp, &_ebx, &tmp, &tmp);
+            else
+                cpuid_count(7, 0, &tmp, &_ebx, &tmp, &tmp);
+
+            _ebx &= pv_featureset[FEATURESET_7b0];
+
+            if ( _ebx & cpufeat_mask(X86_FEATURE_AVX512F) )
+            {
+                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_OPMASK] +
+                                  xstate_sizes[_XSTATE_OPMASK]);
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_ZMM] +
+                                  xstate_sizes[_XSTATE_ZMM]);
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_HI_ZMM] +
+                                  xstate_sizes[_XSTATE_HI_ZMM]);
+            }
+
             a = (uint32_t)xfeature_mask;
             d = (uint32_t)(xfeature_mask >> 32);
             c = xstate_size;
diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
index 39acf8c..9320c9e 100644
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -206,15 +206,24 @@ XEN_CPUFEATURE(PQM,           5*32+12) /*   Platform QoS Monitoring */
 XEN_CPUFEATURE(NO_FPU_SEL,    5*32+13) /*!  FPU CS/DS stored as zero */
 XEN_CPUFEATURE(MPX,           5*32+14) /*S  Memory Protection Extensions */
 XEN_CPUFEATURE(PQE,           5*32+15) /*   Platform QoS Enforcement */
+XEN_CPUFEATURE(AVX512F,       5*32+16) /*A  AVX-512 Foundation Instructions */
+XEN_CPUFEATURE(AVX512DQ,      5*32+17) /*A  AVX-512 Doubleword & Quadword Instrs */
 XEN_CPUFEATURE(RDSEED,        5*32+18) /*A  RDSEED instruction */
 XEN_CPUFEATURE(ADX,           5*32+19) /*A  ADCX, ADOX instructions */
 XEN_CPUFEATURE(SMAP,          5*32+20) /*S  Supervisor Mode Access Prevention */
+XEN_CPUFEATURE(AVX512IFMA,    5*32+21) /*A  AVX-512 Integer Fused Multiply Add */
 XEN_CPUFEATURE(CLFLUSHOPT,    5*32+23) /*A  CLFLUSHOPT instruction */
 XEN_CPUFEATURE(CLWB,          5*32+24) /*A  CLWB instruction */
+XEN_CPUFEATURE(AVX512PF,      5*32+26) /*A  AVX-512 Prefetch Instructions */
+XEN_CPUFEATURE(AVX512ER,      5*32+27) /*A  AVX-512 Exponent & Reciprocal Instrs */
+XEN_CPUFEATURE(AVX512CD,      5*32+28) /*A  AVX-512 Conflict Detection Instrs */
 XEN_CPUFEATURE(SHA,           5*32+29) /*A  SHA1 & SHA256 instructions */
+XEN_CPUFEATURE(AVX512BW,      5*32+30) /*A  AVX-512 Byte and Word Instructions */
+XEN_CPUFEATURE(AVX512VL,      5*32+31) /*A  AVX-512 Vector Length Extensions */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0.ecx, word 6 */
 XEN_CPUFEATURE(PREFETCHWT1,   6*32+ 0) /*A  PREFETCHWT1 instruction */
+XEN_CPUFEATURE(AVX512VBMI,    6*32+ 1) /*A  AVX-512 Vector Byte Manipulation Instrs */
 XEN_CPUFEATURE(PKU,           6*32+ 3) /*H  Protection Keys for Userspace */
 XEN_CPUFEATURE(OSPKE,         6*32+ 4) /*!  OS Protection Keys Enable */
 
diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py
index 7c45eca..897e660 100755
--- a/xen/tools/gen-cpuid.py
+++ b/xen/tools/gen-cpuid.py
@@ -235,6 +235,10 @@ def crunch_numbers(state):
         # subsequent instruction groups may only be VEX encoded.
         AVX: [FMA, FMA4, F16C, AVX2, XOP],
 
+        # AVX-512 is an extention of AVX2 and it depends on AVX2 available.
+        AVX2: [AVX512F, AVX512DQ, AVX512IFMA, AVX512PF, AVX512ER, AVX512CD,
+                AVX512BW, AVX512VL, AVX512VBMI],
+
         # CX16 is only encodable in Long Mode.  LAHF_LM indicates that the
         # SAHF/LAHF instructions are reintroduced in Long Mode.  1GB
         # superpages, PCID and PKU are only available in 4 level paging.
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] x86/cpuid: AVX-512 Feature Detection
  2016-06-28  8:46 ` Andrew Cooper
@ 2016-06-28  8:51   ` Kang, Luwei
  0 siblings, 0 replies; 13+ messages in thread
From: Kang, Luwei @ 2016-06-28  8:51 UTC (permalink / raw)
  To: Andrew Cooper, xen-devel; +Cc: Peng, Chao P, Wang, Yong Y, jbeulich

OK, no problem.


-----Original Message-----
From: Andrew Cooper [mailto:andrew.cooper3@citrix.com] 
Sent: Tuesday, June 28, 2016 4:47 PM
To: Kang, Luwei <luwei.kang@intel.com>; xen-devel@lists.xen.org
Cc: jbeulich@suse.com; Wang, Yong Y <yong.y.wang@intel.com>; Peng, Chao P <chao.p.peng@intel.com>
Subject: Re: [PATCH] x86/cpuid: AVX-512 Feature Detection

On 28/06/16 06:51, Luwei Kang wrote:
> @@ -1136,9 +1136,16 @@ void pv_cpuid(struct cpu_user_regs *regs)
>      case XSTATE_CPUID:
>  
>          if ( !is_control_domain(currd) && !is_hardware_domain(currd) 
> )
> +        {
>              domain_cpuid(currd, 1, 0, &tmp, &tmp, &_ecx, &tmp);
> +            domain_cpuid(currd, 0x07, 0, &tmp, &_ebx, &tmp, &tmp);
> +        }
>          else
> +        {
>              _ecx = cpuid_ecx(1);
> +            cpuid_count(0x07, 0, &tmp, &_ebx, &tmp, &tmp);
> +        }
> +

In addition to Jan's comments, having _ecx from one leaf and _ebx from a different leaf collected at the same time is liable to cause confusion.

Please split the cpuid call for leaf 7 out from here, and put it in the next hunk, just like the way the hvm_cpuid() side works.

~Andrew

>          _ecx &= pv_featureset[FEATURESET_1c];
>  
>          if ( !(_ecx & cpufeat_mask(X86_FEATURE_XSAVE)) || subleaf >= 
> 63 ) @@ -1157,6 +1164,14 @@ void pv_cpuid(struct cpu_user_regs *regs)
>                                 xstate_sizes[_XSTATE_YMM]);
>              }
>  
> +            if ( _ebx & cpufeat_mask(X86_FEATURE_AVX512F) )
> +            {
> +                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
> +                xstate_size = max(xstate_size,
> +                                  xstate_offsets[_XSTATE_HI_ZMM] +
> +                                  xstate_sizes[_XSTATE_HI_ZMM]);
> +            }
> +
>              a = (uint32_t)xfeature_mask;
>              d = (uint32_t)(xfeature_mask >> 32);
>              c = xstate_size;


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] x86/cpuid: AVX-512 Feature Detection
  2016-06-28  5:51 Luwei Kang
  2016-06-28  7:49 ` Jan Beulich
@ 2016-06-28  8:46 ` Andrew Cooper
  2016-06-28  8:51   ` Kang, Luwei
  1 sibling, 1 reply; 13+ messages in thread
From: Andrew Cooper @ 2016-06-28  8:46 UTC (permalink / raw)
  To: Luwei Kang, xen-devel; +Cc: chao.p.peng, yong.y.wang, jbeulich

On 28/06/16 06:51, Luwei Kang wrote:
> @@ -1136,9 +1136,16 @@ void pv_cpuid(struct cpu_user_regs *regs)
>      case XSTATE_CPUID:
>  
>          if ( !is_control_domain(currd) && !is_hardware_domain(currd) )
> +        {
>              domain_cpuid(currd, 1, 0, &tmp, &tmp, &_ecx, &tmp);
> +            domain_cpuid(currd, 0x07, 0, &tmp, &_ebx, &tmp, &tmp);
> +        }
>          else
> +        {
>              _ecx = cpuid_ecx(1);
> +            cpuid_count(0x07, 0, &tmp, &_ebx, &tmp, &tmp);
> +        }
> +

In addition to Jan's comments, having _ecx from one leaf and _ebx from a
different leaf collected at the same time is liable to cause confusion.

Please split the cpuid call for leaf 7 out from here, and put it in the
next hunk, just like the way the hvm_cpuid() side works.

~Andrew

>          _ecx &= pv_featureset[FEATURESET_1c];
>  
>          if ( !(_ecx & cpufeat_mask(X86_FEATURE_XSAVE)) || subleaf >= 63 )
> @@ -1157,6 +1164,14 @@ void pv_cpuid(struct cpu_user_regs *regs)
>                                 xstate_sizes[_XSTATE_YMM]);
>              }
>  
> +            if ( _ebx & cpufeat_mask(X86_FEATURE_AVX512F) )
> +            {
> +                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
> +                xstate_size = max(xstate_size,
> +                                  xstate_offsets[_XSTATE_HI_ZMM] +
> +                                  xstate_sizes[_XSTATE_HI_ZMM]);
> +            }
> +
>              a = (uint32_t)xfeature_mask;
>              d = (uint32_t)(xfeature_mask >> 32);
>              c = xstate_size;


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] x86/cpuid: AVX-512 Feature Detection
  2016-06-28  7:49 ` Jan Beulich
@ 2016-06-28  8:10   ` Kang, Luwei
  0 siblings, 0 replies; 13+ messages in thread
From: Kang, Luwei @ 2016-06-28  8:10 UTC (permalink / raw)
  To: Jan Beulich; +Cc: andrew.cooper3, Peng, Chao P, Wang, Yong Y, xen-devel

Thanks for your advice, I will  make a change right now.


-----Original Message-----
From: Jan Beulich [mailto:JBeulich@suse.com] 
Sent: Tuesday, June 28, 2016 3:49 PM
To: Kang, Luwei <luwei.kang@intel.com>
Cc: andrew.cooper3@citrix.com; Peng, Chao P <chao.p.peng@intel.com>; Wang, Yong Y <yong.y.wang@intel.com>; xen-devel@lists.xen.org
Subject: Re: [PATCH] x86/cpuid: AVX-512 Feature Detection

>>> On 28.06.16 at 07:51, <luwei.kang@intel.com> wrote:
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -3474,6 +3474,14 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
>                                    xstate_sizes[_XSTATE_BNDCSR]);
>              }
>  
> +            if ( _ebx & cpufeat_mask(X86_FEATURE_AVX512F) )
> +            {
> +                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
> +                xstate_size = max(xstate_size,
> +                                  xstate_offsets[_XSTATE_HI_ZMM] +
> +                                  xstate_sizes[_XSTATE_HI_ZMM]);

I think this would better be three such statements, one per bit.
Otherwise the goal of not putting in assumptions on the relative ordering of bits and save area ranges gets undermined.

> @@ -1136,9 +1136,16 @@ void pv_cpuid(struct cpu_user_regs *regs)
>      case XSTATE_CPUID:
>  
>          if ( !is_control_domain(currd) && !is_hardware_domain(currd) 
> )
> +        {
>              domain_cpuid(currd, 1, 0, &tmp, &tmp, &_ecx, &tmp);
> +            domain_cpuid(currd, 0x07, 0, &tmp, &_ebx, &tmp, &tmp);

The neighboring line tells you that this should be 7 instead of 0x07.

> --- a/xen/include/public/arch-x86/cpufeatureset.h
> +++ b/xen/include/public/arch-x86/cpufeatureset.h
> @@ -206,15 +206,24 @@ XEN_CPUFEATURE(PQM,           5*32+12) /*   Platform QoS Monitoring */
>  XEN_CPUFEATURE(NO_FPU_SEL,    5*32+13) /*!  FPU CS/DS stored as zero */
>  XEN_CPUFEATURE(MPX,           5*32+14) /*S  Memory Protection Extensions */
>  XEN_CPUFEATURE(PQE,           5*32+15) /*   Platform QoS Enforcement */
> +XEN_CPUFEATURE(AVX512F,       5*32+16) /*A  AVX-512 Foundation Instructions */
> +XEN_CPUFEATURE(AVX512DQ,      5*32+17) /*A  AVX-512 Doubleword & Quadword Instrs */
>  XEN_CPUFEATURE(RDSEED,        5*32+18) /*A  RDSEED instruction */
>  XEN_CPUFEATURE(ADX,           5*32+19) /*A  ADCX, ADOX instructions */
>  XEN_CPUFEATURE(SMAP,          5*32+20) /*S  Supervisor Mode Access Prevention */
> +XEN_CPUFEATURE(AVX512IFMA,    5*32+21) /*A  AVX-512 Integer Fused Multiply Add */
>  XEN_CPUFEATURE(CLFLUSHOPT,    5*32+23) /*A  CLFLUSHOPT instruction */
>  XEN_CPUFEATURE(CLWB,          5*32+24) /*A  CLWB instruction */
> +XEN_CPUFEATURE(AVX512PF,      5*32+26) /*A  AVX-512 Prefetch Instructions */
> +XEN_CPUFEATURE(AVX512ER,      5*32+27) /*A  AVX-512 Exponent & Reciprocal Instrs */
> +XEN_CPUFEATURE(AVX512CD,      5*32+28) /*A  AVX-512 Conflict Detection Instrs */
>  XEN_CPUFEATURE(SHA,           5*32+29) /*A  SHA1 & SHA256 instructions */
> +XEN_CPUFEATURE(AVX512BW,      5*32+30) /*A  AVX-512 Byte and Word Instructions */
> +XEN_CPUFEATURE(AVX512VL,      5*32+31) /*A  AVX-512 Vector Length Extensions */
>  
>  /* Intel-defined CPU features, CPUID level 0x00000007:0.ecx, word 6 */
>  XEN_CPUFEATURE(PREFETCHWT1,   6*32+ 0) /*A  PREFETCHWT1 instruction */
> +XEN_CPUFEATURE(AVX512VBMI,    6*32+ 1) /*A  AVX-512 Vector Byte Manipulation Instrs */
>  XEN_CPUFEATURE(PKU,           6*32+ 3) /*H  Protection Keys for Userspace */
>  XEN_CPUFEATURE(OSPKE,         6*32+ 4) /*!  OS Protection Keys Enable */

This lacks an adjustment to the dependencies between features in xen/tools/gen-cpuid.py.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] x86/cpuid: AVX-512 Feature Detection
  2016-06-28  5:51 Luwei Kang
@ 2016-06-28  7:49 ` Jan Beulich
  2016-06-28  8:10   ` Kang, Luwei
  2016-06-28  8:46 ` Andrew Cooper
  1 sibling, 1 reply; 13+ messages in thread
From: Jan Beulich @ 2016-06-28  7:49 UTC (permalink / raw)
  To: Luwei Kang; +Cc: andrew.cooper3, chao.p.peng, yong.y.wang, xen-devel

>>> On 28.06.16 at 07:51, <luwei.kang@intel.com> wrote:
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -3474,6 +3474,14 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
>                                    xstate_sizes[_XSTATE_BNDCSR]);
>              }
>  
> +            if ( _ebx & cpufeat_mask(X86_FEATURE_AVX512F) )
> +            {
> +                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
> +                xstate_size = max(xstate_size,
> +                                  xstate_offsets[_XSTATE_HI_ZMM] +
> +                                  xstate_sizes[_XSTATE_HI_ZMM]);

I think this would better be three such statements, one per bit.
Otherwise the goal of not putting in assumptions on the relative
ordering of bits and save area ranges gets undermined.

> @@ -1136,9 +1136,16 @@ void pv_cpuid(struct cpu_user_regs *regs)
>      case XSTATE_CPUID:
>  
>          if ( !is_control_domain(currd) && !is_hardware_domain(currd) )
> +        {
>              domain_cpuid(currd, 1, 0, &tmp, &tmp, &_ecx, &tmp);
> +            domain_cpuid(currd, 0x07, 0, &tmp, &_ebx, &tmp, &tmp);

The neighboring line tells you that this should be 7 instead of 0x07.

> --- a/xen/include/public/arch-x86/cpufeatureset.h
> +++ b/xen/include/public/arch-x86/cpufeatureset.h
> @@ -206,15 +206,24 @@ XEN_CPUFEATURE(PQM,           5*32+12) /*   Platform QoS Monitoring */
>  XEN_CPUFEATURE(NO_FPU_SEL,    5*32+13) /*!  FPU CS/DS stored as zero */
>  XEN_CPUFEATURE(MPX,           5*32+14) /*S  Memory Protection Extensions */
>  XEN_CPUFEATURE(PQE,           5*32+15) /*   Platform QoS Enforcement */
> +XEN_CPUFEATURE(AVX512F,       5*32+16) /*A  AVX-512 Foundation Instructions */
> +XEN_CPUFEATURE(AVX512DQ,      5*32+17) /*A  AVX-512 Doubleword & Quadword Instrs */
>  XEN_CPUFEATURE(RDSEED,        5*32+18) /*A  RDSEED instruction */
>  XEN_CPUFEATURE(ADX,           5*32+19) /*A  ADCX, ADOX instructions */
>  XEN_CPUFEATURE(SMAP,          5*32+20) /*S  Supervisor Mode Access Prevention */
> +XEN_CPUFEATURE(AVX512IFMA,    5*32+21) /*A  AVX-512 Integer Fused Multiply Add */
>  XEN_CPUFEATURE(CLFLUSHOPT,    5*32+23) /*A  CLFLUSHOPT instruction */
>  XEN_CPUFEATURE(CLWB,          5*32+24) /*A  CLWB instruction */
> +XEN_CPUFEATURE(AVX512PF,      5*32+26) /*A  AVX-512 Prefetch Instructions */
> +XEN_CPUFEATURE(AVX512ER,      5*32+27) /*A  AVX-512 Exponent & Reciprocal Instrs */
> +XEN_CPUFEATURE(AVX512CD,      5*32+28) /*A  AVX-512 Conflict Detection Instrs */
>  XEN_CPUFEATURE(SHA,           5*32+29) /*A  SHA1 & SHA256 instructions */
> +XEN_CPUFEATURE(AVX512BW,      5*32+30) /*A  AVX-512 Byte and Word Instructions */
> +XEN_CPUFEATURE(AVX512VL,      5*32+31) /*A  AVX-512 Vector Length Extensions */
>  
>  /* Intel-defined CPU features, CPUID level 0x00000007:0.ecx, word 6 */
>  XEN_CPUFEATURE(PREFETCHWT1,   6*32+ 0) /*A  PREFETCHWT1 instruction */
> +XEN_CPUFEATURE(AVX512VBMI,    6*32+ 1) /*A  AVX-512 Vector Byte Manipulation Instrs */
>  XEN_CPUFEATURE(PKU,           6*32+ 3) /*H  Protection Keys for Userspace */
>  XEN_CPUFEATURE(OSPKE,         6*32+ 4) /*!  OS Protection Keys Enable */

This lacks an adjustment to the dependencies between features in
xen/tools/gen-cpuid.py.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH] x86/cpuid: AVX-512 Feature Detection
@ 2016-06-28  5:51 Luwei Kang
  2016-06-28  7:49 ` Jan Beulich
  2016-06-28  8:46 ` Andrew Cooper
  0 siblings, 2 replies; 13+ messages in thread
From: Luwei Kang @ 2016-06-28  5:51 UTC (permalink / raw)
  To: xen-devel; +Cc: andrew.cooper3, chao.p.peng, Luwei Kang, yong.y.wang, jbeulich

AVX-512 is an extention of AVX2. Its spec can be found at:
https://software.intel.com/sites/default/files/managed/b4/3a/319433-024.pdf
This patch detects AVX-512 features by CPUID.

Signed-off-by: Luwei Kang <luwei.kang@intel.com>
---
 xen/arch/x86/hvm/hvm.c                      |  8 ++++++++
 xen/arch/x86/traps.c                        | 17 ++++++++++++++++-
 xen/include/public/arch-x86/cpufeatureset.h |  9 +++++++++
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index c89ab6e..693afd5 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3474,6 +3474,14 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
                                   xstate_sizes[_XSTATE_BNDCSR]);
             }
 
+            if ( _ebx & cpufeat_mask(X86_FEATURE_AVX512F) )
+            {
+                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_HI_ZMM] +
+                                  xstate_sizes[_XSTATE_HI_ZMM]);
+            }
+
             if ( _ecx & cpufeat_mask(X86_FEATURE_PKU) )
             {
                 xfeature_mask |= XSTATE_PKRU;
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 767d0b0..1c75e93 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -975,7 +975,7 @@ void pv_cpuid(struct cpu_user_regs *regs)
 
     switch ( leaf )
     {
-        uint32_t tmp, _ecx;
+        uint32_t tmp, _ecx, _ebx;
 
     case 0x00000001:
         c &= pv_featureset[FEATURESET_1c];
@@ -1136,9 +1136,16 @@ void pv_cpuid(struct cpu_user_regs *regs)
     case XSTATE_CPUID:
 
         if ( !is_control_domain(currd) && !is_hardware_domain(currd) )
+        {
             domain_cpuid(currd, 1, 0, &tmp, &tmp, &_ecx, &tmp);
+            domain_cpuid(currd, 0x07, 0, &tmp, &_ebx, &tmp, &tmp);
+        }
         else
+        {
             _ecx = cpuid_ecx(1);
+            cpuid_count(0x07, 0, &tmp, &_ebx, &tmp, &tmp);
+        }
+
         _ecx &= pv_featureset[FEATURESET_1c];
 
         if ( !(_ecx & cpufeat_mask(X86_FEATURE_XSAVE)) || subleaf >= 63 )
@@ -1157,6 +1164,14 @@ void pv_cpuid(struct cpu_user_regs *regs)
                                xstate_sizes[_XSTATE_YMM]);
             }
 
+            if ( _ebx & cpufeat_mask(X86_FEATURE_AVX512F) )
+            {
+                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_HI_ZMM] +
+                                  xstate_sizes[_XSTATE_HI_ZMM]);
+            }
+
             a = (uint32_t)xfeature_mask;
             d = (uint32_t)(xfeature_mask >> 32);
             c = xstate_size;
diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
index 39acf8c..9320c9e 100644
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -206,15 +206,24 @@ XEN_CPUFEATURE(PQM,           5*32+12) /*   Platform QoS Monitoring */
 XEN_CPUFEATURE(NO_FPU_SEL,    5*32+13) /*!  FPU CS/DS stored as zero */
 XEN_CPUFEATURE(MPX,           5*32+14) /*S  Memory Protection Extensions */
 XEN_CPUFEATURE(PQE,           5*32+15) /*   Platform QoS Enforcement */
+XEN_CPUFEATURE(AVX512F,       5*32+16) /*A  AVX-512 Foundation Instructions */
+XEN_CPUFEATURE(AVX512DQ,      5*32+17) /*A  AVX-512 Doubleword & Quadword Instrs */
 XEN_CPUFEATURE(RDSEED,        5*32+18) /*A  RDSEED instruction */
 XEN_CPUFEATURE(ADX,           5*32+19) /*A  ADCX, ADOX instructions */
 XEN_CPUFEATURE(SMAP,          5*32+20) /*S  Supervisor Mode Access Prevention */
+XEN_CPUFEATURE(AVX512IFMA,    5*32+21) /*A  AVX-512 Integer Fused Multiply Add */
 XEN_CPUFEATURE(CLFLUSHOPT,    5*32+23) /*A  CLFLUSHOPT instruction */
 XEN_CPUFEATURE(CLWB,          5*32+24) /*A  CLWB instruction */
+XEN_CPUFEATURE(AVX512PF,      5*32+26) /*A  AVX-512 Prefetch Instructions */
+XEN_CPUFEATURE(AVX512ER,      5*32+27) /*A  AVX-512 Exponent & Reciprocal Instrs */
+XEN_CPUFEATURE(AVX512CD,      5*32+28) /*A  AVX-512 Conflict Detection Instrs */
 XEN_CPUFEATURE(SHA,           5*32+29) /*A  SHA1 & SHA256 instructions */
+XEN_CPUFEATURE(AVX512BW,      5*32+30) /*A  AVX-512 Byte and Word Instructions */
+XEN_CPUFEATURE(AVX512VL,      5*32+31) /*A  AVX-512 Vector Length Extensions */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0.ecx, word 6 */
 XEN_CPUFEATURE(PREFETCHWT1,   6*32+ 0) /*A  PREFETCHWT1 instruction */
+XEN_CPUFEATURE(AVX512VBMI,    6*32+ 1) /*A  AVX-512 Vector Byte Manipulation Instrs */
 XEN_CPUFEATURE(PKU,           6*32+ 3) /*H  Protection Keys for Userspace */
 XEN_CPUFEATURE(OSPKE,         6*32+ 4) /*!  OS Protection Keys Enable */
 
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2016-06-29 14:13 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-29  2:20 [PATCH] x86/cpuid: AVX-512 Feature Detection Luwei Kang
2016-06-29  9:50 ` Andrew Cooper
2016-06-29  9:53   ` Andrew Cooper
2016-06-29 10:03   ` Jan Beulich
2016-06-29 11:37     ` Andrew Cooper
2016-06-29 14:13       ` Jan Beulich
  -- strict thread matches above, loose matches on Subject: below --
2016-06-29  1:57 Luwei Kang
2016-06-29  9:21 ` Jan Beulich
2016-06-28  5:51 Luwei Kang
2016-06-28  7:49 ` Jan Beulich
2016-06-28  8:10   ` Kang, Luwei
2016-06-28  8:46 ` Andrew Cooper
2016-06-28  8:51   ` Kang, Luwei

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).