All of lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH v2 3/8] x86: Grant AMX permission for guest
  2022-02-17  6:04 ` [PATCH v2 3/8] x86: Grant AMX permission for guest Yang Zhong
@ 2022-02-17  5:58   ` Yang Zhong
  2022-02-17 13:44     ` Paolo Bonzini
  0 siblings, 1 reply; 20+ messages in thread
From: Yang Zhong @ 2022-02-17  5:58 UTC (permalink / raw)
  To: qemu-devel
  Cc: yang.zhong, kevin.tian, seanjc, jing2.liu, wei.w.wang,
	guang.zeng, pbonzini

On Wed, Feb 16, 2022 at 10:04:29PM -0800, Yang Zhong wrote:
> Kernel allocates 4K xstate buffer by default. For XSAVE features
> which require large state component (e.g. AMX), Linux kernel
> dynamically expands the xstate buffer only after the process has
> acquired the necessary permissions. Those are called dynamically-
> enabled XSAVE features (or dynamic xfeatures).
> 
> There are separate permissions for native tasks and guests.
> 
> Qemu should request the guest permissions for dynamic xfeatures
> which will be exposed to the guest. This only needs to be done
> once before the first vcpu is created.
> 
> KVM implemented one new ARCH_GET_XCOMP_SUPP system attribute API to
> get host side supported_xcr0 and Qemu can decide if it can request
> dynamically enabled XSAVE features permission.
> https://lore.kernel.org/all/20220126152210.3044876-1-pbonzini@redhat.com/
> 
> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Yang Zhong <yang.zhong@intel.com>
> Signed-off-by: Jing Liu <jing2.liu@intel.com>
> ---
>  target/i386/cpu.h         |  7 +++++++
>  target/i386/cpu.c         | 43 +++++++++++++++++++++++++++++++++++++++
>  target/i386/kvm/kvm-cpu.c | 12 +++++------
>  target/i386/kvm/kvm.c     | 20 ++++++++++++++++++
>  4 files changed, 76 insertions(+), 6 deletions(-)
> 
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 06d2d6bccf..d4ad0f56bd 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -549,6 +549,13 @@ typedef enum X86Seg {
>  #define XSTATE_ZMM_Hi256_MASK           (1ULL << XSTATE_ZMM_Hi256_BIT)
>  #define XSTATE_Hi16_ZMM_MASK            (1ULL << XSTATE_Hi16_ZMM_BIT)
>  #define XSTATE_PKRU_MASK                (1ULL << XSTATE_PKRU_BIT)
> +#define XSTATE_XTILE_CFG_MASK           (1ULL << XSTATE_XTILE_CFG_BIT)
> +#define XSTATE_XTILE_DATA_MASK          (1ULL << XSTATE_XTILE_DATA_BIT)
> +#define XFEATURE_XTILE_MASK             (XSTATE_XTILE_CFG_MASK \
> +                                         | XSTATE_XTILE_DATA_MASK)
> +
> +#define ARCH_GET_XCOMP_GUEST_PERM       0x1024
> +#define ARCH_REQ_XCOMP_GUEST_PERM       0x1025
>  
>  #define ESA_FEATURE_ALIGN64_BIT         1
>  
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index ea7e8f9081..377d993438 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -43,6 +43,8 @@
>  #include "disas/capstone.h"
>  #include "cpu-internal.h"
>  
> +#include <sys/syscall.h>
> +
>  /* Helpers for building CPUID[2] descriptors: */
>  
>  struct CPUID2CacheDescriptorInfo {
> @@ -6000,12 +6002,47 @@ static void x86_cpu_adjust_feat_level(X86CPU *cpu, FeatureWord w)
>      }
>  }
>  
> +static void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask)
> +{
> +    KVMState *s = kvm_state;
> +    uint64_t bitmask;
> +    long rc;
> +
> +    if ((mask & XSTATE_XTILE_DATA_MASK) == XSTATE_XTILE_DATA_MASK) {
> +        bitmask = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
> +        if (!(bitmask & XSTATE_XTILE_DATA_MASK)) {

   Paolo, last time you suggested below changes for here:

   rc = kvm_arch_get_supported_cpuid(s, 0xd, 0,
                                  (xdata_bit < 32 ? R_EAX : R_EDX));
   if (!(rc & BIT(xdata_bit & 31)) {
      ...
   }   

  Since I used "mask" as parameter here, so I had to directly use R_EAX here.
  Please review and if need change it to like "(xdata_bit < 32 ? R_EAX : R_EDX)",
  I will change this in next version, thanks!

  Yang


> +            warn_report("no amx support from supported_xcr0, "
> +                        "bitmask:0x%lx", bitmask);
> +            return;
> +        }
> +
> +        rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM,
> +                      XSTATE_XTILE_DATA_BIT);
> +        if (rc) {
> +            /*
> +             * The older kernel version(<5.15) can't support
> +             * ARCH_REQ_XCOMP_GUEST_PERM and directly return.
> +             */
> +            return;
> +        }
> +
> +        rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
> +        if (rc) {
> +            warn_report("prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
> +        } else if (!(bitmask & XFEATURE_XTILE_MASK)) {
> +            warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure "
> +                        "and bitmask=0x%lx", bitmask);
> +        }
> +    }
> +}
> +
>  /* Calculate XSAVE components based on the configured CPU feature flags */
>  static void x86_cpu_enable_xsave_components(X86CPU *cpu)
>  {
>      CPUX86State *env = &cpu->env;
>      int i;
>      uint64_t mask;
> +    static bool request_perm;
>  
>      if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
>          env->features[FEAT_XSAVE_COMP_LO] = 0;
> @@ -6021,6 +6058,12 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu)
>          }
>      }
>  
> +    /* Only request permission for first vcpu */
> +    if (kvm_enabled() && !request_perm) {
> +        kvm_request_xsave_components(cpu, mask);
> +        request_perm = true;
> +    }
> +
>      env->features[FEAT_XSAVE_COMP_LO] = mask;
>      env->features[FEAT_XSAVE_COMP_HI] = mask >> 32;
>  }
> diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
> index ce27d3b1df..a35a1bf9fe 100644
> --- a/target/i386/kvm/kvm-cpu.c
> +++ b/target/i386/kvm/kvm-cpu.c
> @@ -84,7 +84,7 @@ static void kvm_cpu_max_instance_init(X86CPU *cpu)
>  static void kvm_cpu_xsave_init(void)
>  {
>      static bool first = true;
> -    KVMState *s = kvm_state;
> +    uint32_t eax, ebx, ecx, edx;
>      int i;
>  
>      if (!first) {
> @@ -100,11 +100,11 @@ static void kvm_cpu_xsave_init(void)
>          ExtSaveArea *esa = &x86_ext_save_areas[i];
>  
>          if (esa->size) {
> -            int sz = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EAX);
> -            if (sz != 0) {
> -                assert(esa->size == sz);
> -                esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX);
> -                esa->ecx = kvm_arch_get_supported_cpuid(s, 0xd, i, R_ECX);
> +            host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx);
> +            if (eax != 0) {
> +                assert(esa->size == eax);
> +                esa->offset = ebx;
> +                esa->ecx = ecx;
>              }
>          }
>      }
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index 2c8feb4a6f..3bdcd724c4 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -348,6 +348,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
>      struct kvm_cpuid2 *cpuid;
>      uint32_t ret = 0;
>      uint32_t cpuid_1_edx;
> +    uint64_t bitmask;
>  
>      cpuid = get_supported_cpuid(s);
>  
> @@ -405,6 +406,25 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
>          if (!has_msr_arch_capabs) {
>              ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES;
>          }
> +    } else if (function == 0xd && index == 0 &&
> +               (reg == R_EAX || reg == R_EDX)) {
> +        struct kvm_device_attr attr = {
> +            .group = 0,
> +            .attr = KVM_X86_XCOMP_GUEST_SUPP,
> +            .addr = (unsigned long) &bitmask
> +        };
> +
> +        bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES);
> +        if (!sys_attr) {
> +            warn_report("cannot get sys attribute capabilities %d", sys_attr);
> +        }
> +
> +        int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr);
> +        if (rc == -1 && (errno == ENXIO || errno == EINVAL)) {
> +            warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) "
> +                        "error: %d", rc);
> +        }
> +        ret = (reg == R_EAX) ? bitmask : bitmask >> 32;
>      } else if (function == 0x80000001 && reg == R_ECX) {
>          /*
>           * It's safe to enable TOPOEXT even if it's not returned by


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2 0/8] AMX support in Qemu
@ 2022-02-17  6:04 Yang Zhong
  2022-02-17  6:04 ` [PATCH v2 1/8] x86: Fix the 64-byte boundary enumeration for extended state Yang Zhong
                   ` (7 more replies)
  0 siblings, 8 replies; 20+ messages in thread
From: Yang Zhong @ 2022-02-17  6:04 UTC (permalink / raw)
  To: qemu-devel
  Cc: yang.zhong, kevin.tian, seanjc, jing2.liu, wei.w.wang,
	guang.zeng, pbonzini

Intel introduces Advanced Matrix Extensions (AMX) [1] feature that
consists of configurable two-dimensional "TILE" registers and new
accelerator instructions that operate on them. TMUL (Tile matrix
MULtiply) is the first accelerator instruction set to use the new
registers.

Since AMX KVM patches have been merged into Linux release, this series
is based on latest Linux release(5.17-rc4).

According to the KVM design, the userspace VMM (e.g. Qemu) is expected
to request guest permission for the dynamically-enabled XSAVE features
only once when the first vCPU is created, and KVM checks guest permission
in KVM_SET_CPUID2.

Intel AMX is XSAVE supported and XSAVE enabled. Those extended features
has large state while current kvm_xsave only allows 4KB. The AMX KVM has
extended struct kvm_xsave to meet this requirenment and added one extra
KVM_GET_XSAVE2 ioctl to handle extended features. From our test, the AMX
live migration work well.

Notice: This version still includes some definitions in the linux-headers,
once Qemu sync those linux-headers, I will remove those definitions. So
please ignore those changes.

[1] Intel Architecture Instruction Set Extension Programming Reference
    https://software.intel.com/content/dam/develop/external/us/en/documents/\
    architecture-instruction-set-extensions-programming-reference.pdf

Thanks,
Yang
----

Change history
--------------
v1->v2:
   - Patch 1 moved "esa->ecx" into the "if{}"(Paolo).
   - Patch 3, the requiremnets from Paoalo,
     - Moved "esa->ecx" into the "if{}".
     - Used the "mask" as parameter to replace xtiledata bits in
       kvm_request_xsave_components()
     - Used the new defined KVM_X86_XCOMP_GUEST_SUPP from KVM to get
       supported_xcr0 from kvm_arch_get_supported_cpuid().
     - Updated the kvm_request_xsave_components() for future usage.
   - Patch 5 added "case 0x1e:" in kvm_arch_init_vcpu()(Paolo).
   - Patch 6 replaced "if (e->size && e->offset)" with 
     "if (e->size && e->offset && buflen >= e->size + e->offset)"
     for xsave and xrstor(Paolo).
   - Patch 8, which is new added patch and is only for linux-headers.
     This patch can be directly dropped once Qemu sync linux-headers. 

rfc v1->v1:
   - Patch 1 changed commit message(Kevin and Paolo).
   - Patch 2 changed commit message(Kevin and Paolo).
   - Patch 3, below requirements from Paolo,
     - Called ARCH_REQ_XCOMP_GUEST_PERM from x86_cpu_enable_xsave_components.
       Used kvm_request_xsave_components() to replace x86_xsave_req_perm().
       Replaced syscall(ARCH_GET_XCOMP_GUEST_PERM) with kvm_arch_get_supported_cpuid()
       in kvm_request_xsave_components().
     - Changed kvm_cpu_xsave_init() to use host_cpuid() instead of
       kvm_arch_get_supported_cpuid().
     - Added the "function == 0xd" handle in kvm_arch_get_supported_cpuid().
   - Patch 4, used "uint32_t ecx" to replace "uint32_t need_align, support_xfd".
   - Patch 6, below changes,
     - Changed the commit message(Kevin) and Used the new function
     - kvm_init_xsave() to replace some pieces of code(Wei).
     - Moved KVM_CAP_XSAVE2 extension check to kvm_arch_init_vcpu() to
       make the request permission before KVM_CAP_XSAVE2 extension check(Paolo).
   - Removed RFC prefix.

Jing Liu (5):
  x86: Fix the 64-byte boundary enumeration for extended state
  x86: Add AMX XTILECFG and XTILEDATA components
  x86: Add XFD faulting bit for state components
  x86: Add AMX CPUIDs enumeration
  x86: add support for KVM_CAP_XSAVE2 and AMX state migration

Yang Zhong (2):
  x86: Grant AMX permission for guest
  linux-header: Sync the linux headers

Zeng Guang (1):
  x86: Support XFD and AMX xsave data migration

 linux-headers/asm-x86/kvm.h |  17 ++++++
 linux-headers/linux/kvm.h   |   4 ++
 target/i386/cpu.h           |  46 ++++++++++++++-
 target/i386/cpu.c           | 108 +++++++++++++++++++++++++++++++++++-
 target/i386/kvm/kvm-cpu.c   |  11 ++--
 target/i386/kvm/kvm.c       |  84 ++++++++++++++++++++++------
 target/i386/machine.c       |  42 ++++++++++++++
 target/i386/xsave_helper.c  |  33 +++++++++++
 8 files changed, 320 insertions(+), 25 deletions(-)



^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2 1/8] x86: Fix the 64-byte boundary enumeration for extended state
  2022-02-17  6:04 [PATCH v2 0/8] AMX support in Qemu Yang Zhong
@ 2022-02-17  6:04 ` Yang Zhong
  2022-02-21 12:51   ` David Edmondson
  2022-02-17  6:04 ` [PATCH v2 2/8] x86: Add AMX XTILECFG and XTILEDATA components Yang Zhong
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 20+ messages in thread
From: Yang Zhong @ 2022-02-17  6:04 UTC (permalink / raw)
  To: qemu-devel
  Cc: yang.zhong, kevin.tian, seanjc, jing2.liu, wei.w.wang,
	guang.zeng, pbonzini

From: Jing Liu <jing2.liu@intel.com>

The extended state subleaves (EAX=0Dh, ECX=n, n>1).ECX[1]
indicate whether the extended state component locates
on the next 64-byte boundary following the preceding state
component when the compacted format of an XSAVE area is
used.

Right now, they are all zero because no supported component
needed the bit to be set, but the upcoming AMX feature will
use it.  Fix the subleaves value according to KVM's supported
cpuid.

Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
 target/i386/cpu.h         | 6 ++++++
 target/i386/cpu.c         | 1 +
 target/i386/kvm/kvm-cpu.c | 1 +
 3 files changed, 8 insertions(+)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 9911d7c871..de1dc124ab 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -548,6 +548,11 @@ typedef enum X86Seg {
 #define XSTATE_Hi16_ZMM_MASK            (1ULL << XSTATE_Hi16_ZMM_BIT)
 #define XSTATE_PKRU_MASK                (1ULL << XSTATE_PKRU_BIT)
 
+#define ESA_FEATURE_ALIGN64_BIT         1
+
+#define ESA_FEATURE_ALIGN64_MASK        (1U << ESA_FEATURE_ALIGN64_BIT)
+
+
 /* CPUID feature words */
 typedef enum FeatureWord {
     FEAT_1_EDX,         /* CPUID[1].EDX */
@@ -1354,6 +1359,7 @@ QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8);
 typedef struct ExtSaveArea {
     uint32_t feature, bits;
     uint32_t offset, size;
+    uint32_t ecx;
 } ExtSaveArea;
 
 #define XSAVE_STATE_AREA_COUNT (XSTATE_PKRU_BIT + 1)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index aa9e636800..37f06b0b1a 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -5487,6 +5487,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
                 const ExtSaveArea *esa = &x86_ext_save_areas[count];
                 *eax = esa->size;
                 *ebx = esa->offset;
+                *ecx = esa->ecx & ESA_FEATURE_ALIGN64_MASK;
             }
         }
         break;
diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
index d95028018e..ce27d3b1df 100644
--- a/target/i386/kvm/kvm-cpu.c
+++ b/target/i386/kvm/kvm-cpu.c
@@ -104,6 +104,7 @@ static void kvm_cpu_xsave_init(void)
             if (sz != 0) {
                 assert(esa->size == sz);
                 esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX);
+                esa->ecx = kvm_arch_get_supported_cpuid(s, 0xd, i, R_ECX);
             }
         }
     }


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 2/8] x86: Add AMX XTILECFG and XTILEDATA components
  2022-02-17  6:04 [PATCH v2 0/8] AMX support in Qemu Yang Zhong
  2022-02-17  6:04 ` [PATCH v2 1/8] x86: Fix the 64-byte boundary enumeration for extended state Yang Zhong
@ 2022-02-17  6:04 ` Yang Zhong
  2022-02-21 12:53   ` David Edmondson
  2022-02-17  6:04 ` [PATCH v2 3/8] x86: Grant AMX permission for guest Yang Zhong
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 20+ messages in thread
From: Yang Zhong @ 2022-02-17  6:04 UTC (permalink / raw)
  To: qemu-devel
  Cc: yang.zhong, kevin.tian, seanjc, jing2.liu, wei.w.wang,
	guang.zeng, pbonzini

From: Jing Liu <jing2.liu@intel.com>

The AMX TILECFG register and the TMMx tile data registers are
saved/restored via XSAVE, respectively in state component 17
(64 bytes) and state component 18 (8192 bytes).

Add AMX feature bits to x86_ext_save_areas array to set
up AMX components. Add structs that define the layout of
AMX XSAVE areas and use QEMU_BUILD_BUG_ON to validate the
structs sizes.

Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
 target/i386/cpu.h | 18 +++++++++++++++++-
 target/i386/cpu.c |  8 ++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index de1dc124ab..06d2d6bccf 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -537,6 +537,8 @@ typedef enum X86Seg {
 #define XSTATE_ZMM_Hi256_BIT            6
 #define XSTATE_Hi16_ZMM_BIT             7
 #define XSTATE_PKRU_BIT                 9
+#define XSTATE_XTILE_CFG_BIT            17
+#define XSTATE_XTILE_DATA_BIT           18
 
 #define XSTATE_FP_MASK                  (1ULL << XSTATE_FP_BIT)
 #define XSTATE_SSE_MASK                 (1ULL << XSTATE_SSE_BIT)
@@ -845,6 +847,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
 #define CPUID_7_0_EDX_TSX_LDTRK         (1U << 16)
 /* AVX512_FP16 instruction */
 #define CPUID_7_0_EDX_AVX512_FP16       (1U << 23)
+/* AMX tile (two-dimensional register) */
+#define CPUID_7_0_EDX_AMX_TILE          (1U << 24)
 /* Speculation Control */
 #define CPUID_7_0_EDX_SPEC_CTRL         (1U << 26)
 /* Single Thread Indirect Branch Predictors */
@@ -1348,6 +1352,16 @@ typedef struct XSavePKRU {
     uint32_t padding;
 } XSavePKRU;
 
+/* Ext. save area 17: AMX XTILECFG state */
+typedef struct XSaveXTILECFG {
+    uint8_t xtilecfg[64];
+} XSaveXTILECFG;
+
+/* Ext. save area 18: AMX XTILEDATA state */
+typedef struct XSaveXTILEDATA {
+    uint8_t xtiledata[8][1024];
+} XSaveXTILEDATA;
+
 QEMU_BUILD_BUG_ON(sizeof(XSaveAVX) != 0x100);
 QEMU_BUILD_BUG_ON(sizeof(XSaveBNDREG) != 0x40);
 QEMU_BUILD_BUG_ON(sizeof(XSaveBNDCSR) != 0x40);
@@ -1355,6 +1369,8 @@ QEMU_BUILD_BUG_ON(sizeof(XSaveOpmask) != 0x40);
 QEMU_BUILD_BUG_ON(sizeof(XSaveZMM_Hi256) != 0x200);
 QEMU_BUILD_BUG_ON(sizeof(XSaveHi16_ZMM) != 0x400);
 QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8);
+QEMU_BUILD_BUG_ON(sizeof(XSaveXTILECFG) != 0x40);
+QEMU_BUILD_BUG_ON(sizeof(XSaveXTILEDATA) != 0x2000);
 
 typedef struct ExtSaveArea {
     uint32_t feature, bits;
@@ -1362,7 +1378,7 @@ typedef struct ExtSaveArea {
     uint32_t ecx;
 } ExtSaveArea;
 
-#define XSAVE_STATE_AREA_COUNT (XSTATE_PKRU_BIT + 1)
+#define XSAVE_STATE_AREA_COUNT (XSTATE_XTILE_DATA_BIT + 1)
 
 extern ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT];
 
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 37f06b0b1a..ea7e8f9081 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1401,6 +1401,14 @@ ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = {
     [XSTATE_PKRU_BIT] =
           { .feature = FEAT_7_0_ECX, .bits = CPUID_7_0_ECX_PKU,
             .size = sizeof(XSavePKRU) },
+    [XSTATE_XTILE_CFG_BIT] = {
+        .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE,
+        .size = sizeof(XSaveXTILECFG),
+    },
+    [XSTATE_XTILE_DATA_BIT] = {
+        .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE,
+        .size = sizeof(XSaveXTILEDATA)
+    },
 };
 
 static uint32_t xsave_area_size(uint64_t mask)


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 3/8] x86: Grant AMX permission for guest
  2022-02-17  6:04 [PATCH v2 0/8] AMX support in Qemu Yang Zhong
  2022-02-17  6:04 ` [PATCH v2 1/8] x86: Fix the 64-byte boundary enumeration for extended state Yang Zhong
  2022-02-17  6:04 ` [PATCH v2 2/8] x86: Add AMX XTILECFG and XTILEDATA components Yang Zhong
@ 2022-02-17  6:04 ` Yang Zhong
  2022-02-17  5:58   ` Yang Zhong
  2022-02-17  6:04 ` [PATCH v2 4/8] x86: Add XFD faulting bit for state components Yang Zhong
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 20+ messages in thread
From: Yang Zhong @ 2022-02-17  6:04 UTC (permalink / raw)
  To: qemu-devel
  Cc: yang.zhong, kevin.tian, seanjc, jing2.liu, wei.w.wang,
	guang.zeng, pbonzini

Kernel allocates 4K xstate buffer by default. For XSAVE features
which require large state component (e.g. AMX), Linux kernel
dynamically expands the xstate buffer only after the process has
acquired the necessary permissions. Those are called dynamically-
enabled XSAVE features (or dynamic xfeatures).

There are separate permissions for native tasks and guests.

Qemu should request the guest permissions for dynamic xfeatures
which will be exposed to the guest. This only needs to be done
once before the first vcpu is created.

KVM implemented one new ARCH_GET_XCOMP_SUPP system attribute API to
get host side supported_xcr0 and Qemu can decide if it can request
dynamically enabled XSAVE features permission.
https://lore.kernel.org/all/20220126152210.3044876-1-pbonzini@redhat.com/

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
Signed-off-by: Jing Liu <jing2.liu@intel.com>
---
 target/i386/cpu.h         |  7 +++++++
 target/i386/cpu.c         | 43 +++++++++++++++++++++++++++++++++++++++
 target/i386/kvm/kvm-cpu.c | 12 +++++------
 target/i386/kvm/kvm.c     | 20 ++++++++++++++++++
 4 files changed, 76 insertions(+), 6 deletions(-)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 06d2d6bccf..d4ad0f56bd 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -549,6 +549,13 @@ typedef enum X86Seg {
 #define XSTATE_ZMM_Hi256_MASK           (1ULL << XSTATE_ZMM_Hi256_BIT)
 #define XSTATE_Hi16_ZMM_MASK            (1ULL << XSTATE_Hi16_ZMM_BIT)
 #define XSTATE_PKRU_MASK                (1ULL << XSTATE_PKRU_BIT)
+#define XSTATE_XTILE_CFG_MASK           (1ULL << XSTATE_XTILE_CFG_BIT)
+#define XSTATE_XTILE_DATA_MASK          (1ULL << XSTATE_XTILE_DATA_BIT)
+#define XFEATURE_XTILE_MASK             (XSTATE_XTILE_CFG_MASK \
+                                         | XSTATE_XTILE_DATA_MASK)
+
+#define ARCH_GET_XCOMP_GUEST_PERM       0x1024
+#define ARCH_REQ_XCOMP_GUEST_PERM       0x1025
 
 #define ESA_FEATURE_ALIGN64_BIT         1
 
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index ea7e8f9081..377d993438 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -43,6 +43,8 @@
 #include "disas/capstone.h"
 #include "cpu-internal.h"
 
+#include <sys/syscall.h>
+
 /* Helpers for building CPUID[2] descriptors: */
 
 struct CPUID2CacheDescriptorInfo {
@@ -6000,12 +6002,47 @@ static void x86_cpu_adjust_feat_level(X86CPU *cpu, FeatureWord w)
     }
 }
 
+static void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask)
+{
+    KVMState *s = kvm_state;
+    uint64_t bitmask;
+    long rc;
+
+    if ((mask & XSTATE_XTILE_DATA_MASK) == XSTATE_XTILE_DATA_MASK) {
+        bitmask = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
+        if (!(bitmask & XSTATE_XTILE_DATA_MASK)) {
+            warn_report("no amx support from supported_xcr0, "
+                        "bitmask:0x%lx", bitmask);
+            return;
+        }
+
+        rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM,
+                      XSTATE_XTILE_DATA_BIT);
+        if (rc) {
+            /*
+             * The older kernel version(<5.15) can't support
+             * ARCH_REQ_XCOMP_GUEST_PERM and directly return.
+             */
+            return;
+        }
+
+        rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
+        if (rc) {
+            warn_report("prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
+        } else if (!(bitmask & XFEATURE_XTILE_MASK)) {
+            warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure "
+                        "and bitmask=0x%lx", bitmask);
+        }
+    }
+}
+
 /* Calculate XSAVE components based on the configured CPU feature flags */
 static void x86_cpu_enable_xsave_components(X86CPU *cpu)
 {
     CPUX86State *env = &cpu->env;
     int i;
     uint64_t mask;
+    static bool request_perm;
 
     if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
         env->features[FEAT_XSAVE_COMP_LO] = 0;
@@ -6021,6 +6058,12 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu)
         }
     }
 
+    /* Only request permission for first vcpu */
+    if (kvm_enabled() && !request_perm) {
+        kvm_request_xsave_components(cpu, mask);
+        request_perm = true;
+    }
+
     env->features[FEAT_XSAVE_COMP_LO] = mask;
     env->features[FEAT_XSAVE_COMP_HI] = mask >> 32;
 }
diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
index ce27d3b1df..a35a1bf9fe 100644
--- a/target/i386/kvm/kvm-cpu.c
+++ b/target/i386/kvm/kvm-cpu.c
@@ -84,7 +84,7 @@ static void kvm_cpu_max_instance_init(X86CPU *cpu)
 static void kvm_cpu_xsave_init(void)
 {
     static bool first = true;
-    KVMState *s = kvm_state;
+    uint32_t eax, ebx, ecx, edx;
     int i;
 
     if (!first) {
@@ -100,11 +100,11 @@ static void kvm_cpu_xsave_init(void)
         ExtSaveArea *esa = &x86_ext_save_areas[i];
 
         if (esa->size) {
-            int sz = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EAX);
-            if (sz != 0) {
-                assert(esa->size == sz);
-                esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX);
-                esa->ecx = kvm_arch_get_supported_cpuid(s, 0xd, i, R_ECX);
+            host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx);
+            if (eax != 0) {
+                assert(esa->size == eax);
+                esa->offset = ebx;
+                esa->ecx = ecx;
             }
         }
     }
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 2c8feb4a6f..3bdcd724c4 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -348,6 +348,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
     struct kvm_cpuid2 *cpuid;
     uint32_t ret = 0;
     uint32_t cpuid_1_edx;
+    uint64_t bitmask;
 
     cpuid = get_supported_cpuid(s);
 
@@ -405,6 +406,25 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
         if (!has_msr_arch_capabs) {
             ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES;
         }
+    } else if (function == 0xd && index == 0 &&
+               (reg == R_EAX || reg == R_EDX)) {
+        struct kvm_device_attr attr = {
+            .group = 0,
+            .attr = KVM_X86_XCOMP_GUEST_SUPP,
+            .addr = (unsigned long) &bitmask
+        };
+
+        bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES);
+        if (!sys_attr) {
+            warn_report("cannot get sys attribute capabilities %d", sys_attr);
+        }
+
+        int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr);
+        if (rc == -1 && (errno == ENXIO || errno == EINVAL)) {
+            warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) "
+                        "error: %d", rc);
+        }
+        ret = (reg == R_EAX) ? bitmask : bitmask >> 32;
     } else if (function == 0x80000001 && reg == R_ECX) {
         /*
          * It's safe to enable TOPOEXT even if it's not returned by


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 4/8] x86: Add XFD faulting bit for state components
  2022-02-17  6:04 [PATCH v2 0/8] AMX support in Qemu Yang Zhong
                   ` (2 preceding siblings ...)
  2022-02-17  6:04 ` [PATCH v2 3/8] x86: Grant AMX permission for guest Yang Zhong
@ 2022-02-17  6:04 ` Yang Zhong
  2022-02-21 13:00   ` David Edmondson
  2022-02-17  6:04 ` [PATCH v2 5/8] x86: Add AMX CPUIDs enumeration Yang Zhong
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 20+ messages in thread
From: Yang Zhong @ 2022-02-17  6:04 UTC (permalink / raw)
  To: qemu-devel
  Cc: yang.zhong, kevin.tian, seanjc, jing2.liu, wei.w.wang,
	guang.zeng, pbonzini

From: Jing Liu <jing2.liu@intel.com>

Intel introduces XFD faulting mechanism for extended
XSAVE features to dynamically enable the features in
runtime. If CPUID (EAX=0Dh, ECX=n, n>1).ECX[2] is set
as 1, it indicates support for XFD faulting of this
state component.

Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
 target/i386/cpu.h | 2 ++
 target/i386/cpu.c | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index d4ad0f56bd..f7fc2e97a6 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -558,8 +558,10 @@ typedef enum X86Seg {
 #define ARCH_REQ_XCOMP_GUEST_PERM       0x1025
 
 #define ESA_FEATURE_ALIGN64_BIT         1
+#define ESA_FEATURE_XFD_BIT             2
 
 #define ESA_FEATURE_ALIGN64_MASK        (1U << ESA_FEATURE_ALIGN64_BIT)
+#define ESA_FEATURE_XFD_MASK            (1U << ESA_FEATURE_XFD_BIT)
 
 
 /* CPUID feature words */
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 377d993438..5a7ee8c7e1 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -5497,7 +5497,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
                 const ExtSaveArea *esa = &x86_ext_save_areas[count];
                 *eax = esa->size;
                 *ebx = esa->offset;
-                *ecx = esa->ecx & ESA_FEATURE_ALIGN64_MASK;
+                *ecx = (esa->ecx & ESA_FEATURE_ALIGN64_MASK) |
+                       (esa->ecx & ESA_FEATURE_XFD_MASK);
             }
         }
         break;


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 5/8] x86: Add AMX CPUIDs enumeration
  2022-02-17  6:04 [PATCH v2 0/8] AMX support in Qemu Yang Zhong
                   ` (3 preceding siblings ...)
  2022-02-17  6:04 ` [PATCH v2 4/8] x86: Add XFD faulting bit for state components Yang Zhong
@ 2022-02-17  6:04 ` Yang Zhong
  2022-02-23 11:30   ` David Edmondson
  2022-02-17  6:04 ` [PATCH v2 6/8] x86: add support for KVM_CAP_XSAVE2 and AMX state migration Yang Zhong
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 20+ messages in thread
From: Yang Zhong @ 2022-02-17  6:04 UTC (permalink / raw)
  To: qemu-devel
  Cc: yang.zhong, kevin.tian, seanjc, jing2.liu, wei.w.wang,
	guang.zeng, pbonzini

From: Jing Liu <jing2.liu@intel.com>

Add AMX primary feature bits XFD and AMX_TILE to
enumerate the CPU's AMX capability. Meanwhile, add
AMX TILE and TMUL CPUID leaf and subleaves which
exist when AMX TILE is present to provide the maximum
capability of TILE and TMUL.

Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
 target/i386/cpu.c     | 55 ++++++++++++++++++++++++++++++++++++++++---
 target/i386/kvm/kvm.c |  4 +++-
 2 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 5a7ee8c7e1..2465bed5df 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -576,6 +576,18 @@ static CPUCacheInfo legacy_l3_cache = {
 #define INTEL_PT_CYCLE_BITMAP    0x1fff         /* Support 0,2^(0~11) */
 #define INTEL_PT_PSB_BITMAP      (0x003f << 16) /* Support 2K,4K,8K,16K,32K,64K */
 
+/* CPUID Leaf 0x1D constants: */
+#define INTEL_AMX_TILE_MAX_SUBLEAF     0x1
+#define INTEL_AMX_TOTAL_TILE_BYTES     0x2000
+#define INTEL_AMX_BYTES_PER_TILE       0x400
+#define INTEL_AMX_BYTES_PER_ROW        0x40
+#define INTEL_AMX_TILE_MAX_NAMES       0x8
+#define INTEL_AMX_TILE_MAX_ROWS        0x10
+
+/* CPUID Leaf 0x1E constants: */
+#define INTEL_AMX_TMUL_MAX_K           0x10
+#define INTEL_AMX_TMUL_MAX_N           0x40
+
 void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
                               uint32_t vendor2, uint32_t vendor3)
 {
@@ -845,8 +857,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
             "avx512-vp2intersect", NULL, "md-clear", NULL,
             NULL, NULL, "serialize", NULL,
             "tsx-ldtrk", NULL, NULL /* pconfig */, NULL,
-            NULL, NULL, NULL, "avx512-fp16",
-            NULL, NULL, "spec-ctrl", "stibp",
+            NULL, NULL, "amx-bf16", "avx512-fp16",
+            "amx-tile", "amx-int8", "spec-ctrl", "stibp",
             NULL, "arch-capabilities", "core-capability", "ssbd",
         },
         .cpuid = {
@@ -911,7 +923,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
         .type = CPUID_FEATURE_WORD,
         .feat_names = {
             "xsaveopt", "xsavec", "xgetbv1", "xsaves",
-            NULL, NULL, NULL, NULL,
+            "xfd", NULL, NULL, NULL,
             NULL, NULL, NULL, NULL,
             NULL, NULL, NULL, NULL,
             NULL, NULL, NULL, NULL,
@@ -5587,6 +5599,43 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         }
         break;
     }
+    case 0x1D: {
+        /* AMX TILE */
+        *eax = 0;
+        *ebx = 0;
+        *ecx = 0;
+        *edx = 0;
+        if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) {
+            break;
+        }
+
+        if (count == 0) {
+            /* Highest numbered palette subleaf */
+            *eax = INTEL_AMX_TILE_MAX_SUBLEAF;
+        } else if (count == 1) {
+            *eax = INTEL_AMX_TOTAL_TILE_BYTES |
+                   (INTEL_AMX_BYTES_PER_TILE << 16);
+            *ebx = INTEL_AMX_BYTES_PER_ROW | (INTEL_AMX_TILE_MAX_NAMES << 16);
+            *ecx = INTEL_AMX_TILE_MAX_ROWS;
+        }
+        break;
+    }
+    case 0x1E: {
+        /* AMX TMUL */
+        *eax = 0;
+        *ebx = 0;
+        *ecx = 0;
+        *edx = 0;
+        if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) {
+            break;
+        }
+
+        if (count == 0) {
+            /* Highest numbered palette subleaf */
+            *ebx = INTEL_AMX_TMUL_MAX_K | (INTEL_AMX_TMUL_MAX_N << 8);
+        }
+        break;
+    }
     case 0x40000000:
         /*
          * CPUID code in kvm_arch_init_vcpu() ignores stuff
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 3bdcd724c4..8562d3d138 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -1779,7 +1779,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
                 c = &cpuid_data.entries[cpuid_i++];
             }
             break;
-        case 0x14: {
+        case 0x14:
+        case 0x1d:
+        case 0x1e: {
             uint32_t times;
 
             c->function = i;


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 6/8] x86: add support for KVM_CAP_XSAVE2 and AMX state migration
  2022-02-17  6:04 [PATCH v2 0/8] AMX support in Qemu Yang Zhong
                   ` (4 preceding siblings ...)
  2022-02-17  6:04 ` [PATCH v2 5/8] x86: Add AMX CPUIDs enumeration Yang Zhong
@ 2022-02-17  6:04 ` Yang Zhong
  2022-02-21 13:25   ` David Edmondson
  2022-02-17  6:04 ` [PATCH v2 7/8] x86: Support XFD and AMX xsave data migration Yang Zhong
  2022-02-17  6:04 ` [PATCH v2 8/8] linux-header: Sync the linux headers Yang Zhong
  7 siblings, 1 reply; 20+ messages in thread
From: Yang Zhong @ 2022-02-17  6:04 UTC (permalink / raw)
  To: qemu-devel
  Cc: yang.zhong, kevin.tian, seanjc, jing2.liu, wei.w.wang,
	guang.zeng, pbonzini

From: Jing Liu <jing2.liu@intel.com>

When dynamic xfeatures (e.g. AMX) are used by the guest, the xsave
area would be larger than 4KB. KVM_GET_XSAVE2 and KVM_SET_XSAVE
under KVM_CAP_XSAVE2 works with a xsave buffer larger than 4KB.
Always use the new ioctls under KVM_CAP_XSAVE2 when KVM supports it.

Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Zeng Guang <guang.zeng@intel.com>
Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
 target/i386/cpu.h          |  4 ++++
 target/i386/kvm/kvm.c      | 42 ++++++++++++++++++++++++--------------
 target/i386/xsave_helper.c | 33 ++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 15 deletions(-)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index f7fc2e97a6..de9da38e42 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1528,6 +1528,10 @@ typedef struct CPUX86State {
     uint64_t opmask_regs[NB_OPMASK_REGS];
     YMMReg zmmh_regs[CPU_NB_REGS];
     ZMMReg hi16_zmm_regs[CPU_NB_REGS];
+#ifdef TARGET_X86_64
+    uint8_t xtilecfg[64];
+    uint8_t xtiledata[8192];
+#endif
 
     /* sysenter registers */
     uint32_t sysenter_cs;
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 8562d3d138..ff064e3d8f 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -122,6 +122,7 @@ static uint32_t num_architectural_pmu_gp_counters;
 static uint32_t num_architectural_pmu_fixed_counters;
 
 static int has_xsave;
+static int has_xsave2;
 static int has_xcrs;
 static int has_pit_state2;
 static int has_sregs2;
@@ -1585,6 +1586,26 @@ static Error *invtsc_mig_blocker;
 
 #define KVM_MAX_CPUID_ENTRIES  100
 
+static void kvm_init_xsave(CPUX86State *env)
+{
+    if (has_xsave2) {
+        env->xsave_buf_len = QEMU_ALIGN_UP(has_xsave2, 4096);
+    } else if (has_xsave) {
+        env->xsave_buf_len = sizeof(struct kvm_xsave);
+    } else {
+        return;
+    }
+
+    env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len);
+    memset(env->xsave_buf, 0, env->xsave_buf_len);
+     /*
+      * The allocated storage must be large enough for all of the
+      * possible XSAVE state components.
+      */
+    assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) <=
+           env->xsave_buf_len);
+}
+
 int kvm_arch_init_vcpu(CPUState *cs)
 {
     struct {
@@ -1614,6 +1635,8 @@ int kvm_arch_init_vcpu(CPUState *cs)
 
     cpuid_i = 0;
 
+    has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2);
+
     r = kvm_arch_set_tsc_khz(cs);
     if (r < 0) {
         return r;
@@ -2003,19 +2026,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
     if (r) {
         goto fail;
     }
-
-    if (has_xsave) {
-        env->xsave_buf_len = sizeof(struct kvm_xsave);
-        env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len);
-        memset(env->xsave_buf, 0, env->xsave_buf_len);
-
-        /*
-         * The allocated storage must be large enough for all of the
-         * possible XSAVE state components.
-         */
-        assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX)
-               <= env->xsave_buf_len);
-    }
+    kvm_init_xsave(env);
 
     max_nested_state_len = kvm_max_nested_state_length();
     if (max_nested_state_len > 0) {
@@ -3319,13 +3330,14 @@ static int kvm_get_xsave(X86CPU *cpu)
 {
     CPUX86State *env = &cpu->env;
     void *xsave = env->xsave_buf;
-    int ret;
+    int type, ret;
 
     if (!has_xsave) {
         return kvm_get_fpu(cpu);
     }
 
-    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave);
+    type = has_xsave2 ? KVM_GET_XSAVE2 : KVM_GET_XSAVE;
+    ret = kvm_vcpu_ioctl(CPU(cpu), type, xsave);
     if (ret < 0) {
         return ret;
     }
diff --git a/target/i386/xsave_helper.c b/target/i386/xsave_helper.c
index ac61a96344..b6a004505f 100644
--- a/target/i386/xsave_helper.c
+++ b/target/i386/xsave_helper.c
@@ -5,6 +5,7 @@
 #include "qemu/osdep.h"
 
 #include "cpu.h"
+#include <asm/kvm.h>
 
 void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen)
 {
@@ -126,6 +127,22 @@ void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen)
 
         memcpy(pkru, &env->pkru, sizeof(env->pkru));
     }
+
+    e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT];
+    if (e->size && e->offset) {
+        XSaveXTILECFG *tilecfg = buf + e->offset;
+
+        memcpy(tilecfg, &env->xtilecfg, sizeof(env->xtilecfg));
+    }
+
+    if (buflen > sizeof(struct kvm_xsave)) {
+        e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT];
+        if (e->size && e->offset && buflen >= e->size + e->offset) {
+            XSaveXTILEDATA *tiledata = buf + e->offset;
+
+            memcpy(tiledata, &env->xtiledata, sizeof(env->xtiledata));
+        }
+    }
 #endif
 }
 
@@ -247,5 +264,21 @@ void x86_cpu_xrstor_all_areas(X86CPU *cpu, const void *buf, uint32_t buflen)
         pkru = buf + e->offset;
         memcpy(&env->pkru, pkru, sizeof(env->pkru));
     }
+
+    e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT];
+    if (e->size && e->offset) {
+        const XSaveXTILECFG *tilecfg = buf + e->offset;
+
+        memcpy(&env->xtilecfg, tilecfg, sizeof(env->xtilecfg));
+    }
+
+    if (buflen > sizeof(struct kvm_xsave)) {
+        e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT];
+        if (e->size && e->offset && buflen >= e->size + e->offset) {
+            const XSaveXTILEDATA *tiledata = buf + e->offset;
+
+            memcpy(&env->xtiledata, tiledata, sizeof(env->xtiledata));
+        }
+    }
 #endif
 }


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 7/8] x86: Support XFD and AMX xsave data migration
  2022-02-17  6:04 [PATCH v2 0/8] AMX support in Qemu Yang Zhong
                   ` (5 preceding siblings ...)
  2022-02-17  6:04 ` [PATCH v2 6/8] x86: add support for KVM_CAP_XSAVE2 and AMX state migration Yang Zhong
@ 2022-02-17  6:04 ` Yang Zhong
  2022-02-21 13:30   ` David Edmondson
  2022-02-17  6:04 ` [PATCH v2 8/8] linux-header: Sync the linux headers Yang Zhong
  7 siblings, 1 reply; 20+ messages in thread
From: Yang Zhong @ 2022-02-17  6:04 UTC (permalink / raw)
  To: qemu-devel
  Cc: yang.zhong, kevin.tian, seanjc, jing2.liu, wei.w.wang,
	guang.zeng, pbonzini

From: Zeng Guang <guang.zeng@intel.com>

XFD(eXtended Feature Disable) allows to enable a
feature on xsave state while preventing specific
user threads from using the feature.

Support save and restore XFD MSRs if CPUID.D.1.EAX[4]
enumerate to be valid. Likewise migrate the MSRs and
related xsave state necessarily.

Signed-off-by: Zeng Guang <guang.zeng@intel.com>
Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
 target/i386/cpu.h     |  9 +++++++++
 target/i386/kvm/kvm.c | 18 ++++++++++++++++++
 target/i386/machine.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index de9da38e42..509c16323a 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -505,6 +505,9 @@ typedef enum X86Seg {
 
 #define MSR_VM_HSAVE_PA                 0xc0010117
 
+#define MSR_IA32_XFD                    0x000001c4
+#define MSR_IA32_XFD_ERR                0x000001c5
+
 #define MSR_IA32_BNDCFGS                0x00000d90
 #define MSR_IA32_XSS                    0x00000da0
 #define MSR_IA32_UMWAIT_CONTROL         0xe1
@@ -873,6 +876,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
 #define CPUID_7_1_EAX_AVX_VNNI          (1U << 4)
 /* AVX512 BFloat16 Instruction */
 #define CPUID_7_1_EAX_AVX512_BF16       (1U << 5)
+/* XFD Extend Feature Disabled */
+#define CPUID_D_1_EAX_XFD               (1U << 4)
 
 /* Packets which contain IP payload have LIP values */
 #define CPUID_14_0_ECX_LIP              (1U << 31)
@@ -1617,6 +1622,10 @@ typedef struct CPUX86State {
     uint64_t msr_rtit_cr3_match;
     uint64_t msr_rtit_addrs[MAX_RTIT_ADDRS];
 
+    /* Per-VCPU XFD MSRs */
+    uint64_t msr_xfd;
+    uint64_t msr_xfd_err;
+
     /* exception/interrupt handling */
     int error_code;
     int exception_is_int;
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index ff064e3d8f..3dd24b6b0e 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -3275,6 +3275,13 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
                               env->msr_ia32_sgxlepubkeyhash[3]);
         }
 
+        if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) {
+            kvm_msr_entry_add(cpu, MSR_IA32_XFD,
+                              env->msr_xfd);
+            kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR,
+                              env->msr_xfd_err);
+        }
+
         /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see
          *       kvm_put_msr_feature_control. */
     }
@@ -3667,6 +3674,11 @@ static int kvm_get_msrs(X86CPU *cpu)
         kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3, 0);
     }
 
+    if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) {
+        kvm_msr_entry_add(cpu, MSR_IA32_XFD, 0);
+        kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, 0);
+    }
+
     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf);
     if (ret < 0) {
         return ret;
@@ -3963,6 +3975,12 @@ static int kvm_get_msrs(X86CPU *cpu)
             env->msr_ia32_sgxlepubkeyhash[index - MSR_IA32_SGXLEPUBKEYHASH0] =
                            msrs[i].data;
             break;
+        case MSR_IA32_XFD:
+            env->msr_xfd = msrs[i].data;
+            break;
+        case MSR_IA32_XFD_ERR:
+            env->msr_xfd_err = msrs[i].data;
+            break;
         }
     }
 
diff --git a/target/i386/machine.c b/target/i386/machine.c
index 6202f47793..1f9d0c46f1 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -1483,6 +1483,46 @@ static const VMStateDescription vmstate_pdptrs = {
     }
 };
 
+static bool xfd_msrs_needed(void *opaque)
+{
+    X86CPU *cpu = opaque;
+    CPUX86State *env = &cpu->env;
+
+    return !!(env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD);
+}
+
+static const VMStateDescription vmstate_msr_xfd = {
+    .name = "cpu/msr_xfd",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = xfd_msrs_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(env.msr_xfd, X86CPU),
+        VMSTATE_UINT64(env.msr_xfd_err, X86CPU),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static bool amx_xtile_needed(void *opaque)
+{
+    X86CPU *cpu = opaque;
+    CPUX86State *env = &cpu->env;
+
+    return !!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE);
+}
+
+static const VMStateDescription vmstate_amx_xtile = {
+    .name = "cpu/intel_amx_xtile",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = amx_xtile_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8_ARRAY(env.xtilecfg, X86CPU, 64),
+        VMSTATE_UINT8_ARRAY(env.xtiledata, X86CPU, 8192),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 const VMStateDescription vmstate_x86_cpu = {
     .name = "cpu",
     .version_id = 12,
@@ -1622,6 +1662,8 @@ const VMStateDescription vmstate_x86_cpu = {
         &vmstate_msr_tsx_ctrl,
         &vmstate_msr_intel_sgx,
         &vmstate_pdptrs,
+        &vmstate_msr_xfd,
+        &vmstate_amx_xtile,
         NULL
     }
 };


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 8/8] linux-header: Sync the linux headers
  2022-02-17  6:04 [PATCH v2 0/8] AMX support in Qemu Yang Zhong
                   ` (6 preceding siblings ...)
  2022-02-17  6:04 ` [PATCH v2 7/8] x86: Support XFD and AMX xsave data migration Yang Zhong
@ 2022-02-17  6:04 ` Yang Zhong
  7 siblings, 0 replies; 20+ messages in thread
From: Yang Zhong @ 2022-02-17  6:04 UTC (permalink / raw)
  To: qemu-devel
  Cc: yang.zhong, kevin.tian, seanjc, jing2.liu, wei.w.wang,
	guang.zeng, pbonzini

This patch will be dropped once Qemu sync linux 5.17 header.
Making all linux-headers changes here are only for maintainers
to easily remove those changes once those patches are queued.

Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
 linux-headers/asm-x86/kvm.h | 17 +++++++++++++++++
 linux-headers/linux/kvm.h   |  4 ++++
 2 files changed, 21 insertions(+)

diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
index 5a776a08f7..17735430db 100644
--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
@@ -375,7 +375,21 @@ struct kvm_debugregs {
 
 /* for KVM_CAP_XSAVE */
 struct kvm_xsave {
+	/*
+	 * KVM_GET_XSAVE2 and KVM_SET_XSAVE write and read as many bytes
+	 * as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
+	 * respectively, when invoked on the vm file descriptor.
+	 *
+	 * The size value returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
+	 * will always be at least 4096. Currently, it is only greater
+	 * than 4096 if a dynamic feature has been enabled with
+	 * ``arch_prctl()``, but this may change in the future.
+	 *
+	 * The offsets of the state save areas in struct kvm_xsave follow
+	 * the contents of CPUID leaf 0xD on the host.
+	 */
 	__u32 region[1024];
+	__u32 extra[0];
 };
 
 #define KVM_MAX_XCRS	16
@@ -438,6 +452,9 @@ struct kvm_sync_regs {
 
 #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE	0x00000001
 
+/* attributes for system fd (group 0) */
+#define KVM_X86_XCOMP_GUEST_SUPP       0
+
 struct kvm_vmx_nested_state_data {
 	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
 	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 02c5e7b7bb..54ce7e6d90 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -1130,6 +1130,8 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_BINARY_STATS_FD 203
 #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
 #define KVM_CAP_ARM_MTE 205
+#define KVM_CAP_XSAVE2  208
+#define KVM_CAP_SYS_ATTRIBUTES 209
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1677,6 +1679,8 @@ struct kvm_xen_hvm_attr {
 #define KVM_GET_SREGS2             _IOR(KVMIO,  0xcc, struct kvm_sregs2)
 #define KVM_SET_SREGS2             _IOW(KVMIO,  0xcd, struct kvm_sregs2)
 
+#define KVM_GET_XSAVE2           _IOR(KVMIO,  0xcf, struct kvm_xsave)
+
 struct kvm_xen_vcpu_attr {
 	__u16 type;
 	__u16 pad[3];


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 3/8] x86: Grant AMX permission for guest
  2022-02-17  5:58   ` Yang Zhong
@ 2022-02-17 13:44     ` Paolo Bonzini
  2022-02-25 10:40       ` Yang Zhong
  0 siblings, 1 reply; 20+ messages in thread
From: Paolo Bonzini @ 2022-02-17 13:44 UTC (permalink / raw)
  To: Yang Zhong, qemu-devel
  Cc: seanjc, kevin.tian, jing2.liu, wei.w.wang, guang.zeng

On 2/17/22 06:58, Yang Zhong wrote:
>> +
>> +    if ((mask & XSTATE_XTILE_DATA_MASK) == XSTATE_XTILE_DATA_MASK) {
>> +        bitmask = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
>> +        if (!(bitmask & XSTATE_XTILE_DATA_MASK)) {
>     Paolo, last time you suggested below changes for here:
> 
>     rc = kvm_arch_get_supported_cpuid(s, 0xd, 0,
>                                    (xdata_bit < 32 ? R_EAX : R_EDX));
>     if (!(rc & BIT(xdata_bit & 31)) {
>        ...
>     }
> 
>    Since I used "mask" as parameter here, so I had to directly use R_EAX here.
>    Please review and if need change it to like "(xdata_bit < 32 ? R_EAX : R_EDX)",
>    I will change this in next version, thanks!

I looked at this function more closely because it didn't compile on non-Linux
systems, too.  I think it's better to write it already to plan for more
dynamic features.  In the code below, I'm also relying on
KVM_GET_SUPPORTED_CPUID/KVM_X86_COMP_GUEST_SUPP being executed
before ARCH_REQ_XCOMP_GUEST_PERM, which therefore cannot fail.

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 377d993438..1d0c006077 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -43,8 +43,6 @@
  #include "disas/capstone.h"
  #include "cpu-internal.h"
  
-#include <sys/syscall.h>
-
  /* Helpers for building CPUID[2] descriptors: */
  
  struct CPUID2CacheDescriptorInfo {
@@ -6002,40 +6000,6 @@ static void x86_cpu_adjust_feat_level(X86CPU *cpu, FeatureWord w)
      }
  }
  
-static void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask)
-{
-    KVMState *s = kvm_state;
-    uint64_t bitmask;
-    long rc;
-
-    if ((mask & XSTATE_XTILE_DATA_MASK) == XSTATE_XTILE_DATA_MASK) {
-        bitmask = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
-        if (!(bitmask & XSTATE_XTILE_DATA_MASK)) {
-            warn_report("no amx support from supported_xcr0, "
-                        "bitmask:0x%lx", bitmask);
-            return;
-        }
-
-        rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM,
-                      XSTATE_XTILE_DATA_BIT);
-        if (rc) {
-            /*
-             * The older kernel version(<5.15) can't support
-             * ARCH_REQ_XCOMP_GUEST_PERM and directly return.
-             */
-            return;
-        }
-
-        rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
-        if (rc) {
-            warn_report("prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
-        } else if (!(bitmask & XFEATURE_XTILE_MASK)) {
-            warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure "
-                        "and bitmask=0x%lx", bitmask);
-        }
-    }
-}
-
  /* Calculate XSAVE components based on the configured CPU feature flags */
  static void x86_cpu_enable_xsave_components(X86CPU *cpu)
  {
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index d4ad0f56bd..de949bd63d 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -551,11 +551,8 @@ typedef enum X86Seg {
  #define XSTATE_PKRU_MASK                (1ULL << XSTATE_PKRU_BIT)
  #define XSTATE_XTILE_CFG_MASK           (1ULL << XSTATE_XTILE_CFG_BIT)
  #define XSTATE_XTILE_DATA_MASK          (1ULL << XSTATE_XTILE_DATA_BIT)
-#define XFEATURE_XTILE_MASK             (XSTATE_XTILE_CFG_MASK \
-                                         | XSTATE_XTILE_DATA_MASK)
  
-#define ARCH_GET_XCOMP_GUEST_PERM       0x1024
-#define ARCH_REQ_XCOMP_GUEST_PERM       0x1025
+#define XSTATE_DYNAMIC_MASK             (XSTATE_XTILE_DATA_MASK)
  
  #define ESA_FEATURE_ALIGN64_BIT         1
  
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 3bdcd724c4..4b07778970 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -17,6 +17,7 @@
  #include "qapi/error.h"
  #include <sys/ioctl.h>
  #include <sys/utsname.h>
+#include <sys/syscall.h>
  
  #include <linux/kvm.h>
  #include "standard-headers/asm-x86/kvm_para.h"
@@ -5168,3 +5169,39 @@ bool kvm_arch_cpu_check_are_resettable(void)
  {
      return !sev_es_enabled();
  }
+
+#define ARCH_REQ_XCOMP_GUEST_PERM       0x1025
+
+void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask)
+{
+    KVMState *s = kvm_state;
+    uint64_t supported;
+
+    mask &= XSTATE_DYNAMIC_MASK;
+    if (!mask) {
+	return;
+    }
+    /*
+     * Just ignore bits that are not in CPUID[EAX=0xD,ECX=0].
+     * ARCH_REQ_XCOMP_GUEST_PERM would fail, and QEMU has warned
+     * about them already because they are not supported features.
+     */
+    supported = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
+    supported |= (uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32;
+    mask &= ~supported;
+
+    while (mask) {
+        int bit = ctz64(mask);
+        int rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit);
+        if (rc) {
+            /*
+             * Older kernel version (<5.17) do not support
+             * ARCH_REQ_XCOMP_GUEST_PERM, but also do not return
+             * any dynamic feature from kvm_arch_get_supported_cpuid.
+             */
+            warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure "
+                        "for feature bit %d", bit);
+        }
+	mask &= ~BIT_ULL(bit);
+    }
+}
diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
index a978509d50..4124912c20 100644
--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
@@ -52,5 +52,6 @@ bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp);
  uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address);
  
  bool kvm_enable_sgx_provisioning(KVMState *s);
+void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask);
  
  #endif


If this works, the rest of the series is good to go!

Thanks,

Paolo


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 1/8] x86: Fix the 64-byte boundary enumeration for extended state
  2022-02-17  6:04 ` [PATCH v2 1/8] x86: Fix the 64-byte boundary enumeration for extended state Yang Zhong
@ 2022-02-21 12:51   ` David Edmondson
  0 siblings, 0 replies; 20+ messages in thread
From: David Edmondson @ 2022-02-21 12:51 UTC (permalink / raw)
  To: Yang Zhong
  Cc: kevin.tian, seanjc, jing2.liu, qemu-devel, wei.w.wang, pbonzini,
	guang.zeng

On Wednesday, 2022-02-16 at 22:04:27 -08, Yang Zhong wrote:

> From: Jing Liu <jing2.liu@intel.com>
>
> The extended state subleaves (EAX=0Dh, ECX=n, n>1).ECX[1]
> indicate whether the extended state component locates
> on the next 64-byte boundary following the preceding state
> component when the compacted format of an XSAVE area is
> used.
>
> Right now, they are all zero because no supported component
> needed the bit to be set, but the upcoming AMX feature will
> use it.  Fix the subleaves value according to KVM's supported
> cpuid.
>
> Signed-off-by: Jing Liu <jing2.liu@intel.com>
> Signed-off-by: Yang Zhong <yang.zhong@intel.com>

Reviewed-by: David Edmondson <david.edmondson@oracle.com>

> ---
>  target/i386/cpu.h         | 6 ++++++
>  target/i386/cpu.c         | 1 +
>  target/i386/kvm/kvm-cpu.c | 1 +
>  3 files changed, 8 insertions(+)
>
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 9911d7c871..de1dc124ab 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -548,6 +548,11 @@ typedef enum X86Seg {
>  #define XSTATE_Hi16_ZMM_MASK            (1ULL << XSTATE_Hi16_ZMM_BIT)
>  #define XSTATE_PKRU_MASK                (1ULL << XSTATE_PKRU_BIT)
>
> +#define ESA_FEATURE_ALIGN64_BIT         1
> +
> +#define ESA_FEATURE_ALIGN64_MASK        (1U << ESA_FEATURE_ALIGN64_BIT)
> +
> +
>  /* CPUID feature words */
>  typedef enum FeatureWord {
>      FEAT_1_EDX,         /* CPUID[1].EDX */
> @@ -1354,6 +1359,7 @@ QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8);
>  typedef struct ExtSaveArea {
>      uint32_t feature, bits;
>      uint32_t offset, size;
> +    uint32_t ecx;
>  } ExtSaveArea;
>
>  #define XSAVE_STATE_AREA_COUNT (XSTATE_PKRU_BIT + 1)
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index aa9e636800..37f06b0b1a 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -5487,6 +5487,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>                  const ExtSaveArea *esa = &x86_ext_save_areas[count];
>                  *eax = esa->size;
>                  *ebx = esa->offset;
> +                *ecx = esa->ecx & ESA_FEATURE_ALIGN64_MASK;
>              }
>          }
>          break;
> diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
> index d95028018e..ce27d3b1df 100644
> --- a/target/i386/kvm/kvm-cpu.c
> +++ b/target/i386/kvm/kvm-cpu.c
> @@ -104,6 +104,7 @@ static void kvm_cpu_xsave_init(void)
>              if (sz != 0) {
>                  assert(esa->size == sz);
>                  esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX);
> +                esa->ecx = kvm_arch_get_supported_cpuid(s, 0xd, i, R_ECX);
>              }
>          }
>      }

dme.
-- 
We're deep in discussion, the party's on mute.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 2/8] x86: Add AMX XTILECFG and XTILEDATA components
  2022-02-17  6:04 ` [PATCH v2 2/8] x86: Add AMX XTILECFG and XTILEDATA components Yang Zhong
@ 2022-02-21 12:53   ` David Edmondson
  0 siblings, 0 replies; 20+ messages in thread
From: David Edmondson @ 2022-02-21 12:53 UTC (permalink / raw)
  To: Yang Zhong
  Cc: kevin.tian, seanjc, jing2.liu, qemu-devel, wei.w.wang, pbonzini,
	guang.zeng

On Wednesday, 2022-02-16 at 22:04:28 -08, Yang Zhong wrote:

> From: Jing Liu <jing2.liu@intel.com>
>
> The AMX TILECFG register and the TMMx tile data registers are
> saved/restored via XSAVE, respectively in state component 17
> (64 bytes) and state component 18 (8192 bytes).
>
> Add AMX feature bits to x86_ext_save_areas array to set
> up AMX components. Add structs that define the layout of
> AMX XSAVE areas and use QEMU_BUILD_BUG_ON to validate the
> structs sizes.
>
> Signed-off-by: Jing Liu <jing2.liu@intel.com>
> Signed-off-by: Yang Zhong <yang.zhong@intel.com>

Reviewed-by: David Edmondson <david.edmondson@oracle.com>

> ---
>  target/i386/cpu.h | 18 +++++++++++++++++-
>  target/i386/cpu.c |  8 ++++++++
>  2 files changed, 25 insertions(+), 1 deletion(-)
>
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index de1dc124ab..06d2d6bccf 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -537,6 +537,8 @@ typedef enum X86Seg {
>  #define XSTATE_ZMM_Hi256_BIT            6
>  #define XSTATE_Hi16_ZMM_BIT             7
>  #define XSTATE_PKRU_BIT                 9
> +#define XSTATE_XTILE_CFG_BIT            17
> +#define XSTATE_XTILE_DATA_BIT           18
>
>  #define XSTATE_FP_MASK                  (1ULL << XSTATE_FP_BIT)
>  #define XSTATE_SSE_MASK                 (1ULL << XSTATE_SSE_BIT)
> @@ -845,6 +847,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
>  #define CPUID_7_0_EDX_TSX_LDTRK         (1U << 16)
>  /* AVX512_FP16 instruction */
>  #define CPUID_7_0_EDX_AVX512_FP16       (1U << 23)
> +/* AMX tile (two-dimensional register) */
> +#define CPUID_7_0_EDX_AMX_TILE          (1U << 24)
>  /* Speculation Control */
>  #define CPUID_7_0_EDX_SPEC_CTRL         (1U << 26)
>  /* Single Thread Indirect Branch Predictors */
> @@ -1348,6 +1352,16 @@ typedef struct XSavePKRU {
>      uint32_t padding;
>  } XSavePKRU;
>
> +/* Ext. save area 17: AMX XTILECFG state */
> +typedef struct XSaveXTILECFG {
> +    uint8_t xtilecfg[64];
> +} XSaveXTILECFG;
> +
> +/* Ext. save area 18: AMX XTILEDATA state */
> +typedef struct XSaveXTILEDATA {
> +    uint8_t xtiledata[8][1024];
> +} XSaveXTILEDATA;
> +
>  QEMU_BUILD_BUG_ON(sizeof(XSaveAVX) != 0x100);
>  QEMU_BUILD_BUG_ON(sizeof(XSaveBNDREG) != 0x40);
>  QEMU_BUILD_BUG_ON(sizeof(XSaveBNDCSR) != 0x40);
> @@ -1355,6 +1369,8 @@ QEMU_BUILD_BUG_ON(sizeof(XSaveOpmask) != 0x40);
>  QEMU_BUILD_BUG_ON(sizeof(XSaveZMM_Hi256) != 0x200);
>  QEMU_BUILD_BUG_ON(sizeof(XSaveHi16_ZMM) != 0x400);
>  QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8);
> +QEMU_BUILD_BUG_ON(sizeof(XSaveXTILECFG) != 0x40);
> +QEMU_BUILD_BUG_ON(sizeof(XSaveXTILEDATA) != 0x2000);
>
>  typedef struct ExtSaveArea {
>      uint32_t feature, bits;
> @@ -1362,7 +1378,7 @@ typedef struct ExtSaveArea {
>      uint32_t ecx;
>  } ExtSaveArea;
>
> -#define XSAVE_STATE_AREA_COUNT (XSTATE_PKRU_BIT + 1)
> +#define XSAVE_STATE_AREA_COUNT (XSTATE_XTILE_DATA_BIT + 1)
>
>  extern ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT];
>
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 37f06b0b1a..ea7e8f9081 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -1401,6 +1401,14 @@ ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = {
>      [XSTATE_PKRU_BIT] =
>            { .feature = FEAT_7_0_ECX, .bits = CPUID_7_0_ECX_PKU,
>              .size = sizeof(XSavePKRU) },
> +    [XSTATE_XTILE_CFG_BIT] = {
> +        .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE,
> +        .size = sizeof(XSaveXTILECFG),
> +    },
> +    [XSTATE_XTILE_DATA_BIT] = {
> +        .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE,
> +        .size = sizeof(XSaveXTILEDATA)
> +    },
>  };
>
>  static uint32_t xsave_area_size(uint64_t mask)

dme.
-- 
Would you offer your throat to the wolf with the red roses?


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 4/8] x86: Add XFD faulting bit for state components
  2022-02-17  6:04 ` [PATCH v2 4/8] x86: Add XFD faulting bit for state components Yang Zhong
@ 2022-02-21 13:00   ` David Edmondson
  2022-02-25  7:10     ` Yang Zhong
  0 siblings, 1 reply; 20+ messages in thread
From: David Edmondson @ 2022-02-21 13:00 UTC (permalink / raw)
  To: Yang Zhong
  Cc: kevin.tian, seanjc, jing2.liu, qemu-devel, wei.w.wang, pbonzini,
	guang.zeng

On Wednesday, 2022-02-16 at 22:04:30 -08, Yang Zhong wrote:

> From: Jing Liu <jing2.liu@intel.com>
>
> Intel introduces XFD faulting mechanism for extended
> XSAVE features to dynamically enable the features in
> runtime. If CPUID (EAX=0Dh, ECX=n, n>1).ECX[2] is set
> as 1, it indicates support for XFD faulting of this
> state component.
>
> Signed-off-by: Jing Liu <jing2.liu@intel.com>
> Signed-off-by: Yang Zhong <yang.zhong@intel.com>

Small comment below...

Reviewed-by: David Edmondson <david.edmondson@oracle.com>

> ---
>  target/i386/cpu.h | 2 ++
>  target/i386/cpu.c | 3 ++-
>  2 files changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index d4ad0f56bd..f7fc2e97a6 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -558,8 +558,10 @@ typedef enum X86Seg {
>  #define ARCH_REQ_XCOMP_GUEST_PERM       0x1025
>
>  #define ESA_FEATURE_ALIGN64_BIT         1
> +#define ESA_FEATURE_XFD_BIT             2
>
>  #define ESA_FEATURE_ALIGN64_MASK        (1U << ESA_FEATURE_ALIGN64_BIT)
> +#define ESA_FEATURE_XFD_MASK            (1U << ESA_FEATURE_XFD_BIT)
>
>  /* CPUID feature words */
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 377d993438..5a7ee8c7e1 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -5497,7 +5497,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>                  const ExtSaveArea *esa = &x86_ext_save_areas[count];
>                  *eax = esa->size;
>                  *ebx = esa->offset;
> -                *ecx = esa->ecx & ESA_FEATURE_ALIGN64_MASK;
> +                *ecx = (esa->ecx & ESA_FEATURE_ALIGN64_MASK) |
> +                       (esa->ecx & ESA_FEATURE_XFD_MASK);

Is:

                *ecx = esa->ecx &
                       (ESA_FEATURE_ALIGN64_MASK | ESA_FEATURE_XFD_MASK);

not more usual?

>              }
>          }
>          break;

dme.
-- 
All of us, we're going out tonight. We're gonna walk all over your cars.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 6/8] x86: add support for KVM_CAP_XSAVE2 and AMX state migration
  2022-02-17  6:04 ` [PATCH v2 6/8] x86: add support for KVM_CAP_XSAVE2 and AMX state migration Yang Zhong
@ 2022-02-21 13:25   ` David Edmondson
  2022-02-25  7:33     ` Yang Zhong
  0 siblings, 1 reply; 20+ messages in thread
From: David Edmondson @ 2022-02-21 13:25 UTC (permalink / raw)
  To: Yang Zhong
  Cc: kevin.tian, seanjc, jing2.liu, qemu-devel, wei.w.wang, pbonzini,
	guang.zeng

On Wednesday, 2022-02-16 at 22:04:32 -08, Yang Zhong wrote:

> From: Jing Liu <jing2.liu@intel.com>
>
> When dynamic xfeatures (e.g. AMX) are used by the guest, the xsave
> area would be larger than 4KB. KVM_GET_XSAVE2 and KVM_SET_XSAVE
> under KVM_CAP_XSAVE2 works with a xsave buffer larger than 4KB.
> Always use the new ioctls under KVM_CAP_XSAVE2 when KVM supports it.
>
> Signed-off-by: Jing Liu <jing2.liu@intel.com>
> Signed-off-by: Zeng Guang <guang.zeng@intel.com>
> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> Signed-off-by: Yang Zhong <yang.zhong@intel.com>
> ---
>  target/i386/cpu.h          |  4 ++++
>  target/i386/kvm/kvm.c      | 42 ++++++++++++++++++++++++--------------
>  target/i386/xsave_helper.c | 33 ++++++++++++++++++++++++++++++
>  3 files changed, 64 insertions(+), 15 deletions(-)
>
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index f7fc2e97a6..de9da38e42 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -1528,6 +1528,10 @@ typedef struct CPUX86State {
>      uint64_t opmask_regs[NB_OPMASK_REGS];
>      YMMReg zmmh_regs[CPU_NB_REGS];
>      ZMMReg hi16_zmm_regs[CPU_NB_REGS];
> +#ifdef TARGET_X86_64
> +    uint8_t xtilecfg[64];
> +    uint8_t xtiledata[8192];
> +#endif

Can we have defined constants for these sizes? They also appear in patch
2.

>
>      /* sysenter registers */
>      uint32_t sysenter_cs;
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index 8562d3d138..ff064e3d8f 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -122,6 +122,7 @@ static uint32_t num_architectural_pmu_gp_counters;
>  static uint32_t num_architectural_pmu_fixed_counters;
>
>  static int has_xsave;
> +static int has_xsave2;
>  static int has_xcrs;
>  static int has_pit_state2;
>  static int has_sregs2;
> @@ -1585,6 +1586,26 @@ static Error *invtsc_mig_blocker;
>
>  #define KVM_MAX_CPUID_ENTRIES  100
>
> +static void kvm_init_xsave(CPUX86State *env)
> +{
> +    if (has_xsave2) {
> +        env->xsave_buf_len = QEMU_ALIGN_UP(has_xsave2, 4096);

Idle curiosity - why do we round this up?

> +    } else if (has_xsave) {
> +        env->xsave_buf_len = sizeof(struct kvm_xsave);
> +    } else {
> +        return;
> +    }
> +
> +    env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len);
> +    memset(env->xsave_buf, 0, env->xsave_buf_len);
> +     /*
> +      * The allocated storage must be large enough for all of the
> +      * possible XSAVE state components.
> +      */
> +    assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) <=
> +           env->xsave_buf_len);
> +}
> +
>  int kvm_arch_init_vcpu(CPUState *cs)
>  {
>      struct {
> @@ -1614,6 +1635,8 @@ int kvm_arch_init_vcpu(CPUState *cs)
>
>      cpuid_i = 0;
>
> +    has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2);
> +
>      r = kvm_arch_set_tsc_khz(cs);
>      if (r < 0) {
>          return r;
> @@ -2003,19 +2026,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
>      if (r) {
>          goto fail;
>      }
> -
> -    if (has_xsave) {
> -        env->xsave_buf_len = sizeof(struct kvm_xsave);
> -        env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len);
> -        memset(env->xsave_buf, 0, env->xsave_buf_len);
> -
> -        /*
> -         * The allocated storage must be large enough for all of the
> -         * possible XSAVE state components.
> -         */
> -        assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX)
> -               <= env->xsave_buf_len);
> -    }
> +    kvm_init_xsave(env);
>
>      max_nested_state_len = kvm_max_nested_state_length();
>      if (max_nested_state_len > 0) {
> @@ -3319,13 +3330,14 @@ static int kvm_get_xsave(X86CPU *cpu)
>  {
>      CPUX86State *env = &cpu->env;
>      void *xsave = env->xsave_buf;
> -    int ret;
> +    int type, ret;
>
>      if (!has_xsave) {
>          return kvm_get_fpu(cpu);
>      }
>
> -    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave);
> +    type = has_xsave2 ? KVM_GET_XSAVE2 : KVM_GET_XSAVE;
> +    ret = kvm_vcpu_ioctl(CPU(cpu), type, xsave);
>      if (ret < 0) {
>          return ret;
>      }
> diff --git a/target/i386/xsave_helper.c b/target/i386/xsave_helper.c
> index ac61a96344..b6a004505f 100644
> --- a/target/i386/xsave_helper.c
> +++ b/target/i386/xsave_helper.c
> @@ -5,6 +5,7 @@
>  #include "qemu/osdep.h"
>
>  #include "cpu.h"
> +#include <asm/kvm.h>
>
>  void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen)
>  {
> @@ -126,6 +127,22 @@ void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen)
>
>          memcpy(pkru, &env->pkru, sizeof(env->pkru));
>      }
> +
> +    e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT];
> +    if (e->size && e->offset) {
> +        XSaveXTILECFG *tilecfg = buf + e->offset;
> +
> +        memcpy(tilecfg, &env->xtilecfg, sizeof(env->xtilecfg));
> +    }
> +
> +    if (buflen > sizeof(struct kvm_xsave)) {
> +        e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT];
> +        if (e->size && e->offset && buflen >= e->size + e->offset) {
> +            XSaveXTILEDATA *tiledata = buf + e->offset;
> +
> +            memcpy(tiledata, &env->xtiledata, sizeof(env->xtiledata));
> +        }
> +    }
>  #endif
>  }
>
> @@ -247,5 +264,21 @@ void x86_cpu_xrstor_all_areas(X86CPU *cpu, const void *buf, uint32_t buflen)
>          pkru = buf + e->offset;
>          memcpy(&env->pkru, pkru, sizeof(env->pkru));
>      }
> +
> +    e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT];
> +    if (e->size && e->offset) {
> +        const XSaveXTILECFG *tilecfg = buf + e->offset;
> +
> +        memcpy(&env->xtilecfg, tilecfg, sizeof(env->xtilecfg));
> +    }
> +
> +    if (buflen > sizeof(struct kvm_xsave)) {
> +        e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT];
> +        if (e->size && e->offset && buflen >= e->size + e->offset) {
> +            const XSaveXTILEDATA *tiledata = buf + e->offset;
> +
> +            memcpy(&env->xtiledata, tiledata, sizeof(env->xtiledata));
> +        }
> +    }
>  #endif
>  }

dme.
-- 
Why does it have to be like this? I can never tell.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 7/8] x86: Support XFD and AMX xsave data migration
  2022-02-17  6:04 ` [PATCH v2 7/8] x86: Support XFD and AMX xsave data migration Yang Zhong
@ 2022-02-21 13:30   ` David Edmondson
  0 siblings, 0 replies; 20+ messages in thread
From: David Edmondson @ 2022-02-21 13:30 UTC (permalink / raw)
  To: Yang Zhong
  Cc: kevin.tian, seanjc, jing2.liu, qemu-devel, wei.w.wang, pbonzini,
	guang.zeng

On Wednesday, 2022-02-16 at 22:04:33 -08, Yang Zhong wrote:

> From: Zeng Guang <guang.zeng@intel.com>
>
> XFD(eXtended Feature Disable) allows to enable a
> feature on xsave state while preventing specific
> user threads from using the feature.
>
> Support save and restore XFD MSRs if CPUID.D.1.EAX[4]
> enumerate to be valid. Likewise migrate the MSRs and
> related xsave state necessarily.
>
> Signed-off-by: Zeng Guang <guang.zeng@intel.com>
> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> Signed-off-by: Yang Zhong <yang.zhong@intel.com>

Reviewed-by: David Edmondson <david.edmondson@oracle.com>

> ---
>  target/i386/cpu.h     |  9 +++++++++
>  target/i386/kvm/kvm.c | 18 ++++++++++++++++++
>  target/i386/machine.c | 42 ++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 69 insertions(+)
>
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index de9da38e42..509c16323a 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -505,6 +505,9 @@ typedef enum X86Seg {
>
>  #define MSR_VM_HSAVE_PA                 0xc0010117
>
> +#define MSR_IA32_XFD                    0x000001c4
> +#define MSR_IA32_XFD_ERR                0x000001c5
> +
>  #define MSR_IA32_BNDCFGS                0x00000d90
>  #define MSR_IA32_XSS                    0x00000da0
>  #define MSR_IA32_UMWAIT_CONTROL         0xe1
> @@ -873,6 +876,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
>  #define CPUID_7_1_EAX_AVX_VNNI          (1U << 4)
>  /* AVX512 BFloat16 Instruction */
>  #define CPUID_7_1_EAX_AVX512_BF16       (1U << 5)
> +/* XFD Extend Feature Disabled */
> +#define CPUID_D_1_EAX_XFD               (1U << 4)
>
>  /* Packets which contain IP payload have LIP values */
>  #define CPUID_14_0_ECX_LIP              (1U << 31)
> @@ -1617,6 +1622,10 @@ typedef struct CPUX86State {
>      uint64_t msr_rtit_cr3_match;
>      uint64_t msr_rtit_addrs[MAX_RTIT_ADDRS];
>
> +    /* Per-VCPU XFD MSRs */
> +    uint64_t msr_xfd;
> +    uint64_t msr_xfd_err;
> +
>      /* exception/interrupt handling */
>      int error_code;
>      int exception_is_int;
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index ff064e3d8f..3dd24b6b0e 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -3275,6 +3275,13 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
>                                env->msr_ia32_sgxlepubkeyhash[3]);
>          }
>
> +        if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) {
> +            kvm_msr_entry_add(cpu, MSR_IA32_XFD,
> +                              env->msr_xfd);
> +            kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR,
> +                              env->msr_xfd_err);
> +        }
> +
>          /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see
>           *       kvm_put_msr_feature_control. */
>      }
> @@ -3667,6 +3674,11 @@ static int kvm_get_msrs(X86CPU *cpu)
>          kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3, 0);
>      }
>
> +    if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) {
> +        kvm_msr_entry_add(cpu, MSR_IA32_XFD, 0);
> +        kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, 0);
> +    }
> +
>      ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf);
>      if (ret < 0) {
>          return ret;
> @@ -3963,6 +3975,12 @@ static int kvm_get_msrs(X86CPU *cpu)
>              env->msr_ia32_sgxlepubkeyhash[index - MSR_IA32_SGXLEPUBKEYHASH0] =
>                             msrs[i].data;
>              break;
> +        case MSR_IA32_XFD:
> +            env->msr_xfd = msrs[i].data;
> +            break;
> +        case MSR_IA32_XFD_ERR:
> +            env->msr_xfd_err = msrs[i].data;
> +            break;
>          }
>      }
>
> diff --git a/target/i386/machine.c b/target/i386/machine.c
> index 6202f47793..1f9d0c46f1 100644
> --- a/target/i386/machine.c
> +++ b/target/i386/machine.c
> @@ -1483,6 +1483,46 @@ static const VMStateDescription vmstate_pdptrs = {
>      }
>  };
>
> +static bool xfd_msrs_needed(void *opaque)
> +{
> +    X86CPU *cpu = opaque;
> +    CPUX86State *env = &cpu->env;
> +
> +    return !!(env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD);
> +}
> +
> +static const VMStateDescription vmstate_msr_xfd = {
> +    .name = "cpu/msr_xfd",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .needed = xfd_msrs_needed,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT64(env.msr_xfd, X86CPU),
> +        VMSTATE_UINT64(env.msr_xfd_err, X86CPU),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
> +static bool amx_xtile_needed(void *opaque)
> +{
> +    X86CPU *cpu = opaque;
> +    CPUX86State *env = &cpu->env;
> +
> +    return !!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE);
> +}
> +
> +static const VMStateDescription vmstate_amx_xtile = {
> +    .name = "cpu/intel_amx_xtile",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .needed = amx_xtile_needed,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT8_ARRAY(env.xtilecfg, X86CPU, 64),
> +        VMSTATE_UINT8_ARRAY(env.xtiledata, X86CPU, 8192),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
>  const VMStateDescription vmstate_x86_cpu = {
>      .name = "cpu",
>      .version_id = 12,
> @@ -1622,6 +1662,8 @@ const VMStateDescription vmstate_x86_cpu = {
>          &vmstate_msr_tsx_ctrl,
>          &vmstate_msr_intel_sgx,
>          &vmstate_pdptrs,
> +        &vmstate_msr_xfd,
> +        &vmstate_amx_xtile,
>          NULL
>      }
>  };

dme.
-- 
All of us, we're going out tonight. We're gonna walk all over your cars.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 5/8] x86: Add AMX CPUIDs enumeration
  2022-02-17  6:04 ` [PATCH v2 5/8] x86: Add AMX CPUIDs enumeration Yang Zhong
@ 2022-02-23 11:30   ` David Edmondson
  0 siblings, 0 replies; 20+ messages in thread
From: David Edmondson @ 2022-02-23 11:30 UTC (permalink / raw)
  To: Yang Zhong
  Cc: kevin.tian, seanjc, jing2.liu, qemu-devel, wei.w.wang, pbonzini,
	guang.zeng

On Wednesday, 2022-02-16 at 22:04:31 -08, Yang Zhong wrote:

> From: Jing Liu <jing2.liu@intel.com>
>
> Add AMX primary feature bits XFD and AMX_TILE to
> enumerate the CPU's AMX capability. Meanwhile, add
> AMX TILE and TMUL CPUID leaf and subleaves which
> exist when AMX TILE is present to provide the maximum
> capability of TILE and TMUL.
>
> Signed-off-by: Jing Liu <jing2.liu@intel.com>
> Signed-off-by: Yang Zhong <yang.zhong@intel.com>

Reviewed-by: David Edmondson <david.edmondson@oracle.com>

> ---
>  target/i386/cpu.c     | 55 ++++++++++++++++++++++++++++++++++++++++---
>  target/i386/kvm/kvm.c |  4 +++-
>  2 files changed, 55 insertions(+), 4 deletions(-)
>
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 5a7ee8c7e1..2465bed5df 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -576,6 +576,18 @@ static CPUCacheInfo legacy_l3_cache = {
>  #define INTEL_PT_CYCLE_BITMAP    0x1fff         /* Support 0,2^(0~11) */
>  #define INTEL_PT_PSB_BITMAP      (0x003f << 16) /* Support 2K,4K,8K,16K,32K,64K */
>
> +/* CPUID Leaf 0x1D constants: */
> +#define INTEL_AMX_TILE_MAX_SUBLEAF     0x1
> +#define INTEL_AMX_TOTAL_TILE_BYTES     0x2000
> +#define INTEL_AMX_BYTES_PER_TILE       0x400
> +#define INTEL_AMX_BYTES_PER_ROW        0x40
> +#define INTEL_AMX_TILE_MAX_NAMES       0x8
> +#define INTEL_AMX_TILE_MAX_ROWS        0x10
> +
> +/* CPUID Leaf 0x1E constants: */
> +#define INTEL_AMX_TMUL_MAX_K           0x10
> +#define INTEL_AMX_TMUL_MAX_N           0x40
> +
>  void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
>                                uint32_t vendor2, uint32_t vendor3)
>  {
> @@ -845,8 +857,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
>              "avx512-vp2intersect", NULL, "md-clear", NULL,
>              NULL, NULL, "serialize", NULL,
>              "tsx-ldtrk", NULL, NULL /* pconfig */, NULL,
> -            NULL, NULL, NULL, "avx512-fp16",
> -            NULL, NULL, "spec-ctrl", "stibp",
> +            NULL, NULL, "amx-bf16", "avx512-fp16",
> +            "amx-tile", "amx-int8", "spec-ctrl", "stibp",
>              NULL, "arch-capabilities", "core-capability", "ssbd",
>          },
>          .cpuid = {
> @@ -911,7 +923,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
>          .type = CPUID_FEATURE_WORD,
>          .feat_names = {
>              "xsaveopt", "xsavec", "xgetbv1", "xsaves",
> -            NULL, NULL, NULL, NULL,
> +            "xfd", NULL, NULL, NULL,
>              NULL, NULL, NULL, NULL,
>              NULL, NULL, NULL, NULL,
>              NULL, NULL, NULL, NULL,
> @@ -5587,6 +5599,43 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>          }
>          break;
>      }
> +    case 0x1D: {
> +        /* AMX TILE */
> +        *eax = 0;
> +        *ebx = 0;
> +        *ecx = 0;
> +        *edx = 0;
> +        if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) {
> +            break;
> +        }
> +
> +        if (count == 0) {
> +            /* Highest numbered palette subleaf */
> +            *eax = INTEL_AMX_TILE_MAX_SUBLEAF;
> +        } else if (count == 1) {
> +            *eax = INTEL_AMX_TOTAL_TILE_BYTES |
> +                   (INTEL_AMX_BYTES_PER_TILE << 16);
> +            *ebx = INTEL_AMX_BYTES_PER_ROW | (INTEL_AMX_TILE_MAX_NAMES << 16);
> +            *ecx = INTEL_AMX_TILE_MAX_ROWS;
> +        }
> +        break;
> +    }
> +    case 0x1E: {
> +        /* AMX TMUL */
> +        *eax = 0;
> +        *ebx = 0;
> +        *ecx = 0;
> +        *edx = 0;
> +        if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) {
> +            break;
> +        }
> +
> +        if (count == 0) {
> +            /* Highest numbered palette subleaf */
> +            *ebx = INTEL_AMX_TMUL_MAX_K | (INTEL_AMX_TMUL_MAX_N << 8);
> +        }
> +        break;
> +    }
>      case 0x40000000:
>          /*
>           * CPUID code in kvm_arch_init_vcpu() ignores stuff
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index 3bdcd724c4..8562d3d138 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -1779,7 +1779,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
>                  c = &cpuid_data.entries[cpuid_i++];
>              }
>              break;
> -        case 0x14: {
> +        case 0x14:
> +        case 0x1d:
> +        case 0x1e: {
>              uint32_t times;
>
>              c->function = i;

dme.
-- 
Why does it have to be like this? I can never tell.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 4/8] x86: Add XFD faulting bit for state components
  2022-02-21 13:00   ` David Edmondson
@ 2022-02-25  7:10     ` Yang Zhong
  0 siblings, 0 replies; 20+ messages in thread
From: Yang Zhong @ 2022-02-25  7:10 UTC (permalink / raw)
  To: David Edmondson
  Cc: yang.zhong, kevin.tian, seanjc, jing2.liu, qemu-devel,
	wei.w.wang, pbonzini, guang.zeng

On Mon, Feb 21, 2022 at 01:00:41PM +0000, David Edmondson wrote:
> On Wednesday, 2022-02-16 at 22:04:30 -08, Yang Zhong wrote:
> 
> > From: Jing Liu <jing2.liu@intel.com>
> >
> > Intel introduces XFD faulting mechanism for extended
> > XSAVE features to dynamically enable the features in
> > runtime. If CPUID (EAX=0Dh, ECX=n, n>1).ECX[2] is set
> > as 1, it indicates support for XFD faulting of this
> > state component.
> >
> > Signed-off-by: Jing Liu <jing2.liu@intel.com>
> > Signed-off-by: Yang Zhong <yang.zhong@intel.com>
> 
> Small comment below...
> 
> Reviewed-by: David Edmondson <david.edmondson@oracle.com>
> 
> > ---
> >  target/i386/cpu.h | 2 ++
> >  target/i386/cpu.c | 3 ++-
> >  2 files changed, 4 insertions(+), 1 deletion(-)
> >
> > diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> > index d4ad0f56bd..f7fc2e97a6 100644
> > --- a/target/i386/cpu.h
> > +++ b/target/i386/cpu.h
> > @@ -558,8 +558,10 @@ typedef enum X86Seg {
> >  #define ARCH_REQ_XCOMP_GUEST_PERM       0x1025
> >
> >  #define ESA_FEATURE_ALIGN64_BIT         1
> > +#define ESA_FEATURE_XFD_BIT             2
> >
> >  #define ESA_FEATURE_ALIGN64_MASK        (1U << ESA_FEATURE_ALIGN64_BIT)
> > +#define ESA_FEATURE_XFD_MASK            (1U << ESA_FEATURE_XFD_BIT)
> >
> >  /* CPUID feature words */
> > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > index 377d993438..5a7ee8c7e1 100644
> > --- a/target/i386/cpu.c
> > +++ b/target/i386/cpu.c
> > @@ -5497,7 +5497,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
> >                  const ExtSaveArea *esa = &x86_ext_save_areas[count];
> >                  *eax = esa->size;
> >                  *ebx = esa->offset;
> > -                *ecx = esa->ecx & ESA_FEATURE_ALIGN64_MASK;
> > +                *ecx = (esa->ecx & ESA_FEATURE_ALIGN64_MASK) |
> > +                       (esa->ecx & ESA_FEATURE_XFD_MASK);
> 
> Is:
> 
>                 *ecx = esa->ecx &
>                        (ESA_FEATURE_ALIGN64_MASK | ESA_FEATURE_XFD_MASK);
> 
> not more usual?


  Thanks David, I will update this in next version.

  Yang

> 
> >              }
> >          }
> >          break;
> 
> dme.
> -- 
> All of us, we're going out tonight. We're gonna walk all over your cars.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 6/8] x86: add support for KVM_CAP_XSAVE2 and AMX state migration
  2022-02-21 13:25   ` David Edmondson
@ 2022-02-25  7:33     ` Yang Zhong
  0 siblings, 0 replies; 20+ messages in thread
From: Yang Zhong @ 2022-02-25  7:33 UTC (permalink / raw)
  To: David Edmondson
  Cc: yang.zhong, kevin.tian, seanjc, jing2.liu, qemu-devel,
	wei.w.wang, pbonzini, guang.zeng

On Mon, Feb 21, 2022 at 01:25:53PM +0000, David Edmondson wrote:
> On Wednesday, 2022-02-16 at 22:04:32 -08, Yang Zhong wrote:
> 
> > From: Jing Liu <jing2.liu@intel.com>
> >
> > When dynamic xfeatures (e.g. AMX) are used by the guest, the xsave
> > area would be larger than 4KB. KVM_GET_XSAVE2 and KVM_SET_XSAVE
> > under KVM_CAP_XSAVE2 works with a xsave buffer larger than 4KB.
> > Always use the new ioctls under KVM_CAP_XSAVE2 when KVM supports it.
> >
> > Signed-off-by: Jing Liu <jing2.liu@intel.com>
> > Signed-off-by: Zeng Guang <guang.zeng@intel.com>
> > Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> > Signed-off-by: Yang Zhong <yang.zhong@intel.com>
> > ---
> >  target/i386/cpu.h          |  4 ++++
> >  target/i386/kvm/kvm.c      | 42 ++++++++++++++++++++++++--------------
> >  target/i386/xsave_helper.c | 33 ++++++++++++++++++++++++++++++
> >  3 files changed, 64 insertions(+), 15 deletions(-)
> >
> > diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> > index f7fc2e97a6..de9da38e42 100644
> > --- a/target/i386/cpu.h
> > +++ b/target/i386/cpu.h
> > @@ -1528,6 +1528,10 @@ typedef struct CPUX86State {
> >      uint64_t opmask_regs[NB_OPMASK_REGS];
> >      YMMReg zmmh_regs[CPU_NB_REGS];
> >      ZMMReg hi16_zmm_regs[CPU_NB_REGS];
> > +#ifdef TARGET_X86_64
> > +    uint8_t xtilecfg[64];
> > +    uint8_t xtiledata[8192];
> > +#endif
> 
> Can we have defined constants for these sizes? They also appear in patch
> 2.

  David, the constants we used here are mainly consistent with other members
  in this struct and file.  thanks!

  Yang


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 3/8] x86: Grant AMX permission for guest
  2022-02-17 13:44     ` Paolo Bonzini
@ 2022-02-25 10:40       ` Yang Zhong
  0 siblings, 0 replies; 20+ messages in thread
From: Yang Zhong @ 2022-02-25 10:40 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: yang.zhong, kevin.tian, seanjc, jing2.liu, qemu-devel,
	wei.w.wang, guang.zeng

On Thu, Feb 17, 2022 at 02:44:10PM +0100, Paolo Bonzini wrote:
> On 2/17/22 06:58, Yang Zhong wrote:
> >>+
> >>+    if ((mask & XSTATE_XTILE_DATA_MASK) == XSTATE_XTILE_DATA_MASK) {
> >>+        bitmask = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
> >>+        if (!(bitmask & XSTATE_XTILE_DATA_MASK)) {
> >    Paolo, last time you suggested below changes for here:
> >
> >    rc = kvm_arch_get_supported_cpuid(s, 0xd, 0,
> >                                   (xdata_bit < 32 ? R_EAX : R_EDX));
> >    if (!(rc & BIT(xdata_bit & 31)) {
> >       ...
> >    }
> >
> >   Since I used "mask" as parameter here, so I had to directly use R_EAX here.
> >   Please review and if need change it to like "(xdata_bit < 32 ? R_EAX : R_EDX)",
> >   I will change this in next version, thanks!
> 
> I looked at this function more closely because it didn't compile on non-Linux
> systems, too.  I think it's better to write it already to plan for more
> dynamic features.  In the code below, I'm also relying on
> KVM_GET_SUPPORTED_CPUID/KVM_X86_COMP_GUEST_SUPP being executed
> before ARCH_REQ_XCOMP_GUEST_PERM, which therefore cannot fail.
> 
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 377d993438..1d0c006077 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -43,8 +43,6 @@
>  #include "disas/capstone.h"
>  #include "cpu-internal.h"
> -#include <sys/syscall.h>
> -
>  /* Helpers for building CPUID[2] descriptors: */
>  struct CPUID2CacheDescriptorInfo {
> @@ -6002,40 +6000,6 @@ static void x86_cpu_adjust_feat_level(X86CPU *cpu, FeatureWord w)
>      }
>  }
> -static void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask)
> -{
> -    KVMState *s = kvm_state;
> -    uint64_t bitmask;
> -    long rc;
> -
> -    if ((mask & XSTATE_XTILE_DATA_MASK) == XSTATE_XTILE_DATA_MASK) {
> -        bitmask = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
> -        if (!(bitmask & XSTATE_XTILE_DATA_MASK)) {
> -            warn_report("no amx support from supported_xcr0, "
> -                        "bitmask:0x%lx", bitmask);
> -            return;
> -        }
> -
> -        rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM,
> -                      XSTATE_XTILE_DATA_BIT);
> -        if (rc) {
> -            /*
> -             * The older kernel version(<5.15) can't support
> -             * ARCH_REQ_XCOMP_GUEST_PERM and directly return.
> -             */
> -            return;
> -        }
> -
> -        rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
> -        if (rc) {
> -            warn_report("prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
> -        } else if (!(bitmask & XFEATURE_XTILE_MASK)) {
> -            warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure "
> -                        "and bitmask=0x%lx", bitmask);
> -        }
> -    }
> -}
> -
>  /* Calculate XSAVE components based on the configured CPU feature flags */
>  static void x86_cpu_enable_xsave_components(X86CPU *cpu)
>  {
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index d4ad0f56bd..de949bd63d 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -551,11 +551,8 @@ typedef enum X86Seg {
>  #define XSTATE_PKRU_MASK                (1ULL << XSTATE_PKRU_BIT)
>  #define XSTATE_XTILE_CFG_MASK           (1ULL << XSTATE_XTILE_CFG_BIT)
>  #define XSTATE_XTILE_DATA_MASK          (1ULL << XSTATE_XTILE_DATA_BIT)
> -#define XFEATURE_XTILE_MASK             (XSTATE_XTILE_CFG_MASK \
> -                                         | XSTATE_XTILE_DATA_MASK)
> -#define ARCH_GET_XCOMP_GUEST_PERM       0x1024
> -#define ARCH_REQ_XCOMP_GUEST_PERM       0x1025
> +#define XSTATE_DYNAMIC_MASK             (XSTATE_XTILE_DATA_MASK)
>  #define ESA_FEATURE_ALIGN64_BIT         1
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index 3bdcd724c4..4b07778970 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -17,6 +17,7 @@
>  #include "qapi/error.h"
>  #include <sys/ioctl.h>
>  #include <sys/utsname.h>
> +#include <sys/syscall.h>
>  #include <linux/kvm.h>
>  #include "standard-headers/asm-x86/kvm_para.h"
> @@ -5168,3 +5169,39 @@ bool kvm_arch_cpu_check_are_resettable(void)
>  {
>      return !sev_es_enabled();
>  }
> +
> +#define ARCH_REQ_XCOMP_GUEST_PERM       0x1025
> +
> +void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask)
> +{
> +    KVMState *s = kvm_state;
> +    uint64_t supported;
> +
> +    mask &= XSTATE_DYNAMIC_MASK;
> +    if (!mask) {
> +	return;
> +    }
> +    /*
> +     * Just ignore bits that are not in CPUID[EAX=0xD,ECX=0].
> +     * ARCH_REQ_XCOMP_GUEST_PERM would fail, and QEMU has warned
> +     * about them already because they are not supported features.
> +     */
> +    supported = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
> +    supported |= (uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32;
> +    mask &= ~supported;


  Paolo, thanks for your great help!
  Except changing "mask &= ~supported" to "mask &= supported", this patch work well.

  Please re-sync Linux-header since David has pulled linux header to 5.17-rc1
  https://lists.nongnu.org/archive/html/qemu-devel/2022-02/msg03763.html

  Yang


> +
> +    while (mask) {
> +        int bit = ctz64(mask);
> +        int rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit);
> +        if (rc) {
> +            /*
> +             * Older kernel version (<5.17) do not support
> +             * ARCH_REQ_XCOMP_GUEST_PERM, but also do not return
> +             * any dynamic feature from kvm_arch_get_supported_cpuid.
> +             */
> +            warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure "
> +                        "for feature bit %d", bit);
> +        }
> +	mask &= ~BIT_ULL(bit);
> +    }
> +}
> diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
> index a978509d50..4124912c20 100644
> --- a/target/i386/kvm/kvm_i386.h
> +++ b/target/i386/kvm/kvm_i386.h
> @@ -52,5 +52,6 @@ bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp);
>  uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address);
>  bool kvm_enable_sgx_provisioning(KVMState *s);
> +void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask);
>  #endif
> 
> 
> If this works, the rest of the series is good to go!
> 
> Thanks,
> 
> Paolo


^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2022-02-25 11:02 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-17  6:04 [PATCH v2 0/8] AMX support in Qemu Yang Zhong
2022-02-17  6:04 ` [PATCH v2 1/8] x86: Fix the 64-byte boundary enumeration for extended state Yang Zhong
2022-02-21 12:51   ` David Edmondson
2022-02-17  6:04 ` [PATCH v2 2/8] x86: Add AMX XTILECFG and XTILEDATA components Yang Zhong
2022-02-21 12:53   ` David Edmondson
2022-02-17  6:04 ` [PATCH v2 3/8] x86: Grant AMX permission for guest Yang Zhong
2022-02-17  5:58   ` Yang Zhong
2022-02-17 13:44     ` Paolo Bonzini
2022-02-25 10:40       ` Yang Zhong
2022-02-17  6:04 ` [PATCH v2 4/8] x86: Add XFD faulting bit for state components Yang Zhong
2022-02-21 13:00   ` David Edmondson
2022-02-25  7:10     ` Yang Zhong
2022-02-17  6:04 ` [PATCH v2 5/8] x86: Add AMX CPUIDs enumeration Yang Zhong
2022-02-23 11:30   ` David Edmondson
2022-02-17  6:04 ` [PATCH v2 6/8] x86: add support for KVM_CAP_XSAVE2 and AMX state migration Yang Zhong
2022-02-21 13:25   ` David Edmondson
2022-02-25  7:33     ` Yang Zhong
2022-02-17  6:04 ` [PATCH v2 7/8] x86: Support XFD and AMX xsave data migration Yang Zhong
2022-02-21 13:30   ` David Edmondson
2022-02-17  6:04 ` [PATCH v2 8/8] linux-header: Sync the linux headers Yang Zhong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.