All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yang Zhong <yang.zhong@intel.com>
To: qemu-devel@nongnu.org
Cc: yang.zhong@intel.com, kevin.tian@intel.com, seanjc@google.com,
	jing2.liu@linux.intel.com, wei.w.wang@intel.com,
	guang.zeng@intel.com, pbonzini@redhat.com
Subject: Re: [PATCH v2 3/8] x86: Grant AMX permission for guest
Date: Thu, 17 Feb 2022 13:58:36 +0800	[thread overview]
Message-ID: <20220217055836.GA10691@yangzhon-Virtual> (raw)
In-Reply-To: <20220217060434.52460-4-yang.zhong@intel.com>

On Wed, Feb 16, 2022 at 10:04:29PM -0800, Yang Zhong wrote:
> Kernel allocates 4K xstate buffer by default. For XSAVE features
> which require large state component (e.g. AMX), Linux kernel
> dynamically expands the xstate buffer only after the process has
> acquired the necessary permissions. Those are called dynamically-
> enabled XSAVE features (or dynamic xfeatures).
> 
> There are separate permissions for native tasks and guests.
> 
> Qemu should request the guest permissions for dynamic xfeatures
> which will be exposed to the guest. This only needs to be done
> once before the first vcpu is created.
> 
> KVM implemented one new ARCH_GET_XCOMP_SUPP system attribute API to
> get host side supported_xcr0 and Qemu can decide if it can request
> dynamically enabled XSAVE features permission.
> https://lore.kernel.org/all/20220126152210.3044876-1-pbonzini@redhat.com/
> 
> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Yang Zhong <yang.zhong@intel.com>
> Signed-off-by: Jing Liu <jing2.liu@intel.com>
> ---
>  target/i386/cpu.h         |  7 +++++++
>  target/i386/cpu.c         | 43 +++++++++++++++++++++++++++++++++++++++
>  target/i386/kvm/kvm-cpu.c | 12 +++++------
>  target/i386/kvm/kvm.c     | 20 ++++++++++++++++++
>  4 files changed, 76 insertions(+), 6 deletions(-)
> 
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 06d2d6bccf..d4ad0f56bd 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -549,6 +549,13 @@ typedef enum X86Seg {
>  #define XSTATE_ZMM_Hi256_MASK           (1ULL << XSTATE_ZMM_Hi256_BIT)
>  #define XSTATE_Hi16_ZMM_MASK            (1ULL << XSTATE_Hi16_ZMM_BIT)
>  #define XSTATE_PKRU_MASK                (1ULL << XSTATE_PKRU_BIT)
> +#define XSTATE_XTILE_CFG_MASK           (1ULL << XSTATE_XTILE_CFG_BIT)
> +#define XSTATE_XTILE_DATA_MASK          (1ULL << XSTATE_XTILE_DATA_BIT)
> +#define XFEATURE_XTILE_MASK             (XSTATE_XTILE_CFG_MASK \
> +                                         | XSTATE_XTILE_DATA_MASK)
> +
> +#define ARCH_GET_XCOMP_GUEST_PERM       0x1024
> +#define ARCH_REQ_XCOMP_GUEST_PERM       0x1025
>  
>  #define ESA_FEATURE_ALIGN64_BIT         1
>  
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index ea7e8f9081..377d993438 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -43,6 +43,8 @@
>  #include "disas/capstone.h"
>  #include "cpu-internal.h"
>  
> +#include <sys/syscall.h>
> +
>  /* Helpers for building CPUID[2] descriptors: */
>  
>  struct CPUID2CacheDescriptorInfo {
> @@ -6000,12 +6002,47 @@ static void x86_cpu_adjust_feat_level(X86CPU *cpu, FeatureWord w)
>      }
>  }
>  
> +static void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask)
> +{
> +    KVMState *s = kvm_state;
> +    uint64_t bitmask;
> +    long rc;
> +
> +    if ((mask & XSTATE_XTILE_DATA_MASK) == XSTATE_XTILE_DATA_MASK) {
> +        bitmask = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
> +        if (!(bitmask & XSTATE_XTILE_DATA_MASK)) {

   Paolo, last time you suggested below changes for here:

   rc = kvm_arch_get_supported_cpuid(s, 0xd, 0,
                                  (xdata_bit < 32 ? R_EAX : R_EDX));
   if (!(rc & BIT(xdata_bit & 31)) {
      ...
   }   

  Since I used "mask" as parameter here, so I had to directly use R_EAX here.
  Please review and if need change it to like "(xdata_bit < 32 ? R_EAX : R_EDX)",
  I will change this in next version, thanks!

  Yang


> +            warn_report("no amx support from supported_xcr0, "
> +                        "bitmask:0x%lx", bitmask);
> +            return;
> +        }
> +
> +        rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM,
> +                      XSTATE_XTILE_DATA_BIT);
> +        if (rc) {
> +            /*
> +             * The older kernel version(<5.15) can't support
> +             * ARCH_REQ_XCOMP_GUEST_PERM and directly return.
> +             */
> +            return;
> +        }
> +
> +        rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
> +        if (rc) {
> +            warn_report("prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
> +        } else if (!(bitmask & XFEATURE_XTILE_MASK)) {
> +            warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure "
> +                        "and bitmask=0x%lx", bitmask);
> +        }
> +    }
> +}
> +
>  /* Calculate XSAVE components based on the configured CPU feature flags */
>  static void x86_cpu_enable_xsave_components(X86CPU *cpu)
>  {
>      CPUX86State *env = &cpu->env;
>      int i;
>      uint64_t mask;
> +    static bool request_perm;
>  
>      if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
>          env->features[FEAT_XSAVE_COMP_LO] = 0;
> @@ -6021,6 +6058,12 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu)
>          }
>      }
>  
> +    /* Only request permission for first vcpu */
> +    if (kvm_enabled() && !request_perm) {
> +        kvm_request_xsave_components(cpu, mask);
> +        request_perm = true;
> +    }
> +
>      env->features[FEAT_XSAVE_COMP_LO] = mask;
>      env->features[FEAT_XSAVE_COMP_HI] = mask >> 32;
>  }
> diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
> index ce27d3b1df..a35a1bf9fe 100644
> --- a/target/i386/kvm/kvm-cpu.c
> +++ b/target/i386/kvm/kvm-cpu.c
> @@ -84,7 +84,7 @@ static void kvm_cpu_max_instance_init(X86CPU *cpu)
>  static void kvm_cpu_xsave_init(void)
>  {
>      static bool first = true;
> -    KVMState *s = kvm_state;
> +    uint32_t eax, ebx, ecx, edx;
>      int i;
>  
>      if (!first) {
> @@ -100,11 +100,11 @@ static void kvm_cpu_xsave_init(void)
>          ExtSaveArea *esa = &x86_ext_save_areas[i];
>  
>          if (esa->size) {
> -            int sz = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EAX);
> -            if (sz != 0) {
> -                assert(esa->size == sz);
> -                esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX);
> -                esa->ecx = kvm_arch_get_supported_cpuid(s, 0xd, i, R_ECX);
> +            host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx);
> +            if (eax != 0) {
> +                assert(esa->size == eax);
> +                esa->offset = ebx;
> +                esa->ecx = ecx;
>              }
>          }
>      }
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index 2c8feb4a6f..3bdcd724c4 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -348,6 +348,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
>      struct kvm_cpuid2 *cpuid;
>      uint32_t ret = 0;
>      uint32_t cpuid_1_edx;
> +    uint64_t bitmask;
>  
>      cpuid = get_supported_cpuid(s);
>  
> @@ -405,6 +406,25 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
>          if (!has_msr_arch_capabs) {
>              ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES;
>          }
> +    } else if (function == 0xd && index == 0 &&
> +               (reg == R_EAX || reg == R_EDX)) {
> +        struct kvm_device_attr attr = {
> +            .group = 0,
> +            .attr = KVM_X86_XCOMP_GUEST_SUPP,
> +            .addr = (unsigned long) &bitmask
> +        };
> +
> +        bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES);
> +        if (!sys_attr) {
> +            warn_report("cannot get sys attribute capabilities %d", sys_attr);
> +        }
> +
> +        int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr);
> +        if (rc == -1 && (errno == ENXIO || errno == EINVAL)) {
> +            warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) "
> +                        "error: %d", rc);
> +        }
> +        ret = (reg == R_EAX) ? bitmask : bitmask >> 32;
>      } else if (function == 0x80000001 && reg == R_ECX) {
>          /*
>           * It's safe to enable TOPOEXT even if it's not returned by


  reply	other threads:[~2022-02-17  6:19 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-17  6:04 [PATCH v2 0/8] AMX support in Qemu Yang Zhong
2022-02-17  6:04 ` [PATCH v2 1/8] x86: Fix the 64-byte boundary enumeration for extended state Yang Zhong
2022-02-21 12:51   ` David Edmondson
2022-02-17  6:04 ` [PATCH v2 2/8] x86: Add AMX XTILECFG and XTILEDATA components Yang Zhong
2022-02-21 12:53   ` David Edmondson
2022-02-17  6:04 ` [PATCH v2 3/8] x86: Grant AMX permission for guest Yang Zhong
2022-02-17  5:58   ` Yang Zhong [this message]
2022-02-17 13:44     ` Paolo Bonzini
2022-02-25 10:40       ` Yang Zhong
2022-02-17  6:04 ` [PATCH v2 4/8] x86: Add XFD faulting bit for state components Yang Zhong
2022-02-21 13:00   ` David Edmondson
2022-02-25  7:10     ` Yang Zhong
2022-02-17  6:04 ` [PATCH v2 5/8] x86: Add AMX CPUIDs enumeration Yang Zhong
2022-02-23 11:30   ` David Edmondson
2022-02-17  6:04 ` [PATCH v2 6/8] x86: add support for KVM_CAP_XSAVE2 and AMX state migration Yang Zhong
2022-02-21 13:25   ` David Edmondson
2022-02-25  7:33     ` Yang Zhong
2022-02-17  6:04 ` [PATCH v2 7/8] x86: Support XFD and AMX xsave data migration Yang Zhong
2022-02-21 13:30   ` David Edmondson
2022-02-17  6:04 ` [PATCH v2 8/8] linux-header: Sync the linux headers Yang Zhong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220217055836.GA10691@yangzhon-Virtual \
    --to=yang.zhong@intel.com \
    --cc=guang.zeng@intel.com \
    --cc=jing2.liu@linux.intel.com \
    --cc=kevin.tian@intel.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=seanjc@google.com \
    --cc=wei.w.wang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.