xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86/HVM: support emulated UMIP
@ 2021-01-29 11:45 Jan Beulich
  2021-01-29 11:46 ` Jan Beulich
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Jan Beulich @ 2021-01-29 11:45 UTC (permalink / raw)
  To: xen-devel
  Cc: Andrew Cooper, Wei Liu, Roger Pau Monné, Kevin Tian, Jun Nakajima

There are three noteworthy drawbacks:
1) The intercepts we need to enable here are CPL-independent, i.e. we
   now have to emulate certain instructions for ring 0.
2) On VMX there's no intercept for SMSW, so the emulation isn't really
   complete there.
3) The CR4 write intercept on SVM is lower priority than all exception
   checks, so we need to intercept #GP.
Therefore this emulation doesn't get offered to guests by default.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: Don't offer emulation by default. Re-base.
v2: Split off the x86 insn emulator part. Re-base. Use hvm_featureset
    in hvm_cr4_guest_reserved_bits().

--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -453,6 +453,13 @@ static void __init calculate_hvm_max_pol
     __set_bit(X86_FEATURE_X2APIC, hvm_featureset);
 
     /*
+     * Xen can often provide UMIP emulation to HVM guests even if the host
+     * doesn't have such functionality.
+     */
+    if ( hvm_funcs.set_descriptor_access_exiting )
+        __set_bit(X86_FEATURE_UMIP, hvm_featureset);
+
+    /*
      * On AMD, PV guests are entirely unable to use SYSENTER as Xen runs in
      * long mode (and init_amd() has cleared it out of host capabilities), but
      * HVM guests are able if running in protected mode.
@@ -504,6 +511,10 @@ static void __init calculate_hvm_def_pol
     for ( i = 0; i < ARRAY_SIZE(hvm_featureset); ++i )
         hvm_featureset[i] &= hvm_featuremask[i];
 
+    /* Don't offer UMIP emulation by default. */
+    if ( !cpu_has_umip )
+        __clear_bit(X86_FEATURE_UMIP, hvm_featureset);
+
     guest_common_feature_adjustments(hvm_featureset);
     guest_common_default_feature_adjustments(hvm_featureset);
 
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -991,7 +991,8 @@ unsigned long hvm_cr4_guest_valid_bits(c
                                 X86_CR4_PCE                    |
             (p->basic.fxsr    ? X86_CR4_OSFXSR            : 0) |
             (p->basic.sse     ? X86_CR4_OSXMMEXCPT        : 0) |
-            (p->feat.umip     ? X86_CR4_UMIP              : 0) |
+            ((p == &host_cpuid_policy ? &hvm_max_cpuid_policy : p)->feat.umip
+                              ? X86_CR4_UMIP              : 0) |
             (vmxe             ? X86_CR4_VMXE              : 0) |
             (p->feat.fsgsbase ? X86_CR4_FSGSBASE          : 0) |
             (p->basic.pcid    ? X86_CR4_PCIDE             : 0) |
@@ -3731,6 +3732,13 @@ int hvm_descriptor_access_intercept(uint
     struct vcpu *curr = current;
     struct domain *currd = curr->domain;
 
+    if ( (is_write || curr->arch.hvm.guest_cr[4] & X86_CR4_UMIP) &&
+         hvm_get_cpl(curr) )
+    {
+        hvm_inject_hw_exception(TRAP_gp_fault, 0);
+        return X86EMUL_OKAY;
+    }
+
     if ( currd->arch.monitor.descriptor_access_enabled )
     {
         ASSERT(curr->arch.vm_event);
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -547,6 +547,28 @@ void svm_update_guest_cr(struct vcpu *v,
             value &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
         }
 
+        if ( v->domain->arch.cpuid->feat.umip && !cpu_has_umip )
+        {
+            u32 general1_intercepts = vmcb_get_general1_intercepts(vmcb);
+
+            if ( v->arch.hvm.guest_cr[4] & X86_CR4_UMIP )
+            {
+                value &= ~X86_CR4_UMIP;
+                ASSERT(vmcb_get_cr_intercepts(vmcb) & CR_INTERCEPT_CR0_READ);
+                general1_intercepts |= GENERAL1_INTERCEPT_IDTR_READ |
+                                       GENERAL1_INTERCEPT_GDTR_READ |
+                                       GENERAL1_INTERCEPT_LDTR_READ |
+                                       GENERAL1_INTERCEPT_TR_READ;
+            }
+            else if ( !v->domain->arch.monitor.descriptor_access_enabled )
+                general1_intercepts &= ~(GENERAL1_INTERCEPT_IDTR_READ |
+                                         GENERAL1_INTERCEPT_GDTR_READ |
+                                         GENERAL1_INTERCEPT_LDTR_READ |
+                                         GENERAL1_INTERCEPT_TR_READ);
+
+            vmcb_set_general1_intercepts(vmcb, general1_intercepts);
+        }
+
         vmcb_set_cr4(vmcb, value);
         break;
     default:
@@ -883,7 +905,14 @@ static void svm_set_descriptor_access_ex
     if ( enable )
         general1_intercepts |= mask;
     else
+    {
         general1_intercepts &= ~mask;
+        if ( (v->arch.hvm.guest_cr[4] & X86_CR4_UMIP) && !cpu_has_umip )
+            general1_intercepts |= GENERAL1_INTERCEPT_IDTR_READ |
+                                   GENERAL1_INTERCEPT_GDTR_READ |
+                                   GENERAL1_INTERCEPT_LDTR_READ |
+                                   GENERAL1_INTERCEPT_TR_READ;
+    }
 
     vmcb_set_general1_intercepts(vmcb, general1_intercepts);
 }
@@ -1781,6 +1810,16 @@ static void svm_vmexit_do_cr_access(
         __update_guest_eip(regs, vmcb->nextrip - vmcb->rip);
 }
 
+static bool is_cr4_write(const struct x86_emulate_state *state,
+                         const struct x86_emulate_ctxt *ctxt)
+{
+    unsigned int cr;
+
+    return ctxt->opcode == X86EMUL_OPC(0x0f, 0x22) &&
+           x86_insn_modrm(state, NULL, &cr) == 3 &&
+           cr == 4;
+}
+
 static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
 {
     struct vmcb_struct *vmcb = vcpu_nestedhvm(v).nv_n1vmcx;
@@ -2728,6 +2767,14 @@ void svm_vmexit_handler(struct cpu_user_
         svm_fpu_dirty_intercept();
         break;
 
+    case VMEXIT_EXCEPTION_GP:
+        HVMTRACE_1D(TRAP, TRAP_gp_fault);
+        /* We only care about ring 0 faults with error code zero. */
+        if ( vmcb->exitinfo1 || vmcb_get_cpl(vmcb) ||
+             !hvm_emulate_one_insn(is_cr4_write, "CR4 write") )
+            hvm_inject_hw_exception(TRAP_gp_fault, vmcb->exitinfo1);
+        break;
+
     case VMEXIT_EXCEPTION_PF:
     {
         unsigned long va;
@@ -2873,7 +2920,16 @@ void svm_vmexit_handler(struct cpu_user_
             hvm_inject_hw_exception(TRAP_gp_fault, 0);
         break;
 
-    case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
+    case VMEXIT_CR0_READ:
+        if ( (v->arch.hvm.guest_cr[4] & X86_CR4_UMIP) &&
+             vmcb_get_cpl(vmcb) )
+        {
+            ASSERT(!cpu_has_umip);
+            hvm_inject_hw_exception(TRAP_gp_fault, 0);
+            break;
+        }
+        /* fall through */
+    case VMEXIT_CR1_READ ... VMEXIT_CR15_READ:
     case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
         if ( cpu_has_svm_decode && (vmcb->exitinfo1 & (1ULL << 63)) )
             svm_vmexit_do_cr_access(vmcb, regs);
--- a/xen/arch/x86/hvm/svm/vmcb.c
+++ b/xen/arch/x86/hvm/svm/vmcb.c
@@ -141,6 +141,10 @@ static int construct_vmcb(struct vcpu *v
         HVM_TRAP_MASK |
         (v->arch.fully_eager_fpu ? 0 : (1U << TRAP_no_device));
 
+    /* For UMIP emulation intercept #GP to catch faulting CR4 writes. */
+    if ( v->domain->arch.cpuid->feat.umip && !cpu_has_umip )
+        vmcb->_exception_intercepts |= 1U << TRAP_gp_fault;
+
     if ( paging_mode_hap(v->domain) )
     {
         vmcb->_np_enable = 1; /* enable nested paging */
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1074,7 +1074,8 @@ static int construct_vmcs(struct vcpu *v
 
     /*
      * Disable features which we don't want active by default:
-     *  - Descriptor table exiting only if wanted by introspection
+     *  - Descriptor table exiting only if needed for CR4.UMIP write
+     *    emulation or wanted by introspection
      *  - x2APIC - default is xAPIC mode
      *  - VPID settings chosen at VMEntry time
      *  - VMCS Shadowing only when in nested VMX mode
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1264,7 +1264,7 @@ static void vmx_set_descriptor_access_ex
     if ( enable )
         v->arch.hvm.vmx.secondary_exec_control |=
             SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING;
-    else
+    else if ( !(v->arch.hvm.guest_cr[4] & X86_CR4_UMIP) || cpu_has_umip )
         v->arch.hvm.vmx.secondary_exec_control &=
             ~SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING;
 
@@ -1514,6 +1514,21 @@ static void vmx_update_guest_cr(struct v
             v->arch.hvm.hw_cr[4] &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
         }
 
+        if ( v->domain->arch.cpuid->feat.umip && !cpu_has_umip )
+        {
+            if ( (v->arch.hvm.guest_cr[4] & X86_CR4_UMIP) )
+            {
+                ASSERT(cpu_has_vmx_dt_exiting);
+                v->arch.hvm.hw_cr[4] &= ~X86_CR4_UMIP;
+                v->arch.hvm.vmx.secondary_exec_control |=
+                    SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING;
+            }
+            else if ( !v->domain->arch.monitor.descriptor_access_enabled )
+                v->arch.hvm.vmx.secondary_exec_control &=
+                    ~SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING;
+            vmx_update_secondary_exec_control(v);
+        }
+
         __vmwrite(GUEST_CR4, v->arch.hvm.hw_cr[4]);
 
         /*
@@ -1537,6 +1552,7 @@ static void vmx_update_guest_cr(struct v
                                              (X86_CR4_PSE | X86_CR4_SMEP |
                                               X86_CR4_SMAP)
                                              : 0;
+            v->arch.hvm.vmx.cr4_host_mask |= cpu_has_umip ? 0 : X86_CR4_UMIP;
             if ( v->domain->arch.monitor.write_ctrlreg_enabled &
                  monitor_ctrlreg_bitmask(VM_EVENT_X86_CR4) )
                 v->arch.hvm.vmx.cr4_host_mask |=
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -110,6 +110,7 @@
 
 /* CPUID level 0x00000007:0.ecx */
 #define cpu_has_avx512_vbmi     boot_cpu_has(X86_FEATURE_AVX512_VBMI)
+#define cpu_has_umip            boot_cpu_has(X86_FEATURE_UMIP)
 #define cpu_has_avx512_vbmi2    boot_cpu_has(X86_FEATURE_AVX512_VBMI2)
 #define cpu_has_gfni            boot_cpu_has(X86_FEATURE_GFNI)
 #define cpu_has_vaes            boot_cpu_has(X86_FEATURE_VAES)


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] x86/HVM: support emulated UMIP
  2021-01-29 11:45 [PATCH] x86/HVM: support emulated UMIP Jan Beulich
@ 2021-01-29 11:46 ` Jan Beulich
  2021-01-29 11:47 ` [PATCH XTF RFC] force-enable UMIP for UMIP testing Jan Beulich
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 8+ messages in thread
From: Jan Beulich @ 2021-01-29 11:46 UTC (permalink / raw)
  To: xen-devel
  Cc: Andrew Cooper, Wei Liu, Roger Pau Monné, Kevin Tian, Jun Nakajima

On 29.01.2021 12:45, Jan Beulich wrote:
> There are three noteworthy drawbacks:
> 1) The intercepts we need to enable here are CPL-independent, i.e. we
>    now have to emulate certain instructions for ring 0.
> 2) On VMX there's no intercept for SMSW, so the emulation isn't really
>    complete there.
> 3) The CR4 write intercept on SVM is lower priority than all exception
>    checks, so we need to intercept #GP.
> Therefore this emulation doesn't get offered to guests by default.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> v3: Don't offer emulation by default. Re-base.
> v2: Split off the x86 insn emulator part. Re-base. Use hvm_featureset
>     in hvm_cr4_guest_reserved_bits().

As per this, the title was meant to start [PATCH v3]; sorry.
The v2 submission was quite some time ago, though.

Jan


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH XTF RFC] force-enable UMIP for UMIP testing
  2021-01-29 11:45 [PATCH] x86/HVM: support emulated UMIP Jan Beulich
  2021-01-29 11:46 ` Jan Beulich
@ 2021-01-29 11:47 ` Jan Beulich
  2021-01-29 11:59   ` Andrew Cooper
  2021-02-03  7:36 ` [PATCH] x86/HVM: support emulated UMIP Tian, Kevin
  2021-02-04 14:10 ` Andrew Cooper
  3 siblings, 1 reply; 8+ messages in thread
From: Jan Beulich @ 2021-01-29 11:47 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

Enable UMIP even if underlying hardware doesn't support it (assuming
the respective change supporting its emulation is in place). Obviously,
as explained in that patch, the SMSW test is then expected to fail on
Intel hardware.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tests/umip/Makefile
+++ b/tests/umip/Makefile
@@ -4,6 +4,8 @@ NAME      := umip
 CATEGORY  := functional
 TEST-ENVS := hvm32 hvm64
 
+TEST-EXTRA-CFG := extra.cfg.in
+
 obj-perenv += main.o
 
 include $(ROOT)/build/gen.mk
--- /dev/null
+++ b/tests/umip/extra.cfg.in
@@ -0,0 +1 @@
+cpuid = "host,umip=1"



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH XTF RFC] force-enable UMIP for UMIP testing
  2021-01-29 11:47 ` [PATCH XTF RFC] force-enable UMIP for UMIP testing Jan Beulich
@ 2021-01-29 11:59   ` Andrew Cooper
  0 siblings, 0 replies; 8+ messages in thread
From: Andrew Cooper @ 2021-01-29 11:59 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 29/01/2021 11:47, Jan Beulich wrote:
> Enable UMIP even if underlying hardware doesn't support it (assuming
> the respective change supporting its emulation is in place). Obviously,
> as explained in that patch, the SMSW test is then expected to fail on
> Intel hardware.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/tests/umip/Makefile
> +++ b/tests/umip/Makefile
> @@ -4,6 +4,8 @@ NAME      := umip
>  CATEGORY  := functional
>  TEST-ENVS := hvm32 hvm64
>  
> +TEST-EXTRA-CFG := extra.cfg.in
> +
>  obj-perenv += main.o
>  
>  include $(ROOT)/build/gen.mk
> --- /dev/null
> +++ b/tests/umip/extra.cfg.in
> @@ -0,0 +1 @@
> +cpuid = "host,umip=1"

So while I agree in principle that having UMIP emulation is a good
thing, this particular change in XTF would be rejected by the OSSTest
bisector.

The only reason it doesn't fail straight away for the PV guests is
because there's no error handling from problematic CPUID requests, which
is something still to be fixed.

Given that SMSW is a known (and acceptable) hole in UMIP emulation under
Intel, it should be converted into a skip.  However, that is also a
logical change to the test, and will cause other problems for bisection.

This does need the test-revision logic (as does one other bug in the
UMIP test IIRC), which I need to get around to finishing.

I'll see about trying to do that early next release cycle, because the
"tests are logically immutable to avoid the bisector saying no"
restriction is getting in the way of a lot of people, myself included.

~Andrew


^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH] x86/HVM: support emulated UMIP
  2021-01-29 11:45 [PATCH] x86/HVM: support emulated UMIP Jan Beulich
  2021-01-29 11:46 ` Jan Beulich
  2021-01-29 11:47 ` [PATCH XTF RFC] force-enable UMIP for UMIP testing Jan Beulich
@ 2021-02-03  7:36 ` Tian, Kevin
  2021-02-04 14:10 ` Andrew Cooper
  3 siblings, 0 replies; 8+ messages in thread
From: Tian, Kevin @ 2021-02-03  7:36 UTC (permalink / raw)
  To: Jan Beulich, xen-devel
  Cc: Cooper, Andrew, Wei Liu, Roger Pau Monné, Nakajima, Jun

> From: Jan Beulich <jbeulich@suse.com>
> Sent: Friday, January 29, 2021 7:45 PM
> 
> There are three noteworthy drawbacks:
> 1) The intercepts we need to enable here are CPL-independent, i.e. we
>    now have to emulate certain instructions for ring 0.
> 2) On VMX there's no intercept for SMSW, so the emulation isn't really
>    complete there.
> 3) The CR4 write intercept on SVM is lower priority than all exception
>    checks, so we need to intercept #GP.
> Therefore this emulation doesn't get offered to guests by default.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Kevin Tian <kevin.tian@intel.com>

> ---
> v3: Don't offer emulation by default. Re-base.
> v2: Split off the x86 insn emulator part. Re-base. Use hvm_featureset
>     in hvm_cr4_guest_reserved_bits().
> 
> --- a/xen/arch/x86/cpuid.c
> +++ b/xen/arch/x86/cpuid.c
> @@ -453,6 +453,13 @@ static void __init calculate_hvm_max_pol
>      __set_bit(X86_FEATURE_X2APIC, hvm_featureset);
> 
>      /*
> +     * Xen can often provide UMIP emulation to HVM guests even if the host
> +     * doesn't have such functionality.
> +     */
> +    if ( hvm_funcs.set_descriptor_access_exiting )
> +        __set_bit(X86_FEATURE_UMIP, hvm_featureset);
> +
> +    /*
>       * On AMD, PV guests are entirely unable to use SYSENTER as Xen runs in
>       * long mode (and init_amd() has cleared it out of host capabilities), but
>       * HVM guests are able if running in protected mode.
> @@ -504,6 +511,10 @@ static void __init calculate_hvm_def_pol
>      for ( i = 0; i < ARRAY_SIZE(hvm_featureset); ++i )
>          hvm_featureset[i] &= hvm_featuremask[i];
> 
> +    /* Don't offer UMIP emulation by default. */
> +    if ( !cpu_has_umip )
> +        __clear_bit(X86_FEATURE_UMIP, hvm_featureset);
> +
>      guest_common_feature_adjustments(hvm_featureset);
>      guest_common_default_feature_adjustments(hvm_featureset);
> 
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -991,7 +991,8 @@ unsigned long hvm_cr4_guest_valid_bits(c
>                                  X86_CR4_PCE                    |
>              (p->basic.fxsr    ? X86_CR4_OSFXSR            : 0) |
>              (p->basic.sse     ? X86_CR4_OSXMMEXCPT        : 0) |
> -            (p->feat.umip     ? X86_CR4_UMIP              : 0) |
> +            ((p == &host_cpuid_policy ? &hvm_max_cpuid_policy : p)->feat.umip
> +                              ? X86_CR4_UMIP              : 0) |
>              (vmxe             ? X86_CR4_VMXE              : 0) |
>              (p->feat.fsgsbase ? X86_CR4_FSGSBASE          : 0) |
>              (p->basic.pcid    ? X86_CR4_PCIDE             : 0) |
> @@ -3731,6 +3732,13 @@ int hvm_descriptor_access_intercept(uint
>      struct vcpu *curr = current;
>      struct domain *currd = curr->domain;
> 
> +    if ( (is_write || curr->arch.hvm.guest_cr[4] & X86_CR4_UMIP) &&
> +         hvm_get_cpl(curr) )
> +    {
> +        hvm_inject_hw_exception(TRAP_gp_fault, 0);
> +        return X86EMUL_OKAY;
> +    }
> +
>      if ( currd->arch.monitor.descriptor_access_enabled )
>      {
>          ASSERT(curr->arch.vm_event);
> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -547,6 +547,28 @@ void svm_update_guest_cr(struct vcpu *v,
>              value &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
>          }
> 
> +        if ( v->domain->arch.cpuid->feat.umip && !cpu_has_umip )
> +        {
> +            u32 general1_intercepts = vmcb_get_general1_intercepts(vmcb);
> +
> +            if ( v->arch.hvm.guest_cr[4] & X86_CR4_UMIP )
> +            {
> +                value &= ~X86_CR4_UMIP;
> +                ASSERT(vmcb_get_cr_intercepts(vmcb) &
> CR_INTERCEPT_CR0_READ);
> +                general1_intercepts |= GENERAL1_INTERCEPT_IDTR_READ |
> +                                       GENERAL1_INTERCEPT_GDTR_READ |
> +                                       GENERAL1_INTERCEPT_LDTR_READ |
> +                                       GENERAL1_INTERCEPT_TR_READ;
> +            }
> +            else if ( !v->domain->arch.monitor.descriptor_access_enabled )
> +                general1_intercepts &= ~(GENERAL1_INTERCEPT_IDTR_READ |
> +                                         GENERAL1_INTERCEPT_GDTR_READ |
> +                                         GENERAL1_INTERCEPT_LDTR_READ |
> +                                         GENERAL1_INTERCEPT_TR_READ);
> +
> +            vmcb_set_general1_intercepts(vmcb, general1_intercepts);
> +        }
> +
>          vmcb_set_cr4(vmcb, value);
>          break;
>      default:
> @@ -883,7 +905,14 @@ static void svm_set_descriptor_access_ex
>      if ( enable )
>          general1_intercepts |= mask;
>      else
> +    {
>          general1_intercepts &= ~mask;
> +        if ( (v->arch.hvm.guest_cr[4] & X86_CR4_UMIP) && !cpu_has_umip )
> +            general1_intercepts |= GENERAL1_INTERCEPT_IDTR_READ |
> +                                   GENERAL1_INTERCEPT_GDTR_READ |
> +                                   GENERAL1_INTERCEPT_LDTR_READ |
> +                                   GENERAL1_INTERCEPT_TR_READ;
> +    }
> 
>      vmcb_set_general1_intercepts(vmcb, general1_intercepts);
>  }
> @@ -1781,6 +1810,16 @@ static void svm_vmexit_do_cr_access(
>          __update_guest_eip(regs, vmcb->nextrip - vmcb->rip);
>  }
> 
> +static bool is_cr4_write(const struct x86_emulate_state *state,
> +                         const struct x86_emulate_ctxt *ctxt)
> +{
> +    unsigned int cr;
> +
> +    return ctxt->opcode == X86EMUL_OPC(0x0f, 0x22) &&
> +           x86_insn_modrm(state, NULL, &cr) == 3 &&
> +           cr == 4;
> +}
> +
>  static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
>  {
>      struct vmcb_struct *vmcb = vcpu_nestedhvm(v).nv_n1vmcx;
> @@ -2728,6 +2767,14 @@ void svm_vmexit_handler(struct cpu_user_
>          svm_fpu_dirty_intercept();
>          break;
> 
> +    case VMEXIT_EXCEPTION_GP:
> +        HVMTRACE_1D(TRAP, TRAP_gp_fault);
> +        /* We only care about ring 0 faults with error code zero. */
> +        if ( vmcb->exitinfo1 || vmcb_get_cpl(vmcb) ||
> +             !hvm_emulate_one_insn(is_cr4_write, "CR4 write") )
> +            hvm_inject_hw_exception(TRAP_gp_fault, vmcb->exitinfo1);
> +        break;
> +
>      case VMEXIT_EXCEPTION_PF:
>      {
>          unsigned long va;
> @@ -2873,7 +2920,16 @@ void svm_vmexit_handler(struct cpu_user_
>              hvm_inject_hw_exception(TRAP_gp_fault, 0);
>          break;
> 
> -    case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
> +    case VMEXIT_CR0_READ:
> +        if ( (v->arch.hvm.guest_cr[4] & X86_CR4_UMIP) &&
> +             vmcb_get_cpl(vmcb) )
> +        {
> +            ASSERT(!cpu_has_umip);
> +            hvm_inject_hw_exception(TRAP_gp_fault, 0);
> +            break;
> +        }
> +        /* fall through */
> +    case VMEXIT_CR1_READ ... VMEXIT_CR15_READ:
>      case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
>          if ( cpu_has_svm_decode && (vmcb->exitinfo1 & (1ULL << 63)) )
>              svm_vmexit_do_cr_access(vmcb, regs);
> --- a/xen/arch/x86/hvm/svm/vmcb.c
> +++ b/xen/arch/x86/hvm/svm/vmcb.c
> @@ -141,6 +141,10 @@ static int construct_vmcb(struct vcpu *v
>          HVM_TRAP_MASK |
>          (v->arch.fully_eager_fpu ? 0 : (1U << TRAP_no_device));
> 
> +    /* For UMIP emulation intercept #GP to catch faulting CR4 writes. */
> +    if ( v->domain->arch.cpuid->feat.umip && !cpu_has_umip )
> +        vmcb->_exception_intercepts |= 1U << TRAP_gp_fault;
> +
>      if ( paging_mode_hap(v->domain) )
>      {
>          vmcb->_np_enable = 1; /* enable nested paging */
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -1074,7 +1074,8 @@ static int construct_vmcs(struct vcpu *v
> 
>      /*
>       * Disable features which we don't want active by default:
> -     *  - Descriptor table exiting only if wanted by introspection
> +     *  - Descriptor table exiting only if needed for CR4.UMIP write
> +     *    emulation or wanted by introspection
>       *  - x2APIC - default is xAPIC mode
>       *  - VPID settings chosen at VMEntry time
>       *  - VMCS Shadowing only when in nested VMX mode
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -1264,7 +1264,7 @@ static void vmx_set_descriptor_access_ex
>      if ( enable )
>          v->arch.hvm.vmx.secondary_exec_control |=
>              SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING;
> -    else
> +    else if ( !(v->arch.hvm.guest_cr[4] & X86_CR4_UMIP) || cpu_has_umip )
>          v->arch.hvm.vmx.secondary_exec_control &=
>              ~SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING;
> 
> @@ -1514,6 +1514,21 @@ static void vmx_update_guest_cr(struct v
>              v->arch.hvm.hw_cr[4] &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
>          }
> 
> +        if ( v->domain->arch.cpuid->feat.umip && !cpu_has_umip )
> +        {
> +            if ( (v->arch.hvm.guest_cr[4] & X86_CR4_UMIP) )
> +            {
> +                ASSERT(cpu_has_vmx_dt_exiting);
> +                v->arch.hvm.hw_cr[4] &= ~X86_CR4_UMIP;
> +                v->arch.hvm.vmx.secondary_exec_control |=
> +                    SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING;
> +            }
> +            else if ( !v->domain->arch.monitor.descriptor_access_enabled )
> +                v->arch.hvm.vmx.secondary_exec_control &=
> +                    ~SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING;
> +            vmx_update_secondary_exec_control(v);
> +        }
> +
>          __vmwrite(GUEST_CR4, v->arch.hvm.hw_cr[4]);
> 
>          /*
> @@ -1537,6 +1552,7 @@ static void vmx_update_guest_cr(struct v
>                                               (X86_CR4_PSE | X86_CR4_SMEP |
>                                                X86_CR4_SMAP)
>                                               : 0;
> +            v->arch.hvm.vmx.cr4_host_mask |= cpu_has_umip ? 0 :
> X86_CR4_UMIP;
>              if ( v->domain->arch.monitor.write_ctrlreg_enabled &
>                   monitor_ctrlreg_bitmask(VM_EVENT_X86_CR4) )
>                  v->arch.hvm.vmx.cr4_host_mask |=
> --- a/xen/include/asm-x86/cpufeature.h
> +++ b/xen/include/asm-x86/cpufeature.h
> @@ -110,6 +110,7 @@
> 
>  /* CPUID level 0x00000007:0.ecx */
>  #define cpu_has_avx512_vbmi
> boot_cpu_has(X86_FEATURE_AVX512_VBMI)
> +#define cpu_has_umip            boot_cpu_has(X86_FEATURE_UMIP)
>  #define cpu_has_avx512_vbmi2
> boot_cpu_has(X86_FEATURE_AVX512_VBMI2)
>  #define cpu_has_gfni            boot_cpu_has(X86_FEATURE_GFNI)
>  #define cpu_has_vaes            boot_cpu_has(X86_FEATURE_VAES)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] x86/HVM: support emulated UMIP
  2021-01-29 11:45 [PATCH] x86/HVM: support emulated UMIP Jan Beulich
                   ` (2 preceding siblings ...)
  2021-02-03  7:36 ` [PATCH] x86/HVM: support emulated UMIP Tian, Kevin
@ 2021-02-04 14:10 ` Andrew Cooper
  2021-02-05 13:45   ` Jan Beulich
  2021-02-05 14:02   ` Jan Beulich
  3 siblings, 2 replies; 8+ messages in thread
From: Andrew Cooper @ 2021-02-04 14:10 UTC (permalink / raw)
  To: Jan Beulich, xen-devel
  Cc: Wei Liu, Roger Pau Monné, Kevin Tian, Jun Nakajima, Tamas K Lengyel

On 29/01/2021 11:45, Jan Beulich wrote:
> There are three noteworthy drawbacks:
> 1) The intercepts we need to enable here are CPL-independent, i.e. we
>    now have to emulate certain instructions for ring 0.
> 2) On VMX there's no intercept for SMSW, so the emulation isn't really
>    complete there.
> 3) The CR4 write intercept on SVM is lower priority than all exception
>    checks, so we need to intercept #GP.
> Therefore this emulation doesn't get offered to guests by default.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

I wonder if it would be helpful for this to be 3 patches, simply because
of the differing complexity of the VT-x and SVM pieces.

> --- a/xen/arch/x86/cpuid.c
> +++ b/xen/arch/x86/cpuid.c
> @@ -453,6 +453,13 @@ static void __init calculate_hvm_max_pol
>      __set_bit(X86_FEATURE_X2APIC, hvm_featureset);
>  
>      /*
> +     * Xen can often provide UMIP emulation to HVM guests even if the host
> +     * doesn't have such functionality.
> +     */
> +    if ( hvm_funcs.set_descriptor_access_exiting )

No need for this check.  Exiting is available on all generations and
vendors.

Also, the header file probably wants a ! annotation for UMIP to signify
that we doing something special with it.

> +        __set_bit(X86_FEATURE_UMIP, hvm_featureset);
> +
> +    /*
>       * On AMD, PV guests are entirely unable to use SYSENTER as Xen runs in
>       * long mode (and init_amd() has cleared it out of host capabilities), but
>       * HVM guests are able if running in protected mode.
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -991,7 +991,8 @@ unsigned long hvm_cr4_guest_valid_bits(c
>                                  X86_CR4_PCE                    |
>              (p->basic.fxsr    ? X86_CR4_OSFXSR            : 0) |
>              (p->basic.sse     ? X86_CR4_OSXMMEXCPT        : 0) |
> -            (p->feat.umip     ? X86_CR4_UMIP              : 0) |
> +            ((p == &host_cpuid_policy ? &hvm_max_cpuid_policy : p)->feat.umip
> +                              ? X86_CR4_UMIP              : 0) |

This hunk wants dropping.  p can't alias host_cpuid_policy any more.

(and for future changes which do look like this, a local bool please,
per the comment.)

>              (vmxe             ? X86_CR4_VMXE              : 0) |
>              (p->feat.fsgsbase ? X86_CR4_FSGSBASE          : 0) |
>              (p->basic.pcid    ? X86_CR4_PCIDE             : 0) |
> @@ -3731,6 +3732,13 @@ int hvm_descriptor_access_intercept(uint
>      struct vcpu *curr = current;
>      struct domain *currd = curr->domain;
>  
> +    if ( (is_write || curr->arch.hvm.guest_cr[4] & X86_CR4_UMIP) &&

Brackets for & expression?

> +         hvm_get_cpl(curr) )
> +    {
> +        hvm_inject_hw_exception(TRAP_gp_fault, 0);
> +        return X86EMUL_OKAY;
> +    }

I believe this is a logical change for monitor - previously, non-ring0
events would go all the way to userspace.

I don't expect this to be an issue - monitoring agents really shouldn't
be interested in userspace actions the guest kernel is trying to turn
into #GP.

CC'ing Tamas for his opinion.

> +
>      if ( currd->arch.monitor.descriptor_access_enabled )
>      {
>          ASSERT(curr->arch.vm_event);
> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -547,6 +547,28 @@ void svm_update_guest_cr(struct vcpu *v,
>              value &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
>          }
>  
> +        if ( v->domain->arch.cpuid->feat.umip && !cpu_has_umip )

Throughout the series, examples like this should have the !cpu_has_umip
clause first.  It is static per host, rather than variable per VM, and
will improve the branch prediction.

Where the logic is equivalent, it is best to have the clauses in
stability order, as this will prevent a modern CPU from even evaluating
the CPUID policy.

> +        {
> +            u32 general1_intercepts = vmcb_get_general1_intercepts(vmcb);
> +
> +            if ( v->arch.hvm.guest_cr[4] & X86_CR4_UMIP )
> +            {
> +                value &= ~X86_CR4_UMIP;
> +                ASSERT(vmcb_get_cr_intercepts(vmcb) & CR_INTERCEPT_CR0_READ);

It occurs to me that adding CR0 read exiting adds a lot of complexity
for very little gain.

From a practical standpoint, UMIP exists to block SIDT/SGDT which are
the two instructions which confer an attacker with useful information
(linear addresses of the IDT/GDT respectively).  SLDT/STR only confer a
16 bit index within the GDT (fixed per OS), and SMSW is as good as a
constant these days.

Given that Intel cannot intercept SMSW at all and we've already accepted
that as a limitation vs architectural UMIP, I don't think extra
complexity on AMD is worth the gain.

> @@ -2728,6 +2767,14 @@ void svm_vmexit_handler(struct cpu_user_
>          svm_fpu_dirty_intercept();
>          break;
>  
> +    case VMEXIT_EXCEPTION_GP:
> +        HVMTRACE_1D(TRAP, TRAP_gp_fault);
> +        /* We only care about ring 0 faults with error code zero. */
> +        if ( vmcb->exitinfo1 || vmcb_get_cpl(vmcb) ||
> +             !hvm_emulate_one_insn(is_cr4_write, "CR4 write") )
> +            hvm_inject_hw_exception(TRAP_gp_fault, vmcb->exitinfo1);

I should post one of my pending SVM cleanup patches, which further
deconstructs exitinfo into more usefully named fields.

The comment should include *why* we only care about this state.  It
needs to mention emulated UMIP, and the priority order of #GP and VMExit.

> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -1537,6 +1552,7 @@ static void vmx_update_guest_cr(struct v
>                                               (X86_CR4_PSE | X86_CR4_SMEP |
>                                                X86_CR4_SMAP)
>                                               : 0;
> +            v->arch.hvm.vmx.cr4_host_mask |= cpu_has_umip ? 0 : X86_CR4_UMIP;

if ( !cpu_has_umip )
     v->arch.hvm.vmx.cr4_host_mask |= X86_CR4_UMIP;

~Andrew


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] x86/HVM: support emulated UMIP
  2021-02-04 14:10 ` Andrew Cooper
@ 2021-02-05 13:45   ` Jan Beulich
  2021-02-05 14:02   ` Jan Beulich
  1 sibling, 0 replies; 8+ messages in thread
From: Jan Beulich @ 2021-02-05 13:45 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: Wei Liu, Roger Pau Monné,
	Kevin Tian, Jun Nakajima, Tamas K Lengyel, xen-devel

On 04.02.2021 15:10, Andrew Cooper wrote:
> On 29/01/2021 11:45, Jan Beulich wrote:
>> There are three noteworthy drawbacks:
>> 1) The intercepts we need to enable here are CPL-independent, i.e. we
>>    now have to emulate certain instructions for ring 0.
>> 2) On VMX there's no intercept for SMSW, so the emulation isn't really
>>    complete there.
>> 3) The CR4 write intercept on SVM is lower priority than all exception
>>    checks, so we need to intercept #GP.
>> Therefore this emulation doesn't get offered to guests by default.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> I wonder if it would be helpful for this to be 3 patches, simply because
> of the differing complexity of the VT-x and SVM pieces.

If so, then three or even four. One each for SVM/VMX, and
a final one for the enabling in vendor independent code.
For the possible 4th one, see below in the
hvm_descriptor_access_intercept() related part.

>> --- a/xen/arch/x86/cpuid.c
>> +++ b/xen/arch/x86/cpuid.c
>> @@ -453,6 +453,13 @@ static void __init calculate_hvm_max_pol
>>      __set_bit(X86_FEATURE_X2APIC, hvm_featureset);
>>  
>>      /*
>> +     * Xen can often provide UMIP emulation to HVM guests even if the host
>> +     * doesn't have such functionality.
>> +     */
>> +    if ( hvm_funcs.set_descriptor_access_exiting )
> 
> No need for this check.  Exiting is available on all generations and
> vendors.

VMX code treats this as optional, and I think it validly
does so at least in case we're running virtualized ourselves
and the lower hypervisor doesn't emulate this.

> Also, the header file probably wants a ! annotation for UMIP to signify
> that we doing something special with it.

I can do so, sure. I'm never really sure how wide the scope
of "special" is here.

>> @@ -3731,6 +3732,13 @@ int hvm_descriptor_access_intercept(uint
>>      struct vcpu *curr = current;
>>      struct domain *currd = curr->domain;
>>  
>> +    if ( (is_write || curr->arch.hvm.guest_cr[4] & X86_CR4_UMIP) &&
> 
> Brackets for & expression?

Oops.

>> +         hvm_get_cpl(curr) )
>> +    {
>> +        hvm_inject_hw_exception(TRAP_gp_fault, 0);
>> +        return X86EMUL_OKAY;
>> +    }
> 
> I believe this is a logical change for monitor - previously, non-ring0
> events would go all the way to userspace.
> 
> I don't expect this to be an issue - monitoring agents really shouldn't
> be interested in userspace actions the guest kernel is trying to turn
> into #GP.

Isn't the present behavior flawed, in that UMIP (if supported
by hardware and enabled by the guest) doesn't get honored,
and the access _instead_ gets forwarded to the monitor?
Looking at public/vm_event.h I can't seem to be able to spot
any means by which the monitor could say "I want an exception
here" in response. IOW - shouldn't this hunk be split out as
a prereq bug fix (i.e. aforementioned 4th patch)?

>> --- a/xen/arch/x86/hvm/svm/svm.c
>> +++ b/xen/arch/x86/hvm/svm/svm.c
>> @@ -547,6 +547,28 @@ void svm_update_guest_cr(struct vcpu *v,
>>              value &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
>>          }
>>  
>> +        if ( v->domain->arch.cpuid->feat.umip && !cpu_has_umip )
> 
> Throughout the series, examples like this should have the !cpu_has_umip
> clause first.  It is static per host, rather than variable per VM, and
> will improve the branch prediction.
> 
> Where the logic is equivalent, it is best to have the clauses in
> stability order, as this will prevent a modern CPU from even evaluating
> the CPUID policy.
> 
>> +        {
>> +            u32 general1_intercepts = vmcb_get_general1_intercepts(vmcb);
>> +
>> +            if ( v->arch.hvm.guest_cr[4] & X86_CR4_UMIP )
>> +            {
>> +                value &= ~X86_CR4_UMIP;
>> +                ASSERT(vmcb_get_cr_intercepts(vmcb) & CR_INTERCEPT_CR0_READ);
> 
> It occurs to me that adding CR0 read exiting adds a lot of complexity
> for very little gain.
> 
> From a practical standpoint, UMIP exists to block SIDT/SGDT which are
> the two instructions which confer an attacker with useful information
> (linear addresses of the IDT/GDT respectively).  SLDT/STR only confer a
> 16 bit index within the GDT (fixed per OS), and SMSW is as good as a
> constant these days.
> 
> Given that Intel cannot intercept SMSW at all and we've already accepted
> that as a limitation vs architectural UMIP, I don't think extra
> complexity on AMD is worth the gain.

Hmm, I didn't want to make this emulation any less complete
than is necessary because of external restrictions. As an
intermediate solution, how about hiding this behind a
default-off command line option, e.g. "svm=full-umip"?

>> --- a/xen/arch/x86/hvm/vmx/vmcs.c
>> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
>> @@ -1537,6 +1552,7 @@ static void vmx_update_guest_cr(struct v
>>                                               (X86_CR4_PSE | X86_CR4_SMEP |
>>                                                X86_CR4_SMAP)
>>                                               : 0;
>> +            v->arch.hvm.vmx.cr4_host_mask |= cpu_has_umip ? 0 : X86_CR4_UMIP;
> 
> if ( !cpu_has_umip )
>      v->arch.hvm.vmx.cr4_host_mask |= X86_CR4_UMIP;

This wouldn't be in line with immediately preceding code
(visible in context), and subsequent code using if() is, aiui,
because the conditions are (textually) more complex there. So
if I was to make the change, I'd at least like to understand
why adjacent code is fine doing differently, even more so that
iirc it was often you to introduce such constructs in favor of
if()-s.

Jan


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] x86/HVM: support emulated UMIP
  2021-02-04 14:10 ` Andrew Cooper
  2021-02-05 13:45   ` Jan Beulich
@ 2021-02-05 14:02   ` Jan Beulich
  1 sibling, 0 replies; 8+ messages in thread
From: Jan Beulich @ 2021-02-05 14:02 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: Wei Liu, Roger Pau Monné,
	Kevin Tian, Jun Nakajima, Tamas K Lengyel, xen-devel

On 04.02.2021 15:10, Andrew Cooper wrote:
> On 29/01/2021 11:45, Jan Beulich wrote:
>> +        {
>> +            u32 general1_intercepts = vmcb_get_general1_intercepts(vmcb);
>> +
>> +            if ( v->arch.hvm.guest_cr[4] & X86_CR4_UMIP )
>> +            {
>> +                value &= ~X86_CR4_UMIP;
>> +                ASSERT(vmcb_get_cr_intercepts(vmcb) & CR_INTERCEPT_CR0_READ);
> 
> It occurs to me that adding CR0 read exiting adds a lot of complexity
> for very little gain.

Actually, upon second look - why do you say "adding CR0 read
exiting"? This is only an assertion. No other changes are
being made to CR0 interception (apart from the few lines in
the handling of the respective #VMEXIT).

Jan


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-02-05 14:02 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-29 11:45 [PATCH] x86/HVM: support emulated UMIP Jan Beulich
2021-01-29 11:46 ` Jan Beulich
2021-01-29 11:47 ` [PATCH XTF RFC] force-enable UMIP for UMIP testing Jan Beulich
2021-01-29 11:59   ` Andrew Cooper
2021-02-03  7:36 ` [PATCH] x86/HVM: support emulated UMIP Tian, Kevin
2021-02-04 14:10 ` Andrew Cooper
2021-02-05 13:45   ` Jan Beulich
2021-02-05 14:02   ` Jan Beulich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).