[PATCH RFC] x86/Intel: virtualize support for cpuid faulting

* [PATCH RFC] x86/Intel: virtualize support for cpuid faulting
@ 2016-10-03 22:38 Kyle Huey
  2016-10-04  7:25 ` Jan Beulich
  2016-10-04 10:31 ` Andrew Cooper
  0 siblings, 2 replies; 9+ messages in thread
From: Kyle Huey @ 2016-10-03 22:38 UTC (permalink / raw)
  To: xen-devel
  Cc: Jun Nakajima, Andrew Cooper, Kevin Tian, Robert O'Callahan,
	Jan Beulich

rr (http://rr-project.org/), a Linux userspace record-and-replay reverse-
execution debugger, would like to trap and emulate the CPUID instruction.
This would allow us to a) mask away certain hardware features that rr does
not support (e.g. RDRAND) and b) enable trace portability across machines
by providing constant results. Patches for support in the Linux kernel are in
flight, and we'd like to be able to use this feature on virtualized Linux
instances as well.

On HVM guests, the cpuid triggers a vm exit, so we can check the emulated
faulting state in vmx_do_cpuid and inject a GP(0) if CPL > 0. Notably no
hardware support for faulting on cpuid is necessary to emulate support with an
HVM guest.

On PV guests, hardware support is required so that userspace cpuid will trap
to xen. Xen already enables cpuid faulting on supported CPUs for pv guests (that
aren't the control domain, see the comment in intel_ctxt_switch_levelling).
Every userspace cpuid will trap via a GP(0) to emulate_privileged_op
(via do_general_protection). Once there we can simply decline to emulate the
cpuid and instead pass the GP(0) along to the guest kernel, thus emulating
the cpuid faulting behavior. PV guest kernels enter pv_cpuid via a different
path, so we do not need to check the CPL here.

Signed-off-by: Kyle Huey <khuey@kylehuey.com>
---
 xen/arch/x86/domain.c        |  2 ++
 xen/arch/x86/hvm/vmx/vmx.c   | 24 +++++++++++++++++++-----
 xen/arch/x86/traps.c         | 31 ++++++++++++++++++++++++++++++-
 xen/include/asm-x86/domain.h |  3 +++
 4 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 3c4b094..f2bac10 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1095,6 +1095,8 @@ int arch_set_info_guest(
     for ( i = 0; i < 8; i++ )
         (void)set_debugreg(v, i, c(debugreg[i]));
 
+    v->arch.cpuid_fault = 0;
+
     if ( v->is_initialised )
         goto out;
 
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 50cbfed..f1e04c1 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2430,11 +2430,16 @@ static void vmx_cpuid_intercept(
     HVMTRACE_5D (CPUID, input, *eax, *ebx, *ecx, *edx);
 }
 
-static int vmx_do_cpuid(struct cpu_user_regs *regs)
+static int vmx_do_cpuid(struct vcpu *v, struct cpu_user_regs *regs)
 {
     unsigned int eax, ebx, ecx, edx;
     unsigned int leaf, subleaf;
 
+    if ( v->arch.cpuid_fault && !ring_0(regs) ) {
+        hvm_inject_hw_exception(TRAP_gp_fault, 0);
+        return 0;
+    }
+
     eax = regs->eax;
     ebx = regs->ebx;
     ecx = regs->ecx;
@@ -2701,9 +2706,13 @@ static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
         break;
 
     case MSR_INTEL_PLATFORM_INFO:
-        if ( rdmsr_safe(MSR_INTEL_PLATFORM_INFO, *msr_content) )
-            goto gp_fault;
-        *msr_content = 0;
+        if ( is_pvh_vcpu(current) && !cpu_has_cpuid_faulting )
+            *msr_content = 0;
+        else
+            *msr_content = 0x80000000ULL;
+        break;
+    case MSR_INTEL_MISC_FEATURES_ENABLES:
+        *msr_content = current->arch.cpuid_fault ? 1ULL : 0;
         break;
 
     default:
@@ -2931,6 +2940,11 @@ static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content)
              rdmsr_safe(MSR_INTEL_PLATFORM_INFO, msr_content) )
             goto gp_fault;
         break;
+    case MSR_INTEL_MISC_FEATURES_ENABLES:
+        if ( msr_content > 1 )
+            goto gp_fault;
+        v->arch.cpuid_fault = msr_content;
+        break;
 
     default:
         if ( passive_domain_do_wrmsr(msr, msr_content) )
@@ -3605,7 +3619,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
             rc = 0;
         }
         else
-            rc = vmx_do_cpuid(regs);
+            rc = vmx_do_cpuid(v, regs);
 
         /*
          * rc < 0 error in monitor/vm_event, crash
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 24d173f..d5a348e 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2945,6 +2945,16 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
                  rdmsr_safe(MSR_INTEL_PLATFORM_INFO, msr_content) )
                 goto fail;
             break;
+        case MSR_INTEL_MISC_FEATURES_ENABLES:
+            if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+                 rdmsr_safe(MSR_INTEL_MISC_FEATURES_ENABLES, val) ||
+                 msr_content > 1 )
+                goto fail;
+            if ( msr_content == 1 &&
+                 (!cpu_has_cpuid_faulting || is_control_domain(v->domain)) )
+                goto fail;
+            v->arch.cpuid_fault = msr_content;
+            break;
 
         case MSR_P6_PERFCTR(0)...MSR_P6_PERFCTR(7):
         case MSR_P6_EVNTSEL(0)...MSR_P6_EVNTSEL(3):
@@ -3079,7 +3089,22 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
             if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
                  rdmsr_safe(MSR_INTEL_PLATFORM_INFO, val) )
                 goto fail;
-            regs->eax = regs->edx = 0;
+            /*
+             * See the comment in intel_ctxt_switch_levelling about cpuid
+             * faulting in the control domain.
+             */
+            if ( cpu_has_cpuid_faulting && !is_control_domain(v->domain) )
+                regs->eax = 0x80000000;
+            else
+                regs->eax = 0;
+            regs->edx = 0;
+            break;
+        case MSR_INTEL_MISC_FEATURES_ENABLES:
+            if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+                 rdmsr_safe(MSR_INTEL_MISC_FEATURES_ENABLES, val))
+                goto fail;
+            regs->eax = v->arch.cpuid_fault ? 1UL : 0;
+            regs->edx = 0;
             break;
 
         case MSR_P6_PERFCTR(0)...MSR_P6_PERFCTR(7):
@@ -3137,6 +3162,10 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
         break;
 
     case 0xa2: /* CPUID */
+        /* Let the guest have this one */
+        if ( v->arch.cpuid_fault )
+            goto fail;
+
         pv_cpuid(regs);
         break;
 
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 5807a1f..9a52ea6 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -573,6 +573,9 @@ struct arch_vcpu
     XEN_GUEST_HANDLE(vcpu_time_info_t) time_info_guest;
 
     struct arch_vm_event *vm_event;
+
+    /* Has the guest enabled CPUID faulting? */
+    bool_t cpuid_fault;
 };
 
 smap_check_policy_t smap_policy_change(struct vcpu *v,
-- 
2.10.0

base-commit: b982a5bea4273a4b9fc007d5046bed8d1669c07f

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 9+ messages in thread