[PATCH RFC for-4.15] x86/msr: introduce an option for legacy MSR behavior selection

* [PATCH RFC for-4.15] x86/msr: introduce an option for legacy MSR behavior selection
@ 2021-03-01 16:23 Roger Pau Monne
  2021-03-01 17:16 ` Ian Jackson
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Roger Pau Monne @ 2021-03-01 16:23 UTC (permalink / raw)
  To: xen-devel
  Cc: Roger Pau Monne, Ian Jackson, Wei Liu, Anthony PERARD,
	Jan Beulich, Andrew Cooper, Jun Nakajima, Kevin Tian,
	Boris Ostrovsky

Introduce an option to allow selecting the legacy behavior for
accesses to MSRs not explicitly handled. Since commit
84e848fd7a162f669 and 322ec7c89f6640e accesses to MSRs not explicitly
handled by Xen result in the injection of a #GP to the guest. This is
a behavior change since previously a #GP was only injected if
accessing the MSR on the real hardware will also trigger a #GP.

This seems to be problematic for some guests, so introduce an option
to fallback to this legacy behavior. The main difference between what
was previously done is that the hardware MSR value is not leaked to
the guests on reads.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
Note that this option is not made available to dom0. I'm not sure
whether it makes sense to do so, since anyone updating Xen to such
newer version will also likely pair it with a newish kernel that
doesn't require such workarounds.

RFC because there's still some debate as to how we should solve the
MSR issue, this is one possible way, but IMO we need to make a
decision soon-ish because of the release timeline.

Boris, could you please test with Solaris to see if this fixes the
issue?
---
 docs/man/xl.cfg.5.pod.in          | 11 +++++++++++
 tools/include/libxl.h             |  8 ++++++++
 tools/libs/light/libxl_types.idl  |  2 ++
 tools/libs/light/libxl_x86.c      |  5 +++++
 tools/xl/xl_parse.c               |  3 +++
 xen/arch/x86/domain.c             |  3 +++
 xen/arch/x86/hvm/svm/svm.c        |  9 +++++++++
 xen/arch/x86/hvm/vmx/vmx.c        | 11 +++++++++++
 xen/arch/x86/pv/emul-priv-op.c    | 10 ++++++++++
 xen/include/asm-x86/domain.h      |  3 +++
 xen/include/public/arch-x86/xen.h |  3 +++
 11 files changed, 68 insertions(+)

diff --git a/docs/man/xl.cfg.5.pod.in b/docs/man/xl.cfg.5.pod.in
index 040374dcd6..78dadcdfdd 100644
--- a/docs/man/xl.cfg.5.pod.in
+++ b/docs/man/xl.cfg.5.pod.in
@@ -2861,6 +2861,17 @@ No MCA capabilities in above list are enabled.
 
 =back
 
+=item B<msr_legacy_handling=BOOLEAN>
+
+Select whether to use the legacy behaviour for accesses to MSRs not explicitly
+handled by Xen instead of injecting a #GP to the guest.  Such legacy access
+mode will force Xen to replicate the behaviour from the hardware it's currently
+running on in order to decide whether a #GP is injected to the guest.  Note
+that the guest is never allowed access to unhandled MSRs, this option only
+changes whether a #GP might be injected or not.
+
+=back
+
 =back
 
 =head1 SEE ALSO
diff --git a/tools/include/libxl.h b/tools/include/libxl.h
index a7b673e89d..3bf6aded97 100644
--- a/tools/include/libxl.h
+++ b/tools/include/libxl.h
@@ -495,6 +495,14 @@
  */
 #define LIBXL_HAVE_VMTRACE_BUF_KB 1
 
+/*
+ * LIBXL_HAVE_MSR_LEGACY_HANDLING indicates the toolstack has support for
+ * switching the MSR handling in the hypervisor to legacy mode, where #GP is
+ * only injected to guests for unhandled MSRs if accessing the MSR on the
+ * physical hardware also triggers a #GP.
+ */
+#define LIBXL_HAVE_MSR_LEGACY_HANDLING 1
+
 /*
  * libxl ABI compatibility
  *
diff --git a/tools/libs/light/libxl_types.idl b/tools/libs/light/libxl_types.idl
index 5b85a7419f..5bb12bc70d 100644
--- a/tools/libs/light/libxl_types.idl
+++ b/tools/libs/light/libxl_types.idl
@@ -644,6 +644,8 @@ libxl_domain_build_info = Struct("domain_build_info",[
     ("arch_arm", Struct(None, [("gic_version", libxl_gic_version),
                                ("vuart", libxl_vuart_type),
                               ])),
+    ("arch_x86", Struct(None, [("msr_legacy_handling", libxl_defbool),
+                              ])),
     # Alternate p2m is not bound to any architecture or guest type, as it is
     # supported by x86 HVM and ARM support is planned.
     ("altp2m", libxl_altp2m_mode),
diff --git a/tools/libs/light/libxl_x86.c b/tools/libs/light/libxl_x86.c
index 58187ed760..4b2b5d69a6 100644
--- a/tools/libs/light/libxl_x86.c
+++ b/tools/libs/light/libxl_x86.c
@@ -19,6 +19,10 @@ int libxl__arch_domain_prepare_config(libxl__gc *gc,
         abort();
     }
 
+    config->arch.domain_flags = 0;
+    if (libxl_defbool_val(d_config->b_info.arch_x86.msr_legacy_handling))
+        config->arch.domain_flags |= XEN_X86_LEGACY_MSR_HANDLING;
+
     return 0;
 }
 
@@ -809,6 +813,7 @@ void libxl__arch_domain_build_info_setdefault(libxl__gc *gc,
                                               libxl_domain_build_info *b_info)
 {
     libxl_defbool_setdefault(&b_info->acpi, true);
+    libxl_defbool_setdefault(&b_info->arch_x86.msr_legacy_handling, false);
 }
 
 int libxl__arch_passthrough_mode_setdefault(libxl__gc *gc,
diff --git a/tools/xl/xl_parse.c b/tools/xl/xl_parse.c
index 1893cfc086..fc79b6cc83 100644
--- a/tools/xl/xl_parse.c
+++ b/tools/xl/xl_parse.c
@@ -2741,6 +2741,9 @@ skip_usbdev:
     xlu_cfg_get_defbool(config, "xend_suspend_evtchn_compat",
                         &c_info->xend_suspend_evtchn_compat, 0);
 
+    xlu_cfg_get_defbool(config, "msr_legacy_handling",
+                        &b_info->arch_x86.msr_legacy_handling, 0);
+
     xlu_cfg_destroy(config);
 }
 
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 6c7ee25f3b..28805d50b8 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -852,6 +852,9 @@ int arch_domain_create(struct domain *d,
 
     domain_cpu_policy_changed(d);
 
+    d->arch.msr_legacy_handling = config->arch.domain_flags &
+                                  XEN_X86_LEGACY_MSR_HANDLING;
+
     return 0;
 
  fail:
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index b819897a4a..b535c5927a 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1795,6 +1795,7 @@ static int svm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
     const struct domain *d = v->domain;
     struct vmcb_struct *vmcb = v->arch.hvm.svm.vmcb;
     const struct nestedsvm *nsvm = &vcpu_nestedsvm(v);
+    uint64_t tmp;
 
     switch ( msr )
     {
@@ -1965,6 +1966,11 @@ static int svm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
         break;
 
     default:
+        if ( d->arch.msr_legacy_handling && !rdmsr_safe(msr, tmp) )
+        {
+            *msr_content = 0;
+            break;
+        }
         gdprintk(XENLOG_WARNING, "RDMSR 0x%08x unimplemented\n", msr);
         goto gpf;
     }
@@ -2151,6 +2157,9 @@ static int svm_msr_write_intercept(unsigned int msr, uint64_t msr_content)
         break;
 
     default:
+        if ( d->arch.msr_legacy_handling && !rdmsr_safe(msr, msr_content) )
+            break;
+
         gdprintk(XENLOG_WARNING,
                  "WRMSR 0x%08x val 0x%016"PRIx64" unimplemented\n",
                  msr, msr_content);
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index faba95d057..7707ae8cbc 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -3114,6 +3114,7 @@ static int is_last_branch_msr(u32 ecx)
 static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
 {
     struct vcpu *curr = current;
+    uint64_t tmp;
 
     HVM_DBG_LOG(DBG_LEVEL_MSR, "ecx=%#x", msr);
 
@@ -3195,6 +3196,12 @@ static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
             break;
         }
 
+        if ( curr->domain->arch.msr_legacy_handling && !rdmsr_safe(msr, tmp) )
+        {
+            *msr_content = 0;
+            break;
+        }
+
         gdprintk(XENLOG_WARNING, "RDMSR 0x%08x unimplemented\n", msr);
         goto gp_fault;
     }
@@ -3497,6 +3504,10 @@ static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content)
              is_last_branch_msr(msr) )
             break;
 
+        if ( v->domain->arch.msr_legacy_handling &&
+             !rdmsr_safe(msr, msr_content) )
+            break;
+
         gdprintk(XENLOG_WARNING,
                  "WRMSR 0x%08x val 0x%016"PRIx64" unimplemented\n",
                  msr, msr_content);
diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
index e5a22b9347..ecf98c4a05 100644
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -875,6 +875,7 @@ static int read_msr(unsigned int reg, uint64_t *val,
     const struct domain *currd = curr->domain;
     const struct cpuid_policy *cp = currd->arch.cpuid;
     bool vpmu_msr = false;
+    uint64_t tmp;
     int ret;
 
     if ( (ret = guest_rdmsr(curr, reg, val)) != X86EMUL_UNHANDLEABLE )
@@ -986,6 +987,12 @@ static int read_msr(unsigned int reg, uint64_t *val,
         }
         /* fall through */
     default:
+        if ( currd->arch.msr_legacy_handling && !rdmsr_safe(reg, tmp) )
+        {
+            *val = 0;
+            return X86EMUL_OKAY;
+        }
+
         gdprintk(XENLOG_WARNING, "RDMSR 0x%08x unimplemented\n", reg);
         break;
 
@@ -1148,6 +1155,9 @@ static int write_msr(unsigned int reg, uint64_t val,
         }
         /* fall through */
     default:
+        if ( currd->arch.msr_legacy_handling && !rdmsr_safe(reg, val) )
+            return X86EMUL_OKAY;
+
         gdprintk(XENLOG_WARNING,
                  "WRMSR 0x%08x val 0x%016"PRIx64" unimplemented\n",
                  reg, val);
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 3900d7b48b..213557fc2c 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -437,6 +437,9 @@ struct arch_domain
     /* Mem_access emulation control */
     bool_t mem_access_emulate_each_rep;
 
+    /* Legacy handling of MSR accesses. */
+    bool msr_legacy_handling;
+
     /* Emulated devices enabled bitmap. */
     uint32_t emulation_flags;
 } __cacheline_aligned;
diff --git a/xen/include/public/arch-x86/xen.h b/xen/include/public/arch-x86/xen.h
index 629cb2ba40..0a6b40bb89 100644
--- a/xen/include/public/arch-x86/xen.h
+++ b/xen/include/public/arch-x86/xen.h
@@ -304,6 +304,9 @@ struct xen_arch_domainconfig {
                                      XEN_X86_EMU_PIT | XEN_X86_EMU_USE_PIRQ |\
                                      XEN_X86_EMU_VPCI)
     uint32_t emulation_flags;
+
+#define XEN_X86_LEGACY_MSR_HANDLING (1u << 0)
+    uint32_t domain_flags;
 };
 
 /* Location of online VCPU bitmap. */
-- 
2.30.1



^ permalink raw reply related	[flat|nested] 15+ messages in thread