Re: [PATCH] x86/vmx: Fix vmentry failure because of invalid LER on Broadwell

From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Ross Lagerwall <ross.lagerwall@citrix.com>, xen-devel@lists.xen.org
Cc: Sergey Dyasli <sergey.dyasli@citrix.com>,
	Kevin Tian <kevin.tian@intel.com>,
	Jan Beulich <jbeulich@suse.com>,
	Jun Nakajima <jun.nakajima@intel.com>
Subject: Re: [PATCH] x86/vmx: Fix vmentry failure because of invalid LER on Broadwell
Date: Fri, 26 May 2017 10:02:27 +0100	[thread overview]
Message-ID: <571331c1-126f-7039-f354-5f62f2ee3535@citrix.com> (raw)
In-Reply-To: <20170525121533.32409-1-ross.lagerwall@citrix.com>

On 25/05/17 13:15, Ross Lagerwall wrote:
> Occasionally, the top three bits of MSR_IA32_LASTINTTOIP
> (MSR_LER_TO_LIP) may be incorrect, as though the MSR is using the
> LBR_FORMAT_EIP_FLAGS_TSX format. The MSR should contain an offset into
> the current code segment according to the Intel documentation. It is not
> clear why this happens. It may be due to erratum BDF14, or some other
> errata.  The result is a vmentry failure.
>
> Workaround the issue by sign-extending into bits 61:63 for this MSR on
> Broadwell CPUs.
> ---
>  xen/arch/x86/hvm/vmx/vmx.c         | 29 +++++++++++++++++++++++++++++
>  xen/include/asm-x86/hvm/vmx/vmcs.h |  1 +
>  2 files changed, 30 insertions(+)
>
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index c8ef18a..7d729af 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -2434,6 +2434,7 @@ static void pi_notification_interrupt(struct cpu_user_regs *regs)
>  }
>  
>  static void __init lbr_tsx_fixup_check(void);
> +static void __init ler_bdw_fixup_check(void);
>  
>  const struct hvm_function_table * __init start_vmx(void)
>  {
> @@ -2499,6 +2500,7 @@ const struct hvm_function_table * __init start_vmx(void)
>      setup_vmcs_dump();
>  
>      lbr_tsx_fixup_check();
> +    ler_bdw_fixup_check();
>  
>      return &vmx_function_table;
>  }
> @@ -2790,8 +2792,10 @@ enum
>  };
>  
>  #define LBR_FROM_SIGNEXT_2MSB  ((1ULL << 59) | (1ULL << 60))
> +#define LER_TO_SIGNEXT_3MSB  (LBR_FROM_SIGNEXT_2MSB | (1ULL << 58))
>  
>  static bool __read_mostly lbr_tsx_fixup_needed;
> +static bool __read_mostly ler_bdw_fixup_needed;
>  static uint32_t __read_mostly lbr_from_start;
>  static uint32_t __read_mostly lbr_from_end;
>  static uint32_t __read_mostly lbr_lastint_from;
> @@ -2828,6 +2832,13 @@ static void __init lbr_tsx_fixup_check(void)
>      }
>  }
>  
> +static void __init ler_bdw_fixup_check(void)
> +{
> +    /* Broadwell E5-2600 v4 processors need to work around erratum BDF14. */
> +    if ( boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 79 )
> +        ler_bdw_fixup_needed = true;
> +}
> +
>  static int is_last_branch_msr(u32 ecx)
>  {
>      const struct lbr_info *lbr = last_branch_msr_get();
> @@ -3089,6 +3100,8 @@ static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content)
>                          vmx_disable_intercept_for_msr(v, lbr->base + i, MSR_TYPE_R | MSR_TYPE_W);
>                          v->arch.hvm_vmx.lbr_tsx_fixup_enabled =
>                              lbr_tsx_fixup_needed;
> +                        v->arch.hvm_vmx.ler_bdw_fixup_enabled =
> +                            ler_bdw_fixup_needed;
>                      }
>          }
>  
> @@ -4174,6 +4187,20 @@ static void lbr_tsx_fixup(void)
>          msr->data |= ((LBR_FROM_SIGNEXT_2MSB & msr->data) << 2);
>  }
>  
> +static void ler_bdw_fixup(void)

Could we work the erratum identifier into the name?  How about
bdw_erratum_bdf14_fixup() ?

> +{
> +    struct vmx_msr_entry *msr;
> +
> +    /*
> +     * Occasionally, the top three bits of MSR_IA32_LASTINTTOIP may be
> +     * incorrect (possibly due to BDF14), as though the MSR is using the
> +     * LBR_FORMAT_EIP_FLAGS_TSX format. This is incorrect and causes a vmentry
> +     * failure -- the MSR should contain an offset into the current code
> +     * segment. Fix it up by sign-extending into bits 61:63. */

*/ on newline.

> +    if ( (msr = vmx_find_msr(MSR_IA32_LASTINTTOIP, VMX_GUEST_MSR)) != NULL )
> +        msr->data |= ((LER_TO_SIGNEXT_3MSB & msr->data) << 3);

Thinking more about this logic, use

diff --git a/xen/include/asm-x86/x86_64/page.h
b/xen/include/asm-x86/x86_64/page.h
index 1a6cae6..4025e3c 100644
--- a/xen/include/asm-x86/x86_64/page.h
+++ b/xen/include/asm-x86/x86_64/page.h
@@ -28,6 +28,9 @@
 #define PADDR_MASK              ((1UL << PADDR_BITS)-1)
 #define VADDR_MASK              ((1UL << VADDR_BITS)-1)
 
+#define VADDR_TOP_BIT           (1UL << (VADDR_BITS - 1))
+#define CANONICAL_MASK          (((1UL << 63) - 1) & ~((1UL <<
VADDR_BITS) - 1))
+
 #define is_canonical_address(x) (((long)(x) >> 47) == ((long)(x) >> 63))
 
 #ifndef __ASSEMBLY__

BDF14 says that the values are unreliable, and we are only guessing at
LBR_FORMAT_EIP_FLAGS_TSX.  As a result, when re-canonicalising them, we
should do it properly, to cover other eventualities.  Something like this:

if ( msr->data & VADDR_TOP_BIT )
    msr->data |= CANONICAL_MASK;
else
    msr->data &= ~CANONICAL_MASK;

Also, per BDF14, this adjustment needs to happen to
MSR_IA32_LASTINTFROMIP, even if we haven't actually observed a specific
failure there.

~Andrew

> +}
> +
>  void vmx_vmenter_helper(const struct cpu_user_regs *regs)
>  {
>      struct vcpu *curr = current;
> @@ -4232,6 +4259,8 @@ void vmx_vmenter_helper(const struct cpu_user_regs *regs)
>   out:
>      if ( unlikely(curr->arch.hvm_vmx.lbr_tsx_fixup_enabled) )
>          lbr_tsx_fixup();
> +    if ( unlikely(curr->arch.hvm_vmx.ler_bdw_fixup_enabled) )
> +        ler_bdw_fixup();
>  
>      HVMTRACE_ND(VMENTRY, 0, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
>  
> diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
> index 9507bd2..aedef82 100644
> --- a/xen/include/asm-x86/hvm/vmx/vmcs.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
> @@ -137,6 +137,7 @@ struct arch_vmx_struct {
>      uint8_t              vmx_emulate;
>  
>      bool                 lbr_tsx_fixup_enabled;
> +    bool                 ler_bdw_fixup_enabled;
>  
>      /* Bitmask of segments that we can't safely use in virtual 8086 mode */
>      uint16_t             vm86_segment_mask;


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel