Re: [Xen-devel] [PATCH v2 1/2] nvmx: implement support for MSR bitmaps

From: "Tian, Kevin" <kevin.tian@intel.com>
To: Roger Pau Monne <roger.pau@citrix.com>,
	"xen-devel@lists.xenproject.org" <xen-devel@lists.xenproject.org>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>, Wei Liu <wl@xen.org>,
	"Nakajima, Jun" <jun.nakajima@intel.com>
Subject: Re: [Xen-devel] [PATCH v2 1/2] nvmx: implement support for MSR bitmaps
Date: Mon, 3 Feb 2020 08:05:48 +0000	[thread overview]
Message-ID: <AADFC41AFE54684AB9EE6CBC0274A5D19D75F89F@SHSMSX104.ccr.corp.intel.com> (raw)
In-Reply-To: <20200129144514.96686-2-roger.pau@citrix.com>

> From: Roger Pau Monne <roger.pau@citrix.com>
> Sent: Wednesday, January 29, 2020 10:45 PM
> 
> Current implementation of nested VMX has a half baked handling of MSR
> bitmaps for the L1 VMM: it maps the L1 VMM provided MSR bitmap, but
> doesn't actually load it into the nested vmcs, and thus the nested
> guest vmcs ends up using the same MSR bitmap as the L1 VMM.
> 
> This is wrong as there's no assurance that the set of features enabled
> for the L1 vmcs are the same that L1 itself is going to use in the
> nested vmcs, and thus can lead to misconfigurations.
> 
> For example L1 vmcs can use x2APIC virtualization and virtual
> interrupt delivery, and thus some x2APIC MSRs won't be trapped so that
> they can be handled directly by the hardware using virtualization
> extensions. On the other hand, the nested vmcs created by L1 VMM might
> not use any of such features, so using a MSR bitmap that doesn't trap
> accesses to the x2APIC MSRs will be leaking them to the underlying
> hardware.
> 
> Fix this by crafting a merged MSR bitmap between the one used by L1
> and the nested guest.
> 
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> ---
> This seems better than what's done currently, but TBH there's a lot of
> work to be done in nvmx in order to make it functional and secure that
> I'm not sure whether building on top of the current implementation is
> something sane to do, or it would be better to start from scratch and
> re-implement nvmx to just support the minimum required set of VTx
> features in a sane and safe way.

without knowing what "a lot of work" actually means, it's difficult to 
judge which way is better. But from the listed changes in this series,
I think they are reasonable.

> ---
> Changes since v1:
>  - Split the x2APIC MSR fix into a separate patch.
>  - Move setting MSR_BITMAP vmcs field into load_vvmcs_host_state for
>    virtual vmexit.
>  - Allocate memory with MEMF_no_owner.
>  - Use tabs to align comment of the nestedvmx struct field.
> ---
>  xen/arch/x86/hvm/vmx/vvmx.c        | 63 ++++++++++++++++++++++++++++--
>  xen/include/asm-x86/hvm/vmx/vvmx.h |  3 +-
>  2 files changed, 62 insertions(+), 4 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
> index 47eee1e5b9..c35b4bab84 100644
> --- a/xen/arch/x86/hvm/vmx/vvmx.c
> +++ b/xen/arch/x86/hvm/vmx/vvmx.c
> @@ -128,6 +128,16 @@ int nvmx_vcpu_initialise(struct vcpu *v)
>          unmap_domain_page(vw);
>      }
> 
> +    if ( cpu_has_vmx_msr_bitmap )
> +    {
> +        nvmx->msr_merged = alloc_domheap_page(d, MEMF_no_owner);
> +        if ( !nvmx->msr_merged )
> +        {
> +            gdprintk(XENLOG_ERR, "nest: allocation for MSR bitmap failed\n");
> +            return -ENOMEM;
> +        }
> +    }
> +
>      nvmx->ept.enabled = 0;
>      nvmx->guest_vpid = 0;
>      nvmx->vmxon_region_pa = INVALID_PADDR;
> @@ -182,6 +192,11 @@ void nvmx_vcpu_destroy(struct vcpu *v)
>          free_domheap_page(v->arch.hvm.vmx.vmwrite_bitmap);
>          v->arch.hvm.vmx.vmwrite_bitmap = NULL;
>      }
> +    if ( nvmx->msr_merged )
> +    {
> +        free_domheap_page(nvmx->msr_merged);
> +        nvmx->msr_merged = NULL;
> +    }
>  }
> 
>  void nvmx_domain_relinquish_resources(struct domain *d)
> @@ -548,6 +563,37 @@ unsigned long *_shadow_io_bitmap(struct vcpu *v)
>      return nestedhvm_vcpu_iomap_get(port80, portED);
>  }
> 
> +static void update_msrbitmap(struct vcpu *v)
> +{
> +    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
> +    struct vmx_msr_bitmap *msr_bitmap;
> +    unsigned int msr;
> +
> +    ASSERT(__n2_exec_control(v) & CPU_BASED_ACTIVATE_MSR_BITMAP);

what about passing shadow_cntrl and also moving the outer 
condition check into this function? It is not good to assume
that __n2_exec_control always has the same setting as the
local variable shadow_cntrl.

> +
> +    if ( !nvmx->msrbitmap )
> +        return;
> +
> +    msr_bitmap = __map_domain_page(nvmx->msr_merged);
> +
> +    bitmap_or(msr_bitmap->read_low, nvmx->msrbitmap->read_low,
> +              v->arch.hvm.vmx.msr_bitmap->read_low,
> +              sizeof(msr_bitmap->read_low) * 8);
> +    bitmap_or(msr_bitmap->read_high, nvmx->msrbitmap->read_high,
> +              v->arch.hvm.vmx.msr_bitmap->read_high,
> +              sizeof(msr_bitmap->read_high) * 8);
> +    bitmap_or(msr_bitmap->write_low, nvmx->msrbitmap->write_low,
> +              v->arch.hvm.vmx.msr_bitmap->write_low,
> +              sizeof(msr_bitmap->write_low) * 8);
> +    bitmap_or(msr_bitmap->write_high, nvmx->msrbitmap->write_high,
> +              v->arch.hvm.vmx.msr_bitmap->write_high,
> +              sizeof(msr_bitmap->write_high) * 8);
> +
> +    unmap_domain_page(msr_bitmap);
> +
> +    __vmwrite(MSR_BITMAP, page_to_maddr(nvmx->msr_merged));
> +}
> +
>  void nvmx_update_exec_control(struct vcpu *v, u32 host_cntrl)
>  {
>      u32 pio_cntrl = (CPU_BASED_ACTIVATE_IO_BITMAP
> @@ -558,10 +604,15 @@ void nvmx_update_exec_control(struct vcpu *v,
> u32 host_cntrl)
>      shadow_cntrl = __n2_exec_control(v);
>      pio_cntrl &= shadow_cntrl;
>      /* Enforce the removed features */
> -    shadow_cntrl &= ~(CPU_BASED_ACTIVATE_MSR_BITMAP
> -                      | CPU_BASED_ACTIVATE_IO_BITMAP
> +    shadow_cntrl &= ~(CPU_BASED_ACTIVATE_IO_BITMAP
>                        | CPU_BASED_UNCOND_IO_EXITING);
> -    shadow_cntrl |= host_cntrl;
> +    /*
> +     * Do NOT enforce the MSR bitmap currently used by L1, as certain
> hardware
> +     * virtualization features require specific MSR bitmap settings, but
> +     * without the guest also using these same features the bitmap could be
> +     * leaking through unwanted MSR accesses.
> +     */
> +    shadow_cntrl |= (host_cntrl & ~CPU_BASED_ACTIVATE_MSR_BITMAP);

what about msr bitmap is disabled in host_cntrl? We'd better use AND-ed
value from both shadow/host_cntrl for this bit, instead of assuming the
policy of current Xen version which enables msr bitmap by default. 

>      if ( pio_cntrl == CPU_BASED_UNCOND_IO_EXITING ) {
>          /* L1 VMM intercepts all I/O instructions */
>          shadow_cntrl |= CPU_BASED_UNCOND_IO_EXITING;
> @@ -584,6 +635,9 @@ void nvmx_update_exec_control(struct vcpu *v, u32
> host_cntrl)
>          __vmwrite(IO_BITMAP_B, virt_to_maddr(bitmap) + PAGE_SIZE);
>      }
> 
> +    if ( shadow_cntrl & CPU_BASED_ACTIVATE_MSR_BITMAP )
> +        update_msrbitmap(v);
> +
>      /* TODO: change L0 intr window to MTF or NMI window */
>      __vmwrite(CPU_BASED_VM_EXEC_CONTROL, shadow_cntrl);
>  }
> @@ -1278,6 +1332,9 @@ static void load_vvmcs_host_state(struct vcpu *v)
>      hvm_set_tsc_offset(v, v->arch.hvm.cache_tsc_offset, 0);
> 
>      set_vvmcs(v, VM_ENTRY_INTR_INFO, 0);
> +
> +    if ( v->arch.hvm.vmx.exec_control &
> CPU_BASED_ACTIVATE_MSR_BITMAP )
> +        __vmwrite(MSR_BITMAP, virt_to_maddr(v-
> >arch.hvm.vmx.msr_bitmap));
>  }
> 
>  static void sync_exception_state(struct vcpu *v)
> diff --git a/xen/include/asm-x86/hvm/vmx/vvmx.h b/xen/include/asm-
> x86/hvm/vmx/vvmx.h
> index 6b9c4ae0b2..c8d5600fdd 100644
> --- a/xen/include/asm-x86/hvm/vmx/vvmx.h
> +++ b/xen/include/asm-x86/hvm/vmx/vvmx.h
> @@ -37,7 +37,8 @@ struct nestedvmx {
>       */
>      paddr_t    vmxon_region_pa;
>      void       *iobitmap[2];		/* map (va) of L1 guest I/O bitmap */
> -    void       *msrbitmap;		/* map (va) of L1 guest MSR bitmap
> */
> +    struct vmx_msr_bitmap *msrbitmap;	/* map (va) of L1 guest MSR
> bitmap */
> +    struct page_info *msr_merged;	/* merged L1 and L1 guest MSR
> bitmap */

L1 and L2

>      /* deferred nested interrupt */
>      struct {
>          unsigned long intr_info;
> --
> 2.25.0

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel