kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [kvm-unit-tests PATCH v2] x86: vmx: Add test for MTF on a guest MOV-to-CR0 that enables PAE
@ 2020-08-19 20:56 Peter Shier
  2020-09-01 20:31 ` Peter Shier
  2020-09-12  6:20 ` Paolo Bonzini
  0 siblings, 2 replies; 3+ messages in thread
From: Peter Shier @ 2020-08-19 20:56 UTC (permalink / raw)
  To: kvm; +Cc: pbonzini, Peter Shier, Jim Mattson

Verify that when L2 guest enables PAE paging and L0 intercept of L2
MOV to CR0 reflects MTF exit to L1, subsequent resume to L2 correctly
preserves PDPTE array specified by L2 CR3.

Signed-off-by: Jim Mattson <jmattson@google.com>
Reviewed-by:   Peter Shier <pshier@google.com>
Signed-off-by: Peter Shier <pshier@google.com>
---
 lib/x86/asm/page.h |   8 +++
 x86/vmx_tests.c    | 173 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 181 insertions(+)

diff --git a/lib/x86/asm/page.h b/lib/x86/asm/page.h
index 7e2a3dd4b90a..1359eb74cde4 100644
--- a/lib/x86/asm/page.h
+++ b/lib/x86/asm/page.h
@@ -36,10 +36,18 @@ typedef unsigned long pgd_t;
 #define PT64_NX_MASK		(1ull << 63)
 #define PT_ADDR_MASK		GENMASK_ULL(51, 12)
 
+#define PDPTE64_PAGE_SIZE_MASK	  (1ull << 7)
+#define PDPTE64_RSVD_MASK	  GENMASK_ULL(51, cpuid_maxphyaddr())
+
 #define PT_AD_MASK              (PT_ACCESSED_MASK | PT_DIRTY_MASK)
 
+#define PAE_PDPTE_RSVD_MASK     (GENMASK_ULL(63, cpuid_maxphyaddr()) |	\
+				 GENMASK_ULL(8, 5) | GENMASK_ULL(2, 1))
+
+
 #ifdef __x86_64__
 #define	PAGE_LEVEL	4
+#define	PDPT_LEVEL	3
 #define	PGDIR_WIDTH	9
 #define	PGDIR_MASK	511
 #else
diff --git a/x86/vmx_tests.c b/x86/vmx_tests.c
index 32e3d4f47b33..22f0c7b975be 100644
--- a/x86/vmx_tests.c
+++ b/x86/vmx_tests.c
@@ -5250,6 +5250,178 @@ static void vmx_mtf_test(void)
 	enter_guest();
 }
 
+extern char vmx_mtf_pdpte_guest_begin;
+extern char vmx_mtf_pdpte_guest_end;
+
+asm("vmx_mtf_pdpte_guest_begin:\n\t"
+    "mov %cr0, %rax\n\t"    /* save CR0 with PG=1                 */
+    "vmcall\n\t"            /* on return from this CR0.PG=0       */
+    "mov %rax, %cr0\n\t"    /* restore CR0.PG=1 to enter PAE mode */
+    "vmcall\n\t"
+    "retq\n\t"
+    "vmx_mtf_pdpte_guest_end:");
+
+static void vmx_mtf_pdpte_test(void)
+{
+	void *test_mtf_pdpte_guest;
+	pteval_t *pdpt;
+	u32 guest_ar_cs;
+	u64 guest_efer;
+	pteval_t *pte;
+	u64 guest_cr0;
+	u64 guest_cr3;
+	u64 guest_cr4;
+	u64 ent_ctls;
+	int i;
+
+	if (setup_ept(false))
+		return;
+
+	if (!(ctrl_cpu_rev[0].clr & CPU_MTF)) {
+		printf("CPU does not support 'monitor trap flag.'\n");
+		return;
+	}
+
+	if (!(ctrl_cpu_rev[1].clr & CPU_URG)) {
+		printf("CPU does not support 'unrestricted guest.'\n");
+		return;
+	}
+
+	vmcs_write(EXC_BITMAP, ~0);
+	vmcs_write(CPU_EXEC_CTRL1, vmcs_read(CPU_EXEC_CTRL1) | CPU_URG);
+
+	/*
+	 * Copy the guest code to an identity-mapped page.
+	 */
+	test_mtf_pdpte_guest = alloc_page();
+	memcpy(test_mtf_pdpte_guest, &vmx_mtf_pdpte_guest_begin,
+	       &vmx_mtf_pdpte_guest_end - &vmx_mtf_pdpte_guest_begin);
+
+	test_set_guest(test_mtf_pdpte_guest);
+
+	enter_guest();
+	skip_exit_vmcall();
+
+	/*
+	 * Put the guest in non-paged 32-bit protected mode, ready to enter
+	 * PAE mode when CR0.PG is set. CR4.PAE will already have been set
+	 * when the guest started out in long mode.
+	 */
+	ent_ctls = vmcs_read(ENT_CONTROLS);
+	vmcs_write(ENT_CONTROLS, ent_ctls & ~ENT_GUEST_64);
+
+	guest_efer = vmcs_read(GUEST_EFER);
+	vmcs_write(GUEST_EFER, guest_efer & ~(EFER_LMA | EFER_LME));
+
+	/*
+	 * Set CS access rights bits for 32-bit protected mode:
+	 * 3:0    B execute/read/accessed
+	 * 4      1 code or data
+	 * 6:5    0 descriptor privilege level
+	 * 7      1 present
+	 * 11:8   0 reserved
+	 * 12     0 available for use by system software
+	 * 13     0 64 bit mode not active
+	 * 14     1 default operation size 32-bit segment
+	 * 15     1 page granularity: segment limit in 4K units
+	 * 16     0 segment usable
+	 * 31:17  0 reserved
+	 */
+	guest_ar_cs = vmcs_read(GUEST_AR_CS);
+	vmcs_write(GUEST_AR_CS, 0xc09b);
+
+	guest_cr0 = vmcs_read(GUEST_CR0);
+	vmcs_write(GUEST_CR0, guest_cr0 & ~X86_CR0_PG);
+
+	guest_cr4 = vmcs_read(GUEST_CR4);
+	vmcs_write(GUEST_CR4, guest_cr4 & ~X86_CR4_PCIDE);
+
+	guest_cr3 = vmcs_read(GUEST_CR3);
+
+	/*
+	 * Turn the 4-level page table into a PAE page table by following the 0th
+	 * PML4 entry to a PDPT page, and grab the first four PDPTEs from that
+	 * page.
+	 *
+	 * Why does this work?
+	 *
+	 * PAE uses 32-bit addressing which implies:
+	 * Bits 11:0   page offset
+	 * Bits 20:12  entry into 512-entry page table
+	 * Bits 29:21  entry into a 512-entry directory table
+	 * Bits 31:30  entry into the page directory pointer table.
+	 * Bits 63:32  zero
+	 *
+	 * As only 2 bits are needed to select the PDPTEs for the entire
+	 * 32-bit address space, take the first 4 PDPTEs in the level 3 page
+	 * directory pointer table. It doesn't matter which of these PDPTEs
+	 * are present because they must cover the guest code given that it
+	 * has already run successfully.
+	 *
+	 * Get a pointer to PTE for GVA=0 in the page directory pointer table
+	 */
+	pte = get_pte_level(
+            (pgd_t *)phys_to_virt(guest_cr3 & ~X86_CR3_PCID_MASK), 0,
+            PDPT_LEVEL);
+
+	/*
+	 * Need some memory for the 4-entry PAE page directory pointer
+	 * table. Use the end of the identity-mapped page where the guest code
+	 * is stored. There is definitely space as the guest code is only a
+	 * few bytes.
+	 */
+	pdpt = test_mtf_pdpte_guest + PAGE_SIZE - 4 * sizeof(pteval_t);
+
+	/*
+	 * Copy the first four PDPTEs into the PAE page table with reserved
+	 * bits cleared. Note that permission bits from the PML4E and PDPTE
+	 * are not propagated.
+	 */
+	for (i = 0; i < 4; i++) {
+		TEST_ASSERT_EQ_MSG(0, (pte[i] & PDPTE64_RSVD_MASK),
+				   "PDPTE has invalid reserved bits");
+		TEST_ASSERT_EQ_MSG(0, (pte[i] & PDPTE64_PAGE_SIZE_MASK),
+				   "Cannot use 1GB super pages for PAE");
+		pdpt[i] = pte[i] & ~(PAE_PDPTE_RSVD_MASK);
+	}
+	vmcs_write(GUEST_CR3, virt_to_phys(pdpt));
+
+	enable_mtf();
+	enter_guest();
+	assert_exit_reason(VMX_MTF);
+	disable_mtf();
+
+	/*
+	 * The four PDPTEs should have been loaded into the VMCS when
+	 * the guest set CR0.PG to enter PAE mode.
+	 */
+	for (i = 0; i < 4; i++) {
+		u64 pdpte = vmcs_read(GUEST_PDPTE + 2 * i);
+
+		report(pdpte == pdpt[i], "PDPTE%d is 0x%lx (expected 0x%lx)",
+		       i, pdpte, pdpt[i]);
+	}
+
+	/*
+	 * Now, try to enter the guest in PAE mode. If the PDPTEs in the
+	 * vmcs are wrong, this will fail.
+	 */
+	enter_guest();
+	skip_exit_vmcall();
+
+	/*
+	 * Return guest to 64-bit mode and wrap up.
+	 */
+	vmcs_write(ENT_CONTROLS, ent_ctls);
+	vmcs_write(GUEST_EFER, guest_efer);
+	vmcs_write(GUEST_AR_CS, guest_ar_cs);
+	vmcs_write(GUEST_CR0, guest_cr0);
+	vmcs_write(GUEST_CR4, guest_cr4);
+	vmcs_write(GUEST_CR3, guest_cr3);
+
+	enter_guest();
+}
+
 /*
  * Tests for VM-execution control fields
  */
@@ -10112,6 +10284,6 @@ struct vmx_test vmx_tests[] = {
 	TEST(atomic_switch_overflow_msrs_test),
 	TEST(rdtsc_vmexit_diff_test),
 	TEST(vmx_mtf_test),
+	TEST(vmx_mtf_pdpte_test),
 	{ NULL, NULL, NULL, NULL, NULL, {0} },
 };
-- 


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [kvm-unit-tests PATCH v2] x86: vmx: Add test for MTF on a guest MOV-to-CR0 that enables PAE
  2020-08-19 20:56 [kvm-unit-tests PATCH v2] x86: vmx: Add test for MTF on a guest MOV-to-CR0 that enables PAE Peter Shier
@ 2020-09-01 20:31 ` Peter Shier
  2020-09-12  6:20 ` Paolo Bonzini
  1 sibling, 0 replies; 3+ messages in thread
From: Peter Shier @ 2020-09-01 20:31 UTC (permalink / raw)
  To: kvm; +Cc: Paolo Bonzini, Jim Mattson, Andrew Jones

On Wed, Aug 19, 2020 at 1:56 PM Peter Shier <pshier@google.com> wrote:
>
> Verify that when L2 guest enables PAE paging and L0 intercept of L2
> MOV to CR0 reflects MTF exit to L1, subsequent resume to L2 correctly
> preserves PDPTE array specified by L2 CR3.
>
> Signed-off-by: Jim Mattson <jmattson@google.com>
> Reviewed-by:   Peter Shier <pshier@google.com>
> Signed-off-by: Peter Shier <pshier@google.com>
> ---
>  lib/x86/asm/page.h |   8 +++
>  x86/vmx_tests.c    | 173 +++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 181 insertions(+)
>
> diff --git a/lib/x86/asm/page.h b/lib/x86/asm/page.h
> index 7e2a3dd4b90a..1359eb74cde4 100644
> --- a/lib/x86/asm/page.h
> +++ b/lib/x86/asm/page.h
> @@ -36,10 +36,18 @@ typedef unsigned long pgd_t;
>  #define PT64_NX_MASK           (1ull << 63)
>  #define PT_ADDR_MASK           GENMASK_ULL(51, 12)
>
> +#define PDPTE64_PAGE_SIZE_MASK   (1ull << 7)
> +#define PDPTE64_RSVD_MASK        GENMASK_ULL(51, cpuid_maxphyaddr())
> +
>  #define PT_AD_MASK              (PT_ACCESSED_MASK | PT_DIRTY_MASK)
>
> +#define PAE_PDPTE_RSVD_MASK     (GENMASK_ULL(63, cpuid_maxphyaddr()) | \
> +                                GENMASK_ULL(8, 5) | GENMASK_ULL(2, 1))
> +
> +
>  #ifdef __x86_64__
>  #define        PAGE_LEVEL      4
> +#define        PDPT_LEVEL      3
>  #define        PGDIR_WIDTH     9
>  #define        PGDIR_MASK      511
>  #else
> diff --git a/x86/vmx_tests.c b/x86/vmx_tests.c
> index 32e3d4f47b33..22f0c7b975be 100644
> --- a/x86/vmx_tests.c
> +++ b/x86/vmx_tests.c
> @@ -5250,6 +5250,178 @@ static void vmx_mtf_test(void)
>         enter_guest();
>  }
>
> +extern char vmx_mtf_pdpte_guest_begin;
> +extern char vmx_mtf_pdpte_guest_end;
> +
> +asm("vmx_mtf_pdpte_guest_begin:\n\t"
> +    "mov %cr0, %rax\n\t"    /* save CR0 with PG=1                 */
> +    "vmcall\n\t"            /* on return from this CR0.PG=0       */
> +    "mov %rax, %cr0\n\t"    /* restore CR0.PG=1 to enter PAE mode */
> +    "vmcall\n\t"
> +    "retq\n\t"
> +    "vmx_mtf_pdpte_guest_end:");
> +
> +static void vmx_mtf_pdpte_test(void)
> +{
> +       void *test_mtf_pdpte_guest;
> +       pteval_t *pdpt;
> +       u32 guest_ar_cs;
> +       u64 guest_efer;
> +       pteval_t *pte;
> +       u64 guest_cr0;
> +       u64 guest_cr3;
> +       u64 guest_cr4;
> +       u64 ent_ctls;
> +       int i;
> +
> +       if (setup_ept(false))
> +               return;
> +
> +       if (!(ctrl_cpu_rev[0].clr & CPU_MTF)) {
> +               printf("CPU does not support 'monitor trap flag.'\n");
> +               return;
> +       }
> +
> +       if (!(ctrl_cpu_rev[1].clr & CPU_URG)) {
> +               printf("CPU does not support 'unrestricted guest.'\n");
> +               return;
> +       }
> +
> +       vmcs_write(EXC_BITMAP, ~0);
> +       vmcs_write(CPU_EXEC_CTRL1, vmcs_read(CPU_EXEC_CTRL1) | CPU_URG);
> +
> +       /*
> +        * Copy the guest code to an identity-mapped page.
> +        */
> +       test_mtf_pdpte_guest = alloc_page();
> +       memcpy(test_mtf_pdpte_guest, &vmx_mtf_pdpte_guest_begin,
> +              &vmx_mtf_pdpte_guest_end - &vmx_mtf_pdpte_guest_begin);
> +
> +       test_set_guest(test_mtf_pdpte_guest);
> +
> +       enter_guest();
> +       skip_exit_vmcall();
> +
> +       /*
> +        * Put the guest in non-paged 32-bit protected mode, ready to enter
> +        * PAE mode when CR0.PG is set. CR4.PAE will already have been set
> +        * when the guest started out in long mode.
> +        */
> +       ent_ctls = vmcs_read(ENT_CONTROLS);
> +       vmcs_write(ENT_CONTROLS, ent_ctls & ~ENT_GUEST_64);
> +
> +       guest_efer = vmcs_read(GUEST_EFER);
> +       vmcs_write(GUEST_EFER, guest_efer & ~(EFER_LMA | EFER_LME));
> +
> +       /*
> +        * Set CS access rights bits for 32-bit protected mode:
> +        * 3:0    B execute/read/accessed
> +        * 4      1 code or data
> +        * 6:5    0 descriptor privilege level
> +        * 7      1 present
> +        * 11:8   0 reserved
> +        * 12     0 available for use by system software
> +        * 13     0 64 bit mode not active
> +        * 14     1 default operation size 32-bit segment
> +        * 15     1 page granularity: segment limit in 4K units
> +        * 16     0 segment usable
> +        * 31:17  0 reserved
> +        */
> +       guest_ar_cs = vmcs_read(GUEST_AR_CS);
> +       vmcs_write(GUEST_AR_CS, 0xc09b);
> +
> +       guest_cr0 = vmcs_read(GUEST_CR0);
> +       vmcs_write(GUEST_CR0, guest_cr0 & ~X86_CR0_PG);
> +
> +       guest_cr4 = vmcs_read(GUEST_CR4);
> +       vmcs_write(GUEST_CR4, guest_cr4 & ~X86_CR4_PCIDE);
> +
> +       guest_cr3 = vmcs_read(GUEST_CR3);
> +
> +       /*
> +        * Turn the 4-level page table into a PAE page table by following the 0th
> +        * PML4 entry to a PDPT page, and grab the first four PDPTEs from that
> +        * page.
> +        *
> +        * Why does this work?
> +        *
> +        * PAE uses 32-bit addressing which implies:
> +        * Bits 11:0   page offset
> +        * Bits 20:12  entry into 512-entry page table
> +        * Bits 29:21  entry into a 512-entry directory table
> +        * Bits 31:30  entry into the page directory pointer table.
> +        * Bits 63:32  zero
> +        *
> +        * As only 2 bits are needed to select the PDPTEs for the entire
> +        * 32-bit address space, take the first 4 PDPTEs in the level 3 page
> +        * directory pointer table. It doesn't matter which of these PDPTEs
> +        * are present because they must cover the guest code given that it
> +        * has already run successfully.
> +        *
> +        * Get a pointer to PTE for GVA=0 in the page directory pointer table
> +        */
> +       pte = get_pte_level(
> +            (pgd_t *)phys_to_virt(guest_cr3 & ~X86_CR3_PCID_MASK), 0,
> +            PDPT_LEVEL);
> +
> +       /*
> +        * Need some memory for the 4-entry PAE page directory pointer
> +        * table. Use the end of the identity-mapped page where the guest code
> +        * is stored. There is definitely space as the guest code is only a
> +        * few bytes.
> +        */
> +       pdpt = test_mtf_pdpte_guest + PAGE_SIZE - 4 * sizeof(pteval_t);
> +
> +       /*
> +        * Copy the first four PDPTEs into the PAE page table with reserved
> +        * bits cleared. Note that permission bits from the PML4E and PDPTE
> +        * are not propagated.
> +        */
> +       for (i = 0; i < 4; i++) {
> +               TEST_ASSERT_EQ_MSG(0, (pte[i] & PDPTE64_RSVD_MASK),
> +                                  "PDPTE has invalid reserved bits");
> +               TEST_ASSERT_EQ_MSG(0, (pte[i] & PDPTE64_PAGE_SIZE_MASK),
> +                                  "Cannot use 1GB super pages for PAE");
> +               pdpt[i] = pte[i] & ~(PAE_PDPTE_RSVD_MASK);
> +       }
> +       vmcs_write(GUEST_CR3, virt_to_phys(pdpt));
> +
> +       enable_mtf();
> +       enter_guest();
> +       assert_exit_reason(VMX_MTF);
> +       disable_mtf();
> +
> +       /*
> +        * The four PDPTEs should have been loaded into the VMCS when
> +        * the guest set CR0.PG to enter PAE mode.
> +        */
> +       for (i = 0; i < 4; i++) {
> +               u64 pdpte = vmcs_read(GUEST_PDPTE + 2 * i);
> +
> +               report(pdpte == pdpt[i], "PDPTE%d is 0x%lx (expected 0x%lx)",
> +                      i, pdpte, pdpt[i]);
> +       }
> +
> +       /*
> +        * Now, try to enter the guest in PAE mode. If the PDPTEs in the
> +        * vmcs are wrong, this will fail.
> +        */
> +       enter_guest();
> +       skip_exit_vmcall();
> +
> +       /*
> +        * Return guest to 64-bit mode and wrap up.
> +        */
> +       vmcs_write(ENT_CONTROLS, ent_ctls);
> +       vmcs_write(GUEST_EFER, guest_efer);
> +       vmcs_write(GUEST_AR_CS, guest_ar_cs);
> +       vmcs_write(GUEST_CR0, guest_cr0);
> +       vmcs_write(GUEST_CR4, guest_cr4);
> +       vmcs_write(GUEST_CR3, guest_cr3);
> +
> +       enter_guest();
> +}
> +
>  /*
>   * Tests for VM-execution control fields
>   */
> @@ -10112,6 +10284,6 @@ struct vmx_test vmx_tests[] = {
>         TEST(atomic_switch_overflow_msrs_test),
>         TEST(rdtsc_vmexit_diff_test),
>         TEST(vmx_mtf_test),
> +       TEST(vmx_mtf_pdpte_test),
>         { NULL, NULL, NULL, NULL, NULL, {0} },
>  };
> --
>

Ping. Thx

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [kvm-unit-tests PATCH v2] x86: vmx: Add test for MTF on a guest MOV-to-CR0 that enables PAE
  2020-08-19 20:56 [kvm-unit-tests PATCH v2] x86: vmx: Add test for MTF on a guest MOV-to-CR0 that enables PAE Peter Shier
  2020-09-01 20:31 ` Peter Shier
@ 2020-09-12  6:20 ` Paolo Bonzini
  1 sibling, 0 replies; 3+ messages in thread
From: Paolo Bonzini @ 2020-09-12  6:20 UTC (permalink / raw)
  To: Peter Shier, kvm; +Cc: Jim Mattson

On 19/08/20 22:56, Peter Shier wrote:
> Verify that when L2 guest enables PAE paging and L0 intercept of L2
> MOV to CR0 reflects MTF exit to L1, subsequent resume to L2 correctly
> preserves PDPTE array specified by L2 CR3.
> 
> Signed-off-by: Jim Mattson <jmattson@google.com>
> Reviewed-by:   Peter Shier <pshier@google.com>
> Signed-off-by: Peter Shier <pshier@google.com>
> ---
>  lib/x86/asm/page.h |   8 +++
>  x86/vmx_tests.c    | 173 +++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 181 insertions(+)
> 
> diff --git a/lib/x86/asm/page.h b/lib/x86/asm/page.h
> index 7e2a3dd4b90a..1359eb74cde4 100644
> --- a/lib/x86/asm/page.h
> +++ b/lib/x86/asm/page.h
> @@ -36,10 +36,18 @@ typedef unsigned long pgd_t;
>  #define PT64_NX_MASK		(1ull << 63)
>  #define PT_ADDR_MASK		GENMASK_ULL(51, 12)
>  
> +#define PDPTE64_PAGE_SIZE_MASK	  (1ull << 7)
> +#define PDPTE64_RSVD_MASK	  GENMASK_ULL(51, cpuid_maxphyaddr())
> +
>  #define PT_AD_MASK              (PT_ACCESSED_MASK | PT_DIRTY_MASK)
>  
> +#define PAE_PDPTE_RSVD_MASK     (GENMASK_ULL(63, cpuid_maxphyaddr()) |	\
> +				 GENMASK_ULL(8, 5) | GENMASK_ULL(2, 1))
> +
> +
>  #ifdef __x86_64__
>  #define	PAGE_LEVEL	4
> +#define	PDPT_LEVEL	3
>  #define	PGDIR_WIDTH	9
>  #define	PGDIR_MASK	511
>  #else
> diff --git a/x86/vmx_tests.c b/x86/vmx_tests.c
> index 32e3d4f47b33..22f0c7b975be 100644
> --- a/x86/vmx_tests.c
> +++ b/x86/vmx_tests.c
> @@ -5250,6 +5250,178 @@ static void vmx_mtf_test(void)
>  	enter_guest();
>  }
>  
> +extern char vmx_mtf_pdpte_guest_begin;
> +extern char vmx_mtf_pdpte_guest_end;
> +
> +asm("vmx_mtf_pdpte_guest_begin:\n\t"
> +    "mov %cr0, %rax\n\t"    /* save CR0 with PG=1                 */
> +    "vmcall\n\t"            /* on return from this CR0.PG=0       */
> +    "mov %rax, %cr0\n\t"    /* restore CR0.PG=1 to enter PAE mode */
> +    "vmcall\n\t"
> +    "retq\n\t"
> +    "vmx_mtf_pdpte_guest_end:");
> +
> +static void vmx_mtf_pdpte_test(void)
> +{
> +	void *test_mtf_pdpte_guest;
> +	pteval_t *pdpt;
> +	u32 guest_ar_cs;
> +	u64 guest_efer;
> +	pteval_t *pte;
> +	u64 guest_cr0;
> +	u64 guest_cr3;
> +	u64 guest_cr4;
> +	u64 ent_ctls;
> +	int i;
> +
> +	if (setup_ept(false))
> +		return;
> +
> +	if (!(ctrl_cpu_rev[0].clr & CPU_MTF)) {
> +		printf("CPU does not support 'monitor trap flag.'\n");
> +		return;
> +	}
> +
> +	if (!(ctrl_cpu_rev[1].clr & CPU_URG)) {
> +		printf("CPU does not support 'unrestricted guest.'\n");
> +		return;
> +	}
> +
> +	vmcs_write(EXC_BITMAP, ~0);
> +	vmcs_write(CPU_EXEC_CTRL1, vmcs_read(CPU_EXEC_CTRL1) | CPU_URG);
> +
> +	/*
> +	 * Copy the guest code to an identity-mapped page.
> +	 */
> +	test_mtf_pdpte_guest = alloc_page();
> +	memcpy(test_mtf_pdpte_guest, &vmx_mtf_pdpte_guest_begin,
> +	       &vmx_mtf_pdpte_guest_end - &vmx_mtf_pdpte_guest_begin);
> +
> +	test_set_guest(test_mtf_pdpte_guest);
> +
> +	enter_guest();
> +	skip_exit_vmcall();
> +
> +	/*
> +	 * Put the guest in non-paged 32-bit protected mode, ready to enter
> +	 * PAE mode when CR0.PG is set. CR4.PAE will already have been set
> +	 * when the guest started out in long mode.
> +	 */
> +	ent_ctls = vmcs_read(ENT_CONTROLS);
> +	vmcs_write(ENT_CONTROLS, ent_ctls & ~ENT_GUEST_64);
> +
> +	guest_efer = vmcs_read(GUEST_EFER);
> +	vmcs_write(GUEST_EFER, guest_efer & ~(EFER_LMA | EFER_LME));
> +
> +	/*
> +	 * Set CS access rights bits for 32-bit protected mode:
> +	 * 3:0    B execute/read/accessed
> +	 * 4      1 code or data
> +	 * 6:5    0 descriptor privilege level
> +	 * 7      1 present
> +	 * 11:8   0 reserved
> +	 * 12     0 available for use by system software
> +	 * 13     0 64 bit mode not active
> +	 * 14     1 default operation size 32-bit segment
> +	 * 15     1 page granularity: segment limit in 4K units
> +	 * 16     0 segment usable
> +	 * 31:17  0 reserved
> +	 */
> +	guest_ar_cs = vmcs_read(GUEST_AR_CS);
> +	vmcs_write(GUEST_AR_CS, 0xc09b);
> +
> +	guest_cr0 = vmcs_read(GUEST_CR0);
> +	vmcs_write(GUEST_CR0, guest_cr0 & ~X86_CR0_PG);
> +
> +	guest_cr4 = vmcs_read(GUEST_CR4);
> +	vmcs_write(GUEST_CR4, guest_cr4 & ~X86_CR4_PCIDE);
> +
> +	guest_cr3 = vmcs_read(GUEST_CR3);
> +
> +	/*
> +	 * Turn the 4-level page table into a PAE page table by following the 0th
> +	 * PML4 entry to a PDPT page, and grab the first four PDPTEs from that
> +	 * page.
> +	 *
> +	 * Why does this work?
> +	 *
> +	 * PAE uses 32-bit addressing which implies:
> +	 * Bits 11:0   page offset
> +	 * Bits 20:12  entry into 512-entry page table
> +	 * Bits 29:21  entry into a 512-entry directory table
> +	 * Bits 31:30  entry into the page directory pointer table.
> +	 * Bits 63:32  zero
> +	 *
> +	 * As only 2 bits are needed to select the PDPTEs for the entire
> +	 * 32-bit address space, take the first 4 PDPTEs in the level 3 page
> +	 * directory pointer table. It doesn't matter which of these PDPTEs
> +	 * are present because they must cover the guest code given that it
> +	 * has already run successfully.
> +	 *
> +	 * Get a pointer to PTE for GVA=0 in the page directory pointer table
> +	 */
> +	pte = get_pte_level(
> +            (pgd_t *)phys_to_virt(guest_cr3 & ~X86_CR3_PCID_MASK), 0,
> +            PDPT_LEVEL);
> +
> +	/*
> +	 * Need some memory for the 4-entry PAE page directory pointer
> +	 * table. Use the end of the identity-mapped page where the guest code
> +	 * is stored. There is definitely space as the guest code is only a
> +	 * few bytes.
> +	 */
> +	pdpt = test_mtf_pdpte_guest + PAGE_SIZE - 4 * sizeof(pteval_t);
> +
> +	/*
> +	 * Copy the first four PDPTEs into the PAE page table with reserved
> +	 * bits cleared. Note that permission bits from the PML4E and PDPTE
> +	 * are not propagated.
> +	 */
> +	for (i = 0; i < 4; i++) {
> +		TEST_ASSERT_EQ_MSG(0, (pte[i] & PDPTE64_RSVD_MASK),
> +				   "PDPTE has invalid reserved bits");
> +		TEST_ASSERT_EQ_MSG(0, (pte[i] & PDPTE64_PAGE_SIZE_MASK),
> +				   "Cannot use 1GB super pages for PAE");
> +		pdpt[i] = pte[i] & ~(PAE_PDPTE_RSVD_MASK);
> +	}
> +	vmcs_write(GUEST_CR3, virt_to_phys(pdpt));
> +
> +	enable_mtf();
> +	enter_guest();
> +	assert_exit_reason(VMX_MTF);
> +	disable_mtf();
> +
> +	/*
> +	 * The four PDPTEs should have been loaded into the VMCS when
> +	 * the guest set CR0.PG to enter PAE mode.
> +	 */
> +	for (i = 0; i < 4; i++) {
> +		u64 pdpte = vmcs_read(GUEST_PDPTE + 2 * i);
> +
> +		report(pdpte == pdpt[i], "PDPTE%d is 0x%lx (expected 0x%lx)",
> +		       i, pdpte, pdpt[i]);
> +	}
> +
> +	/*
> +	 * Now, try to enter the guest in PAE mode. If the PDPTEs in the
> +	 * vmcs are wrong, this will fail.
> +	 */
> +	enter_guest();
> +	skip_exit_vmcall();
> +
> +	/*
> +	 * Return guest to 64-bit mode and wrap up.
> +	 */
> +	vmcs_write(ENT_CONTROLS, ent_ctls);
> +	vmcs_write(GUEST_EFER, guest_efer);
> +	vmcs_write(GUEST_AR_CS, guest_ar_cs);
> +	vmcs_write(GUEST_CR0, guest_cr0);
> +	vmcs_write(GUEST_CR4, guest_cr4);
> +	vmcs_write(GUEST_CR3, guest_cr3);
> +
> +	enter_guest();
> +}
> +
>  /*
>   * Tests for VM-execution control fields
>   */
> @@ -10112,6 +10284,6 @@ struct vmx_test vmx_tests[] = {
>  	TEST(atomic_switch_overflow_msrs_test),
>  	TEST(rdtsc_vmexit_diff_test),
>  	TEST(vmx_mtf_test),
> +	TEST(vmx_mtf_pdpte_test),
>  	{ NULL, NULL, NULL, NULL, NULL, {0} },
>  };
> 

Queued, thanks.

Paolo


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-09-12  6:21 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-19 20:56 [kvm-unit-tests PATCH v2] x86: vmx: Add test for MTF on a guest MOV-to-CR0 that enables PAE Peter Shier
2020-09-01 20:31 ` Peter Shier
2020-09-12  6:20 ` Paolo Bonzini

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).