All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Dong, Eddie" <eddie.dong@intel.com>
To: Avi Kivity <avi@redhat.com>
Cc: "kvm@vger.kernel.org" <kvm@vger.kernel.org>,
	"Dong, Eddie" <eddie.dong@intel.com>
Subject: RE: RFC: Add reserved bits check
Date: Fri, 27 Mar 2009 22:42:49 +0800	[thread overview]
Message-ID: <9832F13BD22FB94A829F798DA4A8280501A21071B0@pdsmsx503.ccr.corp.intel.com> (raw)
In-Reply-To: <49CCE275.9070608@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 6233 bytes --]

> 
> Need to make sure rsvd_bits_mask[] is maintained on ept and npt, then.

Sure, will be in next patch, post the current modified one.

Thx, eddie



Current KVM doesn't check reserved bits of guest page table entry, but use reserved bits to bypass guest #PF in VMX.

 

This patch add reserved bit check while leaving shadow pte un-constructed if guest RSVD=1.


commit dd1d697edf42953d407c10f4d38c650aafd3d3d5
Author: root <root@eddie-wb.localdomain>
Date:   Fri Mar 27 23:35:27 2009 +0800

    Emulate #PF error code of reserved bits violation.
    
    Signed-off-by: Eddie Dong <eddie.dong@intel.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 55fd4c5..4fe2742 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -261,6 +261,7 @@ struct kvm_mmu {
 	union kvm_mmu_page_role base_role;
 
 	u64 *pae_root;
+	u64 rsvd_bits_mask[2][4];
 };
 
 struct kvm_vcpu_arch {
@@ -791,5 +792,6 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
 
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ef060ec..35af90a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -126,6 +126,7 @@ module_param(oos_shadow, bool, 0644);
 #define PFERR_PRESENT_MASK (1U << 0)
 #define PFERR_WRITE_MASK (1U << 1)
 #define PFERR_USER_MASK (1U << 2)
+#define PFERR_RSVD_MASK (1U << 3)
 #define PFERR_FETCH_MASK (1U << 4)
 
 #define PT_DIRECTORY_LEVEL 2
@@ -179,6 +180,11 @@ static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
 static u64 __read_mostly shadow_mt_mask;
 
+static inline u64 rsvd_bits(int s, int e)
+{
+	return ((1ULL << (e - s + 1)) - 1) << s;
+}
+
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
 {
 	shadow_trap_nonpresent_pte = trap_pte;
@@ -2155,6 +2161,15 @@ static void paging_free(struct kvm_vcpu *vcpu)
 	nonpaging_free(vcpu);
 }
 
+static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level)
+{
+	int ps = 0;
+
+	if (level == PT_DIRECTORY_LEVEL)
+		ps = !!(gpte & PT_PAGE_SIZE_MASK);
+	return (gpte & vcpu->arch.mmu.rsvd_bits_mask[ps][level-1]) != 0;
+}
+
 #define PTTYPE 64
 #include "paging_tmpl.h"
 #undef PTTYPE
@@ -2183,6 +2198,22 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
 
 static int paging64_init_context(struct kvm_vcpu *vcpu)
 {
+	struct kvm_mmu *context = &vcpu->arch.mmu;
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+	u64 exb_bit_rsvd = 0;
+
+	if (!is_nx(vcpu))
+		exb_bit_rsvd = rsvd_bits(63, 63);
+
+	context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+	context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+	context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+	context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
+	context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20);
 	return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
 }
 
@@ -2190,6 +2221,15 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *context = &vcpu->arch.mmu;
 
+	/* no rsvd bits for 2 level 4K page table entries */
+	context->rsvd_bits_mask[0][0] = 0;
+	context->rsvd_bits_mask[0][1] = 0;
+	if (is_cpuid_PSE36())
+		/* 36bits PSE 4MB page */
+		context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
+	else
+		/* 32 bits PSE 4MB page */
+		context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
 	context->new_cr3 = paging_new_cr3;
 	context->page_fault = paging32_page_fault;
 	context->gva_to_gpa = paging32_gva_to_gpa;
@@ -2205,6 +2245,21 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 
 static int paging32E_init_context(struct kvm_vcpu *vcpu)
 {
+	struct kvm_mmu *context = &vcpu->arch.mmu;
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+	u64 exb_bit_rsvd = 0;
+
+	if (!is_nx(vcpu))
+		exb_bit_rsvd = rsvd_bits(63, 63);
+
+	context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 62);		/* PDE */
+	context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62); 	/* PTE */
+	context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62) |
+			rsvd_bits(13, 20);		/* large page */
+
 	return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
 }
 
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 7314c09..3bf1345 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 	gfn_t table_gfn;
 	unsigned index, pt_access, pte_access;
 	gpa_t pte_gpa;
+	int rsvd_fault = 0;
 
 	pgprintk("%s: addr %lx\n", __func__, addr);
 walk:
@@ -153,10 +154,13 @@ walk:
 			 walker->level - 1, table_gfn);
 
 		kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
+		rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
 
 		if (!is_present_pte(pte))
 			goto not_present;
 
+		if (rsvd_fault)
+			goto access_error;
 		if (write_fault && !is_writeble_pte(pte))
 			if (user_fault || is_write_protection(vcpu))
 				goto access_error;
@@ -233,6 +237,8 @@ err:
 		walker->error_code |= PFERR_USER_MASK;
 	if (fetch_fault)
 		walker->error_code |= PFERR_FETCH_MASK;
+	if (rsvd_fault)
+		walker->error_code |= PFERR_RSVD_MASK;
 	return 0;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e96edda..2c6f180 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2899,6 +2899,16 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
 	return best;
 }
 
+int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+	if (best)
+		return best->eax & 0xff;
+	return 32;
+}
+
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 {
 	u32 function, index;

[-- Attachment #2: rsvd5.patch --]
[-- Type: application/octet-stream, Size: 5669 bytes --]

commit dd1d697edf42953d407c10f4d38c650aafd3d3d5
Author: root <root@eddie-wb.localdomain>
Date:   Fri Mar 27 23:35:27 2009 +0800

    Emulate #PF error code of reserved bits violation.
    
    Signed-off-by: Eddie Dong <eddie.dong@intel.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 55fd4c5..4fe2742 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -261,6 +261,7 @@ struct kvm_mmu {
 	union kvm_mmu_page_role base_role;
 
 	u64 *pae_root;
+	u64 rsvd_bits_mask[2][4];
 };
 
 struct kvm_vcpu_arch {
@@ -791,5 +792,6 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
 
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ef060ec..35af90a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -126,6 +126,7 @@ module_param(oos_shadow, bool, 0644);
 #define PFERR_PRESENT_MASK (1U << 0)
 #define PFERR_WRITE_MASK (1U << 1)
 #define PFERR_USER_MASK (1U << 2)
+#define PFERR_RSVD_MASK (1U << 3)
 #define PFERR_FETCH_MASK (1U << 4)
 
 #define PT_DIRECTORY_LEVEL 2
@@ -179,6 +180,11 @@ static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
 static u64 __read_mostly shadow_mt_mask;
 
+static inline u64 rsvd_bits(int s, int e)
+{
+	return ((1ULL << (e - s + 1)) - 1) << s;
+}
+
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
 {
 	shadow_trap_nonpresent_pte = trap_pte;
@@ -2155,6 +2161,15 @@ static void paging_free(struct kvm_vcpu *vcpu)
 	nonpaging_free(vcpu);
 }
 
+static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level)
+{
+	int ps = 0;
+
+	if (level == PT_DIRECTORY_LEVEL)
+		ps = !!(gpte & PT_PAGE_SIZE_MASK);
+	return (gpte & vcpu->arch.mmu.rsvd_bits_mask[ps][level-1]) != 0;
+}
+
 #define PTTYPE 64
 #include "paging_tmpl.h"
 #undef PTTYPE
@@ -2183,6 +2198,22 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
 
 static int paging64_init_context(struct kvm_vcpu *vcpu)
 {
+	struct kvm_mmu *context = &vcpu->arch.mmu;
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+	u64 exb_bit_rsvd = 0;
+
+	if (!is_nx(vcpu))
+		exb_bit_rsvd = rsvd_bits(63, 63);
+
+	context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+	context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+	context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+	context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
+	context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20);
 	return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
 }
 
@@ -2190,6 +2221,15 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *context = &vcpu->arch.mmu;
 
+	/* no rsvd bits for 2 level 4K page table entries */
+	context->rsvd_bits_mask[0][0] = 0;
+	context->rsvd_bits_mask[0][1] = 0;
+	if (is_cpuid_PSE36())
+		/* 36bits PSE 4MB page */
+		context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
+	else
+		/* 32 bits PSE 4MB page */
+		context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
 	context->new_cr3 = paging_new_cr3;
 	context->page_fault = paging32_page_fault;
 	context->gva_to_gpa = paging32_gva_to_gpa;
@@ -2205,6 +2245,21 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 
 static int paging32E_init_context(struct kvm_vcpu *vcpu)
 {
+	struct kvm_mmu *context = &vcpu->arch.mmu;
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+	u64 exb_bit_rsvd = 0;
+
+	if (!is_nx(vcpu))
+		exb_bit_rsvd = rsvd_bits(63, 63);
+
+	context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 62);		/* PDE */
+	context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62); 	/* PTE */
+	context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62) |
+			rsvd_bits(13, 20);		/* large page */
+
 	return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
 }
 
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 7314c09..3bf1345 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 	gfn_t table_gfn;
 	unsigned index, pt_access, pte_access;
 	gpa_t pte_gpa;
+	int rsvd_fault = 0;
 
 	pgprintk("%s: addr %lx\n", __func__, addr);
 walk:
@@ -153,10 +154,13 @@ walk:
 			 walker->level - 1, table_gfn);
 
 		kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
+		rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
 
 		if (!is_present_pte(pte))
 			goto not_present;
 
+		if (rsvd_fault)
+			goto access_error;
 		if (write_fault && !is_writeble_pte(pte))
 			if (user_fault || is_write_protection(vcpu))
 				goto access_error;
@@ -233,6 +237,8 @@ err:
 		walker->error_code |= PFERR_USER_MASK;
 	if (fetch_fault)
 		walker->error_code |= PFERR_FETCH_MASK;
+	if (rsvd_fault)
+		walker->error_code |= PFERR_RSVD_MASK;
 	return 0;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e96edda..2c6f180 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2899,6 +2899,16 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
 	return best;
 }
 
+int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+	if (best)
+		return best->eax & 0xff;
+	return 32;
+}
+
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 {
 	u32 function, index;

  reply	other threads:[~2009-03-27 14:43 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <9832F13BD22FB94A829F798DA4A8280501A21068EF@pdsmsx503.ccr.corp.intel.com>
2009-03-27  4:19 ` RFC: Add reserved bits check Dong, Eddie
2009-03-27  9:34   ` Avi Kivity
2009-03-27 13:46     ` Dong, Eddie
2009-03-27 13:59       ` Dong, Eddie
2009-03-27 14:28       ` Avi Kivity
2009-03-27 14:42         ` Dong, Eddie [this message]
2009-03-29 10:23           ` Avi Kivity
2009-03-30  1:53             ` Dong, Eddie
2009-03-30  2:38               ` Cleanup to reuse is_long_mode() Dong, Eddie
2009-03-30  7:43                 ` Avi Kivity
2009-03-30  8:24                   ` Dong, Eddie
2009-03-30 11:46                     ` Avi Kivity
2009-03-30  2:49               ` Use rsvd_bits_mask in load_pdptrs for cleanup and considing EXB bit Dong, Eddie
2009-03-30  8:27                 ` Dong, Eddie
2009-03-30 12:13                   ` Avi Kivity
2009-03-30 13:46                     ` Dong, Eddie
2009-03-30  5:12               ` RFC: Add reserved bits check Avi Kivity
2009-03-30  8:21                 ` Dong, Eddie
2009-03-30 12:05                   ` Avi Kivity
2009-03-31  8:40                   ` Avi Kivity

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9832F13BD22FB94A829F798DA4A8280501A21071B0@pdsmsx503.ccr.corp.intel.com \
    --to=eddie.dong@intel.com \
    --cc=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.