kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* RFC: Add reserved bits check
       [not found] <9832F13BD22FB94A829F798DA4A8280501A21068EF@pdsmsx503.ccr.corp.intel.com>
@ 2009-03-27  4:19 ` Dong, Eddie
  2009-03-27  9:34   ` Avi Kivity
  0 siblings, 1 reply; 25+ messages in thread
From: Dong, Eddie @ 2009-03-27  4:19 UTC (permalink / raw)
  To: Dong, Eddie, kvm, Avi Kivity; +Cc: Dong, Eddie

 



Current KVM doesn't check reserved bits of guest page table, while may use reserved bits to bypass guest #PF in VMX.

 

This patch add this check while leaving shadow pte un-constructed if guest RSVD=1.

 

Comments?

Thx, eddie

 

 

 

 

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 55fd4c5..9370ff0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -261,6 +261,8 @@ struct kvm_mmu {
  union kvm_mmu_page_role base_role;
 
  u64 *pae_root;
+ u64 rsvd_bits_mask[4];
+ u64 large_page_rsvd_mask;
 };
 
 struct kvm_vcpu_arch {
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 31ba3cb..7f55c4a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -127,6 +127,7 @@ module_param(oos_shadow, bool, 0644);
 #define PFERR_PRESENT_MASK (1U << 0)
 #define PFERR_WRITE_MASK (1U << 1)
 #define PFERR_USER_MASK (1U << 2)
+#define PFERR_RSVD_MASK (1U << 3)
 #define PFERR_FETCH_MASK (1U << 4)
 
 #define PT_DIRECTORY_LEVEL 2
@@ -179,6 +180,13 @@ static u64 __read_mostly shadow_user_mask;
 static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
 static u64 __read_mostly shadow_mt_mask;
+extern struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(
+ struct kvm_vcpu *vcpu, u32 function, u32 index);
+
+static inline u64 rsvd_bits(int s, int e)
+{
+ return ((1ULL << (e - s + 1)) - 1) << s;
+}
 
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
 {
@@ -251,6 +259,18 @@ static int is_rmap_pte(u64 pte)
  return is_shadow_present_pte(pte);
 }
 
+static int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+ u32 function=0x80000008;
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, function, 0);
+ if (best) {
+  return best->eax & 0xff;
+ }
+ return 40;
+}
+
 static pfn_t spte_to_pfn(u64 pte)
 {
  return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
@@ -2156,6 +2176,17 @@ static void paging_free(struct kvm_vcpu *vcpu)
  nonpaging_free(vcpu);
 }
 
+static int is_rsvd_bits_set(struct kvm_vcpu *vcpu, unsigned long pte, int level)
+{
+ if (level == PT_DIRECTORY_LEVEL && (pte & PT_PAGE_SIZE_MASK)) {
+  /* large page */
+  return (pte & vcpu->arch.mmu.large_page_rsvd_mask) != 0;
+ }
+ else
+  /* 4K page */
+  return (pte & vcpu->arch.mmu.rsvd_bits_mask[level-1]) != 0;
+}
+
 #define PTTYPE 64
 #include "paging_tmpl.h"
 #undef PTTYPE
@@ -2184,6 +2215,18 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
 
 static int paging64_init_context(struct kvm_vcpu *vcpu)
 {
+ struct kvm_mmu *context = &vcpu->arch.mmu;
+ int maxphyaddr = cpuid_maxphyaddr(vcpu);
+
+ context->rsvd_bits_mask[3] =
+  rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+ context->rsvd_bits_mask[2] =
+  rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+ context->rsvd_bits_mask[1] =
+  rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+ context->rsvd_bits_mask[0] = rsvd_bits(maxphyaddr, 51);
+ context->large_page_rsvd_mask =  /* 2MB PDE */
+  rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20);
  return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
 }
 
@@ -2191,6 +2234,15 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 {
  struct kvm_mmu *context = &vcpu->arch.mmu;
 
+ /* no rsvd bits for 2 level 4K page table entries */
+ context->rsvd_bits_mask[0] = 0;
+ context->rsvd_bits_mask[1] = 0;
+ if (is_cpuid_PSE36())
+  /* 36bits PSE 4MB page */
+  context->large_page_rsvd_mask = rsvd_bits(17, 21);
+ else
+  /* 32 bits PSE 4MB page */
+  context->large_page_rsvd_mask = rsvd_bits(13, 21);
  context->new_cr3 = paging_new_cr3;
  context->page_fault = paging32_page_fault;
  context->gva_to_gpa = paging32_gva_to_gpa;
@@ -2206,6 +2258,18 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 
 static int paging32E_init_context(struct kvm_vcpu *vcpu)
 {
+ struct kvm_mmu *context = &vcpu->arch.mmu;
+ int maxphyaddr = cpuid_maxphyaddr(vcpu);
+
+ /* 3 levels */
+ context->rsvd_bits_mask[2] = rsvd_bits(maxphyaddr, 63) |
+  rsvd_bits(7, 8) | rsvd_bits(1,2); /* PDPTE */
+ context->rsvd_bits_mask[1] = rsvd_bits(maxphyaddr, 63); /* PDE */
+ context->rsvd_bits_mask[0] =   /* PTE */
+  rsvd_bits(maxphyaddr, 63) | rsvd_bits(7, 8) | rsvd_bits(1, 2);
+ context->large_page_rsvd_mask =   /* 2M page */
+   rsvd_bits(maxphyaddr, 63) | rsvd_bits(13, 20);
+
  return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
 }
 
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 7314c09..844efe9 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
  gfn_t table_gfn;
  unsigned index, pt_access, pte_access;
  gpa_t pte_gpa;
+ int rsvd_fault;
 
  pgprintk("%s: addr %lx\n", __func__, addr);
 walk:
@@ -153,10 +154,13 @@ walk:
     walker->level - 1, table_gfn);
 
   kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
+  rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
 
   if (!is_present_pte(pte))
    goto not_present;
 
+  if (rsvd_fault)
+   goto access_error;
   if (write_fault && !is_writeble_pte(pte))
    if (user_fault || is_write_protection(vcpu))
     goto access_error;
@@ -233,6 +237,8 @@ err:
   walker->error_code |= PFERR_USER_MASK;
  if (fetch_fault)
   walker->error_code |= PFERR_FETCH_MASK;
+ if (rsvd_fault)
+  walker->error_code |= PFERR_RSVD_MASK;
  return 0;
 }
 



^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: RFC: Add reserved bits check
  2009-03-27  4:19 ` RFC: Add reserved bits check Dong, Eddie
@ 2009-03-27  9:34   ` Avi Kivity
  2009-03-27 13:46     ` Dong, Eddie
  0 siblings, 1 reply; 25+ messages in thread
From: Avi Kivity @ 2009-03-27  9:34 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: kvm

Dong, Eddie wrote:
>  
>
>
>
> Current KVM doesn't check reserved bits of guest page table, while may use reserved bits to bypass guest #PF in VMX.
>
>  
>
> This patch add this check while leaving shadow pte un-constructed if guest RSVD=1.
>
>   


> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -261,6 +261,8 @@ struct kvm_mmu {
>   union kvm_mmu_page_role base_role;
>  
>   u64 *pae_root;
> + u64 rsvd_bits_mask[4];
> + u64 large_page_rsvd_mask;
>  }; 

Make large_page_rsvd_mask() an array too, in preparation for 1GB pages?

Perhaps u64 rsvd_bits_mask[2][4].  First index is bit 7 of the pte, 
second index is the level.  Makes for faster run time too.

>  #define PT_DIRECTORY_LEVEL 2
> @@ -179,6 +180,13 @@ static u64 __read_mostly shadow_user_mask;
>  static u64 __read_mostly shadow_accessed_mask;
>  static u64 __read_mostly shadow_dirty_mask;
>  static u64 __read_mostly shadow_mt_mask;
> +extern struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(
> + struct kvm_vcpu *vcpu, u32 function, u32 index);
>   

This needs to be in a header file, so we don't get random breakage when 
the signature changes.

>  
> +static int is_rsvd_bits_set(struct kvm_vcpu *vcpu, unsigned long pte, int level)
>   

u64 pte... (and bool for return type)

s/pte/gpte/ to make it clear.

> @@ -2184,6 +2215,18 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
>  
>  static int paging64_init_context(struct kvm_vcpu *vcpu)
>  {
> + struct kvm_mmu *context = &vcpu->arch.mmu;
> + int maxphyaddr = cpuid_maxphyaddr(vcpu);
> +
> + context->rsvd_bits_mask[3] =
> +  rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
> + context->rsvd_bits_mask[2] =
> +  rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
> + context->rsvd_bits_mask[1] =
> +  rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
> + context->rsvd_bits_mask[0] = rsvd_bits(maxphyaddr, 51);
> + context->large_page_rsvd_mask =  /* 2MB PDE */
> +  rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20);
>   return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
>  }
>   

Isn't bit 63 reserved if NX is disabled?

>  
> @@ -2206,6 +2258,18 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
>  
>  static int paging32E_init_context(struct kvm_vcpu *vcpu)
>  {
> + struct kvm_mmu *context = &vcpu->arch.mmu;
> + int maxphyaddr = cpuid_maxphyaddr(vcpu);
> +
> + /* 3 levels */
> + context->rsvd_bits_mask[2] = rsvd_bits(maxphyaddr, 63) |
> +  rsvd_bits(7, 8) | rsvd_bits(1,2); /* PDPTE */
>   

Will never be use, PDPTEs are loaded by set_cr3(), not walk_addr().

>  
> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> index 7314c09..844efe9 100644
> --- a/arch/x86/kvm/paging_tmpl.h
> +++ b/arch/x86/kvm/paging_tmpl.h
> @@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
>   gfn_t table_gfn;
>   unsigned index, pt_access, pte_access;
>   gpa_t pte_gpa;
> + int rsvd_fault;
>  
>   pgprintk("%s: addr %lx\n", __func__, addr);
>  walk:
> @@ -153,10 +154,13 @@ walk:
>      walker->level - 1, table_gfn);
>  
>    kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
> +  rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
>   

Does a not present pte set PFERR_RSVD?

>  
>    if (!is_present_pte(pte))
>     goto not_present;
>  
> +  if (rsvd_fault)
> +   goto access_error;
>    if (write_fault && !is_writeble_pte(pte))
>     if (user_fault || is_write_protection(vcpu))
>      goto access_error;
> @@ -233,6 +237,8 @@ err:
>    walker->error_code |= PFERR_USER_MASK;
>   if (fetch_fault)
>    walker->error_code |= PFERR_FETCH_MASK;
> + if (rsvd_fault)
> +  walker->error_code |= PFERR_RSVD_MASK;
>   return 0;
>  }
>  
>
>
>   


-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: RFC: Add reserved bits check
  2009-03-27  9:34   ` Avi Kivity
@ 2009-03-27 13:46     ` Dong, Eddie
  2009-03-27 13:59       ` Dong, Eddie
  2009-03-27 14:28       ` Avi Kivity
  0 siblings, 2 replies; 25+ messages in thread
From: Dong, Eddie @ 2009-03-27 13:46 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Dong, Eddie

>> + context->rsvd_bits_mask[0] = rsvd_bits(maxphyaddr, 51);
>> + context->large_page_rsvd_mask =  /* 2MB PDE */
>> +  rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20);
>>   return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);  }
>> 
> 
> Isn't bit 63 reserved if NX is disabled?

Sure.

> 
>> 
>> @@ -2206,6 +2258,18 @@ static int paging32_init_context(struct
>> kvm_vcpu *vcpu) 
>> 
>>  static int paging32E_init_context(struct kvm_vcpu *vcpu)  {
>> + struct kvm_mmu *context = &vcpu->arch.mmu;
>> + int maxphyaddr = cpuid_maxphyaddr(vcpu);
>> +
>> + /* 3 levels */
>> + context->rsvd_bits_mask[2] = rsvd_bits(maxphyaddr, 63) |
>> +  rsvd_bits(7, 8) | rsvd_bits(1,2); /* PDPTE */
>> 
> 
> Will never be use, PDPTEs are loaded by set_cr3(), not walk_addr().
> 

I see, then how about to replace CR3_PAE_RESERVED_BITS check at cr3 load with
rsvd_bits_mask[2]? Seems current code are lacking of enough reserved bits check too.

>> @@ -153,10 +154,13 @@ walk:
>>      walker->level - 1, table_gfn);
>> 
>>    kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
>> +  rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
>> 
> 
> Does a not present pte set PFERR_RSVD?

Yes though most commercial OS doesn't use it. 
I plan to post a follow up patch to fix the potential RSVD_fault error code mismatch when bypass_guest_pf=1.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: RFC: Add reserved bits check
  2009-03-27 13:46     ` Dong, Eddie
@ 2009-03-27 13:59       ` Dong, Eddie
  2009-03-27 14:28       ` Avi Kivity
  1 sibling, 0 replies; 25+ messages in thread
From: Dong, Eddie @ 2009-03-27 13:59 UTC (permalink / raw)
  To: Dong, Eddie, Avi Kivity; +Cc: kvm, Dong, Eddie


>> Will never be use, PDPTEs are loaded by set_cr3(), not walk_addr().
>> 
> 
> I see, then how about to replace CR3_PAE_RESERVED_BITS check at cr3
> load with 
> rsvd_bits_mask[2]? Seems current code are lacking of enough reserved
> bits check too. 
> 

typo, I mean this:

--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -233,7 +233,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
                goto out;
        }
        for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
-               if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) {
+               if ((pdpte[i] & 1) && (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
                        ret = 0;
                        goto out;
                }
(

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: RFC: Add reserved bits check
  2009-03-27 13:46     ` Dong, Eddie
  2009-03-27 13:59       ` Dong, Eddie
@ 2009-03-27 14:28       ` Avi Kivity
  2009-03-27 14:42         ` Dong, Eddie
  1 sibling, 1 reply; 25+ messages in thread
From: Avi Kivity @ 2009-03-27 14:28 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: kvm

Dong, Eddie wrote:
>> Will never be use, PDPTEs are loaded by set_cr3(), not walk_addr().
>>
>>     
>
> I see, then how about to replace CR3_PAE_RESERVED_BITS check at cr3 load with
> rsvd_bits_mask[2]? Seems current code are lacking of enough reserved bits check too.
>
>   

Need to make sure rsvd_bits_mask[] is maintained on ept and npt, then.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: RFC: Add reserved bits check
  2009-03-27 14:28       ` Avi Kivity
@ 2009-03-27 14:42         ` Dong, Eddie
  2009-03-29 10:23           ` Avi Kivity
  0 siblings, 1 reply; 25+ messages in thread
From: Dong, Eddie @ 2009-03-27 14:42 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Dong, Eddie

[-- Attachment #1: Type: text/plain, Size: 6233 bytes --]

> 
> Need to make sure rsvd_bits_mask[] is maintained on ept and npt, then.

Sure, will be in next patch, post the current modified one.

Thx, eddie



Current KVM doesn't check reserved bits of guest page table entry, but use reserved bits to bypass guest #PF in VMX.

 

This patch add reserved bit check while leaving shadow pte un-constructed if guest RSVD=1.


commit dd1d697edf42953d407c10f4d38c650aafd3d3d5
Author: root <root@eddie-wb.localdomain>
Date:   Fri Mar 27 23:35:27 2009 +0800

    Emulate #PF error code of reserved bits violation.
    
    Signed-off-by: Eddie Dong <eddie.dong@intel.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 55fd4c5..4fe2742 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -261,6 +261,7 @@ struct kvm_mmu {
 	union kvm_mmu_page_role base_role;
 
 	u64 *pae_root;
+	u64 rsvd_bits_mask[2][4];
 };
 
 struct kvm_vcpu_arch {
@@ -791,5 +792,6 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
 
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ef060ec..35af90a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -126,6 +126,7 @@ module_param(oos_shadow, bool, 0644);
 #define PFERR_PRESENT_MASK (1U << 0)
 #define PFERR_WRITE_MASK (1U << 1)
 #define PFERR_USER_MASK (1U << 2)
+#define PFERR_RSVD_MASK (1U << 3)
 #define PFERR_FETCH_MASK (1U << 4)
 
 #define PT_DIRECTORY_LEVEL 2
@@ -179,6 +180,11 @@ static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
 static u64 __read_mostly shadow_mt_mask;
 
+static inline u64 rsvd_bits(int s, int e)
+{
+	return ((1ULL << (e - s + 1)) - 1) << s;
+}
+
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
 {
 	shadow_trap_nonpresent_pte = trap_pte;
@@ -2155,6 +2161,15 @@ static void paging_free(struct kvm_vcpu *vcpu)
 	nonpaging_free(vcpu);
 }
 
+static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level)
+{
+	int ps = 0;
+
+	if (level == PT_DIRECTORY_LEVEL)
+		ps = !!(gpte & PT_PAGE_SIZE_MASK);
+	return (gpte & vcpu->arch.mmu.rsvd_bits_mask[ps][level-1]) != 0;
+}
+
 #define PTTYPE 64
 #include "paging_tmpl.h"
 #undef PTTYPE
@@ -2183,6 +2198,22 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
 
 static int paging64_init_context(struct kvm_vcpu *vcpu)
 {
+	struct kvm_mmu *context = &vcpu->arch.mmu;
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+	u64 exb_bit_rsvd = 0;
+
+	if (!is_nx(vcpu))
+		exb_bit_rsvd = rsvd_bits(63, 63);
+
+	context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+	context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+	context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+	context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
+	context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20);
 	return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
 }
 
@@ -2190,6 +2221,15 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *context = &vcpu->arch.mmu;
 
+	/* no rsvd bits for 2 level 4K page table entries */
+	context->rsvd_bits_mask[0][0] = 0;
+	context->rsvd_bits_mask[0][1] = 0;
+	if (is_cpuid_PSE36())
+		/* 36bits PSE 4MB page */
+		context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
+	else
+		/* 32 bits PSE 4MB page */
+		context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
 	context->new_cr3 = paging_new_cr3;
 	context->page_fault = paging32_page_fault;
 	context->gva_to_gpa = paging32_gva_to_gpa;
@@ -2205,6 +2245,21 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 
 static int paging32E_init_context(struct kvm_vcpu *vcpu)
 {
+	struct kvm_mmu *context = &vcpu->arch.mmu;
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+	u64 exb_bit_rsvd = 0;
+
+	if (!is_nx(vcpu))
+		exb_bit_rsvd = rsvd_bits(63, 63);
+
+	context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 62);		/* PDE */
+	context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62); 	/* PTE */
+	context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62) |
+			rsvd_bits(13, 20);		/* large page */
+
 	return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
 }
 
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 7314c09..3bf1345 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 	gfn_t table_gfn;
 	unsigned index, pt_access, pte_access;
 	gpa_t pte_gpa;
+	int rsvd_fault = 0;
 
 	pgprintk("%s: addr %lx\n", __func__, addr);
 walk:
@@ -153,10 +154,13 @@ walk:
 			 walker->level - 1, table_gfn);
 
 		kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
+		rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
 
 		if (!is_present_pte(pte))
 			goto not_present;
 
+		if (rsvd_fault)
+			goto access_error;
 		if (write_fault && !is_writeble_pte(pte))
 			if (user_fault || is_write_protection(vcpu))
 				goto access_error;
@@ -233,6 +237,8 @@ err:
 		walker->error_code |= PFERR_USER_MASK;
 	if (fetch_fault)
 		walker->error_code |= PFERR_FETCH_MASK;
+	if (rsvd_fault)
+		walker->error_code |= PFERR_RSVD_MASK;
 	return 0;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e96edda..2c6f180 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2899,6 +2899,16 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
 	return best;
 }
 
+int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+	if (best)
+		return best->eax & 0xff;
+	return 32;
+}
+
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 {
 	u32 function, index;

[-- Attachment #2: rsvd5.patch --]
[-- Type: application/octet-stream, Size: 5669 bytes --]

commit dd1d697edf42953d407c10f4d38c650aafd3d3d5
Author: root <root@eddie-wb.localdomain>
Date:   Fri Mar 27 23:35:27 2009 +0800

    Emulate #PF error code of reserved bits violation.
    
    Signed-off-by: Eddie Dong <eddie.dong@intel.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 55fd4c5..4fe2742 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -261,6 +261,7 @@ struct kvm_mmu {
 	union kvm_mmu_page_role base_role;
 
 	u64 *pae_root;
+	u64 rsvd_bits_mask[2][4];
 };
 
 struct kvm_vcpu_arch {
@@ -791,5 +792,6 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
 
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ef060ec..35af90a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -126,6 +126,7 @@ module_param(oos_shadow, bool, 0644);
 #define PFERR_PRESENT_MASK (1U << 0)
 #define PFERR_WRITE_MASK (1U << 1)
 #define PFERR_USER_MASK (1U << 2)
+#define PFERR_RSVD_MASK (1U << 3)
 #define PFERR_FETCH_MASK (1U << 4)
 
 #define PT_DIRECTORY_LEVEL 2
@@ -179,6 +180,11 @@ static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
 static u64 __read_mostly shadow_mt_mask;
 
+static inline u64 rsvd_bits(int s, int e)
+{
+	return ((1ULL << (e - s + 1)) - 1) << s;
+}
+
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
 {
 	shadow_trap_nonpresent_pte = trap_pte;
@@ -2155,6 +2161,15 @@ static void paging_free(struct kvm_vcpu *vcpu)
 	nonpaging_free(vcpu);
 }
 
+static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level)
+{
+	int ps = 0;
+
+	if (level == PT_DIRECTORY_LEVEL)
+		ps = !!(gpte & PT_PAGE_SIZE_MASK);
+	return (gpte & vcpu->arch.mmu.rsvd_bits_mask[ps][level-1]) != 0;
+}
+
 #define PTTYPE 64
 #include "paging_tmpl.h"
 #undef PTTYPE
@@ -2183,6 +2198,22 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
 
 static int paging64_init_context(struct kvm_vcpu *vcpu)
 {
+	struct kvm_mmu *context = &vcpu->arch.mmu;
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+	u64 exb_bit_rsvd = 0;
+
+	if (!is_nx(vcpu))
+		exb_bit_rsvd = rsvd_bits(63, 63);
+
+	context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+	context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+	context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+	context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
+	context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20);
 	return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
 }
 
@@ -2190,6 +2221,15 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *context = &vcpu->arch.mmu;
 
+	/* no rsvd bits for 2 level 4K page table entries */
+	context->rsvd_bits_mask[0][0] = 0;
+	context->rsvd_bits_mask[0][1] = 0;
+	if (is_cpuid_PSE36())
+		/* 36bits PSE 4MB page */
+		context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
+	else
+		/* 32 bits PSE 4MB page */
+		context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
 	context->new_cr3 = paging_new_cr3;
 	context->page_fault = paging32_page_fault;
 	context->gva_to_gpa = paging32_gva_to_gpa;
@@ -2205,6 +2245,21 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 
 static int paging32E_init_context(struct kvm_vcpu *vcpu)
 {
+	struct kvm_mmu *context = &vcpu->arch.mmu;
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+	u64 exb_bit_rsvd = 0;
+
+	if (!is_nx(vcpu))
+		exb_bit_rsvd = rsvd_bits(63, 63);
+
+	context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 62);		/* PDE */
+	context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62); 	/* PTE */
+	context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62) |
+			rsvd_bits(13, 20);		/* large page */
+
 	return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
 }
 
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 7314c09..3bf1345 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 	gfn_t table_gfn;
 	unsigned index, pt_access, pte_access;
 	gpa_t pte_gpa;
+	int rsvd_fault = 0;
 
 	pgprintk("%s: addr %lx\n", __func__, addr);
 walk:
@@ -153,10 +154,13 @@ walk:
 			 walker->level - 1, table_gfn);
 
 		kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
+		rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
 
 		if (!is_present_pte(pte))
 			goto not_present;
 
+		if (rsvd_fault)
+			goto access_error;
 		if (write_fault && !is_writeble_pte(pte))
 			if (user_fault || is_write_protection(vcpu))
 				goto access_error;
@@ -233,6 +237,8 @@ err:
 		walker->error_code |= PFERR_USER_MASK;
 	if (fetch_fault)
 		walker->error_code |= PFERR_FETCH_MASK;
+	if (rsvd_fault)
+		walker->error_code |= PFERR_RSVD_MASK;
 	return 0;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e96edda..2c6f180 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2899,6 +2899,16 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
 	return best;
 }
 
+int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+	if (best)
+		return best->eax & 0xff;
+	return 32;
+}
+
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 {
 	u32 function, index;

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: RFC: Add reserved bits check
  2009-03-27 14:42         ` Dong, Eddie
@ 2009-03-29 10:23           ` Avi Kivity
  2009-03-30  1:53             ` Dong, Eddie
  0 siblings, 1 reply; 25+ messages in thread
From: Avi Kivity @ 2009-03-29 10:23 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: kvm

Dong, Eddie wrote:
>  
> +static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level)
> +{
> +	int ps = 0;
> +
> +	if (level == PT_DIRECTORY_LEVEL)
> +		ps = !!(gpte & PT_PAGE_SIZE_MASK);
>   

No need for this.  If you set rsvd_bits_mask[1][0] == 
rsvd_bits_mask[0][0], then you get the same behaviour.  The first index 
is not the page size, it's just bit 7.

You'll need to fill all the indexes for bit 7 == 1, but it's worth it, 
with the 1GB pages patch.

> +	return (gpte & vcpu->arch.mmu.rsvd_bits_mask[ps][level-1]) != 0;
> +}
> +
>  #define PTTYPE 64
>  #include "paging_tmpl.h"
>  #undef PTTYPE
>  
> +int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_cpuid_entry2 *best;
> +
> +	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
> +	if (best)
> +		return best->eax & 0xff;
> +	return 32;
> +}
> +
>   

Best to return 36 if the cpu doesn't support cpuid 80000008 but does 
support pae.


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: RFC: Add reserved bits check
  2009-03-29 10:23           ` Avi Kivity
@ 2009-03-30  1:53             ` Dong, Eddie
  2009-03-30  2:38               ` Cleanup to reuse is_long_mode() Dong, Eddie
                                 ` (2 more replies)
  0 siblings, 3 replies; 25+ messages in thread
From: Dong, Eddie @ 2009-03-30  1:53 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Dong, Eddie

[-- Attachment #1: Type: text/plain, Size: 7080 bytes --]

>> +static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int
>> level) +{ +	int ps = 0;
>> +
>> +	if (level == PT_DIRECTORY_LEVEL)
>> +		ps = !!(gpte & PT_PAGE_SIZE_MASK);
>> 
> 
> No need for this.  If you set rsvd_bits_mask[1][0] ==
> rsvd_bits_mask[0][0], then you get the same behaviour.  The first
> index is not the page size, it's just bit 7.

Sure, fixed.

> 
> You'll need to fill all the indexes for bit 7 == 1, but it's worth it,
> with the 1GB pages patch.
> 
>> +	return (gpte & vcpu->arch.mmu.rsvd_bits_mask[ps][level-1]) != 0; +}
>> +
>>  #define PTTYPE 64
>>  #include "paging_tmpl.h"
>>  #undef PTTYPE
>> 
>> +int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
>> +{
>> +	struct kvm_cpuid_entry2 *best;
>> +
>> +	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); +	if (best)
>> +		return best->eax & 0xff;
>> +	return 32;
>> +}
>> +
>> 
> 
> Best to return 36 if the cpu doesn't support cpuid 80000008 but does
> support pae.

Mmm, noticed a conflict information in SDM, but you are right :)

One more modification is that RSVD bit error code won't update if P=0 after double checking with internal architect.

Thanks and reposted.
Eddie




    Emulate #PF error code of reserved bits violation.
    
    Signed-off-by: Eddie Dong <eddie.dong@intel.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 55fd4c5..4fe2742 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -261,6 +261,7 @@ struct kvm_mmu {
 	union kvm_mmu_page_role base_role;
 
 	u64 *pae_root;
+	u64 rsvd_bits_mask[2][4];
 };
 
 struct kvm_vcpu_arch {
@@ -791,5 +792,6 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
 
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ef060ec..0a6f109 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -126,6 +126,7 @@ module_param(oos_shadow, bool, 0644);
 #define PFERR_PRESENT_MASK (1U << 0)
 #define PFERR_WRITE_MASK (1U << 1)
 #define PFERR_USER_MASK (1U << 2)
+#define PFERR_RSVD_MASK (1U << 3)
 #define PFERR_FETCH_MASK (1U << 4)
 
 #define PT_DIRECTORY_LEVEL 2
@@ -179,6 +180,11 @@ static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
 static u64 __read_mostly shadow_mt_mask;
 
+static inline u64 rsvd_bits(int s, int e)
+{
+	return ((1ULL << (e - s + 1)) - 1) << s;
+}
+
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
 {
 	shadow_trap_nonpresent_pte = trap_pte;
@@ -2155,6 +2161,14 @@ static void paging_free(struct kvm_vcpu *vcpu)
 	nonpaging_free(vcpu);
 }
 
+static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level)
+{
+	int bit7;
+
+	bit7 = (gpte >> 7) & 1;
+	return (gpte & vcpu->arch.mmu.rsvd_bits_mask[bit7][level-1]) != 0;
+}
+
 #define PTTYPE 64
 #include "paging_tmpl.h"
 #undef PTTYPE
@@ -2183,6 +2197,25 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
 
 static int paging64_init_context(struct kvm_vcpu *vcpu)
 {
+	struct kvm_mmu *context = &vcpu->arch.mmu;
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+	u64 exb_bit_rsvd = 0;
+
+	if (!is_nx(vcpu))
+		exb_bit_rsvd = rsvd_bits(63, 63);
+
+	context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+	context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+	context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+	context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
+	context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
+	context->rsvd_bits_mask[1][2] = context->rsvd_bits_mask[0][2];
+	context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20);
+	context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
 	return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
 }
 
@@ -2190,6 +2223,16 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *context = &vcpu->arch.mmu;
 
+	/* no rsvd bits for 2 level 4K page table entries */
+	context->rsvd_bits_mask[0][1] = 0;
+	context->rsvd_bits_mask[0][0] = 0;
+	if (is_cpuid_PSE36())
+		/* 36bits PSE 4MB page */
+		context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
+	else
+		/* 32 bits PSE 4MB page */
+		context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
+	context->rsvd_bits_mask[1][0] = 0;
 	context->new_cr3 = paging_new_cr3;
 	context->page_fault = paging32_page_fault;
 	context->gva_to_gpa = paging32_gva_to_gpa;
@@ -2205,6 +2248,22 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 
 static int paging32E_init_context(struct kvm_vcpu *vcpu)
 {
+	struct kvm_mmu *context = &vcpu->arch.mmu;
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+	u64 exb_bit_rsvd = 0;
+
+	if (!is_nx(vcpu))
+		exb_bit_rsvd = rsvd_bits(63, 63);
+
+	context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 62);		/* PDE */
+	context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62); 	/* PTE */
+	context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62) |
+			rsvd_bits(13, 20);		/* large page */
+	context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+
 	return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
 }
 
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 7314c09..0d9110a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 	gfn_t table_gfn;
 	unsigned index, pt_access, pte_access;
 	gpa_t pte_gpa;
+	int rsvd_fault = 0;
 
 	pgprintk("%s: addr %lx\n", __func__, addr);
 walk:
@@ -157,6 +158,10 @@ walk:
 		if (!is_present_pte(pte))
 			goto not_present;
 
+		rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
+		if (rsvd_fault)
+			goto access_error;
+
 		if (write_fault && !is_writeble_pte(pte))
 			if (user_fault || is_write_protection(vcpu))
 				goto access_error;
@@ -233,6 +238,8 @@ err:
 		walker->error_code |= PFERR_USER_MASK;
 	if (fetch_fault)
 		walker->error_code |= PFERR_FETCH_MASK;
+	if (rsvd_fault)
+		walker->error_code |= PFERR_RSVD_MASK;
 	return 0;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e96edda..bf6683a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2899,6 +2899,16 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
 	return best;
 }
 
+int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+	if (best)
+		return best->eax & 0xff;
+	return 36;
+}
+
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 {
 	u32 function, index;

[-- Attachment #2: rsvd5.patch --]
[-- Type: application/octet-stream, Size: 5822 bytes --]

commit b3faca81ae4ade4f46b549ce01b84a035b676b14
Author: root <root@eddie-wb.localdomain>
Date:   Mon Mar 30 10:39:50 2009 +0800

    Emulate #PF error code of reserved bits violation.
    
    Signed-off-by: Eddie Dong <eddie.dong@intel.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 55fd4c5..4fe2742 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -261,6 +261,7 @@ struct kvm_mmu {
 	union kvm_mmu_page_role base_role;
 
 	u64 *pae_root;
+	u64 rsvd_bits_mask[2][4];
 };
 
 struct kvm_vcpu_arch {
@@ -791,5 +792,6 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
 
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ef060ec..0a6f109 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -126,6 +126,7 @@ module_param(oos_shadow, bool, 0644);
 #define PFERR_PRESENT_MASK (1U << 0)
 #define PFERR_WRITE_MASK (1U << 1)
 #define PFERR_USER_MASK (1U << 2)
+#define PFERR_RSVD_MASK (1U << 3)
 #define PFERR_FETCH_MASK (1U << 4)
 
 #define PT_DIRECTORY_LEVEL 2
@@ -179,6 +180,11 @@ static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
 static u64 __read_mostly shadow_mt_mask;
 
+static inline u64 rsvd_bits(int s, int e)
+{
+	return ((1ULL << (e - s + 1)) - 1) << s;
+}
+
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
 {
 	shadow_trap_nonpresent_pte = trap_pte;
@@ -2155,6 +2161,14 @@ static void paging_free(struct kvm_vcpu *vcpu)
 	nonpaging_free(vcpu);
 }
 
+static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level)
+{
+	int bit7;
+
+	bit7 = (gpte >> 7) & 1;
+	return (gpte & vcpu->arch.mmu.rsvd_bits_mask[bit7][level-1]) != 0;
+}
+
 #define PTTYPE 64
 #include "paging_tmpl.h"
 #undef PTTYPE
@@ -2183,6 +2197,25 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
 
 static int paging64_init_context(struct kvm_vcpu *vcpu)
 {
+	struct kvm_mmu *context = &vcpu->arch.mmu;
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+	u64 exb_bit_rsvd = 0;
+
+	if (!is_nx(vcpu))
+		exb_bit_rsvd = rsvd_bits(63, 63);
+
+	context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+	context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+	context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+	context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
+	context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
+	context->rsvd_bits_mask[1][2] = context->rsvd_bits_mask[0][2];
+	context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20);
+	context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
 	return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
 }
 
@@ -2190,6 +2223,16 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *context = &vcpu->arch.mmu;
 
+	/* no rsvd bits for 2 level 4K page table entries */
+	context->rsvd_bits_mask[0][1] = 0;
+	context->rsvd_bits_mask[0][0] = 0;
+	if (is_cpuid_PSE36())
+		/* 36bits PSE 4MB page */
+		context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
+	else
+		/* 32 bits PSE 4MB page */
+		context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
+	context->rsvd_bits_mask[1][0] = 0;
 	context->new_cr3 = paging_new_cr3;
 	context->page_fault = paging32_page_fault;
 	context->gva_to_gpa = paging32_gva_to_gpa;
@@ -2205,6 +2248,22 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 
 static int paging32E_init_context(struct kvm_vcpu *vcpu)
 {
+	struct kvm_mmu *context = &vcpu->arch.mmu;
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+	u64 exb_bit_rsvd = 0;
+
+	if (!is_nx(vcpu))
+		exb_bit_rsvd = rsvd_bits(63, 63);
+
+	context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+		rsvd_bits(maxphyaddr, 62);		/* PDE */
+	context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62); 	/* PTE */
+	context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62) |
+			rsvd_bits(13, 20);		/* large page */
+	context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+
 	return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
 }
 
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 7314c09..0d9110a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 	gfn_t table_gfn;
 	unsigned index, pt_access, pte_access;
 	gpa_t pte_gpa;
+	int rsvd_fault = 0;
 
 	pgprintk("%s: addr %lx\n", __func__, addr);
 walk:
@@ -157,6 +158,10 @@ walk:
 		if (!is_present_pte(pte))
 			goto not_present;
 
+		rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
+		if (rsvd_fault)
+			goto access_error;
+
 		if (write_fault && !is_writeble_pte(pte))
 			if (user_fault || is_write_protection(vcpu))
 				goto access_error;
@@ -233,6 +238,8 @@ err:
 		walker->error_code |= PFERR_USER_MASK;
 	if (fetch_fault)
 		walker->error_code |= PFERR_FETCH_MASK;
+	if (rsvd_fault)
+		walker->error_code |= PFERR_RSVD_MASK;
 	return 0;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e96edda..bf6683a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2899,6 +2899,16 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
 	return best;
 }
 
+int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+	if (best)
+		return best->eax & 0xff;
+	return 36;
+}
+
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 {
 	u32 function, index;

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Cleanup to reuse is_long_mode()
  2009-03-30  1:53             ` Dong, Eddie
@ 2009-03-30  2:38               ` Dong, Eddie
  2009-03-30  7:43                 ` Avi Kivity
  2009-03-30  2:49               ` Use rsvd_bits_mask in load_pdptrs for cleanup and considing EXB bit Dong, Eddie
  2009-03-30  5:12               ` RFC: Add reserved bits check Avi Kivity
  2 siblings, 1 reply; 25+ messages in thread
From: Dong, Eddie @ 2009-03-30  2:38 UTC (permalink / raw)
  To: Dong, Eddie, Avi Kivity; +Cc: kvm, Dong, Eddie

[-- Attachment #1: Type: text/plain, Size: 1653 bytes --]



Thanks, Eddie




commit 6688a1fbc37330f2c4e16d1a78050b64e1ce5dcc
Author: root <root@eddie-wb.localdomain>
Date:   Mon Mar 30 11:31:10 2009 +0800

    cleanup to reuse is_long_mode(vcpu)
    
    Signed-off-by: Eddie Dong <eddie.dong@intel.com>

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index db5021b..affc31d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -859,7 +859,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 #ifdef CONFIG_X86_64
-	if (vcpu->arch.shadow_efer & EFER_LME) {
+	if (is_long_mode(vcpu)) {
 		if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
 			vcpu->arch.shadow_efer |= EFER_LMA;
 			svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9913a1d..b1f1458 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1543,7 +1543,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 		enter_rmode(vcpu);
 
 #ifdef CONFIG_X86_64
-	if (vcpu->arch.shadow_efer & EFER_LME) {
+	if (is_long_mode(vcpu)) {
 		if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
 			enter_lmode(vcpu);
 		if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bf6683a..961bd2b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -289,7 +289,7 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 
 	if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
 #ifdef CONFIG_X86_64
-		if ((vcpu->arch.shadow_efer & EFER_LME)) {
+		if (is_long_mode(vcpu)) {
 			int cs_db, cs_l;
 
 			if (!is_pae(vcpu)) {

[-- Attachment #2: cleanup.patch --]
[-- Type: application/octet-stream, Size: 1581 bytes --]

commit 6688a1fbc37330f2c4e16d1a78050b64e1ce5dcc
Author: root <root@eddie-wb.localdomain>
Date:   Mon Mar 30 11:31:10 2009 +0800

    cleanup to reuse is_long_mode(vcpu)
    
    Signed-off-by: Eddie Dong <eddie.dong@intel.com>

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index db5021b..affc31d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -859,7 +859,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 #ifdef CONFIG_X86_64
-	if (vcpu->arch.shadow_efer & EFER_LME) {
+	if (is_long_mode(vcpu)) {
 		if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
 			vcpu->arch.shadow_efer |= EFER_LMA;
 			svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9913a1d..b1f1458 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1543,7 +1543,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 		enter_rmode(vcpu);
 
 #ifdef CONFIG_X86_64
-	if (vcpu->arch.shadow_efer & EFER_LME) {
+	if (is_long_mode(vcpu)) {
 		if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
 			enter_lmode(vcpu);
 		if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bf6683a..961bd2b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -289,7 +289,7 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 
 	if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
 #ifdef CONFIG_X86_64
-		if ((vcpu->arch.shadow_efer & EFER_LME)) {
+		if (is_long_mode(vcpu)) {
 			int cs_db, cs_l;
 
 			if (!is_pae(vcpu)) {

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Use rsvd_bits_mask in load_pdptrs for cleanup and considing EXB bit
  2009-03-30  1:53             ` Dong, Eddie
  2009-03-30  2:38               ` Cleanup to reuse is_long_mode() Dong, Eddie
@ 2009-03-30  2:49               ` Dong, Eddie
  2009-03-30  8:27                 ` Dong, Eddie
  2009-03-30  5:12               ` RFC: Add reserved bits check Avi Kivity
  2 siblings, 1 reply; 25+ messages in thread
From: Dong, Eddie @ 2009-03-30  2:49 UTC (permalink / raw)
  To: Dong, Eddie, Avi Kivity; +Cc: kvm, Dong, Eddie

This is followup of rsvd_bits emulation.

thx, eddie




commit 171eb2b2d8282dd913a5d5c6c695fd64e1ddcf4c
Author: root <root@eddie-wb.localdomain>
Date:   Mon Mar 30 11:39:50 2009 +0800

    Use rsvd_bits_mask in load_pdptrs for cleanup and considing EXB bit.
    
    Signed-off-by: Eddie Dong <Eddie.Dong@intel.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 0a6f109..b0bf8b2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2255,6 +2255,9 @@ static int paging32E_init_context(struct kvm_vcpu *vcpu)
 	if (!is_nx(vcpu))
 		exb_bit_rsvd = rsvd_bits(63, 63);
 
+	context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62) |
+			rsvd_bits(7, 8) | rsvd_bits(1, 2);	/* PDPTE */
 	context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
 		rsvd_bits(maxphyaddr, 62);		/* PDE */
 	context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
@@ -2270,6 +2273,17 @@ static int paging32E_init_context(struct kvm_vcpu *vcpu)
 static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *context = &vcpu->arch.mmu;
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+	u64 exb_bit_rsvd = 0;
+
+	if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu)) {
+		if (!is_nx(vcpu))
+			exb_bit_rsvd = rsvd_bits(63, 63);
+
+		context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62) |
+			rsvd_bits(7, 8) | rsvd_bits(1, 2);	/* PDPTE */
+	}
 
 	context->new_cr3 = nonpaging_new_cr3;
 	context->page_fault = tdp_page_fault;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 961bd2b..ff178fd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -233,7 +233,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
 		goto out;
 	}
 	for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
-		if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) {
+		if ((pdpte[i] & PT_PRESENT_MASK) &&
+		    (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
 			ret = 0;
 			goto out;
 		}

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: RFC: Add reserved bits check
  2009-03-30  1:53             ` Dong, Eddie
  2009-03-30  2:38               ` Cleanup to reuse is_long_mode() Dong, Eddie
  2009-03-30  2:49               ` Use rsvd_bits_mask in load_pdptrs for cleanup and considing EXB bit Dong, Eddie
@ 2009-03-30  5:12               ` Avi Kivity
  2009-03-30  8:21                 ` Dong, Eddie
  2 siblings, 1 reply; 25+ messages in thread
From: Avi Kivity @ 2009-03-30  5:12 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: kvm

Dong, Eddie wrote:
> @@ -2183,6 +2197,25 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
>  
>  static int paging64_init_context(struct kvm_vcpu *vcpu)
>  {
> +	struct kvm_mmu *context = &vcpu->arch.mmu;
> +	int maxphyaddr = cpuid_maxphyaddr(vcpu);
> +	u64 exb_bit_rsvd = 0;
> +
> +	if (!is_nx(vcpu))
> +		exb_bit_rsvd = rsvd_bits(63, 63);
> +
> +	context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
> +		rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
> +	context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
> +		rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
> +	context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
> +		rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
> +	context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
> +	context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
> +	context->rsvd_bits_mask[1][2] = context->rsvd_bits_mask[0][2];
> +	context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
> +		rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20);
> +	context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
>  	return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
>  }
>   

Just noticed that walk_addr() too can be called from tdp context, so 
need to make sure rsvd_bits_mask is initialized in init_kvm_tdp_mmu() as 
well.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Cleanup to reuse is_long_mode()
  2009-03-30  2:38               ` Cleanup to reuse is_long_mode() Dong, Eddie
@ 2009-03-30  7:43                 ` Avi Kivity
  2009-03-30  8:24                   ` Dong, Eddie
  0 siblings, 1 reply; 25+ messages in thread
From: Avi Kivity @ 2009-03-30  7:43 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: kvm

Dong, Eddie wrote:
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  
>  #ifdef CONFIG_X86_64
> -	if (vcpu->arch.shadow_efer & EFER_LME) {
> +	if (is_long_mode(vcpu)) {
>   

is_long_mode() actually tests EFER_LMA, so this is incorrect.


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: RFC: Add reserved bits check
  2009-03-30  5:12               ` RFC: Add reserved bits check Avi Kivity
@ 2009-03-30  8:21                 ` Dong, Eddie
  2009-03-30 12:05                   ` Avi Kivity
  2009-03-31  8:40                   ` Avi Kivity
  0 siblings, 2 replies; 25+ messages in thread
From: Dong, Eddie @ 2009-03-30  8:21 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Dong, Eddie

> 
> Just noticed that walk_addr() too can be called from tdp context, so
> need to make sure rsvd_bits_mask is initialized in init_kvm_tdp_mmu()
> as well.

Yes, fixed.
Thx, eddie


commit b282565503a78e75af643de42fe7bf495e2213ec
Author: root <root@eddie-wb.localdomain>
Date:   Mon Mar 30 16:57:39 2009 +0800

    Emulate #PF error code of reserved bits violation.
    
    Signed-off-by: Eddie Dong <eddie.dong@intel.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 55fd4c5..4fe2742 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -261,6 +261,7 @@ struct kvm_mmu {
 	union kvm_mmu_page_role base_role;
 
 	u64 *pae_root;
+	u64 rsvd_bits_mask[2][4];
 };
 
 struct kvm_vcpu_arch {
@@ -791,5 +792,6 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
 
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ef060ec..2eab758 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -126,6 +126,7 @@ module_param(oos_shadow, bool, 0644);
 #define PFERR_PRESENT_MASK (1U << 0)
 #define PFERR_WRITE_MASK (1U << 1)
 #define PFERR_USER_MASK (1U << 2)
+#define PFERR_RSVD_MASK (1U << 3)
 #define PFERR_FETCH_MASK (1U << 4)
 
 #define PT_DIRECTORY_LEVEL 2
@@ -179,6 +180,11 @@ static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
 static u64 __read_mostly shadow_mt_mask;
 
+static inline u64 rsvd_bits(int s, int e)
+{
+	return ((1ULL << (e - s + 1)) - 1) << s;
+}
+
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
 {
 	shadow_trap_nonpresent_pte = trap_pte;
@@ -2155,6 +2161,14 @@ static void paging_free(struct kvm_vcpu *vcpu)
 	nonpaging_free(vcpu);
 }
 
+static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level)
+{
+	int bit7;
+
+	bit7 = (gpte >> 7) & 1;
+	return (gpte & vcpu->arch.mmu.rsvd_bits_mask[bit7][level-1]) != 0;
+}
+
 #define PTTYPE 64
 #include "paging_tmpl.h"
 #undef PTTYPE
@@ -2163,6 +2177,54 @@ static void paging_free(struct kvm_vcpu *vcpu)
 #include "paging_tmpl.h"
 #undef PTTYPE
 
+void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
+{
+	struct kvm_mmu *context = &vcpu->arch.mmu;
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+	u64 exb_bit_rsvd = 0;
+
+	if (!is_nx(vcpu))
+		exb_bit_rsvd = rsvd_bits(63, 63);
+	switch (level) {
+	case PT32_ROOT_LEVEL:
+		/* no rsvd bits for 2 level 4K page table entries */
+		context->rsvd_bits_mask[0][1] = 0;
+		context->rsvd_bits_mask[0][0] = 0;
+		if (is_cpuid_PSE36())
+			/* 36bits PSE 4MB page */
+			context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
+		else
+			/* 32 bits PSE 4MB page */
+			context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
+		context->rsvd_bits_mask[1][0] = 0;
+		break;
+	case PT32E_ROOT_LEVEL:
+		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62);		/* PDE */
+		context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62); 	/* PTE */
+		context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62) |
+			rsvd_bits(13, 20);		/* large page */
+		context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+		break;
+	case PT64_ROOT_LEVEL:
+		context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+		context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+		context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
+		context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
+		context->rsvd_bits_mask[1][2] = context->rsvd_bits_mask[0][2];
+		context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20);
+		context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+		break;
+	}
+}
+
 static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
 {
 	struct kvm_mmu *context = &vcpu->arch.mmu;
@@ -2183,6 +2245,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
 
 static int paging64_init_context(struct kvm_vcpu *vcpu)
 {
+	reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL);
 	return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
 }
 
@@ -2190,6 +2253,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *context = &vcpu->arch.mmu;
 
+	reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL);
 	context->new_cr3 = paging_new_cr3;
 	context->page_fault = paging32_page_fault;
 	context->gva_to_gpa = paging32_gva_to_gpa;
@@ -2205,6 +2269,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 
 static int paging32E_init_context(struct kvm_vcpu *vcpu)
 {
+	reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL);
 	return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
 }
 
@@ -2225,12 +2290,15 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 		context->gva_to_gpa = nonpaging_gva_to_gpa;
 		context->root_level = 0;
 	} else if (is_long_mode(vcpu)) {
+		reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL);
 		context->gva_to_gpa = paging64_gva_to_gpa;
 		context->root_level = PT64_ROOT_LEVEL;
 	} else if (is_pae(vcpu)) {
+		reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL);
 		context->gva_to_gpa = paging64_gva_to_gpa;
 		context->root_level = PT32E_ROOT_LEVEL;
 	} else {
+		reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL);
 		context->gva_to_gpa = paging32_gva_to_gpa;
 		context->root_level = PT32_ROOT_LEVEL;
 	}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 7314c09..0d9110a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 	gfn_t table_gfn;
 	unsigned index, pt_access, pte_access;
 	gpa_t pte_gpa;
+	int rsvd_fault = 0;
 
 	pgprintk("%s: addr %lx\n", __func__, addr);
 walk:
@@ -157,6 +158,10 @@ walk:
 		if (!is_present_pte(pte))
 			goto not_present;
 
+		rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
+		if (rsvd_fault)
+			goto access_error;
+
 		if (write_fault && !is_writeble_pte(pte))
 			if (user_fault || is_write_protection(vcpu))
 				goto access_error;
@@ -233,6 +238,8 @@ err:
 		walker->error_code |= PFERR_USER_MASK;
 	if (fetch_fault)
 		walker->error_code |= PFERR_FETCH_MASK;
+	if (rsvd_fault)
+		walker->error_code |= PFERR_RSVD_MASK;
 	return 0;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e96edda..bf6683a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2899,6 +2899,16 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
 	return best;
 }
 
+int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+	if (best)
+		return best->eax & 0xff;
+	return 36;
+}
+
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 {
 	u32 function, index;

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* RE: Cleanup to reuse is_long_mode()
  2009-03-30  7:43                 ` Avi Kivity
@ 2009-03-30  8:24                   ` Dong, Eddie
  2009-03-30 11:46                     ` Avi Kivity
  0 siblings, 1 reply; 25+ messages in thread
From: Dong, Eddie @ 2009-03-30  8:24 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Dong, Eddie

Avi Kivity wrote:
> Dong, Eddie wrote:
>>  	struct vcpu_svm *svm = to_svm(vcpu);
>> 
>>  #ifdef CONFIG_X86_64
>> -	if (vcpu->arch.shadow_efer & EFER_LME) {
>> +	if (is_long_mode(vcpu)) {
>> 
> 
> is_long_mode() actually tests EFER_LMA, so this is incorrect.

Something missing? Here is the definition of is_long_mode, the patch is just for equal replacement.
thx, eddie


static inline int is_long_mode(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_X86_64
        return vcpu->arch.shadow_efer & EFER_LME;
#else
        return 0;
#endif
}

^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: Use rsvd_bits_mask in load_pdptrs for cleanup and considing EXB bit
  2009-03-30  2:49               ` Use rsvd_bits_mask in load_pdptrs for cleanup and considing EXB bit Dong, Eddie
@ 2009-03-30  8:27                 ` Dong, Eddie
  2009-03-30 12:13                   ` Avi Kivity
  0 siblings, 1 reply; 25+ messages in thread
From: Dong, Eddie @ 2009-03-30  8:27 UTC (permalink / raw)
  To: Dong, Eddie, Avi Kivity; +Cc: kvm, Dong, Eddie

Dong, Eddie wrote:
> This is followup of rsvd_bits emulation.
> 
Base on new rsvd_bits emulation patch.
thx, eddie


commit 2c1472ef2b9fd87a261e8b58a7db11afd6a111dc
Author: root <root@eddie-wb.localdomain>
Date:   Mon Mar 30 17:05:47 2009 +0800

    Use rsvd_bits_mask in load_pdptrs for cleanup with EXB bit considered.
    
    Signed-off-by: Eddie Dong <Eddie.Dong@intel.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2eab758..eaf41c0 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -225,11 +225,6 @@ static int is_nx(struct kvm_vcpu *vcpu)
 	return vcpu->arch.shadow_efer & EFER_NX;
 }
 
-static int is_present_pte(unsigned long pte)
-{
-	return pte & PT_PRESENT_MASK;
-}
-
 static int is_shadow_present_pte(u64 pte)
 {
 	return pte != shadow_trap_nonpresent_pte
@@ -2199,6 +2194,9 @@ void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
 		context->rsvd_bits_mask[1][0] = 0;
 		break;
 	case PT32E_ROOT_LEVEL:
+		context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62) |
+			rsvd_bits(7, 8) | rsvd_bits(1, 2);	/* PDPTE */
 		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 62);		/* PDE */
 		context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 258e5d5..2a6eb50 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -75,4 +75,9 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
 	return vcpu->arch.cr0 & X86_CR0_PG;
 }
 
+static inline int is_present_pte(unsigned long pte)
+{
+	return pte & PT_PRESENT_MASK;
+}
+
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 961bd2b..b449ff0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -233,7 +233,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
 		goto out;
 	}
 	for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
-		if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) {
+		if (is_present_pte(pdpte[i]) &&
+		    (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
 			ret = 0;
 			goto out;
 		}

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: Cleanup to reuse is_long_mode()
  2009-03-30  8:24                   ` Dong, Eddie
@ 2009-03-30 11:46                     ` Avi Kivity
  0 siblings, 0 replies; 25+ messages in thread
From: Avi Kivity @ 2009-03-30 11:46 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: kvm

Dong, Eddie wrote:
> Avi Kivity wrote:
>   
>> Dong, Eddie wrote:
>>     
>>>  	struct vcpu_svm *svm = to_svm(vcpu);
>>>
>>>  #ifdef CONFIG_X86_64
>>> -	if (vcpu->arch.shadow_efer & EFER_LME) {
>>> +	if (is_long_mode(vcpu)) {
>>>
>>>       
>> is_long_mode() actually tests EFER_LMA, so this is incorrect.
>>     
>
> Something missing? Here is the definition of is_long_mode, the patch is just for equal replacement.
> thx, eddie
>
>
> static inline int is_long_mode(struct kvm_vcpu *vcpu)
> {
> #ifdef CONFIG_X86_64
>         return vcpu->arch.shadow_efer & EFER_LME;
> #else
>         return 0;
> #endif
> }

You're looking at an old version.  Mine has EFER_LMA.  See 9d642b.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: RFC: Add reserved bits check
  2009-03-30  8:21                 ` Dong, Eddie
@ 2009-03-30 12:05                   ` Avi Kivity
  2009-03-31  8:40                   ` Avi Kivity
  1 sibling, 0 replies; 25+ messages in thread
From: Avi Kivity @ 2009-03-30 12:05 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: kvm

Dong, Eddie wrote:
>> Just noticed that walk_addr() too can be called from tdp context, so
>> need to make sure rsvd_bits_mask is initialized in init_kvm_tdp_mmu()
>> as well.
>>     
>
> Yes, fixed.
>
>   
Applied, thanks.  I also added unit tests for bit 51 of the pte and pde 
in the mmu tests.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Use rsvd_bits_mask in load_pdptrs for cleanup and considing EXB bit
  2009-03-30  8:27                 ` Dong, Eddie
@ 2009-03-30 12:13                   ` Avi Kivity
  2009-03-30 13:46                     ` Dong, Eddie
  0 siblings, 1 reply; 25+ messages in thread
From: Avi Kivity @ 2009-03-30 12:13 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: kvm

Dong, Eddie wrote:
> @@ -2199,6 +2194,9 @@ void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
>  		context->rsvd_bits_mask[1][0] = 0;
>  		break;
>  	case PT32E_ROOT_LEVEL:
> +		context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
> +			rsvd_bits(maxphyaddr, 62) |
> +			rsvd_bits(7, 8) | rsvd_bits(1, 2);	/* PDPTE */
>  		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
>  			rsvd_bits(maxphyaddr, 62);		/* PDE */
>  		context->rsvd_bits_mask[0][0] = exb_bit_rsvd 

Are you sure that PDPTEs support NX?  They don't support R/W and U/S, so 
it seems likely that NX is reserved as well even when EFER.NXE is enabled.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: Use rsvd_bits_mask in load_pdptrs for cleanup and considing EXB bit
  2009-03-30 12:13                   ` Avi Kivity
@ 2009-03-30 13:46                     ` Dong, Eddie
  0 siblings, 0 replies; 25+ messages in thread
From: Dong, Eddie @ 2009-03-30 13:46 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Dong, Eddie

Avi Kivity wrote:
> Dong, Eddie wrote:
>> @@ -2199,6 +2194,9 @@ void reset_rsvds_bits_mask(struct kvm_vcpu
>>  		*vcpu, int level) context->rsvd_bits_mask[1][0] = 0;
>>  		break;
>>  	case PT32E_ROOT_LEVEL:
>> +		context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
>> +			rsvd_bits(maxphyaddr, 62) |
>> +			rsvd_bits(7, 8) | rsvd_bits(1, 2);	/* PDPTE */
>>  		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
>>  			rsvd_bits(maxphyaddr, 62);		/* PDE */
>>  		context->rsvd_bits_mask[0][0] = exb_bit_rsvd
> 
> Are you sure that PDPTEs support NX?  They don't support R/W and U/S,
> so it seems likely that NX is reserved as well even when EFER.NXE is
> enabled. 

I am refering Fig 3-20/3-21 of SDM3A, but I think Fig3-20/21 has EXB bit missed since Table 3-5 and section 3.10.3.
I will double check with internal architect. 
thx, eddie

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: RFC: Add reserved bits check
  2009-03-30  8:21                 ` Dong, Eddie
  2009-03-30 12:05                   ` Avi Kivity
@ 2009-03-31  8:40                   ` Avi Kivity
  1 sibling, 0 replies; 25+ messages in thread
From: Avi Kivity @ 2009-03-31  8:40 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: kvm

Dong, Eddie wrote:
> +	case PT64_ROOT_LEVEL:
> +		context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
> +			rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
> +		context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
> +			rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
> +		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
> +			rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
> +		context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51)

I added a test for this and it noticed the pte bits missed nx.  I fixed 
that up.  I also added code to shadow into different pages when EFER.NXE 
changes, so that we can handle the transition without flushing all 
shadow (and also run vcpus with mismatched EFER.NX).

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Use rsvd_bits_mask in load_pdptrs for cleanup and considing EXB bit
  2009-03-31 15:03       ` Dong, Eddie
@ 2009-04-01  8:28         ` Avi Kivity
  0 siblings, 0 replies; 25+ messages in thread
From: Avi Kivity @ 2009-04-01  8:28 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: kvm

Dong, Eddie wrote:
>> Looks good, but doesn't apply; please check if you are working against
>> the latest version.
>>     
>
> Rebased on top of a317a1e496b22d1520218ecf16a02498b99645e2 + previous rsvd bits violation check patch.
>   

Applied, thanks.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: Use rsvd_bits_mask in load_pdptrs for cleanup and considing EXB bit
  2009-03-31  8:55     ` Avi Kivity
@ 2009-03-31 15:03       ` Dong, Eddie
  2009-04-01  8:28         ` Avi Kivity
  0 siblings, 1 reply; 25+ messages in thread
From: Dong, Eddie @ 2009-03-31 15:03 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Dong, Eddie


> 
> Looks good, but doesn't apply; please check if you are working against
> the latest version.

Rebased on top of a317a1e496b22d1520218ecf16a02498b99645e2 + previous rsvd bits violation check patch.

thx, eddie



    Use rsvd_bits_mask in load_pdptrs and remove bit 5-6 from rsvd_bits_mask per latest SDM.
    
    Signed-off-by: Eddie Dong <Eddie.Dong@intel.com>


diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 41a0482..400c056 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -225,11 +225,6 @@ static int is_nx(struct kvm_vcpu *vcpu)
 	return vcpu->arch.shadow_efer & EFER_NX;
 }
 
-static int is_present_pte(unsigned long pte)
-{
-	return pte & PT_PRESENT_MASK;
-}
-
 static int is_shadow_present_pte(u64 pte)
 {
 	return pte != shadow_trap_nonpresent_pte
@@ -2195,6 +2190,9 @@ void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
 		context->rsvd_bits_mask[1][0] = 0;
 		break;
 	case PT32E_ROOT_LEVEL:
+		context->rsvd_bits_mask[0][2] =
+			rsvd_bits(maxphyaddr, 63) |
+			rsvd_bits(7, 8) | rsvd_bits(1, 2);	/* PDPTE */
 		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 62);		/* PDE */
 		context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index eaab214..3494a2f 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -75,4 +75,9 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
 	return vcpu->arch.cr0 & X86_CR0_PG;
 }
 
+static inline int is_present_pte(unsigned long pte)
+{
+	return pte & PT_PRESENT_MASK;
+}
+
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9702353..3d07c9a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -234,7 +234,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
 		goto out;
 	}
 	for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
-		if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) {
+		if (is_present_pte(pdpte[i]) &&
+		    (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
 			ret = 0;
 			goto out;
 		}

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: Use rsvd_bits_mask in load_pdptrs for cleanup and considing EXB bit
  2009-03-31  7:32   ` Dong, Eddie
@ 2009-03-31  8:55     ` Avi Kivity
  2009-03-31 15:03       ` Dong, Eddie
  0 siblings, 1 reply; 25+ messages in thread
From: Avi Kivity @ 2009-03-31  8:55 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: kvm, Neiger, Gil

Dong, Eddie wrote:
> Neiger, Gil wrote:
>   
>> PDPTEs are used only if CR0.PG=CR4.PAE=1.
>>
>> In that situation, their format depends the value of IA32_EFER.LMA.
>>
>> If IA32_EFER.LMA=0, bit 63 is reserved and must be 0 in any PDPTE
>> that is marked present.  The execute-disable setting of a page is
>> determined only by the PDE and PTE.  
>>
>> If IA32_EFER.LMA=1, bit 63 is used for the execute-disable in PML4
>> entries, PDPTEs, PDEs, and PTEs (assuming IA32_EFER.NXE=1). 
>>
>> 					- Gil
>>     
>
> Rebased.
> Thanks, eddie
>
>
>   

Looks good, but doesn't apply; please check if you are working against 
the latest version.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: Use rsvd_bits_mask in load_pdptrs for cleanup and considing EXB bit
  2009-03-31  4:12 ` Neiger, Gil
@ 2009-03-31  7:32   ` Dong, Eddie
  2009-03-31  8:55     ` Avi Kivity
  0 siblings, 1 reply; 25+ messages in thread
From: Dong, Eddie @ 2009-03-31  7:32 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Neiger, Gil, Dong, Eddie

[-- Attachment #1: Type: text/plain, Size: 2491 bytes --]

Neiger, Gil wrote:
> PDPTEs are used only if CR0.PG=CR4.PAE=1.
> 
> In that situation, their format depends the value of IA32_EFER.LMA.
> 
> If IA32_EFER.LMA=0, bit 63 is reserved and must be 0 in any PDPTE
> that is marked present.  The execute-disable setting of a page is
> determined only by the PDE and PTE.  
> 
> If IA32_EFER.LMA=1, bit 63 is used for the execute-disable in PML4
> entries, PDPTEs, PDEs, and PTEs (assuming IA32_EFER.NXE=1). 
> 
> 					- Gil

Rebased.
Thanks, eddie


commit 032caed3da123950eeb3e192baf444d4eae80c85
Author: root <root@eddie-wb.localdomain>
Date:   Tue Mar 31 16:22:49 2009 +0800

    Use rsvd_bits_mask in load_pdptrs and remove bit 5-6 from rsvd_bits_mask per latest SDM.
    
    Signed-off-by: Eddie Dong <Eddie.Dong@intel.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2eab758..1bed3aa 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -225,11 +225,6 @@ static int is_nx(struct kvm_vcpu *vcpu)
 	return vcpu->arch.shadow_efer & EFER_NX;
 }
 
-static int is_present_pte(unsigned long pte)
-{
-	return pte & PT_PRESENT_MASK;
-}
-
 static int is_shadow_present_pte(u64 pte)
 {
 	return pte != shadow_trap_nonpresent_pte
@@ -2199,6 +2194,9 @@ void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
 		context->rsvd_bits_mask[1][0] = 0;
 		break;
 	case PT32E_ROOT_LEVEL:
+		context->rsvd_bits_mask[0][2] =
+			rsvd_bits(maxphyaddr, 63) |
+			rsvd_bits(7, 8) | rsvd_bits(1, 2);	/* PDPTE */
 		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 62);		/* PDE */
 		context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 258e5d5..2a6eb50 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -75,4 +75,9 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
 	return vcpu->arch.cr0 & X86_CR0_PG;
 }
 
+static inline int is_present_pte(unsigned long pte)
+{
+	return pte & PT_PRESENT_MASK;
+}
+
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 961bd2b..b449ff0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -233,7 +233,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
 		goto out;
 	}
 	for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
-		if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) {
+		if (is_present_pte(pdpte[i]) &&
+		    (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
 			ret = 0;
 			goto out;
 		}

[-- Attachment #2: cr3_load_rsvd.patch --]
[-- Type: application/octet-stream, Size: 1921 bytes --]

commit 032caed3da123950eeb3e192baf444d4eae80c85
Author: root <root@eddie-wb.localdomain>
Date:   Tue Mar 31 16:22:49 2009 +0800

    Use rsvd_bits_mask in load_pdptrs and remove bit 5-6 from rsvd_bits_mask per latest SDM.
    
    Signed-off-by: Eddie Dong <Eddie.Dong@intel.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2eab758..1bed3aa 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -225,11 +225,6 @@ static int is_nx(struct kvm_vcpu *vcpu)
 	return vcpu->arch.shadow_efer & EFER_NX;
 }
 
-static int is_present_pte(unsigned long pte)
-{
-	return pte & PT_PRESENT_MASK;
-}
-
 static int is_shadow_present_pte(u64 pte)
 {
 	return pte != shadow_trap_nonpresent_pte
@@ -2199,6 +2194,9 @@ void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
 		context->rsvd_bits_mask[1][0] = 0;
 		break;
 	case PT32E_ROOT_LEVEL:
+		context->rsvd_bits_mask[0][2] =
+			rsvd_bits(maxphyaddr, 63) |
+			rsvd_bits(7, 8) | rsvd_bits(1, 2);	/* PDPTE */
 		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 62);		/* PDE */
 		context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 258e5d5..2a6eb50 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -75,4 +75,9 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
 	return vcpu->arch.cr0 & X86_CR0_PG;
 }
 
+static inline int is_present_pte(unsigned long pte)
+{
+	return pte & PT_PRESENT_MASK;
+}
+
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 961bd2b..b449ff0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -233,7 +233,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
 		goto out;
 	}
 	for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
-		if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) {
+		if (is_present_pte(pdpte[i]) &&
+		    (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
 			ret = 0;
 			goto out;
 		}

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* RE: Use rsvd_bits_mask in load_pdptrs for cleanup and considing EXB bit
  2009-03-31  0:51 FW: Use rsvd_bits_mask in load_pdptrs for cleanup and considing EXB bit Dong, Eddie
@ 2009-03-31  4:12 ` Neiger, Gil
  2009-03-31  7:32   ` Dong, Eddie
  0 siblings, 1 reply; 25+ messages in thread
From: Neiger, Gil @ 2009-03-31  4:12 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: Avi Kivity, kvm

PDPTEs are used only if CR0.PG=CR4.PAE=1.

In that situation, their format depends the value of IA32_EFER.LMA.

If IA32_EFER.LMA=0, bit 63 is reserved and must be 0 in any PDPTE that is marked present.  The execute-disable setting of a page is determined only by the PDE and PTE.

If IA32_EFER.LMA=1, bit 63 is used for the execute-disable in PML4 entries, PDPTEs, PDEs, and PTEs (assuming IA32_EFER.NXE=1).

					- Gil

-----Original Message-----
From: Dong, Eddie 
Sent: Monday, March 30, 2009 5:51 PM
To: Neiger, Gil
Cc: Avi Kivity; kvm@vger.kernel.org; Dong, Eddie
Subject: FW: Use rsvd_bits_mask in load_pdptrs for cleanup and considing EXB bit

Avi Kivity wrote:
> Dong, Eddie wrote:
>> @@ -2199,6 +2194,9 @@ void reset_rsvds_bits_mask(struct kvm_vcpu
>>  		*vcpu, int level) context->rsvd_bits_mask[1][0] = 0;
>>  		break;
>>  	case PT32E_ROOT_LEVEL:
>> +		context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
>> +			rsvd_bits(maxphyaddr, 62) |
>> +			rsvd_bits(7, 8) | rsvd_bits(1, 2);	/* PDPTE */
>>  		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
>>  			rsvd_bits(maxphyaddr, 62);		/* PDE */
>>  		context->rsvd_bits_mask[0][0] = exb_bit_rsvd
> 
> Are you sure that PDPTEs support NX?  They don't support R/W and U/S,
> so it seems likely that NX is reserved as well even when EFER.NXE is
> enabled. 


Gil:
	Here is the original mail in KVM mailinglist. If you would be able to help, that is great.
thx, eddie

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2009-04-01  8:27 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <9832F13BD22FB94A829F798DA4A8280501A21068EF@pdsmsx503.ccr.corp.intel.com>
2009-03-27  4:19 ` RFC: Add reserved bits check Dong, Eddie
2009-03-27  9:34   ` Avi Kivity
2009-03-27 13:46     ` Dong, Eddie
2009-03-27 13:59       ` Dong, Eddie
2009-03-27 14:28       ` Avi Kivity
2009-03-27 14:42         ` Dong, Eddie
2009-03-29 10:23           ` Avi Kivity
2009-03-30  1:53             ` Dong, Eddie
2009-03-30  2:38               ` Cleanup to reuse is_long_mode() Dong, Eddie
2009-03-30  7:43                 ` Avi Kivity
2009-03-30  8:24                   ` Dong, Eddie
2009-03-30 11:46                     ` Avi Kivity
2009-03-30  2:49               ` Use rsvd_bits_mask in load_pdptrs for cleanup and considing EXB bit Dong, Eddie
2009-03-30  8:27                 ` Dong, Eddie
2009-03-30 12:13                   ` Avi Kivity
2009-03-30 13:46                     ` Dong, Eddie
2009-03-30  5:12               ` RFC: Add reserved bits check Avi Kivity
2009-03-30  8:21                 ` Dong, Eddie
2009-03-30 12:05                   ` Avi Kivity
2009-03-31  8:40                   ` Avi Kivity
2009-03-31  0:51 FW: Use rsvd_bits_mask in load_pdptrs for cleanup and considing EXB bit Dong, Eddie
2009-03-31  4:12 ` Neiger, Gil
2009-03-31  7:32   ` Dong, Eddie
2009-03-31  8:55     ` Avi Kivity
2009-03-31 15:03       ` Dong, Eddie
2009-04-01  8:28         ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).