From mboxrd@z Thu Jan 1 00:00:00 1970 From: Junaid Shahid Subject: [PATCH 4/4] kvm: x86: mmu: Lockless access tracking for Intel CPUs without EPT A bits. Date: Wed, 26 Oct 2016 19:19:45 -0700 Message-ID: References: Cc: pbonzini@redhat.com, andreslc@google.com, pfeiner@google.com To: kvm@vger.kernel.org Return-path: Received: from mail-pf0-f179.google.com ([209.85.192.179]:32832 "EHLO mail-pf0-f179.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S935138AbcJ0CTw (ORCPT ); Wed, 26 Oct 2016 22:19:52 -0400 Received: by mail-pf0-f179.google.com with SMTP id 197so7351911pfu.0 for ; Wed, 26 Oct 2016 19:19:51 -0700 (PDT) In-Reply-To: Sender: kvm-owner@vger.kernel.org List-ID: This change implements lockless access tracking for Intel CPUs without EPT A bits. This is achieved by marking the PTEs as not-present (but not completely clearing them) when clear_flush_young() is called after marking the pages as accessed. When an EPT Violation is generated as a result of the VM accessing those pages, the PTEs are restored to their original values. Signed-off-by: Junaid Shahid --- arch/x86/include/asm/vmx.h | 39 ++++++ arch/x86/kvm/mmu.c | 314 ++++++++++++++++++++++++++++++++++----------- arch/x86/kvm/mmu.h | 2 + arch/x86/kvm/vmx.c | 20 ++- 4 files changed, 301 insertions(+), 74 deletions(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 60991fb..3d63098 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -434,6 +434,45 @@ enum vmcs_field { #define VMX_EPT_IPAT_BIT (1ull << 6) #define VMX_EPT_ACCESS_BIT (1ull << 8) #define VMX_EPT_DIRTY_BIT (1ull << 9) +#define VMX_EPT_RWX_MASK (VMX_EPT_READABLE_MASK | \ + VMX_EPT_WRITABLE_MASK | \ + VMX_EPT_EXECUTABLE_MASK) +#define VMX_EPT_MT_MASK (7ull << VMX_EPT_MT_EPTE_SHIFT) + +/* The mask to use to trigger an EPT Misconfiguration in order to track MMIO */ +#define VMX_EPT_MISCONFIG_WX_VALUE (VMX_EPT_WRITABLE_MASK | \ + VMX_EPT_EXECUTABLE_MASK) + +/* + * The shift to use for saving the original RWX value when marking the PTE as + * not-present for tracking purposes. + */ +#define VMX_EPT_RWX_SAVE_SHIFT 52 + +/* + * The shift/mask for determining the type of tracking (if any) being used for a + * not-present PTE. Currently, only two bits are used, but more can be added. + * + * NOTE: Bit 63 is an architecturally ignored bit (and hence can be used for our + * purpose) when the EPT PTE is in a misconfigured state. However, it is + * not necessarily an ignored bit otherwise (even in a not-present state). + * Since the existing MMIO code already uses this bit and since KVM + * doesn't use #VEs currently (where this bit comes into play), so we can + * continue to use it for storing the type. But to be on the safe side, + * we should not set it to 1 in those TRACK_TYPEs where the tracking is + * done via EPT Violations instead of EPT Misconfigurations. + */ +#define VMX_EPT_TRACK_TYPE_SHIFT 62 +#define VMX_EPT_TRACK_TYPE_MASK (3ull << \ + VMX_EPT_TRACK_TYPE_SHIFT) + +/* Sets only bit 62 as the tracking is done by EPT Violations. See note above */ +#define VMX_EPT_TRACK_ACCESS (1ull << \ + VMX_EPT_TRACK_TYPE_SHIFT) +/* Sets bits 62 and 63. See note above */ +#define VMX_EPT_TRACK_MMIO (3ull << \ + VMX_EPT_TRACK_TYPE_SHIFT) + #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a22a8a2..8ea1618 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -177,6 +178,10 @@ static u64 __read_mostly shadow_accessed_mask; static u64 __read_mostly shadow_dirty_mask; static u64 __read_mostly shadow_mmio_mask; static u64 __read_mostly shadow_present_mask; +static u64 __read_mostly shadow_acc_track_mask; +static u64 __read_mostly shadow_acc_track_value; +static u64 __read_mostly shadow_acc_track_saved_bits_mask; +static u64 __read_mostly shadow_acc_track_saved_bits_shift; static void mmu_spte_set(u64 *sptep, u64 spte); static void mmu_free_roots(struct kvm_vcpu *vcpu); @@ -187,6 +192,26 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask) } EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); +void kvm_mmu_set_access_track_masks(u64 acc_track_mask, u64 acc_track_value, + u64 saved_bits_mask, u64 saved_bits_shift) +{ + shadow_acc_track_mask = acc_track_mask; + shadow_acc_track_value = acc_track_value; + shadow_acc_track_saved_bits_mask = saved_bits_mask; + shadow_acc_track_saved_bits_shift = saved_bits_shift; + + BUG_ON((~acc_track_mask & acc_track_value) != 0); + BUG_ON((~acc_track_mask & saved_bits_mask) != 0); + BUG_ON(shadow_accessed_mask != 0); +} +EXPORT_SYMBOL_GPL(kvm_mmu_set_access_track_masks); + +static inline bool is_access_track_spte(u64 spte) +{ + return shadow_acc_track_mask != 0 && + (spte & shadow_acc_track_mask) == shadow_acc_track_value; +} + /* * the low bit of the generation number is always presumed to be zero. * This disables mmio caching during memslot updates. The concept is @@ -292,9 +317,25 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, shadow_nx_mask = nx_mask; shadow_x_mask = x_mask; shadow_present_mask = p_mask; + BUG_ON(shadow_accessed_mask != 0 && shadow_acc_track_mask != 0); } EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); +void kvm_mmu_clear_all_pte_masks(void) +{ + shadow_user_mask = 0; + shadow_accessed_mask = 0; + shadow_dirty_mask = 0; + shadow_nx_mask = 0; + shadow_x_mask = 0; + shadow_mmio_mask = 0; + shadow_present_mask = 0; + shadow_acc_track_mask = 0; + shadow_acc_track_value = 0; + shadow_acc_track_saved_bits_mask = 0; + shadow_acc_track_saved_bits_shift = 0; +} + static int is_cpuid_PSE36(void) { return 1; @@ -307,7 +348,8 @@ static int is_nx(struct kvm_vcpu *vcpu) static int is_shadow_present_pte(u64 pte) { - return (pte & 0xFFFFFFFFull) && !is_mmio_spte(pte); + return ((pte & 0xFFFFFFFFull) && !is_mmio_spte(pte)) || + is_access_track_spte(pte); } static int is_large_pte(u64 pte) @@ -490,6 +532,9 @@ static bool spte_has_volatile_bits(u64 spte) if (spte_can_locklessly_be_made_writable(spte)) return true; + if (is_access_track_spte(spte)) + return true; + if (!shadow_accessed_mask) return false; @@ -533,17 +578,21 @@ static void mmu_spte_set(u64 *sptep, u64 new_spte) * will find a read-only spte, even though the writable spte * might be cached on a CPU's TLB, the return value indicates this * case. + * + * Returns true if the TLB needs to be flushed */ static bool mmu_spte_update(u64 *sptep, u64 new_spte) { u64 old_spte = *sptep; - bool ret = false; + bool flush = false; + bool writable_cleared; + bool acc_track_enabled; WARN_ON(!is_shadow_present_pte(new_spte)); if (!is_shadow_present_pte(old_spte)) { mmu_spte_set(sptep, new_spte); - return ret; + return flush; } if (!spte_has_volatile_bits(old_spte)) @@ -551,24 +600,16 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte) else old_spte = __update_clear_spte_slow(sptep, new_spte); + BUG_ON(spte_to_pfn(old_spte) != spte_to_pfn(new_spte)); + /* * For the spte updated out of mmu-lock is safe, since * we always atomically update it, see the comments in * spte_has_volatile_bits(). */ if (spte_can_locklessly_be_made_writable(old_spte) && - !is_writable_pte(new_spte)) - ret = true; - - if (!shadow_accessed_mask) { - /* - * We don't set page dirty when dropping non-writable spte. - * So do it now if the new spte is becoming non-writable. - */ - if (ret) - kvm_set_pfn_dirty(spte_to_pfn(old_spte)); - return ret; - } + !is_writable_pte(new_spte)) + flush = true; /* * Flush TLB when accessed/dirty bits are changed in the page tables, @@ -576,20 +617,34 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte) */ if (spte_is_bit_changed(old_spte, new_spte, shadow_accessed_mask | shadow_dirty_mask)) - ret = true; + flush = true; - if (spte_is_bit_cleared(old_spte, new_spte, shadow_accessed_mask)) + writable_cleared = is_writable_pte(old_spte) && + !is_writable_pte(new_spte); + acc_track_enabled = !is_access_track_spte(old_spte) && + is_access_track_spte(new_spte); + + if (writable_cleared || acc_track_enabled) + flush = true; + + if (shadow_accessed_mask ? + spte_is_bit_cleared(old_spte, new_spte, shadow_accessed_mask) : + acc_track_enabled) kvm_set_pfn_accessed(spte_to_pfn(old_spte)); - if (spte_is_bit_cleared(old_spte, new_spte, shadow_dirty_mask)) + + if (shadow_dirty_mask ? + spte_is_bit_cleared(old_spte, new_spte, shadow_dirty_mask) : + writable_cleared) kvm_set_pfn_dirty(spte_to_pfn(old_spte)); - return ret; + return flush; } /* * Rules for using mmu_spte_clear_track_bits: * It sets the sptep from present to nonpresent, and track the * state bits, it is used to clear the last level sptep. + * Returns non-zero if the PTE was previously valid. */ static int mmu_spte_clear_track_bits(u64 *sptep) { @@ -604,6 +659,13 @@ static int mmu_spte_clear_track_bits(u64 *sptep) if (!is_shadow_present_pte(old_spte)) return 0; + /* + * For access tracking SPTEs, the pfn was already marked accessed/dirty + * when the SPTE was marked for access tracking, so nothing to do here. + */ + if (is_access_track_spte(old_spte)) + return 1; + pfn = spte_to_pfn(old_spte); /* @@ -618,6 +680,7 @@ static int mmu_spte_clear_track_bits(u64 *sptep) if (old_spte & (shadow_dirty_mask ? shadow_dirty_mask : PT_WRITABLE_MASK)) kvm_set_pfn_dirty(pfn); + return 1; } @@ -636,6 +699,52 @@ static u64 mmu_spte_get_lockless(u64 *sptep) return __get_spte_lockless(sptep); } +static u64 mark_spte_for_access_track(u64 spte) +{ + if (shadow_acc_track_mask == 0) + return spte; + + /* + * Verify that the write-protection that we do below will be fixable + * via the fast page fault path. Currently, that is always the case, at + * least when using EPT (which is when access tracking would be used). + */ + WARN_ONCE((spte & PT_WRITABLE_MASK) && + !spte_can_locklessly_be_made_writable(spte), + "Writable SPTE is not locklessly dirty-trackable\n"); + + /* + * Any PTE marked for access tracking should also be marked for dirty + * tracking (by being non-writable) + */ + spte &= ~PT_WRITABLE_MASK; + + spte &= ~(shadow_acc_track_saved_bits_mask << + shadow_acc_track_saved_bits_shift); + spte |= (spte & shadow_acc_track_saved_bits_mask) << + shadow_acc_track_saved_bits_shift; + spte &= ~shadow_acc_track_mask; + spte |= shadow_acc_track_value; + + return spte; +} + +/* Returns true if the TLB needs to be flushed */ +static bool mmu_spte_enable_access_track(u64 *sptep) +{ + u64 spte = mmu_spte_get_lockless(sptep); + + if (is_access_track_spte(spte)) + return false; + + /* Access tracking should not be enabled if CPU supports A/D bits */ + BUG_ON(shadow_accessed_mask != 0); + + spte = mark_spte_for_access_track(spte); + + return mmu_spte_update(sptep, spte); +} + static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu) { /* @@ -1403,6 +1512,25 @@ static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, return kvm_zap_rmapp(kvm, rmap_head); } +static int kvm_acc_track_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, + struct kvm_memory_slot *slot, gfn_t gfn, + int level, unsigned long data) +{ + u64 *sptep; + struct rmap_iterator iter; + int need_tlb_flush = 0; + + for_each_rmap_spte(rmap_head, &iter, sptep) { + + rmap_printk("kvm_acc_track_rmapp: spte %p %llx gfn %llx (%d)\n", + sptep, *sptep, gfn, level); + + need_tlb_flush |= mmu_spte_enable_access_track(sptep); + } + + return need_tlb_flush; +} + static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, unsigned long data) @@ -1419,8 +1547,9 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, restart: for_each_rmap_spte(rmap_head, &iter, sptep) { + rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n", - sptep, *sptep, gfn, level); + sptep, *sptep, gfn, level); need_flush = 1; @@ -1435,6 +1564,8 @@ restart: new_spte &= ~SPTE_HOST_WRITEABLE; new_spte &= ~shadow_accessed_mask; + new_spte = mark_spte_for_access_track(new_spte); + mmu_spte_clear_track_bits(sptep); mmu_spte_set(sptep, new_spte); } @@ -1615,24 +1746,14 @@ static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, { u64 *sptep; struct rmap_iterator iter; - int young = 0; - - /* - * If there's no access bit in the secondary pte set by the - * hardware it's up to gup-fast/gup to set the access bit in - * the primary pte or in the page structure. - */ - if (!shadow_accessed_mask) - goto out; for_each_rmap_spte(rmap_head, &iter, sptep) { - if (*sptep & shadow_accessed_mask) { - young = 1; - break; - } + if ((*sptep & shadow_accessed_mask) || + (!shadow_accessed_mask && !is_access_track_spte(*sptep))) + return 1; } -out: - return young; + + return 0; } #define RMAP_RECYCLE_THRESHOLD 1000 @@ -1669,7 +1790,9 @@ int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) */ kvm->mmu_notifier_seq++; return kvm_handle_hva_range(kvm, start, end, 0, - kvm_unmap_rmapp); + shadow_acc_track_mask != 0 + ? kvm_acc_track_rmapp + : kvm_unmap_rmapp); } return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp); @@ -2591,6 +2714,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, spte |= shadow_dirty_mask; } + if (speculative) + spte = mark_spte_for_access_track(spte); + set_pte: if (mmu_spte_update(sptep, spte)) kvm_flush_remote_tlbs(vcpu->kvm); @@ -2644,7 +2770,7 @@ static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, pgprintk("%s: setting spte %llx\n", __func__, *sptep); pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n", is_large_pte(*sptep)? "2MB" : "4kB", - *sptep & PT_PRESENT_MASK ?"RW":"R", gfn, + *sptep & PT_WRITABLE_MASK ? "RW" : "R", gfn, *sptep, sptep); if (!was_rmapped && is_large_pte(*sptep)) ++vcpu->kvm->stat.lpages; @@ -2877,16 +3003,27 @@ static bool page_fault_can_be_fast(u32 error_code) if (unlikely(error_code & PFERR_RSVD_MASK)) return false; - /* - * #PF can be fast only if the shadow page table is present and it - * is caused by write-protect, that means we just need change the - * W bit of the spte which can be done out of mmu-lock. - */ - if (!(error_code & PFERR_PRESENT_MASK) || - !(error_code & PFERR_WRITE_MASK)) + /* See if the page fault is due to an NX violation */ + if (unlikely(((error_code & (PFERR_FETCH_MASK | PFERR_PRESENT_MASK)) + == (PFERR_FETCH_MASK | PFERR_PRESENT_MASK)))) return false; - return true; + /* + * #PF can be fast if: + * 1. The shadow page table entry is not present, which could mean that + * the fault is potentially caused by access tracking (if enabled). + * 2. The shadow page table entry is present and the fault + * is caused by write-protect, that means we just need change the W + * bit of the spte which can be done out of mmu-lock. + * + * However, if Access Tracking is disabled, then the first condition + * above cannot be handled by the fast path. So if access tracking is + * disabled, we return true only if the second condition is met. + */ + + return shadow_acc_track_mask != 0 || + ((error_code & (PFERR_WRITE_MASK | PFERR_PRESENT_MASK)) + == (PFERR_WRITE_MASK | PFERR_PRESENT_MASK)); } /* @@ -2895,17 +3032,24 @@ static bool page_fault_can_be_fast(u32 error_code) */ static bool fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, - u64 *sptep, u64 spte) + u64 *sptep, u64 old_spte, + bool remove_write_prot, bool remove_acc_track) { gfn_t gfn; + u64 new_spte = old_spte; WARN_ON(!sp->role.direct); - /* - * The gfn of direct spte is stable since it is calculated - * by sp->gfn. - */ - gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt); + if (remove_acc_track) { + u64 saved_bits = old_spte & (shadow_acc_track_saved_bits_mask << + shadow_acc_track_saved_bits_shift); + + new_spte &= ~shadow_acc_track_mask; + new_spte |= saved_bits >> shadow_acc_track_saved_bits_shift; + } + + if (remove_write_prot) + new_spte |= PT_WRITABLE_MASK; /* * Theoretically we could also set dirty bit (and flush TLB) here in @@ -2919,10 +3063,17 @@ fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, * * Compare with set_spte where instead shadow_dirty_mask is set. */ - if (cmpxchg64(sptep, spte, spte | PT_WRITABLE_MASK) != spte) + if (cmpxchg64(sptep, old_spte, new_spte) != old_spte) return false; - kvm_vcpu_mark_page_dirty(vcpu, gfn); + if (remove_write_prot) { + /* + * The gfn of direct spte is stable since it is + * calculated by sp->gfn. + */ + gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt); + kvm_vcpu_mark_page_dirty(vcpu, gfn); + } return true; } @@ -2937,7 +3088,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, { struct kvm_shadow_walk_iterator iterator; struct kvm_mmu_page *sp; - bool ret = false; + bool fault_handled = false; u64 spte = 0ull; uint retry_count = 0; @@ -2953,36 +3104,43 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, break; do { - /* - * If the mapping has been changed, let the vcpu fault on the - * same address again. - */ - if (!is_shadow_present_pte(spte)) { - ret = true; - break; - } + bool remove_write_prot = (error_code & PFERR_WRITE_MASK) && + !(spte & PT_WRITABLE_MASK); + bool remove_acc_track; + bool valid_exec_access = (error_code & PFERR_FETCH_MASK) && + (spte & shadow_x_mask); sp = page_header(__pa(iterator.sptep)); if (!is_last_spte(spte, sp->role.level)) break; /* - * Check if it is a spurious fault caused by TLB lazily flushed. + * Check whether the memory access that caused the fault would + * still cause it if it were to be performed right now. If not, + * then this is a spurious fault caused by TLB lazily flushed, + * or some other CPU has already fixed the PTE after the + * current CPU took the fault. + * + * If Write-Only mappings ever become supported, then the + * condition below would need to be changed appropriately. * * Need not check the access of upper level table entries since * they are always ACC_ALL. */ - if (is_writable_pte(spte)) { - ret = true; + if (((spte & PT_PRESENT_MASK) && !remove_write_prot) || + valid_exec_access) { + fault_handled = true; break; } + remove_acc_track = is_access_track_spte(spte); + /* - * Currently, to simplify the code, only the spte - * write-protected by dirty-log can be fast fixed. + * Currently, to simplify the code, write-protection can be + * removed in the fast path only if the SPTE was write-protected + * for dirty-logging. */ - if (!spte_can_locklessly_be_made_writable(spte)) - break; + remove_write_prot &= spte_can_locklessly_be_made_writable(spte); /* * Do not fix write-permission on the large spte since we only @@ -2998,13 +3156,20 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, if (sp->role.level > PT_PAGE_TABLE_LEVEL) break; + /* Verify that the fault can be handled in the fast path */ + if (!remove_acc_track && !remove_write_prot) + break; + /* * Currently, fast page fault only works for direct mapping * since the gfn is not stable for indirect shadow page. See * Documentation/virtual/kvm/locking.txt to get more detail. */ - ret = fast_pf_fix_direct_spte(vcpu, sp, iterator.sptep, spte); - if (ret) + fault_handled = fast_pf_fix_direct_spte(vcpu, sp, + iterator.sptep, spte, + remove_write_prot, + remove_acc_track); + if (fault_handled) break; if (++retry_count > 4) { @@ -3018,10 +3183,10 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, } while (true); trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep, - spte, ret); + spte, fault_handled); walk_shadow_page_lockless_end(vcpu); - return ret; + return fault_handled; } static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, @@ -4300,6 +4465,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, vcpu->arch.mmu.update_pte(vcpu, sp, spte, new); } +/* This is only supposed to be used for non-EPT mappings */ static bool need_remote_flush(u64 old, u64 new) { if (!is_shadow_present_pte(old)) @@ -5067,6 +5233,8 @@ static void mmu_destroy_caches(void) int kvm_mmu_module_init(void) { + kvm_mmu_clear_all_pte_masks(); + pte_list_desc_cache = kmem_cache_create("pte_list_desc", sizeof(struct pte_list_desc), 0, 0, NULL); diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index ddc56e9..dfd3056 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -52,6 +52,8 @@ static inline u64 rsvd_bits(int s, int e) } void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); +void kvm_mmu_set_access_track_masks(u64 acc_track_mask, u64 acc_track_value, + u64 saved_bits_mask, u64 saved_bits_shift); void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 859da8e..9cbfc56 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5019,7 +5019,22 @@ static void ept_set_mmio_spte_mask(void) * Also, magic bits (0x3ull << 62) is set to quickly identify mmio * spte. */ - kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull); + kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE | + VMX_EPT_TRACK_MMIO); +} + +static void ept_set_acc_track_spte_mask(void) +{ + /* + * For access track PTEs we use a non-present PTE to trigger an EPT + * Violation. The original RWX value is saved in some unused bits in + * the PTE and restored when the violation is fixed. + */ + kvm_mmu_set_access_track_masks(VMX_EPT_RWX_MASK | + VMX_EPT_TRACK_TYPE_MASK, + VMX_EPT_TRACK_ACCESS, + VMX_EPT_RWX_MASK, + VMX_EPT_RWX_SAVE_SHIFT); } #define VMX_XSS_EXIT_BITMAP 0 @@ -6549,6 +6564,9 @@ static __init int hardware_setup(void) 0ull : VMX_EPT_READABLE_MASK); ept_set_mmio_spte_mask(); kvm_enable_tdp(); + + if (!enable_ept_ad_bits) + ept_set_acc_track_spte_mask(); } else kvm_disable_tdp(); -- 2.8.0.rc3.226.g39d4020