From 90e4b3dcb33a52da267f1099289ec7b71ad6a975 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 20 Aug 2019 12:00:13 -0700 Subject: [PATCH] KVM: x86/MMU: Rewrite zapping pages in memslot to fix major flaws TODO: write a proper changelog if this actually works Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu.c | 58 +++++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 24843cf49579..c91c3472821b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5653,37 +5653,47 @@ static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, struct kvm_page_track_notifier_node *node) { - struct kvm_mmu_page *sp; + struct kvm_mmu_page *sp, *tmp; LIST_HEAD(invalid_list); - unsigned long i; - bool flush; - gfn_t gfn; + int max_level, ign; + gfn_t gfn_mask; + + /* + * Zapping upper level shadow pages isn't required, worst case scenario + * we'll have unused shadow pages with no children that aren't zapped + * until they're recycled due to age or when the VM is destroyed. Skip + * shadow pages that can't point directly at the memslot. + */ + max_level = kvm_largepages_enabled() ? kvm_x86_ops->get_lpage_level() : + PT_PAGE_TABLE_LEVEL; + while (slot->npages < KVM_PAGES_PER_HPAGE(max_level)) + --max_level; spin_lock(&kvm->mmu_lock); +restart: + list_for_each_entry_safe(sp, tmp, &kvm->arch.active_mmu_pages, link) { + if (sp->role.level > max_level) + continue; + if (sp->role.invalid && sp->root_count) + continue; - if (list_empty(&kvm->arch.active_mmu_pages)) - goto out_unlock; + /* + * Note, the mask is calculated using level+1. We're looking + * for shadow pages with sptes that point at the to-be-removed + * memslot, not just for shadow pages residing in the memslot. + */ + gfn_mask = ~(KVM_PAGES_PER_HPAGE(sp->role.level + 1) - 1); + if (sp->gfn < (slot->base_gfn & gfn_mask) || + sp->gfn > ((slot->base_gfn + slot->npages - 1) & gfn_mask)) + continue; - flush = slot_handle_all_level(kvm, slot, kvm_zap_rmapp, false); + if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign)) + goto restart; - for (i = 0; i < slot->npages; i++) { - gfn = slot->base_gfn + i; - - for_each_valid_sp(kvm, sp, gfn) { - if (sp->gfn != gfn) - continue; - - kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); - } - if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { - kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); - flush = false; - cond_resched_lock(&kvm->mmu_lock); - } + if (cond_resched_lock(&kvm->mmu_lock)) + goto restart; } - kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); - -out_unlock: + kvm_mmu_commit_zap_page(kvm, &invalid_list); spin_unlock(&kvm->mmu_lock); } -- 2.22.0