From: Ben Gardon <bgardon@google.com>
To: kvm@vger.kernel.org
Cc: Paolo Bonzini <pbonzini@redhat.com>,
Peter Feiner <pfeiner@google.com>,
Peter Shier <pshier@google.com>,
Junaid Shahid <junaids@google.com>,
Jim Mattson <jmattson@google.com>,
Ben Gardon <bgardon@google.com>
Subject: [RFC PATCH 10/28] kvm: mmu: Flush TLBs before freeing direct MMU page table memory
Date: Thu, 26 Sep 2019 16:18:06 -0700 [thread overview]
Message-ID: <20190926231824.149014-11-bgardon@google.com> (raw)
In-Reply-To: <20190926231824.149014-1-bgardon@google.com>
If page table memory is freed before a TLB flush, it can result in
improper guest access to memory through paging structure caches.
Specifically, until a TLB flush, memory that was part of the paging
structure could be used by the hardware for address translation if a
partial walk leading to it is stored in the paging structure cache. Ensure
that there is a TLB flush before page table memory is freed by
transferring disconnected pages to a disconnected list, and on a flush
transferring a snapshot of the disconnected list to a free list. The free
list is processed asynchronously to avoid slowing TLB flushes.
Signed-off-by: Ben Gardon <bgardon@google.com>
---
arch/x86/include/asm/kvm_host.h | 5 ++
arch/x86/kvm/Kconfig | 1 +
arch/x86/kvm/mmu.c | 127 ++++++++++++++++++++++++++++++--
include/linux/kvm_host.h | 1 +
virt/kvm/kvm_main.c | 9 ++-
5 files changed, 136 insertions(+), 7 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1f8164c577d50..9bf149dce146d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -974,6 +974,11 @@ struct kvm_arch {
*/
bool pure_direct_mmu;
hpa_t direct_root_hpa[KVM_ADDRESS_SPACE_NUM];
+ spinlock_t direct_mmu_disconnected_pts_lock;
+ struct list_head direct_mmu_disconnected_pts;
+ spinlock_t direct_mmu_pt_free_list_lock;
+ struct list_head direct_mmu_pt_free_list;
+ struct work_struct direct_mmu_free_work;
};
struct kvm_vm_stat {
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 840e12583b85b..7c615f3cebf8f 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -45,6 +45,7 @@ config KVM
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_VFIO
select SRCU
+ select HAVE_KVM_ARCH_TLB_FLUSH_ALL
---help---
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9fe57ef7baa29..317e9238f17b2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1700,6 +1700,100 @@ static void free_pt_rcu_callback(struct rcu_head *rp)
free_page((unsigned long)disconnected_pt);
}
+/*
+ * Takes a snapshot of, and clears, the direct MMU disconnected pt list. Once
+ * TLBs have been flushed, this snapshot can be transferred to the direct MMU
+ * PT free list to be freed.
+ */
+static void direct_mmu_cut_disconnected_pt_list(struct kvm *kvm,
+ struct list_head *snapshot)
+{
+ spin_lock(&kvm->arch.direct_mmu_disconnected_pts_lock);
+ list_splice_tail_init(&kvm->arch.direct_mmu_disconnected_pts, snapshot);
+ spin_unlock(&kvm->arch.direct_mmu_disconnected_pts_lock);
+}
+
+/*
+ * Takes a snapshot of, and clears, the direct MMU PT free list and then sets
+ * each page in the snapshot to be freed after an RCU grace period.
+ */
+static void direct_mmu_process_pt_free_list(struct kvm *kvm)
+{
+ LIST_HEAD(free_list);
+ struct page *page;
+ struct page *next;
+
+ spin_lock(&kvm->arch.direct_mmu_pt_free_list_lock);
+ list_splice_tail_init(&kvm->arch.direct_mmu_pt_free_list, &free_list);
+ spin_unlock(&kvm->arch.direct_mmu_pt_free_list_lock);
+
+ list_for_each_entry_safe(page, next, &free_list, lru) {
+ list_del(&page->lru);
+ /*
+ * Free the pt page in an RCU callback, once it's safe to do
+ * so.
+ */
+ call_rcu(&page->rcu_head, free_pt_rcu_callback);
+ }
+}
+
+static void direct_mmu_free_work_fn(struct work_struct *work)
+{
+ struct kvm *kvm = container_of(work, struct kvm,
+ arch.direct_mmu_free_work);
+
+ direct_mmu_process_pt_free_list(kvm);
+}
+
+/*
+ * Propagate a snapshot of the direct MMU disonnected pt list to the direct MMU
+ * PT free list, after TLBs have been flushed. Schedule work to free the pages
+ * in the direct MMU PT free list.
+ */
+static void direct_mmu_process_free_list_async(struct kvm *kvm,
+ struct list_head *snapshot)
+{
+ spin_lock(&kvm->arch.direct_mmu_pt_free_list_lock);
+ list_splice_tail_init(snapshot, &kvm->arch.direct_mmu_pt_free_list);
+ spin_unlock(&kvm->arch.direct_mmu_pt_free_list_lock);
+
+ schedule_work(&kvm->arch.direct_mmu_free_work);
+}
+
+/*
+ * To be used during teardown once all VCPUs are paused. Ensures that the
+ * direct MMU disconnected PT and PT free lists are emptied and outstanding
+ * page table memory freed.
+ */
+static void direct_mmu_process_pt_free_list_sync(struct kvm *kvm)
+{
+ LIST_HEAD(snapshot);
+
+ cancel_work_sync(&kvm->arch.direct_mmu_free_work);
+ direct_mmu_cut_disconnected_pt_list(kvm, &snapshot);
+
+ spin_lock(&kvm->arch.direct_mmu_pt_free_list_lock);
+ list_splice_tail_init(&snapshot, &kvm->arch.direct_mmu_pt_free_list);
+ spin_unlock(&kvm->arch.direct_mmu_pt_free_list_lock);
+
+ direct_mmu_process_pt_free_list(kvm);
+}
+
+/*
+ * Add a page of memory that has been disconnected from the paging structure to
+ * a queue to be freed. This is a two step process: after a page has been
+ * disconnected, the TLBs must be flushed, and an RCU grace period must elapse
+ * before the memory can be freed.
+ */
+static void direct_mmu_disconnected_pt_list_add(struct kvm *kvm,
+ struct page *page)
+{
+ spin_lock(&kvm->arch.direct_mmu_disconnected_pts_lock);
+ list_add_tail(&page->lru, &kvm->arch.direct_mmu_disconnected_pts);
+ spin_unlock(&kvm->arch.direct_mmu_disconnected_pts_lock);
+}
+
+
static void handle_changed_pte(struct kvm *kvm, int as_id, gfn_t gfn,
u64 old_pte, u64 new_pte, int level);
@@ -1760,12 +1854,8 @@ static void handle_disconnected_pt(struct kvm *kvm, int as_id,
gfn += KVM_PAGES_PER_HPAGE(level);
}
- /*
- * Free the pt page in an RCU callback, once it's safe to do
- * so.
- */
page = pfn_to_page(pfn);
- call_rcu(&page->rcu_head, free_pt_rcu_callback);
+ direct_mmu_disconnected_pt_list_add(kvm, page);
}
/**
@@ -5813,6 +5903,12 @@ static int kvm_mmu_init_direct_mmu(struct kvm *kvm)
kvm->arch.direct_mmu_enabled = true;
kvm->arch.pure_direct_mmu = true;
+ spin_lock_init(&kvm->arch.direct_mmu_disconnected_pts_lock);
+ INIT_LIST_HEAD(&kvm->arch.direct_mmu_disconnected_pts);
+ spin_lock_init(&kvm->arch.direct_mmu_pt_free_list_lock);
+ INIT_LIST_HEAD(&kvm->arch.direct_mmu_pt_free_list);
+ INIT_WORK(&kvm->arch.direct_mmu_free_work, direct_mmu_free_work_fn);
+
return 0;
err:
for (i = 0; i < ARRAY_SIZE(kvm->arch.direct_root_hpa); i++) {
@@ -5831,6 +5927,8 @@ static void kvm_mmu_uninit_direct_mmu(struct kvm *kvm)
if (!kvm->arch.direct_mmu_enabled)
return;
+ direct_mmu_process_pt_free_list_sync(kvm);
+
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
handle_disconnected_pt(kvm, i, 0,
(kvm_pfn_t)(kvm->arch.direct_root_hpa[i] >> PAGE_SHIFT),
@@ -6516,3 +6614,22 @@ void kvm_mmu_module_exit(void)
unregister_shrinker(&mmu_shrinker);
mmu_audit_disable();
}
+
+void kvm_flush_remote_tlbs(struct kvm *kvm)
+{
+ LIST_HEAD(disconnected_snapshot);
+
+ if (kvm->arch.direct_mmu_enabled)
+ direct_mmu_cut_disconnected_pt_list(kvm,
+ &disconnected_snapshot);
+
+ /*
+ * Synchronously flush the TLBs before processing the direct MMU free
+ * list.
+ */
+ __kvm_flush_remote_tlbs(kvm);
+
+ if (kvm->arch.direct_mmu_enabled)
+ direct_mmu_process_free_list_async(kvm, &disconnected_snapshot);
+}
+EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index baed80f8a7f00..350a3b79cc8d1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -786,6 +786,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
int kvm_vcpu_yield_to(struct kvm_vcpu *target);
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible);
+void __kvm_flush_remote_tlbs(struct kvm *kvm);
void kvm_flush_remote_tlbs(struct kvm *kvm);
void kvm_reload_remote_mmus(struct kvm *kvm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9ce067b6882b7..c8559a86625ce 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -255,8 +255,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
return called;
}
-#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
-void kvm_flush_remote_tlbs(struct kvm *kvm)
+void __kvm_flush_remote_tlbs(struct kvm *kvm)
{
/*
* Read tlbs_dirty before setting KVM_REQ_TLB_FLUSH in
@@ -280,6 +279,12 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
++kvm->stat.remote_tlb_flush;
cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
}
+
+#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
+void kvm_flush_remote_tlbs(struct kvm *kvm)
+{
+ __kvm_flush_remote_tlbs(kvm);
+}
EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
#endif
--
2.23.0.444.g18eeb5a265-goog
next prev parent reply other threads:[~2019-09-26 23:18 UTC|newest]
Thread overview: 57+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-09-26 23:17 [RFC PATCH 00/28] kvm: mmu: Rework the x86 TDP direct mapped case Ben Gardon
2019-09-26 23:17 ` [RFC PATCH 01/28] kvm: mmu: Separate generating and setting mmio ptes Ben Gardon
2019-11-27 18:15 ` Sean Christopherson
2019-09-26 23:17 ` [RFC PATCH 02/28] kvm: mmu: Separate pte generation from set_spte Ben Gardon
2019-11-27 18:25 ` Sean Christopherson
2019-09-26 23:17 ` [RFC PATCH 03/28] kvm: mmu: Zero page cache memory at allocation time Ben Gardon
2019-11-27 18:32 ` Sean Christopherson
2019-09-26 23:18 ` [RFC PATCH 04/28] kvm: mmu: Update the lpages stat atomically Ben Gardon
2019-11-27 18:39 ` Sean Christopherson
2019-12-06 20:10 ` Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 05/28] sched: Add cond_resched_rwlock Ben Gardon
2019-11-27 18:42 ` Sean Christopherson
2019-12-06 20:12 ` Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 06/28] kvm: mmu: Replace mmu_lock with a read/write lock Ben Gardon
2019-11-27 18:47 ` Sean Christopherson
2019-12-02 22:45 ` Sean Christopherson
2019-09-26 23:18 ` [RFC PATCH 07/28] kvm: mmu: Add functions for handling changed PTEs Ben Gardon
2019-11-27 19:04 ` Sean Christopherson
2019-09-26 23:18 ` [RFC PATCH 08/28] kvm: mmu: Init / Uninit the direct MMU Ben Gardon
2019-12-02 23:40 ` Sean Christopherson
2019-12-06 20:25 ` Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 09/28] kvm: mmu: Free direct MMU page table memory in an RCU callback Ben Gardon
2019-09-26 23:18 ` Ben Gardon [this message]
2019-12-02 23:46 ` [RFC PATCH 10/28] kvm: mmu: Flush TLBs before freeing direct MMU page table memory Sean Christopherson
2019-12-06 20:31 ` Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 11/28] kvm: mmu: Optimize for freeing direct MMU PTs on teardown Ben Gardon
2019-12-02 23:54 ` Sean Christopherson
2019-09-26 23:18 ` [RFC PATCH 12/28] kvm: mmu: Set tlbs_dirty atomically Ben Gardon
2019-12-03 0:13 ` Sean Christopherson
2019-09-26 23:18 ` [RFC PATCH 13/28] kvm: mmu: Add an iterator for concurrent paging structure walks Ben Gardon
2019-12-03 2:15 ` Sean Christopherson
2019-12-18 18:25 ` Ben Gardon
2019-12-18 19:14 ` Sean Christopherson
2019-09-26 23:18 ` [RFC PATCH 14/28] kvm: mmu: Batch updates to the direct mmu disconnected list Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 15/28] kvm: mmu: Support invalidate_zap_all_pages Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 16/28] kvm: mmu: Add direct MMU page fault handler Ben Gardon
2020-01-08 17:20 ` Peter Xu
2020-01-08 18:15 ` Ben Gardon
2020-01-08 19:00 ` Peter Xu
2019-09-26 23:18 ` [RFC PATCH 17/28] kvm: mmu: Add direct MMU fast " Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 18/28] kvm: mmu: Add an hva range iterator for memslot GFNs Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 19/28] kvm: mmu: Make address space ID a property of memslots Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 20/28] kvm: mmu: Implement the invalidation MMU notifiers for the direct MMU Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 21/28] kvm: mmu: Integrate the direct mmu with the changed pte notifier Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 22/28] kvm: mmu: Implement access tracking for the direct MMU Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 23/28] kvm: mmu: Make mark_page_dirty_in_slot usable from outside kvm_main Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 24/28] kvm: mmu: Support dirty logging in the direct MMU Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 25/28] kvm: mmu: Support kvm_zap_gfn_range " Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 26/28] kvm: mmu: Integrate direct MMU with nesting Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 27/28] kvm: mmu: Lazily allocate rmap when direct MMU is enabled Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 28/28] kvm: mmu: Support MMIO in the direct MMU Ben Gardon
2019-10-17 18:50 ` [RFC PATCH 00/28] kvm: mmu: Rework the x86 TDP direct mapped case Sean Christopherson
2019-10-18 13:42 ` Paolo Bonzini
2019-11-27 19:09 ` Sean Christopherson
2019-12-06 19:55 ` Ben Gardon
2019-12-06 19:57 ` Sean Christopherson
2019-12-06 20:42 ` Ben Gardon
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190926231824.149014-11-bgardon@google.com \
--to=bgardon@google.com \
--cc=jmattson@google.com \
--cc=junaids@google.com \
--cc=kvm@vger.kernel.org \
--cc=pbonzini@redhat.com \
--cc=pfeiner@google.com \
--cc=pshier@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).