All of lore.kernel.org
 help / color / mirror / Atom feed
From: Avi Kivity <avi@redhat.com>
To: Marcelo Tosatti <mtosatti@redhat.com>,
	Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>,
	Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>,
	kvm@vger.kernel.org
Subject: [PATCH v2 1/5] KVM: Add APIs for unlocked TLB flush
Date: Thu, 17 May 2012 13:24:40 +0300	[thread overview]
Message-ID: <1337250284-18607-1-git-send-email-avi@redhat.com> (raw)

Currently we flush the TLB while holding mmu_lock.  This
increases the lock hold time by the IPI round-trip time, increasing
contention, and makes dropping the lock (for latency reasons) harder.

This patch changes TLB management to be usable locklessly, introducing
the following APIs:

  kvm_mark_tlb_dirty() - mark the TLB as containing stale entries
  kvm_cond_flush_remote_tlbs() - flush the TLB if it was marked as
                                  dirty

These APIs can be used without holding mmu_lock (though if the TLB
became stale due to shadow page table modifications, typically it
will need to be called with the lock held to prevent other threads
from seeing the modified page tables with the TLB unmarked and unflushed)/

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 Documentation/virtual/kvm/locking.txt |   14 ++++++++++++++
 arch/x86/kvm/paging_tmpl.h            |    4 ++--
 include/linux/kvm_host.h              |   22 +++++++++++++++++++++-
 virt/kvm/kvm_main.c                   |   29 ++++++++++++++++++++---------
 4 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt
index 3b4cd3b..f6c90479 100644
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
@@ -23,3 +23,17 @@ Arch:		x86
 Protects:	- kvm_arch::{last_tsc_write,last_tsc_nsec,last_tsc_offset}
 		- tsc offset in vmcb
 Comment:	'raw' because updating the tsc offsets must not be preempted.
+
+3. TLB control
+--------------
+
+The following APIs should be used for TLB control:
+
+ - kvm_mark_tlb_dirty() - indicates that the TLB is out of sync wrt
+      either guest or host page tables.
+ - kvm_flush_remote_tlbs() - unconditionally flush the tlbs
+ - kvm_cond_flush_remote_tlbs() - flush the TLBs if previously marked
+
+These may be used without mmu_lock, though kvm_mark_tlb_dirty() needs to be
+used while holding mmu_lock if it is called due to host page table changes
+(contrast to guest page table changes).
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 34f9709..97e2a81 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -793,7 +793,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 			return -EINVAL;
 
 		if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
-			vcpu->kvm->tlbs_dirty++;
+			kvm_mark_tlb_dirty(vcpu->kvm);
 			continue;
 		}
 
@@ -806,7 +806,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
 		if (gfn != sp->gfns[i]) {
 			drop_spte(vcpu->kvm, &sp->spt[i]);
-			vcpu->kvm->tlbs_dirty++;
+			kvm_mark_tlb_dirty(vcpu->kvm);
 			continue;
 		}
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index cae342d..7ed1c4f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -310,7 +310,14 @@ struct kvm {
 	unsigned long mmu_notifier_seq;
 	long mmu_notifier_count;
 #endif
-	long tlbs_dirty;
+	struct {
+		/*
+		 * When these two are different, a TLB somewhere holds a
+		 * stale TLB entry.  Clean with kvm_[cond_]flush_remote_tlbs().
+		 */
+		atomic_long_t dirtied_count;
+		atomic_long_t flushed_count;
+	} tlb_state;
 };
 
 /* The guest did something we don't support. */
@@ -468,6 +475,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
 
 void kvm_flush_remote_tlbs(struct kvm *kvm);
+void kvm_cond_flush_remote_tlbs(struct kvm *kvm);
 void kvm_reload_remote_mmus(struct kvm *kvm);
 
 long kvm_arch_dev_ioctl(struct file *filp,
@@ -889,5 +897,17 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
 	}
 }
 
+/*
+ * Mark the TLB as dirty, for kvm_cond_flush_remote_tlbs().
+ */
+static inline void kvm_mark_tlb_dirty(struct kvm *kvm)
+{
+	/*
+	 * Make any changes to the page tables visible to remote flushers.
+	 */
+	smp_mb__before_atomic_inc();
+	atomic_long_inc(&kvm->tlb_state.dirtied_count);
+}
+
 #endif
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7e14068..4113a36 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -203,12 +203,21 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
 
 void kvm_flush_remote_tlbs(struct kvm *kvm)
 {
-	long dirty_count = kvm->tlbs_dirty;
+	long dirty_count = atomic_long_read(&kvm->tlb_state.dirtied_count);
+	long flushed_count = atomic_long_read(&kvm->tlb_state.flushed_count);
 
 	smp_mb();
 	if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
 		++kvm->stat.remote_tlb_flush;
-	cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
+	atomic_long_cmpxchg(&kvm->tlb_state.flushed_count,
+			    flushed_count, dirty_count);
+}
+
+void kvm_cond_flush_remote_tlbs(struct kvm *kvm)
+{
+	if (atomic_long_read(&kvm->tlb_state.dirtied_count)
+	    != atomic_long_read(&kvm->tlb_state.flushed_count))
+		kvm_flush_remote_tlbs(kvm);
 }
 
 void kvm_reload_remote_mmus(struct kvm *kvm)
@@ -267,7 +276,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
 					     unsigned long address)
 {
 	struct kvm *kvm = mmu_notifier_to_kvm(mn);
-	int need_tlb_flush, idx;
+	int idx;
 
 	/*
 	 * When ->invalidate_page runs, the linux pte has been zapped
@@ -291,10 +300,10 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
 	spin_lock(&kvm->mmu_lock);
 
 	kvm->mmu_notifier_seq++;
-	need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty;
+	if (kvm_unmap_hva(kvm, address))
+		kvm_mark_tlb_dirty(kvm);
 	/* we've to flush the tlb before the pages can be freed */
-	if (need_tlb_flush)
-		kvm_flush_remote_tlbs(kvm);
+	kvm_cond_flush_remote_tlbs(kvm);
 
 	spin_unlock(&kvm->mmu_lock);
 	srcu_read_unlock(&kvm->srcu, idx);
@@ -334,10 +343,12 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
 	kvm->mmu_notifier_count++;
 	for (; start < end; start += PAGE_SIZE)
 		need_tlb_flush |= kvm_unmap_hva(kvm, start);
-	need_tlb_flush |= kvm->tlbs_dirty;
-	/* we've to flush the tlb before the pages can be freed */
+
 	if (need_tlb_flush)
-		kvm_flush_remote_tlbs(kvm);
+		kvm_mark_tlb_dirty(kvm);
+
+	/* we've to flush the tlb before the pages can be freed */
+	kvm_cond_flush_remote_tlbs(kvm);
 
 	spin_unlock(&kvm->mmu_lock);
 	srcu_read_unlock(&kvm->srcu, idx);
-- 
1.7.10.1


             reply	other threads:[~2012-05-17 10:25 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-05-17 10:24 Avi Kivity [this message]
2012-05-17 10:24 ` [PATCH v2 2/5] KVM: MMU: Convert remote flushes to kvm_mark_tlb_dirty() and a conditional flush Avi Kivity
2012-05-21 20:13   ` Marcelo Tosatti
2012-05-22 14:46   ` Takuya Yoshikawa
2012-05-17 10:24 ` [PATCH v2 3/5] KVM: Flush TLB in mmu notifier without holding mmu_lock Avi Kivity
2012-05-21 20:58   ` Marcelo Tosatti
2012-05-21 21:07     ` Marcelo Tosatti
2012-07-02 12:05     ` Avi Kivity
2012-07-02 12:41       ` Avi Kivity
2012-07-02 14:09         ` Takuya Yoshikawa
2012-07-02 14:18           ` Avi Kivity
2012-05-17 10:24 ` [PATCH v2 4/5] KVM: Flush TLB in FNAME(invlpg) " Avi Kivity
2012-05-17 10:24 ` [PATCH v2 5/5] KVM: Flush TLB in change_pte mmu notifier " Avi Kivity
2012-05-17 11:51 ` [PATCH v2 1/5] KVM: Add APIs for unlocked TLB flush Avi Kivity

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1337250284-18607-1-git-send-email-avi@redhat.com \
    --to=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=mtosatti@redhat.com \
    --cc=xiaoguangrong@linux.vnet.ibm.com \
    --cc=yoshikawa.takuya@oss.ntt.co.jp \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.