From: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
To: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
virtualization@lists.linux-foundation.org,
linux-s390@vger.kernel.org,
xen-devel-request@lists.xenproject.org, kvm@vger.kernel.org
Cc: benh@kernel.crashing.org, paulus@samba.org, mpe@ellerman.id.au,
mingo@redhat.com, peterz@infradead.org,
paulmck@linux.vnet.ibm.com, will.deacon@arm.com,
kernellwp@gmail.com, jgross@suse.com, pbonzini@redhat.com,
bsingharora@gmail.com, boqun.feng@gmail.com,
borntraeger@de.ibm.com,
Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
Subject: [PATCH v4 5/5] x86, kvm: support vcpu preempted check
Date: Wed, 19 Oct 2016 06:20:16 -0400 [thread overview]
Message-ID: <1476872416-42752-6-git-send-email-xinhui.pan@linux.vnet.ibm.com> (raw)
In-Reply-To: <1476872416-42752-1-git-send-email-xinhui.pan@linux.vnet.ibm.com>
This is to fix some lock holder preemption issues. Some other locks
implementation do a spin loop before acquiring the lock itself.
Currently kernel has an interface of bool vcpu_is_preempted(int cpu). It
takes the cpu as parameter and return true if the cpu is preempted. Then
kernel can break the spin loops upon on the retval of vcpu_is_preempted.
As kernel has used this interface, So lets support it.
We use one field of struct kvm_steal_time to indicate that if one vcpu
is running or not.
unix benchmark result:
host: kernel 4.8.1, i5-4570, 4 cpus
guest: kernel 4.8.1, 8 vcpus
test-case after-patch before-patch
Execl Throughput | 18307.9 lps | 11701.6 lps
File Copy 1024 bufsize 2000 maxblocks | 1352407.3 KBps | 790418.9 KBps
File Copy 256 bufsize 500 maxblocks | 367555.6 KBps | 222867.7 KBps
File Copy 4096 bufsize 8000 maxblocks | 3675649.7 KBps | 1780614.4 KBps
Pipe Throughput | 11872208.7 lps | 11855628.9 lps
Pipe-based Context Switching | 1495126.5 lps | 1490533.9 lps
Process Creation | 29881.2 lps | 28572.8 lps
Shell Scripts (1 concurrent) | 23224.3 lpm | 22607.4 lpm
Shell Scripts (8 concurrent) | 3531.4 lpm | 3211.9 lpm
System Call Overhead | 10385653.0 lps | 10419979.0 lps
Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
---
arch/x86/include/asm/paravirt_types.h | 6 ++++++
arch/x86/include/asm/spinlock.h | 8 ++++++++
arch/x86/include/uapi/asm/kvm_para.h | 3 ++-
arch/x86/kernel/kvm.c | 11 +++++++++++
arch/x86/kernel/paravirt.c | 11 +++++++++++
arch/x86/kvm/x86.c | 12 ++++++++++++
6 files changed, 50 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 0f400c0..b1c7937 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -98,6 +98,10 @@ struct pv_time_ops {
unsigned long long (*steal_clock)(int cpu);
};
+struct pv_vcpu_ops {
+ bool (*vcpu_is_preempted)(int cpu);
+};
+
struct pv_cpu_ops {
/* hooks for various privileged instructions */
unsigned long (*get_debugreg)(int regno);
@@ -318,6 +322,7 @@ struct pv_lock_ops {
struct paravirt_patch_template {
struct pv_init_ops pv_init_ops;
struct pv_time_ops pv_time_ops;
+ struct pv_vcpu_ops pv_vcpu_ops;
struct pv_cpu_ops pv_cpu_ops;
struct pv_irq_ops pv_irq_ops;
struct pv_mmu_ops pv_mmu_ops;
@@ -327,6 +332,7 @@ struct paravirt_patch_template {
extern struct pv_info pv_info;
extern struct pv_init_ops pv_init_ops;
extern struct pv_time_ops pv_time_ops;
+extern struct pv_vcpu_ops pv_vcpu_ops;
extern struct pv_cpu_ops pv_cpu_ops;
extern struct pv_irq_ops pv_irq_ops;
extern struct pv_mmu_ops pv_mmu_ops;
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 921bea7..52fd942 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -26,6 +26,14 @@
extern struct static_key paravirt_ticketlocks_enabled;
static __always_inline bool static_key_false(struct static_key *key);
+#ifdef CONFIG_PARAVIRT
+#define vcpu_is_preempted vcpu_is_preempted
+static inline bool vcpu_is_preempted(int cpu)
+{
+ return pv_vcpu_ops.vcpu_is_preempted(cpu);
+}
+#endif
+
#include <asm/qspinlock.h>
/*
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 94dc8ca..e9c12a1 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -45,7 +45,8 @@ struct kvm_steal_time {
__u64 steal;
__u32 version;
__u32 flags;
- __u32 pad[12];
+ __u32 preempted;
+ __u32 pad[11];
};
#define KVM_STEAL_ALIGNMENT_BITS 5
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index edbbfc8..0011bef 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -415,6 +415,15 @@ void kvm_disable_steal_time(void)
wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
}
+static bool kvm_vcpu_is_preempted(int cpu)
+{
+ struct kvm_steal_time *src;
+
+ src = &per_cpu(steal_time, cpu);
+
+ return !!src->preempted;
+}
+
#ifdef CONFIG_SMP
static void __init kvm_smp_prepare_boot_cpu(void)
{
@@ -488,6 +497,8 @@ void __init kvm_guest_init(void)
kvm_guest_cpu_init();
#endif
+ pv_vcpu_ops.vcpu_is_preempted = kvm_vcpu_is_preempted;
+
/*
* Hard lockup detection is enabled by default. Disable it, as guests
* can get false positives too easily, for example if the host is
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index bbf3d59..7adb7e9 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -122,6 +122,7 @@ static void *get_call_destination(u8 type)
struct paravirt_patch_template tmpl = {
.pv_init_ops = pv_init_ops,
.pv_time_ops = pv_time_ops,
+ .pv_vcpu_ops = pv_vcpu_ops,
.pv_cpu_ops = pv_cpu_ops,
.pv_irq_ops = pv_irq_ops,
.pv_mmu_ops = pv_mmu_ops,
@@ -203,6 +204,11 @@ static u64 native_steal_clock(int cpu)
return 0;
}
+static bool native_vcpu_is_preempted(int cpu)
+{
+ return 0;
+}
+
/* These are in entry.S */
extern void native_iret(void);
extern void native_usergs_sysret64(void);
@@ -312,6 +318,10 @@ struct pv_time_ops pv_time_ops = {
.steal_clock = native_steal_clock,
};
+struct pv_vcpu_ops pv_vcpu_ops = {
+ .vcpu_is_preempted = native_vcpu_is_preempted,
+};
+
__visible struct pv_irq_ops pv_irq_ops = {
.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
.restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
@@ -458,6 +468,7 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
};
EXPORT_SYMBOL_GPL(pv_time_ops);
+EXPORT_SYMBOL (pv_vcpu_ops);
EXPORT_SYMBOL (pv_cpu_ops);
EXPORT_SYMBOL (pv_mmu_ops);
EXPORT_SYMBOL_GPL(pv_info);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6c633de..0ffc5aa 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2057,6 +2057,8 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
return;
+ vcpu->arch.st.steal.preempted = 0;
+
if (vcpu->arch.st.steal.version & 1)
vcpu->arch.st.steal.version += 1; /* first time write, random junk */
@@ -2812,6 +2814,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
+ if (vcpu->arch.st.msr_val & KVM_MSR_ENABLED)
+ if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+ &vcpu->arch.st.steal,
+ sizeof(struct kvm_steal_time)) == 0) {
+ vcpu->arch.st.steal.preempted = 1;
+ kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+ &vcpu->arch.st.steal,
+ sizeof(struct kvm_steal_time));
+ }
+
kvm_x86_ops->vcpu_put(vcpu);
kvm_put_guest_fpu(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
--
2.4.11
next prev parent reply other threads:[~2016-10-19 6:23 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-10-19 10:20 [PATCH v4 0/5] implement vcpu preempted check Pan Xinhui
2016-10-19 6:47 ` Christian Borntraeger
2016-10-19 16:57 ` Pan Xinhui
2016-10-19 10:20 ` [PATCH v4 1/5] kernel/sched: introduce vcpu preempted check interface Pan Xinhui
2016-10-19 10:20 ` [PATCH v4 2/5] locking/osq: Drop the overload of osq_lock() Pan Xinhui
2016-10-19 10:20 ` [PATCH v4 3/5] kernel/locking: Drop the overload of {mutex, rwsem}_spin_on_owner Pan Xinhui
2016-10-19 10:20 ` [PATCH v4 4/5] powerpc/spinlock: support vcpu preempted check Pan Xinhui
2016-10-19 10:20 ` Pan Xinhui [this message]
2016-10-19 17:24 ` [PATCH v4 5/5] x86, kvm: " Radim Krčmář
2016-10-19 18:45 ` Pan Xinhui
2016-10-24 14:39 ` Paolo Bonzini
2016-10-24 15:14 ` Radim Krčmář
2016-10-24 15:18 ` Paolo Bonzini
2016-10-25 1:25 ` Pan Xinhui
2016-10-19 15:58 ` [PATCH v4 0/5] implement " Juergen Gross
2016-10-19 17:08 ` Pan Xinhui
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1476872416-42752-6-git-send-email-xinhui.pan@linux.vnet.ibm.com \
--to=xinhui.pan@linux.vnet.ibm.com \
--cc=benh@kernel.crashing.org \
--cc=boqun.feng@gmail.com \
--cc=borntraeger@de.ibm.com \
--cc=bsingharora@gmail.com \
--cc=jgross@suse.com \
--cc=kernellwp@gmail.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-s390@vger.kernel.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=mingo@redhat.com \
--cc=mpe@ellerman.id.au \
--cc=paulmck@linux.vnet.ibm.com \
--cc=paulus@samba.org \
--cc=pbonzini@redhat.com \
--cc=peterz@infradead.org \
--cc=virtualization@lists.linux-foundation.org \
--cc=will.deacon@arm.com \
--cc=xen-devel-request@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).