linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] KVM: X86: Provides userspace with a capability to not intercept MWAIT
@ 2018-03-01  9:49 Wanpeng Li
  2018-03-01  9:49 ` [PATCH 2/3] KVM: X86: Provides userspace with a capability to not intercept HLT Wanpeng Li
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Wanpeng Li @ 2018-03-01  9:49 UTC (permalink / raw)
  To: linux-kernel, kvm; +Cc: Paolo Bonzini, Radim Krčmář

From: Wanpeng Li <wanpengli@tencent.com>

Allowing a guest to execute MWAIT without interception enables a guest
to put a (physical) CPU into a power saving state, where it takes
longer to return from than what may be desired by the host.

Don't give a guest that power over a host by default. (Especially,
since nothing prevents a guest from using MWAIT even when it is not
advertised via CPUID.)

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
 Documentation/virtual/kvm/api.txt | 26 ++++++++++++++++----------
 arch/x86/include/asm/kvm_host.h   |  2 ++
 arch/x86/kvm/svm.c                |  2 +-
 arch/x86/kvm/vmx.c                |  9 +++++----
 arch/x86/kvm/x86.c                | 24 ++++++++++++++++++++----
 arch/x86/kvm/x86.h                | 10 +++++-----
 include/uapi/linux/kvm.h          |  2 +-
 tools/include/uapi/linux/kvm.h    |  2 +-
 8 files changed, 51 insertions(+), 26 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index da3b958..4df35c0 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1041,7 +1041,8 @@ On systems that do not support this ioctl, it always fails. On systems that
 do support it, it only works for extensions that are supported for enablement.
 
 To check if a capability can be enabled, the KVM_CHECK_EXTENSION ioctl should
-be used.
+be used. Blindly passing the KVM_CHECK_EXTENSION result to KVM_ENABLE_CAP is
+a valid thing to do when vCPUs are associated to dedicated physical CPUs.
 
 struct kvm_enable_cap {
        /* in */
@@ -4358,6 +4359,20 @@ enables QEMU to build error log and branch to guest kernel registered
 machine check handling routine. Without this capability KVM will
 branch to guests' 0x200 interrupt vector.
 
+7.13 KVM_CAP_X86_DISABLE_EXITS
+
+Architectures: x86
+Parameters: args[0] defines which exits are disabled
+Returns: 0 on success, -EINVAL when args[0] contains invalid exits
+
+Valid exits in args[0] are
+
+#define KVM_X86_DISABLE_EXITS_MWAIT            (1 << 0)
+
+Enabling this capability on a VM provides userspace with a way to no
+longer intercepts some instructions for improved latency in some
+workloads. Not enable KVM_FEATURE_PV_UNHALT if you block HLT.
+
 8. Other capabilities.
 ----------------------
 
@@ -4470,15 +4485,6 @@ reserved.
     Both registers and addresses are 64-bits wide.
     It will be possible to run 64-bit or 32-bit guest code.
 
-8.8 KVM_CAP_X86_GUEST_MWAIT
-
-Architectures: x86
-
-This capability indicates that guest using memory monotoring instructions
-(MWAIT/MWAITX) to stop the virtual CPU will not cause a VM exit.  As such time
-spent while virtual CPU is halted in this way will then be accounted for as
-guest running time on the host (as opposed to e.g. HLT).
-
 8.9 KVM_CAP_ARM_USER_IRQ
 
 Architectures: arm, arm64
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index df6720f..6bd754f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -807,6 +807,8 @@ struct kvm_arch {
 
 	gpa_t wall_clock;
 
+	bool mwait_in_guest;
+
 	bool ept_identity_pagetable_done;
 	gpa_t ept_identity_map_addr;
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 312f33f..dff3a5d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1389,7 +1389,7 @@ static void init_vmcb(struct vcpu_svm *svm)
 	set_intercept(svm, INTERCEPT_XSETBV);
 	set_intercept(svm, INTERCEPT_RSM);
 
-	if (!kvm_mwait_in_guest()) {
+	if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
 		set_intercept(svm, INTERCEPT_MONITOR);
 		set_intercept(svm, INTERCEPT_MWAIT);
 	}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2cdbea7..b551067 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3713,13 +3713,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	      CPU_BASED_UNCOND_IO_EXITING |
 	      CPU_BASED_MOV_DR_EXITING |
 	      CPU_BASED_USE_TSC_OFFSETING |
+	      CPU_BASED_MWAIT_EXITING |
+	      CPU_BASED_MONITOR_EXITING |
 	      CPU_BASED_INVLPG_EXITING |
 	      CPU_BASED_RDPMC_EXITING;
 
-	if (!kvm_mwait_in_guest())
-		min |= CPU_BASED_MWAIT_EXITING |
-			CPU_BASED_MONITOR_EXITING;
-
 	opt = CPU_BASED_TPR_SHADOW |
 	      CPU_BASED_USE_MSR_BITMAPS |
 	      CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
@@ -5503,6 +5501,9 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
 		exec_control |= CPU_BASED_CR3_STORE_EXITING |
 				CPU_BASED_CR3_LOAD_EXITING  |
 				CPU_BASED_INVLPG_EXITING;
+	if (kvm_mwait_in_guest(vmx->vcpu.kvm))
+		exec_control &= ~(CPU_BASED_MWAIT_EXITING |
+				CPU_BASED_MONITOR_EXITING);
 	return exec_control;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5c93cbc..c1d9bbb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2780,9 +2780,15 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
 	return r;
 }
 
+static inline bool kvm_mwait_can_in_guest(void)
+{
+	return boot_cpu_has(X86_FEATURE_MWAIT) &&
+		!boot_cpu_has_bug(X86_BUG_MONITOR);
+}
+
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
-	int r;
+	int r = 0;
 
 	switch (ext) {
 	case KVM_CAP_IRQCHIP:
@@ -2838,8 +2844,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_ADJUST_CLOCK:
 		r = KVM_CLOCK_TSC_STABLE;
 		break;
-	case KVM_CAP_X86_GUEST_MWAIT:
-		r = kvm_mwait_in_guest();
+	case KVM_CAP_X86_DISABLE_EXITS:
+		if(kvm_mwait_can_in_guest())
+			r |= KVM_X86_DISABLE_EXITS_MWAIT;
 		break;
 	case KVM_CAP_X86_SMM:
 		/* SMBASE is usually relocated above 1M on modern chipsets,
@@ -2880,7 +2887,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = KVM_X2APIC_API_VALID_FLAGS;
 		break;
 	default:
-		r = 0;
 		break;
 	}
 	return r;
@@ -4185,6 +4191,16 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 
 		r = 0;
 		break;
+	case KVM_CAP_X86_DISABLE_EXITS:
+		r = -EINVAL;
+		if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
+			break;
+
+		if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
+			kvm_mwait_can_in_guest())
+			kvm->arch.mwait_in_guest = true;
+		r = 0;
+		break;
 	default:
 		r = -EINVAL;
 		break;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index b91215d..cd1215e 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -2,8 +2,6 @@
 #ifndef ARCH_X86_KVM_X86_H
 #define ARCH_X86_KVM_X86_H
 
-#include <asm/processor.h>
-#include <asm/mwait.h>
 #include <linux/kvm_host.h>
 #include <asm/pvclock.h>
 #include "kvm_cache_regs.h"
@@ -264,10 +262,12 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
 	    __rem;						\
 	 })
 
-static inline bool kvm_mwait_in_guest(void)
+#define KVM_X86_DISABLE_EXITS_MWAIT          (1 << 0)
+#define KVM_X86_DISABLE_VALID_EXITS          (KVM_X86_DISABLE_EXITS_MWAIT)
+
+static inline bool kvm_mwait_in_guest(struct kvm *kvm)
 {
-	return boot_cpu_has(X86_FEATURE_MWAIT) &&
-		!boot_cpu_has_bug(X86_BUG_MONITOR);
+	return kvm->arch.mwait_in_guest;
 }
 
 #endif
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 088c2c9..1065006 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -929,7 +929,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_GS 140
 #define KVM_CAP_S390_AIS 141
 #define KVM_CAP_SPAPR_TCE_VFIO 142
-#define KVM_CAP_X86_GUEST_MWAIT 143
+#define KVM_CAP_X86_DISABLE_EXITS 143
 #define KVM_CAP_ARM_USER_IRQ 144
 #define KVM_CAP_S390_CMMA_MIGRATION 145
 #define KVM_CAP_PPC_FWNMI 146
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 8fb90a0..a81df22 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -924,7 +924,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_GS 140
 #define KVM_CAP_S390_AIS 141
 #define KVM_CAP_SPAPR_TCE_VFIO 142
-#define KVM_CAP_X86_GUEST_MWAIT 143
+#define KVM_CAP_X86_DISABLE_EXITS 143
 #define KVM_CAP_ARM_USER_IRQ 144
 #define KVM_CAP_S390_CMMA_MIGRATION 145
 #define KVM_CAP_PPC_FWNMI 146
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/3] KVM: X86: Provides userspace with a capability to not intercept HLT
  2018-03-01  9:49 [PATCH 1/3] KVM: X86: Provides userspace with a capability to not intercept MWAIT Wanpeng Li
@ 2018-03-01  9:49 ` Wanpeng Li
  2018-03-08 20:40   ` Radim Krčmář
  2018-03-01  9:49 ` [PATCH 3/3] KVM: X86: Provides userspace with a capability to not intercept PAUSE Wanpeng Li
  2018-03-08 20:31 ` [PATCH 1/3] KVM: X86: Provides userspace with a capability to not intercept MWAIT Radim Krčmář
  2 siblings, 1 reply; 9+ messages in thread
From: Wanpeng Li @ 2018-03-01  9:49 UTC (permalink / raw)
  To: linux-kernel, kvm; +Cc: Paolo Bonzini, Radim Krčmář

From: Wanpeng Li <wanpengli@tencent.com>

If host CPUs are dedicated to a VM, we can avoid VM exits on HLT.
This patch adds the per-VM non-HLT-exiting capability.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
 Documentation/virtual/kvm/api.txt |  1 +
 arch/x86/include/asm/kvm_host.h   |  1 +
 arch/x86/kvm/svm.c                |  3 +++
 arch/x86/kvm/vmx.c                | 23 +++++++++++++++++++++++
 arch/x86/kvm/x86.c                |  3 +++
 arch/x86/kvm/x86.h                |  9 ++++++++-
 6 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 4df35c0..c07e775 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -4368,6 +4368,7 @@ Returns: 0 on success, -EINVAL when args[0] contains invalid exits
 Valid exits in args[0] are
 
 #define KVM_X86_DISABLE_EXITS_MWAIT            (1 << 0)
+#define KVM_X86_DISABLE_EXITS_HLT              (1 << 1)
 
 Enabling this capability on a VM provides userspace with a way to no
 longer intercepts some instructions for improved latency in some
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6bd754f..ee739ad 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -808,6 +808,7 @@ struct kvm_arch {
 	gpa_t wall_clock;
 
 	bool mwait_in_guest;
+	bool hlt_in_guest;
 
 	bool ept_identity_pagetable_done;
 	gpa_t ept_identity_map_addr;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index dff3a5d..fcf8339 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1394,6 +1394,9 @@ static void init_vmcb(struct vcpu_svm *svm)
 		set_intercept(svm, INTERCEPT_MWAIT);
 	}
 
+	if (!kvm_hlt_in_guest(svm->vcpu.kvm))
+		set_intercept(svm, INTERCEPT_HLT);
+
 	control->iopm_base_pa = __sme_set(iopm_base);
 	control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
 	control->int_ctl = V_INTR_MASKING_MASK;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b551067..f9f887a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2525,6 +2525,19 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit
 	return 0;
 }
 
+static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * Ensure that we clear the HLT state in the VMCS.  We don't need to
+	 * explicitly skip the instruction because if the HLT state is set,
+	 * then the instruction is already executing and RIP has already been
+	 * advanced.
+	 */
+	if (kvm_hlt_in_guest(vcpu->kvm) &&
+			vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
+		vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
+}
+
 static void vmx_queue_exception(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2555,6 +2568,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
 		intr_info |= INTR_TYPE_HARD_EXCEPTION;
 
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
+
+	vmx_clear_hlt(vcpu);
 }
 
 static bool vmx_rdtscp_supported(void)
@@ -5504,6 +5519,8 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
 	if (kvm_mwait_in_guest(vmx->vcpu.kvm))
 		exec_control &= ~(CPU_BASED_MWAIT_EXITING |
 				CPU_BASED_MONITOR_EXITING);
+	if (kvm_hlt_in_guest(vmx->vcpu.kvm))
+		exec_control &= ~CPU_BASED_HLT_EXITING;
 	return exec_control;
 }
 
@@ -5865,6 +5882,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	update_exception_bitmap(vcpu);
 
 	vpid_sync_context(vmx->vpid);
+	if (init_event)
+		vmx_clear_hlt(vcpu);
 }
 
 /*
@@ -5936,6 +5955,8 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
 	} else
 		intr |= INTR_TYPE_EXT_INTR;
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
+
+	vmx_clear_hlt(vcpu);
 }
 
 static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
@@ -5966,6 +5987,8 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
 			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
+
+	vmx_clear_hlt(vcpu);
 }
 
 static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c1d9bbb..13f01d7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2845,6 +2845,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = KVM_CLOCK_TSC_STABLE;
 		break;
 	case KVM_CAP_X86_DISABLE_EXITS:
+		r |=  KVM_X86_DISABLE_EXITS_HTL;
 		if(kvm_mwait_can_in_guest())
 			r |= KVM_X86_DISABLE_EXITS_MWAIT;
 		break;
@@ -4199,6 +4200,8 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 		if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
 			kvm_mwait_can_in_guest())
 			kvm->arch.mwait_in_guest = true;
+		if (cap->args[0] & KVM_X86_DISABLE_EXITS_HTL)
+			kvm->arch.hlt_in_guest = true;
 		r = 0;
 		break;
 	default:
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index cd1215e..d4ddb00 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -263,11 +263,18 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
 	 })
 
 #define KVM_X86_DISABLE_EXITS_MWAIT          (1 << 0)
-#define KVM_X86_DISABLE_VALID_EXITS          (KVM_X86_DISABLE_EXITS_MWAIT)
+#define KVM_X86_DISABLE_EXITS_HTL            (1 << 1)
+#define KVM_X86_DISABLE_VALID_EXITS          (KVM_X86_DISABLE_EXITS_MWAIT | \
+                                              KVM_X86_DISABLE_EXITS_HTL)
 
 static inline bool kvm_mwait_in_guest(struct kvm *kvm)
 {
 	return kvm->arch.mwait_in_guest;
 }
 
+static inline bool kvm_hlt_in_guest(struct kvm *kvm)
+{
+	return kvm->arch.hlt_in_guest;
+}
+
 #endif
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/3] KVM: X86: Provides userspace with a capability to not intercept PAUSE
  2018-03-01  9:49 [PATCH 1/3] KVM: X86: Provides userspace with a capability to not intercept MWAIT Wanpeng Li
  2018-03-01  9:49 ` [PATCH 2/3] KVM: X86: Provides userspace with a capability to not intercept HLT Wanpeng Li
@ 2018-03-01  9:49 ` Wanpeng Li
  2018-03-08 20:52   ` Radim Krčmář
  2018-03-08 20:31 ` [PATCH 1/3] KVM: X86: Provides userspace with a capability to not intercept MWAIT Radim Krčmář
  2 siblings, 1 reply; 9+ messages in thread
From: Wanpeng Li @ 2018-03-01  9:49 UTC (permalink / raw)
  To: linux-kernel, kvm; +Cc: Paolo Bonzini, Radim Krčmář

From: Wanpeng Li <wanpengli@tencent.com>

Allow to disable pause loop exit/pause filtering on a per VM basis.

If some VMs have dedicated host CPUs, they won't be negatively affected
due to needlessly intercepted PAUSE instructions.

Thanks to Jan H. Schönherr's initial patch.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/svm.c              |  3 ++-
 arch/x86/kvm/vmx.c              | 17 +++++++++++++----
 arch/x86/kvm/x86.c              |  4 +++-
 arch/x86/kvm/x86.h              |  9 ++++++++-
 5 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ee739ad..555f6d4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -809,6 +809,7 @@ struct kvm_arch {
 
 	bool mwait_in_guest;
 	bool hlt_in_guest;
+	bool pause_in_guest;
 
 	bool ept_identity_pagetable_done;
 	gpa_t ept_identity_map_addr;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index fcf8339..53cb3ff 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1452,7 +1452,8 @@ static void init_vmcb(struct vcpu_svm *svm)
 	svm->nested.vmcb = 0;
 	svm->vcpu.arch.hflags = 0;
 
-	if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
+	if (boot_cpu_has(X86_FEATURE_PAUSEFILTER) &&
+	    !kvm_pause_in_guest(svm->vcpu.kvm)) {
 		control->pause_filter_count = 3000;
 		set_intercept(svm, INTERCEPT_PAUSE);
 	}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f9f887a..b2cffcd 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5554,7 +5554,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 	}
 	if (!enable_unrestricted_guest)
 		exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
-	if (!ple_gap)
+	if (kvm_pause_in_guest(vmx->vcpu.kvm))
 		exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
 	if (!kvm_vcpu_apicv_active(vcpu))
 		exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
@@ -5717,7 +5717,7 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 		vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
 	}
 
-	if (ple_gap) {
+	if (!kvm_pause_in_guest(vmx->vcpu.kvm)) {
 		vmcs_write32(PLE_GAP, ple_gap);
 		vmx->ple_window = ple_window;
 		vmx->ple_window_dirty = true;
@@ -7149,7 +7149,7 @@ static __exit void hardware_unsetup(void)
  */
 static int handle_pause(struct kvm_vcpu *vcpu)
 {
-	if (ple_gap)
+	if (!kvm_pause_in_guest(vcpu->kvm))
 		grow_ple_window(vcpu);
 
 	/*
@@ -9844,6 +9844,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	return ERR_PTR(err);
 }
 
+static int vmx_vm_init(struct kvm *kvm)
+{
+	if (!ple_gap)
+		kvm->arch.pause_in_guest = true;
+	return 0;
+}
+
 static void __init vmx_check_processor_compat(void *rtn)
 {
 	struct vmcs_config vmcs_conf;
@@ -11965,7 +11972,7 @@ static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
 
 static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
 {
-	if (ple_gap)
+	if (!kvm_pause_in_guest(vcpu->kvm))
 		shrink_ple_window(vcpu);
 }
 
@@ -12324,6 +12331,8 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.cpu_has_accelerated_tpr = report_flexpriority,
 	.cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase,
 
+	.vm_init = vmx_vm_init,
+
 	.vcpu_create = vmx_create_vcpu,
 	.vcpu_free = vmx_free_vcpu,
 	.vcpu_reset = vmx_vcpu_reset,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 13f01d7..eafa0cb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2845,7 +2845,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = KVM_CLOCK_TSC_STABLE;
 		break;
 	case KVM_CAP_X86_DISABLE_EXITS:
-		r |=  KVM_X86_DISABLE_EXITS_HTL;
+		r |=  KVM_X86_DISABLE_EXITS_HTL | KVM_X86_DISABLE_EXITS_PAUSE;
 		if(kvm_mwait_can_in_guest())
 			r |= KVM_X86_DISABLE_EXITS_MWAIT;
 		break;
@@ -4202,6 +4202,8 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 			kvm->arch.mwait_in_guest = true;
 		if (cap->args[0] & KVM_X86_DISABLE_EXITS_HTL)
 			kvm->arch.hlt_in_guest = true;
+		if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
+			kvm->arch.pause_in_guest = true;
 		r = 0;
 		break;
 	default:
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index d4ddb00..658ea9a 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -264,8 +264,10 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
 
 #define KVM_X86_DISABLE_EXITS_MWAIT          (1 << 0)
 #define KVM_X86_DISABLE_EXITS_HTL            (1 << 1)
+#define KVM_X86_DISABLE_EXITS_PAUSE          (1 << 2)
 #define KVM_X86_DISABLE_VALID_EXITS          (KVM_X86_DISABLE_EXITS_MWAIT | \
-                                              KVM_X86_DISABLE_EXITS_HTL)
+                                              KVM_X86_DISABLE_EXITS_HTL | \
+                                              KVM_X86_DISABLE_EXITS_PAUSE)
 
 static inline bool kvm_mwait_in_guest(struct kvm *kvm)
 {
@@ -277,4 +279,9 @@ static inline bool kvm_hlt_in_guest(struct kvm *kvm)
 	return kvm->arch.hlt_in_guest;
 }
 
+static inline bool kvm_pause_in_guest(struct kvm *kvm)
+{
+	return kvm->arch.pause_in_guest;
+}
+
 #endif
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/3] KVM: X86: Provides userspace with a capability to not intercept MWAIT
  2018-03-01  9:49 [PATCH 1/3] KVM: X86: Provides userspace with a capability to not intercept MWAIT Wanpeng Li
  2018-03-01  9:49 ` [PATCH 2/3] KVM: X86: Provides userspace with a capability to not intercept HLT Wanpeng Li
  2018-03-01  9:49 ` [PATCH 3/3] KVM: X86: Provides userspace with a capability to not intercept PAUSE Wanpeng Li
@ 2018-03-08 20:31 ` Radim Krčmář
  2018-03-08 20:55   ` Radim Krčmář
  2018-03-09  2:34   ` Wanpeng Li
  2 siblings, 2 replies; 9+ messages in thread
From: Radim Krčmář @ 2018-03-08 20:31 UTC (permalink / raw)
  To: Wanpeng Li; +Cc: linux-kernel, kvm, Paolo Bonzini

2018-03-01 17:49+0800, Wanpeng Li:
> From: Wanpeng Li <wanpengli@tencent.com>
> 
> Allowing a guest to execute MWAIT without interception enables a guest
> to put a (physical) CPU into a power saving state, where it takes
> longer to return from than what may be desired by the host.
> 
> Don't give a guest that power over a host by default. (Especially,
> since nothing prevents a guest from using MWAIT even when it is not
> advertised via CPUID.)
> 
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> ---
> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> @@ -1041,7 +1041,8 @@ On systems that do not support this ioctl, it always fails. On systems that
>  do support it, it only works for extensions that are supported for enablement.
>  
>  To check if a capability can be enabled, the KVM_CHECK_EXTENSION ioctl should
> -be used.
> +be used. Blindly passing the KVM_CHECK_EXTENSION result to KVM_ENABLE_CAP is
> +a valid thing to do when vCPUs are associated to dedicated physical CPUs.

This is not true even for x86 KVM_CAP_SPLIT_IRQCHIP and neither is is a
need to limit ourselves.  Just leave it be.

> +7.13 KVM_CAP_X86_DISABLE_EXITS
> +
> +Architectures: x86
> +Parameters: args[0] defines which exits are disabled
> +Returns: 0 on success, -EINVAL when args[0] contains invalid exits
> +
> +Valid exits in args[0] are
> +
> +#define KVM_X86_DISABLE_EXITS_MWAIT            (1 << 0)
> +
> +Enabling this capability on a VM provides userspace with a way to no
> +longer intercepts some instructions for improved latency in some
> +workloads. Not enable KVM_FEATURE_PV_UNHALT if you block HLT.

The last sentence belong to the patch that enables HLT.
KVM could in theory handle the case (although it makes no sense), so if
it doesn't currently work, please add a check to kvm_update_cpuid() that
forbids KVM_FEATURE_PV_UNHALT when halt exits are disabled.

Also, it would be nicer to write that as
"Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits."

> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> @@ -2780,9 +2780,15 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
>  	return r;
>  }
>  
> +static inline bool kvm_mwait_can_in_guest(void)

I think kvm_can_mwait_in_guest would be a better name.

> +{
> +	return boot_cpu_has(X86_FEATURE_MWAIT) &&
> +		!boot_cpu_has_bug(X86_BUG_MONITOR);
> +}
> +
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index b91215d..cd1215e 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -2,8 +2,6 @@
>  #ifndef ARCH_X86_KVM_X86_H
>  #define ARCH_X86_KVM_X86_H
>  
> -#include <asm/processor.h>
> -#include <asm/mwait.h>
>  #include <linux/kvm_host.h>
>  #include <asm/pvclock.h>
>  #include "kvm_cache_regs.h"
> @@ -264,10 +262,12 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
>  	    __rem;						\
>  	 })
>  
> -static inline bool kvm_mwait_in_guest(void)
> +#define KVM_X86_DISABLE_EXITS_MWAIT          (1 << 0)
> +#define KVM_X86_DISABLE_VALID_EXITS          (KVM_X86_DISABLE_EXITS_MWAIT)
> +
> +static inline bool kvm_mwait_in_guest(struct kvm *kvm)
>  {
> -	return boot_cpu_has(X86_FEATURE_MWAIT) &&
> -		!boot_cpu_has_bug(X86_BUG_MONITOR);
> +	return kvm->arch.mwait_in_guest;

With this nice variable, the wrapper actually makes the code harder to
read.  Please use kvm->arch.mwait_in_guest directly (and the same for
the other two future exits),

thanks.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] KVM: X86: Provides userspace with a capability to not intercept HLT
  2018-03-01  9:49 ` [PATCH 2/3] KVM: X86: Provides userspace with a capability to not intercept HLT Wanpeng Li
@ 2018-03-08 20:40   ` Radim Krčmář
  2018-03-09  0:51     ` Wanpeng Li
  0 siblings, 1 reply; 9+ messages in thread
From: Radim Krčmář @ 2018-03-08 20:40 UTC (permalink / raw)
  To: Wanpeng Li; +Cc: linux-kernel, kvm, Paolo Bonzini

2018-03-01 17:49+0800, Wanpeng Li:
> From: Wanpeng Li <wanpengli@tencent.com>
> 
> If host CPUs are dedicated to a VM, we can avoid VM exits on HLT.
> This patch adds the per-VM non-HLT-exiting capability.
> 
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> ---
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index dff3a5d..fcf8339 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -1394,6 +1394,9 @@ static void init_vmcb(struct vcpu_svm *svm)
>  		set_intercept(svm, INTERCEPT_MWAIT);
>  	}
>  
> +	if (!kvm_hlt_in_guest(svm->vcpu.kvm))
> +		set_intercept(svm, INTERCEPT_HLT);

We unconditionally set INTERCEPT_HLT just above, so that line has to be
removed.

> +
>  	control->iopm_base_pa = __sme_set(iopm_base);
>  	control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
>  	control->int_ctl = V_INTR_MASKING_MASK;
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> @@ -2525,6 +2525,19 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit
>  	return 0;
>  }
>  
> +static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
> +{
> +	/*
> +	 * Ensure that we clear the HLT state in the VMCS.  We don't need to
> +	 * explicitly skip the instruction because if the HLT state is set,
> +	 * then the instruction is already executing and RIP has already been
> +	 * advanced.
> +	 */
> +	if (kvm_hlt_in_guest(vcpu->kvm) &&
> +			vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
> +		vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
> +}

The clearing seems to be still missing around SMM -- I think you need to
call vmx_clear_hlt() from pre_enter_smm().

Thanks.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/3] KVM: X86: Provides userspace with a capability to not intercept PAUSE
  2018-03-01  9:49 ` [PATCH 3/3] KVM: X86: Provides userspace with a capability to not intercept PAUSE Wanpeng Li
@ 2018-03-08 20:52   ` Radim Krčmář
  0 siblings, 0 replies; 9+ messages in thread
From: Radim Krčmář @ 2018-03-08 20:52 UTC (permalink / raw)
  To: Wanpeng Li; +Cc: linux-kernel, kvm, Paolo Bonzini

2018-03-01 17:49+0800, Wanpeng Li:
> From: Wanpeng Li <wanpengli@tencent.com>
> 
> Allow to disable pause loop exit/pause filtering on a per VM basis.
> 
> If some VMs have dedicated host CPUs, they won't be negatively affected
> due to needlessly intercepted PAUSE instructions.
> 
> Thanks to Jan H. Schönherr's initial patch.

Please Cc him on v2.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/3] KVM: X86: Provides userspace with a capability to not intercept MWAIT
  2018-03-08 20:31 ` [PATCH 1/3] KVM: X86: Provides userspace with a capability to not intercept MWAIT Radim Krčmář
@ 2018-03-08 20:55   ` Radim Krčmář
  2018-03-09  2:34   ` Wanpeng Li
  1 sibling, 0 replies; 9+ messages in thread
From: Radim Krčmář @ 2018-03-08 20:55 UTC (permalink / raw)
  To: Wanpeng Li; +Cc: linux-kernel, kvm, Paolo Bonzini

2018-03-08 21:31+0100, Radim Krčmář:
> 2018-03-01 17:49+0800, Wanpeng Li:
> > @@ -264,10 +262,12 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
> >  	    __rem;						\
> >  	 })
> >  
> > -static inline bool kvm_mwait_in_guest(void)
> > +#define KVM_X86_DISABLE_EXITS_MWAIT          (1 << 0)
> > +#define KVM_X86_DISABLE_VALID_EXITS          (KVM_X86_DISABLE_EXITS_MWAIT)
> > +
> > +static inline bool kvm_mwait_in_guest(struct kvm *kvm)
> >  {
> > -	return boot_cpu_has(X86_FEATURE_MWAIT) &&
> > -		!boot_cpu_has_bug(X86_BUG_MONITOR);
> > +	return kvm->arch.mwait_in_guest;
> 
> With this nice variable, the wrapper actually makes the code harder to
> read.  Please use kvm->arch.mwait_in_guest directly (and the same for
> the other two future exits),

I take that back.  kvm->arch.mwait_in_guest looks ok, but it would be
vmx->vcpu.kvm->arch.mwait_in_guest elsewhere and that is just too bad.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] KVM: X86: Provides userspace with a capability to not intercept HLT
  2018-03-08 20:40   ` Radim Krčmář
@ 2018-03-09  0:51     ` Wanpeng Li
  0 siblings, 0 replies; 9+ messages in thread
From: Wanpeng Li @ 2018-03-09  0:51 UTC (permalink / raw)
  To: Radim Krčmář; +Cc: LKML, kvm, Paolo Bonzini

2018-03-09 4:40 GMT+08:00 Radim Krčmář <rkrcmar@redhat.com>:
> 2018-03-01 17:49+0800, Wanpeng Li:
>> From: Wanpeng Li <wanpengli@tencent.com>
>>
>> If host CPUs are dedicated to a VM, we can avoid VM exits on HLT.
>> This patch adds the per-VM non-HLT-exiting capability.
>>
>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>> Cc: Radim Krčmář <rkrcmar@redhat.com>
>> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
>> ---
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index dff3a5d..fcf8339 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>> @@ -1394,6 +1394,9 @@ static void init_vmcb(struct vcpu_svm *svm)
>>               set_intercept(svm, INTERCEPT_MWAIT);
>>       }
>>
>> +     if (!kvm_hlt_in_guest(svm->vcpu.kvm))
>> +             set_intercept(svm, INTERCEPT_HLT);
>
> We unconditionally set INTERCEPT_HLT just above, so that line has to be
> removed.

Agreed.

>
>> +
>>       control->iopm_base_pa = __sme_set(iopm_base);
>>       control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
>>       control->int_ctl = V_INTR_MASKING_MASK;
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> @@ -2525,6 +2525,19 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit
>>       return 0;
>>  }
>>
>> +static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
>> +{
>> +     /*
>> +      * Ensure that we clear the HLT state in the VMCS.  We don't need to
>> +      * explicitly skip the instruction because if the HLT state is set,
>> +      * then the instruction is already executing and RIP has already been
>> +      * advanced.
>> +      */
>> +     if (kvm_hlt_in_guest(vcpu->kvm) &&
>> +                     vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
>> +             vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
>> +}
>
> The clearing seems to be still missing around SMM -- I think you need to
> call vmx_clear_hlt() from pre_enter_smm().

Will do in v2.

Regards,
Wanpeng Li

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/3] KVM: X86: Provides userspace with a capability to not intercept MWAIT
  2018-03-08 20:31 ` [PATCH 1/3] KVM: X86: Provides userspace with a capability to not intercept MWAIT Radim Krčmář
  2018-03-08 20:55   ` Radim Krčmář
@ 2018-03-09  2:34   ` Wanpeng Li
  1 sibling, 0 replies; 9+ messages in thread
From: Wanpeng Li @ 2018-03-09  2:34 UTC (permalink / raw)
  To: Radim Krčmář; +Cc: LKML, kvm, Paolo Bonzini

2018-03-09 4:31 GMT+08:00 Radim Krčmář <rkrcmar@redhat.com>:
>>  To check if a capability can be enabled, the KVM_CHECK_EXTENSION ioctl should
>> -be used.
>> +be used. Blindly passing the KVM_CHECK_EXTENSION result to KVM_ENABLE_CAP is
>> +a valid thing to do when vCPUs are associated to dedicated physical CPUs.
>
> This is not true even for x86 KVM_CAP_SPLIT_IRQCHIP and neither is is a
> need to limit ourselves.  Just leave it be.

https://www.spinics.net/lists/kvm/msg159524.html

>  So I think we should put in the
> documentation that blindly passing the KVM_CHECK_EXTENSION result to
> KVM_ENABLE_CAP is a valid thing to do when vCPUs are associated to
> dedicated physical CPUs.

Paolo ask this before, Paolo, what's your opinion?

>> +7.13 KVM_CAP_X86_DISABLE_EXITS
>> +
>> +Architectures: x86
>> +Parameters: args[0] defines which exits are disabled
>> +Returns: 0 on success, -EINVAL when args[0] contains invalid exits
>> +
>> +Valid exits in args[0] are
>> +
>> +#define KVM_X86_DISABLE_EXITS_MWAIT            (1 << 0)
>> +
>> +Enabling this capability on a VM provides userspace with a way to no
>> +longer intercepts some instructions for improved latency in some
>> +workloads. Not enable KVM_FEATURE_PV_UNHALT if you block HLT.
>
> The last sentence belong to the patch that enables HLT.
> KVM could in theory handle the case (although it makes no sense), so if

> it doesn't currently work, please add a check to kvm_update_cpuid() that
> forbids KVM_FEATURE_PV_UNHALT when halt exits are disabled.

Agreed.

>
> Also, it would be nicer to write that as
> "Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits."
>
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> @@ -2780,9 +2780,15 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
>>       return r;
>>  }
>>
>> +static inline bool kvm_mwait_can_in_guest(void)
>
> I think kvm_can_mwait_in_guest would be a better name.

Agreed.

Regards,
Wanpeng Li

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2018-03-09  2:34 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-01  9:49 [PATCH 1/3] KVM: X86: Provides userspace with a capability to not intercept MWAIT Wanpeng Li
2018-03-01  9:49 ` [PATCH 2/3] KVM: X86: Provides userspace with a capability to not intercept HLT Wanpeng Li
2018-03-08 20:40   ` Radim Krčmář
2018-03-09  0:51     ` Wanpeng Li
2018-03-01  9:49 ` [PATCH 3/3] KVM: X86: Provides userspace with a capability to not intercept PAUSE Wanpeng Li
2018-03-08 20:52   ` Radim Krčmář
2018-03-08 20:31 ` [PATCH 1/3] KVM: X86: Provides userspace with a capability to not intercept MWAIT Radim Krčmář
2018-03-08 20:55   ` Radim Krčmář
2018-03-09  2:34   ` Wanpeng Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).