All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 0/6] nSVM/SVM features
@ 2022-03-22 17:40 Maxim Levitsky
  2022-03-22 17:40 ` [PATCH v4 1/6] KVM: x86: nSVM: correctly virtualize LBR msrs when L2 is running Maxim Levitsky
                   ` (5 more replies)
  0 siblings, 6 replies; 13+ messages in thread
From: Maxim Levitsky @ 2022-03-22 17:40 UTC (permalink / raw)
  To: kvm
  Cc: Ingo Molnar, Borislav Petkov, Sean Christopherson,
	Vitaly Kuznetsov, Paolo Bonzini, H. Peter Anvin, Joerg Roedel,
	linux-kernel, Thomas Gleixner, Jim Mattson, x86, Dave Hansen,
	Wanpeng Li, Maxim Levitsky

This is a set of patches for optional SVM nested features.

V4: rebased on top of kvm/queue + my patch series 'SVM fixes + refactoring'
and incorporated all review feedback.

This was tested with kvm unit test, running on L0,L1, and L2,
and no new failures were seen.

This time I also tested this with all new features disabled in L1,
and in L2, to avoid repeating an issue I had in nested tsc scaling.

Best regards,
	Maxim Levitsky

Maxim Levitsky (6):
  KVM: x86: nSVM: correctly virtualize LBR msrs when L2 is running
  KVM: x86: nSVM: implement nested LBR virtualization
  KVM: x86: nSVM: support PAUSE filtering when L0 doesn't intercept
    PAUSE
  KVM: x86: nSVM: implement nested vGIF
  KVM: x86: allow per cpu apicv inhibit reasons
  KVM: x86: SVM: allow AVIC to co-exist with a nested guest running

 arch/x86/include/asm/kvm-x86-ops.h |   1 +
 arch/x86/include/asm/kvm_host.h    |   6 ++
 arch/x86/kvm/svm/avic.c            |   7 ++
 arch/x86/kvm/svm/nested.c          |  83 +++++++++++++--
 arch/x86/kvm/svm/svm.c             | 162 +++++++++++++++++++++++------
 arch/x86/kvm/svm/svm.h             |  41 ++++++--
 arch/x86/kvm/x86.c                 |  14 ++-
 7 files changed, 264 insertions(+), 50 deletions(-)

-- 
2.26.3



^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v4 1/6] KVM: x86: nSVM: correctly virtualize LBR msrs when L2 is running
  2022-03-22 17:40 [PATCH v4 0/6] nSVM/SVM features Maxim Levitsky
@ 2022-03-22 17:40 ` Maxim Levitsky
  2022-03-22 17:40 ` [PATCH v4 2/6] KVM: x86: nSVM: implement nested LBR virtualization Maxim Levitsky
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 13+ messages in thread
From: Maxim Levitsky @ 2022-03-22 17:40 UTC (permalink / raw)
  To: kvm
  Cc: Ingo Molnar, Borislav Petkov, Sean Christopherson,
	Vitaly Kuznetsov, Paolo Bonzini, H. Peter Anvin, Joerg Roedel,
	linux-kernel, Thomas Gleixner, Jim Mattson, x86, Dave Hansen,
	Wanpeng Li, Maxim Levitsky

When L2 is running without LBR virtualization, we should ensure
that L1's LBR msrs continue to update as usual.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 12 +++++
 arch/x86/kvm/svm/svm.c    | 98 +++++++++++++++++++++++++++++++--------
 arch/x86/kvm/svm/svm.h    |  2 +
 3 files changed, 93 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 1c381c6a7b51..98647f5dec93 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -536,6 +536,7 @@ void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm)
 static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
 {
 	bool new_vmcb12 = false;
+	struct vmcb *vmcb01 = svm->vmcb01.ptr;
 	struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
 
 	nested_vmcb02_compute_g_pat(svm);
@@ -586,6 +587,9 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
 		svm->vcpu.arch.dr6  = svm->nested.save.dr6 | DR6_ACTIVE_LOW;
 		vmcb_mark_dirty(vmcb02, VMCB_DR);
 	}
+
+	if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK))
+		svm_copy_lbrs(vmcb02, vmcb01);
 }
 
 static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
@@ -645,6 +649,9 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
 	vmcb02->control.event_inj           = svm->nested.ctl.event_inj;
 	vmcb02->control.event_inj_err       = svm->nested.ctl.event_inj_err;
 
+	vmcb02->control.virt_ext            = vmcb01->control.virt_ext &
+					      LBR_CTL_ENABLE_MASK;
+
 	if (!nested_vmcb_needs_vls_intercept(svm))
 		vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
 
@@ -912,6 +919,11 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
 
 	svm_switch_vmcb(svm, &svm->vmcb01);
 
+	if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
+		svm_copy_lbrs(vmcb01, vmcb02);
+		svm_update_lbrv(vcpu);
+	}
+
 	/*
 	 * On vmexit the  GIF is set to false and
 	 * no event can be injected in L1.
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 70fc5897f5f2..b3ba3bf2d95e 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -793,6 +793,17 @@ static void init_msrpm_offsets(void)
 	}
 }
 
+void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
+{
+	to_vmcb->save.dbgctl		= from_vmcb->save.dbgctl;
+	to_vmcb->save.br_from		= from_vmcb->save.br_from;
+	to_vmcb->save.br_to		= from_vmcb->save.br_to;
+	to_vmcb->save.last_excp_from	= from_vmcb->save.last_excp_from;
+	to_vmcb->save.last_excp_to	= from_vmcb->save.last_excp_to;
+
+	vmcb_mark_dirty(to_vmcb, VMCB_LBR);
+}
+
 static void svm_enable_lbrv(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -802,6 +813,10 @@ static void svm_enable_lbrv(struct kvm_vcpu *vcpu)
 	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
 	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
 	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
+
+	/* Move the LBR msrs to the vmcb02 so that the guest can see them. */
+	if (is_guest_mode(vcpu))
+		svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr);
 }
 
 static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
@@ -813,6 +828,63 @@ static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
 	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
 	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
 	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
+
+	/*
+	 * Move the LBR msrs back to the vmcb01 to avoid copying them
+	 * on nested guest entries.
+	 */
+	if (is_guest_mode(vcpu))
+		svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
+}
+
+static int svm_get_lbr_msr(struct vcpu_svm *svm, u32 index)
+{
+	/*
+	 * If the LBR virtualization is disabled, the LBR msrs are always
+	 * kept in the vmcb01 to avoid copying them on nested guest entries.
+	 *
+	 * If nested, and the LBR virtualization is enabled/disabled, the msrs
+	 * are moved between the vmcb01 and vmcb02 as needed.
+	 */
+	struct vmcb *vmcb =
+		(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) ?
+			svm->vmcb : svm->vmcb01.ptr;
+
+	switch (index) {
+	case MSR_IA32_DEBUGCTLMSR:
+		return vmcb->save.dbgctl;
+	case MSR_IA32_LASTBRANCHFROMIP:
+		return vmcb->save.br_from;
+	case MSR_IA32_LASTBRANCHTOIP:
+		return vmcb->save.br_to;
+	case MSR_IA32_LASTINTFROMIP:
+		return vmcb->save.last_excp_from;
+	case MSR_IA32_LASTINTTOIP:
+		return vmcb->save.last_excp_to;
+	default:
+		KVM_BUG(false, svm->vcpu.kvm,
+			"%s: Unknown MSR 0x%x", __func__, index);
+		return 0;
+	}
+}
+
+void svm_update_lbrv(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	bool enable_lbrv = svm_get_lbr_msr(svm, MSR_IA32_DEBUGCTLMSR) &
+					   DEBUGCTLMSR_LBR;
+
+	bool current_enable_lbrv = !!(svm->vmcb->control.virt_ext &
+				      LBR_CTL_ENABLE_MASK);
+
+	if (enable_lbrv == current_enable_lbrv)
+		return;
+
+	if (enable_lbrv)
+		svm_enable_lbrv(vcpu);
+	else
+		svm_disable_lbrv(vcpu);
 }
 
 void disable_nmi_singlestep(struct vcpu_svm *svm)
@@ -2581,25 +2653,12 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_TSC_AUX:
 		msr_info->data = svm->tsc_aux;
 		break;
-	/*
-	 * Nobody will change the following 5 values in the VMCB so we can
-	 * safely return them on rdmsr. They will always be 0 until LBRV is
-	 * implemented.
-	 */
 	case MSR_IA32_DEBUGCTLMSR:
-		msr_info->data = svm->vmcb->save.dbgctl;
-		break;
 	case MSR_IA32_LASTBRANCHFROMIP:
-		msr_info->data = svm->vmcb->save.br_from;
-		break;
 	case MSR_IA32_LASTBRANCHTOIP:
-		msr_info->data = svm->vmcb->save.br_to;
-		break;
 	case MSR_IA32_LASTINTFROMIP:
-		msr_info->data = svm->vmcb->save.last_excp_from;
-		break;
 	case MSR_IA32_LASTINTTOIP:
-		msr_info->data = svm->vmcb->save.last_excp_to;
+		msr_info->data = svm_get_lbr_msr(svm, msr_info->index);
 		break;
 	case MSR_VM_HSAVE_PA:
 		msr_info->data = svm->nested.hsave_msr;
@@ -2845,12 +2904,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 		if (data & DEBUGCTL_RESERVED_BITS)
 			return 1;
 
-		svm->vmcb->save.dbgctl = data;
-		vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
-		if (data & (1ULL<<0))
-			svm_enable_lbrv(vcpu);
+		if (svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK)
+			svm->vmcb->save.dbgctl = data;
 		else
-			svm_disable_lbrv(vcpu);
+			svm->vmcb01.ptr->save.dbgctl = data;
+
+		svm_update_lbrv(vcpu);
+
 		break;
 	case MSR_VM_HSAVE_PA:
 		/*
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 6a10cb4817e8..75373cb24a39 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -492,6 +492,8 @@ u32 svm_msrpm_offset(u32 msr);
 u32 *svm_vcpu_alloc_msrpm(void);
 void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm);
 void svm_vcpu_free_msrpm(u32 *msrpm);
+void svm_copy_lbrs(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
+void svm_update_lbrv(struct kvm_vcpu *vcpu);
 
 int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
 void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v4 2/6] KVM: x86: nSVM: implement nested LBR virtualization
  2022-03-22 17:40 [PATCH v4 0/6] nSVM/SVM features Maxim Levitsky
  2022-03-22 17:40 ` [PATCH v4 1/6] KVM: x86: nSVM: correctly virtualize LBR msrs when L2 is running Maxim Levitsky
@ 2022-03-22 17:40 ` Maxim Levitsky
  2022-03-24 18:21   ` Paolo Bonzini
  2022-03-22 17:40 ` [PATCH v4 3/6] KVM: x86: nSVM: support PAUSE filtering when L0 doesn't intercept PAUSE Maxim Levitsky
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 13+ messages in thread
From: Maxim Levitsky @ 2022-03-22 17:40 UTC (permalink / raw)
  To: kvm
  Cc: Ingo Molnar, Borislav Petkov, Sean Christopherson,
	Vitaly Kuznetsov, Paolo Bonzini, H. Peter Anvin, Joerg Roedel,
	linux-kernel, Thomas Gleixner, Jim Mattson, x86, Dave Hansen,
	Wanpeng Li, Maxim Levitsky

This was tested with kvm-unit-test that was developed
for this purpose.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 19 +++++++++++++++++--
 arch/x86/kvm/svm/svm.c    |  7 +++++++
 arch/x86/kvm/svm/svm.h    |  1 +
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 98647f5dec93..c1baa3a68ce6 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -588,8 +588,17 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
 		vmcb_mark_dirty(vmcb02, VMCB_DR);
 	}
 
-	if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK))
+	if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
+		/* Copy LBR related registers from vmcb12,
+		 * but make sure that we only pick LBR enable bit from the guest.
+		 */
+		svm_copy_lbrs(vmcb02, vmcb12);
+		vmcb02->save.dbgctl &= LBR_CTL_ENABLE_MASK;
+		svm_update_lbrv(&svm->vcpu);
+
+	} else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
 		svm_copy_lbrs(vmcb02, vmcb01);
+	}
 }
 
 static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
@@ -651,6 +660,9 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
 
 	vmcb02->control.virt_ext            = vmcb01->control.virt_ext &
 					      LBR_CTL_ENABLE_MASK;
+	if (svm->lbrv_enabled)
+		vmcb02->control.virt_ext  |=
+			(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK);
 
 	if (!nested_vmcb_needs_vls_intercept(svm))
 		vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
@@ -919,7 +931,10 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
 
 	svm_switch_vmcb(svm, &svm->vmcb01);
 
-	if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
+	if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
+		svm_copy_lbrs(vmcb12, vmcb02);
+		svm_update_lbrv(vcpu);
+	} else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
 		svm_copy_lbrs(vmcb01, vmcb02);
 		svm_update_lbrv(vcpu);
 	}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index b3ba3bf2d95e..ec9a1dabdcc3 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -878,6 +878,10 @@ void svm_update_lbrv(struct kvm_vcpu *vcpu)
 	bool current_enable_lbrv = !!(svm->vmcb->control.virt_ext &
 				      LBR_CTL_ENABLE_MASK);
 
+	if (unlikely(is_guest_mode(vcpu) && svm->lbrv_enabled))
+		if (unlikely(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))
+			enable_lbrv = true;
+
 	if (enable_lbrv == current_enable_lbrv)
 		return;
 
@@ -4012,6 +4016,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 			     guest_cpuid_has(vcpu, X86_FEATURE_NRIPS);
 
 	svm->tsc_scaling_enabled = tsc_scaling && guest_cpuid_has(vcpu, X86_FEATURE_TSCRATEMSR);
+	svm->lbrv_enabled = lbrv && guest_cpuid_has(vcpu, X86_FEATURE_LBRV);
 
 	svm->v_vmload_vmsave_enabled = vls && guest_cpuid_has(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
 
@@ -4765,6 +4770,8 @@ static __init void svm_set_cpu_caps(void)
 
 		if (vls)
 			kvm_cpu_cap_set(X86_FEATURE_V_VMSAVE_VMLOAD);
+		if (lbrv)
+			kvm_cpu_cap_set(X86_FEATURE_LBRV);
 
 		/* Nested VM can receive #VMEXIT instead of triggering #GP */
 		kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 75373cb24a39..aaf46b1fbf76 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -236,6 +236,7 @@ struct vcpu_svm {
 	bool nrips_enabled                : 1;
 	bool tsc_scaling_enabled          : 1;
 	bool v_vmload_vmsave_enabled      : 1;
+	bool lbrv_enabled                 : 1;
 
 	u32 ldr_reg;
 	u32 dfr_reg;
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v4 3/6] KVM: x86: nSVM: support PAUSE filtering when L0 doesn't intercept PAUSE
  2022-03-22 17:40 [PATCH v4 0/6] nSVM/SVM features Maxim Levitsky
  2022-03-22 17:40 ` [PATCH v4 1/6] KVM: x86: nSVM: correctly virtualize LBR msrs when L2 is running Maxim Levitsky
  2022-03-22 17:40 ` [PATCH v4 2/6] KVM: x86: nSVM: implement nested LBR virtualization Maxim Levitsky
@ 2022-03-22 17:40 ` Maxim Levitsky
  2022-03-24 18:24   ` Paolo Bonzini
  2022-03-22 17:40 ` [PATCH v4 4/6] KVM: x86: nSVM: implement nested vGIF Maxim Levitsky
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 13+ messages in thread
From: Maxim Levitsky @ 2022-03-22 17:40 UTC (permalink / raw)
  To: kvm
  Cc: Ingo Molnar, Borislav Petkov, Sean Christopherson,
	Vitaly Kuznetsov, Paolo Bonzini, H. Peter Anvin, Joerg Roedel,
	linux-kernel, Thomas Gleixner, Jim Mattson, x86, Dave Hansen,
	Wanpeng Li, Maxim Levitsky

Allow L1 to use PAUSE filtering if L0 doesn't use it.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 26 ++++++++++++++++++++++++++
 arch/x86/kvm/svm/svm.c    | 22 +++++++++++++++++++---
 arch/x86/kvm/svm/svm.h    |  2 ++
 3 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index c1baa3a68ce6..0a0b4b26c91e 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -667,6 +667,29 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
 	if (!nested_vmcb_needs_vls_intercept(svm))
 		vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
 
+	if (kvm_pause_in_guest(svm->vcpu.kvm)) {
+		/* use guest values since host doesn't use them */
+		vmcb02->control.pause_filter_count =
+				svm->pause_filter_enabled ?
+				svm->nested.ctl.pause_filter_count : 0;
+
+		vmcb02->control.pause_filter_thresh =
+				svm->pause_threshold_enabled ?
+				svm->nested.ctl.pause_filter_thresh : 0;
+
+	} else if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) {
+		/* use host values when guest doesn't use them */
+		vmcb02->control.pause_filter_count = vmcb01->control.pause_filter_count;
+		vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh;
+	} else {
+		/*
+		 * Intercept every PAUSE otherwise and
+		 * ignore both host and guest values
+		 */
+		vmcb02->control.pause_filter_count = 0;
+		vmcb02->control.pause_filter_thresh = 0;
+	}
+
 	nested_svm_transition_tlb_flush(vcpu);
 
 	/* Enter Guest-Mode */
@@ -927,6 +950,9 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
 	vmcb12->control.event_inj         = svm->nested.ctl.event_inj;
 	vmcb12->control.event_inj_err     = svm->nested.ctl.event_inj_err;
 
+	if (!kvm_pause_in_guest(vcpu->kvm) && vmcb02->control.pause_filter_count)
+		vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count;
+
 	nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
 
 	svm_switch_vmcb(svm, &svm->vmcb01);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index ec9a1dabdcc3..4c23cb1895ab 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -910,6 +910,9 @@ static void grow_ple_window(struct kvm_vcpu *vcpu)
 	struct vmcb_control_area *control = &svm->vmcb->control;
 	int old = control->pause_filter_count;
 
+	if (kvm_pause_in_guest(vcpu->kvm) || !old)
+		return;
+
 	control->pause_filter_count = __grow_ple_window(old,
 							pause_filter_count,
 							pause_filter_count_grow,
@@ -928,6 +931,9 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
 	struct vmcb_control_area *control = &svm->vmcb->control;
 	int old = control->pause_filter_count;
 
+	if (kvm_pause_in_guest(vcpu->kvm) || !old)
+		return;
+
 	control->pause_filter_count =
 				__shrink_ple_window(old,
 						    pause_filter_count,
@@ -2984,7 +2990,6 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu)
 static int pause_interception(struct kvm_vcpu *vcpu)
 {
 	bool in_kernel;
-
 	/*
 	 * CPL is not made available for an SEV-ES guest, therefore
 	 * vcpu->arch.preempted_in_kernel can never be true.  Just
@@ -2992,8 +2997,7 @@ static int pause_interception(struct kvm_vcpu *vcpu)
 	 */
 	in_kernel = !sev_es_guest(vcpu->kvm) && svm_get_cpl(vcpu) == 0;
 
-	if (!kvm_pause_in_guest(vcpu->kvm))
-		grow_ple_window(vcpu);
+	grow_ple_window(vcpu);
 
 	kvm_vcpu_on_spin(vcpu, in_kernel);
 	return kvm_skip_emulated_instruction(vcpu);
@@ -4020,6 +4024,12 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 
 	svm->v_vmload_vmsave_enabled = vls && guest_cpuid_has(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
 
+	svm->pause_filter_enabled = kvm_cpu_cap_has(X86_FEATURE_PAUSEFILTER) &&
+			guest_cpuid_has(vcpu, X86_FEATURE_PAUSEFILTER);
+
+	svm->pause_threshold_enabled = kvm_cpu_cap_has(X86_FEATURE_PFTHRESHOLD) &&
+			guest_cpuid_has(vcpu, X86_FEATURE_PFTHRESHOLD);
+
 	svm_recalc_instruction_intercepts(vcpu, svm);
 
 	/* For sev guests, the memory encryption bit is not reserved in CR3.  */
@@ -4773,6 +4783,12 @@ static __init void svm_set_cpu_caps(void)
 		if (lbrv)
 			kvm_cpu_cap_set(X86_FEATURE_LBRV);
 
+		if (boot_cpu_has(X86_FEATURE_PAUSEFILTER))
+			kvm_cpu_cap_set(X86_FEATURE_PAUSEFILTER);
+
+		if (boot_cpu_has(X86_FEATURE_PFTHRESHOLD))
+			kvm_cpu_cap_set(X86_FEATURE_PFTHRESHOLD);
+
 		/* Nested VM can receive #VMEXIT instead of triggering #GP */
 		kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
 	}
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index aaf46b1fbf76..9895fd6a7310 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -237,6 +237,8 @@ struct vcpu_svm {
 	bool tsc_scaling_enabled          : 1;
 	bool v_vmload_vmsave_enabled      : 1;
 	bool lbrv_enabled                 : 1;
+	bool pause_filter_enabled         : 1;
+	bool pause_threshold_enabled      : 1;
 
 	u32 ldr_reg;
 	u32 dfr_reg;
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v4 4/6] KVM: x86: nSVM: implement nested vGIF
  2022-03-22 17:40 [PATCH v4 0/6] nSVM/SVM features Maxim Levitsky
                   ` (2 preceding siblings ...)
  2022-03-22 17:40 ` [PATCH v4 3/6] KVM: x86: nSVM: support PAUSE filtering when L0 doesn't intercept PAUSE Maxim Levitsky
@ 2022-03-22 17:40 ` Maxim Levitsky
  2022-03-22 17:40 ` [PATCH v4 5/6] KVM: x86: allow per cpu apicv inhibit reasons Maxim Levitsky
  2022-03-22 17:40 ` [PATCH v4 6/6] KVM: x86: SVM: allow AVIC to co-exist with a nested guest running Maxim Levitsky
  5 siblings, 0 replies; 13+ messages in thread
From: Maxim Levitsky @ 2022-03-22 17:40 UTC (permalink / raw)
  To: kvm
  Cc: Ingo Molnar, Borislav Petkov, Sean Christopherson,
	Vitaly Kuznetsov, Paolo Bonzini, H. Peter Anvin, Joerg Roedel,
	linux-kernel, Thomas Gleixner, Jim Mattson, x86, Dave Hansen,
	Wanpeng Li, Maxim Levitsky

In case L1 enables vGIF for L2, the L2 cannot affect L1's GIF, regardless
of STGI/CLGI intercepts, and since VM entry enables GIF, this means
that L1's GIF is always 1 while L2 is running.

Thus in this case leave L1's vGIF in vmcb01, while letting L2
control the vGIF thus implementing nested vGIF.

Also allow KVM to toggle L1's GIF during nested entry/exit
by always using vmcb01.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 17 +++++++++++++----
 arch/x86/kvm/svm/svm.c    |  5 +++++
 arch/x86/kvm/svm/svm.h    | 35 +++++++++++++++++++++++++++++------
 3 files changed, 47 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 0a0b4b26c91e..47a5e8d8b578 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -439,6 +439,10 @@ void nested_sync_control_from_vmcb02(struct vcpu_svm *svm)
 		 */
 		mask &= ~V_IRQ_MASK;
 	}
+
+	if (nested_vgif_enabled(svm))
+		mask |= V_GIF_MASK;
+
 	svm->nested.ctl.int_ctl        &= ~mask;
 	svm->nested.ctl.int_ctl        |= svm->vmcb->control.int_ctl & mask;
 }
@@ -603,10 +607,8 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
 
 static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
 {
-	const u32 int_ctl_vmcb01_bits =
-		V_INTR_MASKING_MASK | V_GIF_MASK | V_GIF_ENABLE_MASK;
-
-	const u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
+	u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK;
+	u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
 
 	struct kvm_vcpu *vcpu = &svm->vcpu;
 	struct vmcb *vmcb01 = svm->vmcb01.ptr;
@@ -623,6 +625,13 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
 	 */
 	WARN_ON(kvm_apicv_activated(svm->vcpu.kvm));
 
+
+
+	if (svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK))
+		int_ctl_vmcb12_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
+	else
+		int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
+
 	/* Copied from vmcb01.  msrpm_base can be overwritten later.  */
 	vmcb02->control.nested_ctl = vmcb01->control.nested_ctl;
 	vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 4c23cb1895ab..7fb4cf3bce4f 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4030,6 +4030,8 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 	svm->pause_threshold_enabled = kvm_cpu_cap_has(X86_FEATURE_PFTHRESHOLD) &&
 			guest_cpuid_has(vcpu, X86_FEATURE_PFTHRESHOLD);
 
+	svm->vgif_enabled = vgif && guest_cpuid_has(vcpu, X86_FEATURE_VGIF);
+
 	svm_recalc_instruction_intercepts(vcpu, svm);
 
 	/* For sev guests, the memory encryption bit is not reserved in CR3.  */
@@ -4789,6 +4791,9 @@ static __init void svm_set_cpu_caps(void)
 		if (boot_cpu_has(X86_FEATURE_PFTHRESHOLD))
 			kvm_cpu_cap_set(X86_FEATURE_PFTHRESHOLD);
 
+		if (vgif)
+			kvm_cpu_cap_set(X86_FEATURE_VGIF);
+
 		/* Nested VM can receive #VMEXIT instead of triggering #GP */
 		kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
 	}
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 9895fd6a7310..ba0c90bc5c55 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -239,6 +239,7 @@ struct vcpu_svm {
 	bool lbrv_enabled                 : 1;
 	bool pause_filter_enabled         : 1;
 	bool pause_threshold_enabled      : 1;
+	bool vgif_enabled                 : 1;
 
 	u32 ldr_reg;
 	u32 dfr_reg;
@@ -457,26 +458,48 @@ static inline bool svm_is_intercept(struct vcpu_svm *svm, int bit)
 	return vmcb_is_intercept(&svm->vmcb->control, bit);
 }
 
+static inline bool nested_vgif_enabled(struct vcpu_svm *svm)
+{
+	return svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK);
+}
+
+static inline struct vmcb *get_vgif_vmcb(struct vcpu_svm *svm)
+{
+	if (!vgif)
+		return NULL;
+
+	if (is_guest_mode(&svm->vcpu) && !nested_vgif_enabled(svm))
+		return svm->nested.vmcb02.ptr;
+	else
+		return svm->vmcb01.ptr;
+}
+
 static inline void enable_gif(struct vcpu_svm *svm)
 {
-	if (vgif)
-		svm->vmcb->control.int_ctl |= V_GIF_MASK;
+	struct vmcb *vmcb = get_vgif_vmcb(svm);
+
+	if (vmcb)
+		vmcb->control.int_ctl |= V_GIF_MASK;
 	else
 		svm->vcpu.arch.hflags |= HF_GIF_MASK;
 }
 
 static inline void disable_gif(struct vcpu_svm *svm)
 {
-	if (vgif)
-		svm->vmcb->control.int_ctl &= ~V_GIF_MASK;
+	struct vmcb *vmcb = get_vgif_vmcb(svm);
+
+	if (vmcb)
+		vmcb->control.int_ctl &= ~V_GIF_MASK;
 	else
 		svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
 }
 
 static inline bool gif_set(struct vcpu_svm *svm)
 {
-	if (vgif)
-		return !!(svm->vmcb->control.int_ctl & V_GIF_MASK);
+	struct vmcb *vmcb = get_vgif_vmcb(svm);
+
+	if (vmcb)
+		return !!(vmcb->control.int_ctl & V_GIF_MASK);
 	else
 		return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
 }
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v4 5/6] KVM: x86: allow per cpu apicv inhibit reasons
  2022-03-22 17:40 [PATCH v4 0/6] nSVM/SVM features Maxim Levitsky
                   ` (3 preceding siblings ...)
  2022-03-22 17:40 ` [PATCH v4 4/6] KVM: x86: nSVM: implement nested vGIF Maxim Levitsky
@ 2022-03-22 17:40 ` Maxim Levitsky
  2022-03-22 17:40 ` [PATCH v4 6/6] KVM: x86: SVM: allow AVIC to co-exist with a nested guest running Maxim Levitsky
  5 siblings, 0 replies; 13+ messages in thread
From: Maxim Levitsky @ 2022-03-22 17:40 UTC (permalink / raw)
  To: kvm
  Cc: Ingo Molnar, Borislav Petkov, Sean Christopherson,
	Vitaly Kuznetsov, Paolo Bonzini, H. Peter Anvin, Joerg Roedel,
	linux-kernel, Thomas Gleixner, Jim Mattson, x86, Dave Hansen,
	Wanpeng Li, Maxim Levitsky

Add optional callback .vcpu_get_apicv_inhibit_reasons returning
extra inhibit reasons that prevent APICv from working on this vCPU.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 arch/x86/include/asm/kvm-x86-ops.h |  1 +
 arch/x86/include/asm/kvm_host.h    |  6 ++++++
 arch/x86/kvm/x86.c                 | 14 ++++++++++++--
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 3c368b639c04..96e4e9842dfc 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -126,6 +126,7 @@ KVM_X86_OP_OPTIONAL(migrate_timers)
 KVM_X86_OP(msr_filter_changed)
 KVM_X86_OP(complete_emulated_msr)
 KVM_X86_OP(vcpu_deliver_sipi_vector)
+KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
 
 #undef KVM_X86_OP
 #undef KVM_X86_OP_OPTIONAL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9694dd5e6ccc..1c87a6e9e99f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1507,6 +1507,11 @@ struct kvm_x86_ops {
 	int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
 
 	void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
+
+	/*
+	 * Returns vCPU specific APICv inhibit reasons
+	 */
+	unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_x86_nested_ops {
@@ -1807,6 +1812,7 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
 				struct x86_exception *exception);
 
 bool kvm_apicv_activated(struct kvm *kvm);
+bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu);
 void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
 void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
 				      enum kvm_apicv_inhibit reason, bool set);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9c27239f987f..d52af860843d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9114,6 +9114,14 @@ bool kvm_apicv_activated(struct kvm *kvm)
 }
 EXPORT_SYMBOL_GPL(kvm_apicv_activated);
 
+bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu)
+{
+	ulong vm_reasons = READ_ONCE(vcpu->kvm->arch.apicv_inhibit_reasons);
+	ulong vcpu_reasons = static_call(kvm_x86_vcpu_get_apicv_inhibit_reasons)(vcpu);
+
+	return (vm_reasons | vcpu_reasons) == 0;
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_apicv_activated);
 
 static void set_or_clear_apicv_inhibit(unsigned long *inhibits,
 				       enum kvm_apicv_inhibit reason, bool set)
@@ -9788,7 +9796,8 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
 
 	down_read(&vcpu->kvm->arch.apicv_update_lock);
 
-	activate = kvm_apicv_activated(vcpu->kvm);
+	activate = kvm_vcpu_apicv_activated(vcpu);
+
 	if (vcpu->arch.apicv_active == activate)
 		goto out;
 
@@ -10189,7 +10198,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		 * per-VM state, and responsing vCPUs must wait for the update
 		 * to complete before servicing KVM_REQ_APICV_UPDATE.
 		 */
-		WARN_ON_ONCE(kvm_apicv_activated(vcpu->kvm) != kvm_vcpu_apicv_active(vcpu));
+		WARN_ON_ONCE(kvm_vcpu_apicv_activated(vcpu) != kvm_vcpu_apicv_active(vcpu));
+
 
 		exit_fastpath = static_call(kvm_x86_vcpu_run)(vcpu);
 		if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v4 6/6] KVM: x86: SVM: allow AVIC to co-exist with a nested guest running
  2022-03-22 17:40 [PATCH v4 0/6] nSVM/SVM features Maxim Levitsky
                   ` (4 preceding siblings ...)
  2022-03-22 17:40 ` [PATCH v4 5/6] KVM: x86: allow per cpu apicv inhibit reasons Maxim Levitsky
@ 2022-03-22 17:40 ` Maxim Levitsky
  5 siblings, 0 replies; 13+ messages in thread
From: Maxim Levitsky @ 2022-03-22 17:40 UTC (permalink / raw)
  To: kvm
  Cc: Ingo Molnar, Borislav Petkov, Sean Christopherson,
	Vitaly Kuznetsov, Paolo Bonzini, H. Peter Anvin, Joerg Roedel,
	linux-kernel, Thomas Gleixner, Jim Mattson, x86, Dave Hansen,
	Wanpeng Li, Maxim Levitsky

Inhibit the AVIC of the vCPU that is running nested for the duration of the
nested run, so that all interrupts arriving from both its vCPU siblings
and from KVM are delivered using normal IPIs and cause that vCPU to vmexit.

Note that unlike normal AVIC inhibition, there is no need to
update the AVIC mmio memslot, because the nested guest uses its
own set of paging tables.
That also means that AVIC doesn't need to be inhibited VM wide.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 arch/x86/kvm/svm/avic.c   |  7 +++++++
 arch/x86/kvm/svm/nested.c | 17 ++++++++++-------
 arch/x86/kvm/svm/svm.c    | 30 +++++++++++++++++++-----------
 arch/x86/kvm/svm/svm.h    |  1 +
 4 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index b39fe614467a..334fca06a3c8 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -357,6 +357,13 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+unsigned long avic_vcpu_get_apicv_inhibit_reasons(struct kvm_vcpu *vcpu)
+{
+	if (is_guest_mode(vcpu))
+		return APICV_INHIBIT_REASON_NESTED;
+	return 0;
+}
+
 static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
 {
 	struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 47a5e8d8b578..26fd48603fab 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -619,13 +619,6 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
 	 * exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes.
 	 */
 
-	/*
-	 * Also covers avic_vapic_bar, avic_backing_page, avic_logical_id,
-	 * avic_physical_id.
-	 */
-	WARN_ON(kvm_apicv_activated(svm->vcpu.kvm));
-
-
 
 	if (svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK))
 		int_ctl_vmcb12_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
@@ -766,6 +759,9 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
 
 	svm_set_gif(svm, true);
 
+	if (kvm_vcpu_apicv_active(vcpu))
+		kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
+
 	return 0;
 }
 
@@ -1043,6 +1039,13 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
 	if (unlikely(vmcb01->save.rflags & X86_EFLAGS_TF))
 		kvm_queue_exception(&(svm->vcpu), DB_VECTOR);
 
+	/*
+	 * Un-inhibit the AVIC right away, so that other vCPUs can start
+	 * to benefit from it right away.
+	 */
+	if (kvm_apicv_activated(vcpu->kvm))
+		kvm_vcpu_update_apicv(vcpu);
+
 	return 0;
 }
 
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 7fb4cf3bce4f..28d7a4aebafd 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1468,7 +1468,7 @@ static void svm_set_vintr(struct vcpu_svm *svm)
 	/*
 	 * The following fields are ignored when AVIC is enabled
 	 */
-	WARN_ON(kvm_apicv_activated(svm->vcpu.kvm));
+	WARN_ON(kvm_vcpu_apicv_activated(&svm->vcpu));
 
 	svm_set_intercept(svm, INTERCEPT_VINTR);
 
@@ -2977,9 +2977,16 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu)
 	svm_clear_vintr(to_svm(vcpu));
 
 	/*
-	 * For AVIC, the only reason to end up here is ExtINTs.
+	 * If not running nested, for AVIC, the only reason to end up here is ExtINTs.
 	 * In this case AVIC was temporarily disabled for
 	 * requesting the IRQ window and we have to re-enable it.
+	 *
+	 * If running nested, still remove the VM wide AVIC inhibit to
+	 * support case in which the interrupt window was requested when the
+	 * vCPU was not running nested.
+
+	 * All vCPUs which run still run nested, will remain to have their
+	 * AVIC still inhibited due to per-cpu AVIC inhibition.
 	 */
 	kvm_clear_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_IRQWIN);
 
@@ -3574,10 +3581,16 @@ static void svm_enable_irq_window(struct kvm_vcpu *vcpu)
 		/*
 		 * IRQ window is not needed when AVIC is enabled,
 		 * unless we have pending ExtINT since it cannot be injected
-		 * via AVIC. In such case, we need to temporarily disable AVIC,
+		 * via AVIC. In such case, KVM needs to temporarily disable AVIC,
 		 * and fallback to injecting IRQ via V_IRQ.
+		 *
+		 * If running nested, AVIC is already locally inhibited
+		 * on this vCPU, therefore there is no need to request
+		 * the VM wide AVIC inhibition.
 		 */
-		kvm_set_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_IRQWIN);
+		if (!is_guest_mode(vcpu))
+			kvm_set_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_IRQWIN);
+
 		svm_set_vintr(svm);
 	}
 }
@@ -4048,13 +4061,6 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 		 */
 		if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC))
 			kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_X2APIC);
-
-		/*
-		 * Currently, AVIC does not work with nested virtualization.
-		 * So, we disable AVIC when cpuid for SVM is set in the L1 guest.
-		 */
-		if (nested && guest_cpuid_has(vcpu, X86_FEATURE_SVM))
-			kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_NESTED);
 	}
 	init_vmcb_after_set_cpuid(vcpu);
 }
@@ -4717,6 +4723,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.complete_emulated_msr = svm_complete_emulated_msr,
 
 	.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
+	.vcpu_get_apicv_inhibit_reasons = avic_vcpu_get_apicv_inhibit_reasons,
 };
 
 /*
@@ -4918,6 +4925,7 @@ static __init int svm_hardware_setup(void)
 	} else {
 		svm_x86_ops.vcpu_blocking = NULL;
 		svm_x86_ops.vcpu_unblocking = NULL;
+		svm_x86_ops.vcpu_get_apicv_inhibit_reasons = NULL;
 	}
 
 	if (vls) {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index ba0c90bc5c55..c6db1fe17cbc 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -622,6 +622,7 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
 void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
 void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
 void avic_ring_doorbell(struct kvm_vcpu *vcpu);
+unsigned long avic_vcpu_get_apicv_inhibit_reasons(struct kvm_vcpu *vcpu);
 
 /* sev.c */
 
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v4 2/6] KVM: x86: nSVM: implement nested LBR virtualization
  2022-03-22 17:40 ` [PATCH v4 2/6] KVM: x86: nSVM: implement nested LBR virtualization Maxim Levitsky
@ 2022-03-24 18:21   ` Paolo Bonzini
  2022-03-27 15:12     ` Maxim Levitsky
  0 siblings, 1 reply; 13+ messages in thread
From: Paolo Bonzini @ 2022-03-24 18:21 UTC (permalink / raw)
  To: Maxim Levitsky, kvm
  Cc: Ingo Molnar, Borislav Petkov, Sean Christopherson,
	Vitaly Kuznetsov, H. Peter Anvin, Joerg Roedel, linux-kernel,
	Thomas Gleixner, Jim Mattson, x86, Dave Hansen, Wanpeng Li

On 3/22/22 18:40, Maxim Levitsky wrote:
> +		/* Copy LBR related registers from vmcb12,
> +		 * but make sure that we only pick LBR enable bit from the guest.
> +		 */
> +		svm_copy_lbrs(vmcb02, vmcb12);
> +		vmcb02->save.dbgctl &= LBR_CTL_ENABLE_MASK;

I still do not understand why it is not copying all bits outside
DEBUGCTL_RESERVED_BITS.  That is:

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index c1baa3a68ce6..f1332d802ec8 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -589,11 +589,12 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
  	}
  
  	if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
-		/* Copy LBR related registers from vmcb12,
-		 * but make sure that we only pick LBR enable bit from the guest.
+		/*
+		 * Reserved bits of DEBUGCTL are ignored.  Be consistent with
+		 * svm_set_msr's definition of reserved bits.
  		 */
  		svm_copy_lbrs(vmcb02, vmcb12);
-		vmcb02->save.dbgctl &= LBR_CTL_ENABLE_MASK;
+		vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS;
  		svm_update_lbrv(&svm->vcpu);
  
  	} else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 54fa048069b2..a6282be4e419 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -62,8 +62,6 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
  #define SEG_TYPE_LDT 2
  #define SEG_TYPE_BUSY_TSS16 3
  
-#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
-
  static bool erratum_383_found __read_mostly;
  
  u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index cade032520cb..b687393e86ad 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -487,6 +487,8 @@ static inline bool nested_npt_enabled(struct vcpu_svm *svm)
  /* svm.c */
  #define MSR_INVALID				0xffffffffU
  
+#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
+
  extern bool dump_invalid_vmcb;
  
  u32 svm_msrpm_offset(u32 msr);


> +		svm_update_lbrv(&svm->vcpu);
> +
> +	} else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
>   		svm_copy_lbrs(vmcb02, vmcb01);


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v4 3/6] KVM: x86: nSVM: support PAUSE filtering when L0 doesn't intercept PAUSE
  2022-03-22 17:40 ` [PATCH v4 3/6] KVM: x86: nSVM: support PAUSE filtering when L0 doesn't intercept PAUSE Maxim Levitsky
@ 2022-03-24 18:24   ` Paolo Bonzini
  2022-03-27 15:14     ` Maxim Levitsky
  0 siblings, 1 reply; 13+ messages in thread
From: Paolo Bonzini @ 2022-03-24 18:24 UTC (permalink / raw)
  To: Maxim Levitsky, kvm
  Cc: Ingo Molnar, Borislav Petkov, Sean Christopherson,
	Vitaly Kuznetsov, H. Peter Anvin, Joerg Roedel, linux-kernel,
	Thomas Gleixner, Jim Mattson, x86, Dave Hansen, Wanpeng Li

On 3/22/22 18:40, Maxim Levitsky wrote:
> Allow L1 to use PAUSE filtering if L0 doesn't use it.
> 
> Signed-off-by: Maxim Levitsky<mlevitsk@redhat.com>

Can you enlarge the commit message to explain the logic in 
nested_vmcb02_prepare_control?

Thanks,

Paolo


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4 2/6] KVM: x86: nSVM: implement nested LBR virtualization
  2022-03-24 18:21   ` Paolo Bonzini
@ 2022-03-27 15:12     ` Maxim Levitsky
  2022-03-28 17:12       ` Paolo Bonzini
  0 siblings, 1 reply; 13+ messages in thread
From: Maxim Levitsky @ 2022-03-27 15:12 UTC (permalink / raw)
  To: Paolo Bonzini, kvm
  Cc: Ingo Molnar, Borislav Petkov, Sean Christopherson,
	Vitaly Kuznetsov, H. Peter Anvin, Joerg Roedel, linux-kernel,
	Thomas Gleixner, Jim Mattson, x86, Dave Hansen, Wanpeng Li

On Thu, 2022-03-24 at 19:21 +0100, Paolo Bonzini wrote:
> On 3/22/22 18:40, Maxim Levitsky wrote:
> > +		/* Copy LBR related registers from vmcb12,
> > +		 * but make sure that we only pick LBR enable bit from the guest.
> > +		 */
> > +		svm_copy_lbrs(vmcb02, vmcb12);
> > +		vmcb02->save.dbgctl &= LBR_CTL_ENABLE_MASK;
> 
> I still do not understand why it is not copying all bits outside
> DEBUGCTL_RESERVED_BITS.  That is:

Honestly, you are right, I'll do this.
 
Note however about few issues that we have around MSR_IA32_DEBUGCTLMSR
which needs to be eventually fixed (and if I get to it first, I'll do this):
 

On SVM:
 
- without LBR virtualization supported (!lbrv) 
any attempt to set that msr is ignored and logged with pr_err_ratelimited.
 
Note that on AMD, MSR_IA32_DEBUGCTLMSR consists of:
 
bit 0 - 
     AMD's LBR bit
 
bit 1 - 
     BTF - when set, EFLAGS.TF flag causes debug exception
     only on control flow instructions, allowing you to do more efficient
     debugger controlled run of code under debug.
 
bit 2-5:
    exposes perf counters on external CPU pins. Very likely NOP
    on anything remotely modern.
 
- with LBR virtualization supported, the guest can set this msr to any value
as long as it doesn't set reserved bits and then read back the written value, 
but it is not used by the CPU, unless LBR bit is set in MSR_IA32_DEBUGCTLMSR, 
because only then LBR virtualization is enabled, which makes the CPU 
load the guest value on VM entry.
 
This means that MSR_IA32_DEBUGCTLMSR.BTF will magically start working when
MSR_IA32_DEBUGCTLMSR.LBR is set as well, and will not work otherwise.
 
On VMX, we also have something a bit related (but I didn't do any homework
on this):
 
If both LBR and BTF are set, they are both cleared and 
we also get vcpu_unimpl ratelimited message.

otherwise the value is written to GUEST_IA32_DEBUGCTL which I 
think isn't tied to LBR virtualization like on AMD 

(also intel's LBR implementation is much more useful, 
since it has multiple records).


So since the only bit in question is BTF, I was thinking, 
lets just pick the LBR bit.

But I have absolutely no issue to be bug-consistent with non 
nested treatment of MSR_IA32_DEBUGCTLMSR and passthrough all 
non reserved bits.

Or I might at least document this in the errata document you added
recently to KVM (which is a great idea).

Best regards,
	Maxim Levitsky

> 
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index c1baa3a68ce6..f1332d802ec8 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -589,11 +589,12 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
>   	}
>   
>   	if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
> -		/* Copy LBR related registers from vmcb12,
> -		 * but make sure that we only pick LBR enable bit from the guest.
> +		/*
> +		 * Reserved bits of DEBUGCTL are ignored.  Be consistent with
> +		 * svm_set_msr's definition of reserved bits.
>   		 */
>   		svm_copy_lbrs(vmcb02, vmcb12);
> -		vmcb02->save.dbgctl &= LBR_CTL_ENABLE_MASK;
> +		vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS;
>   		svm_update_lbrv(&svm->vcpu);
>   
>   	} else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 54fa048069b2..a6282be4e419 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -62,8 +62,6 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
>   #define SEG_TYPE_LDT 2
>   #define SEG_TYPE_BUSY_TSS16 3
>   
> -#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
> -
>   static bool erratum_383_found __read_mostly;
>   
>   u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index cade032520cb..b687393e86ad 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -487,6 +487,8 @@ static inline bool nested_npt_enabled(struct vcpu_svm *svm)
>   /* svm.c */
>   #define MSR_INVALID				0xffffffffU
>   
> +#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
> +
>   extern bool dump_invalid_vmcb;
>   
>   u32 svm_msrpm_offset(u32 msr);
> 
> 
> > +		svm_update_lbrv(&svm->vcpu);
> > +
> > +	} else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
> >   		svm_copy_lbrs(vmcb02, vmcb01);



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4 3/6] KVM: x86: nSVM: support PAUSE filtering when L0 doesn't intercept PAUSE
  2022-03-24 18:24   ` Paolo Bonzini
@ 2022-03-27 15:14     ` Maxim Levitsky
  0 siblings, 0 replies; 13+ messages in thread
From: Maxim Levitsky @ 2022-03-27 15:14 UTC (permalink / raw)
  To: Paolo Bonzini, kvm
  Cc: Ingo Molnar, Borislav Petkov, Sean Christopherson,
	Vitaly Kuznetsov, H. Peter Anvin, Joerg Roedel, linux-kernel,
	Thomas Gleixner, Jim Mattson, x86, Dave Hansen, Wanpeng Li

On Thu, 2022-03-24 at 19:24 +0100, Paolo Bonzini wrote:
> On 3/22/22 18:40, Maxim Levitsky wrote:
> > Allow L1 to use PAUSE filtering if L0 doesn't use it.
> > 
> > Signed-off-by: Maxim Levitsky<mlevitsk@redhat.com>
> 
> Can you enlarge the commit message to explain the logic in 
> nested_vmcb02_prepare_control?

No problem, I will do in the next version.

How about this:

KVM: x86: nSVM: support nested PAUSE filtering when possible           
 
Expose the pause filtering and threshold in the guest CPUID 
and support PAUSE filtering when possible:

- If the L0 doesn't intercept PAUSE
  (cpu_pm=on, or pause_filter_count kvm_amd's parameter is 0),
  then allow L1 to have full control over PAUSE filtering.
 
- Otherwise if the L1 doesn't intercept PAUSE, 
  use KVM's PAUSE thresholds, and update them even 
  when running nested.
 
- Otherwise ignore both	host and guest PAUSE thresholds,
  because it is	not really possible to merge them correctly.

  It is	expected that in this case, userspace hypervisor (e.g qemu)
  will not enable this feature in the guest CPUID, to avoid
  having the guest to update both thresholds pointlessly.


Best regards,
	Maxim Levitsky

> 
> Thanks,
> 
> Paolo
> 



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4 2/6] KVM: x86: nSVM: implement nested LBR virtualization
  2022-03-27 15:12     ` Maxim Levitsky
@ 2022-03-28 17:12       ` Paolo Bonzini
  2022-03-29  8:46         ` Maxim Levitsky
  0 siblings, 1 reply; 13+ messages in thread
From: Paolo Bonzini @ 2022-03-28 17:12 UTC (permalink / raw)
  To: Maxim Levitsky, kvm
  Cc: Ingo Molnar, Borislav Petkov, Sean Christopherson,
	Vitaly Kuznetsov, H. Peter Anvin, Joerg Roedel, linux-kernel,
	Thomas Gleixner, Jim Mattson, x86, Dave Hansen, Wanpeng Li

On 3/27/22 17:12, Maxim Levitsky wrote:
> - with LBR virtualization supported, the guest can set this msr to any value
> as long as it doesn't set reserved bits and then read back the written value,
> but it is not used by the CPU, unless LBR bit is set in MSR_IA32_DEBUGCTLMSR,
> because only then LBR virtualization is enabled, which makes the CPU
> load the guest value on VM entry.
>   
> This means that MSR_IA32_DEBUGCTLMSR.BTF will magically start working when
> MSR_IA32_DEBUGCTLMSR.LBR is set as well, and will not work otherwise.

That can be fixed by context-switching DEBUGCTLMSR by hand when LBR=0 && 
BTF=1.  Would you like to give it a shot?

Paolo


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4 2/6] KVM: x86: nSVM: implement nested LBR virtualization
  2022-03-28 17:12       ` Paolo Bonzini
@ 2022-03-29  8:46         ` Maxim Levitsky
  0 siblings, 0 replies; 13+ messages in thread
From: Maxim Levitsky @ 2022-03-29  8:46 UTC (permalink / raw)
  To: Paolo Bonzini, kvm
  Cc: Ingo Molnar, Borislav Petkov, Sean Christopherson,
	Vitaly Kuznetsov, H. Peter Anvin, Joerg Roedel, linux-kernel,
	Thomas Gleixner, Jim Mattson, x86, Dave Hansen, Wanpeng Li

On Mon, 2022-03-28 at 19:12 +0200, Paolo Bonzini wrote:
> On 3/27/22 17:12, Maxim Levitsky wrote:
> > - with LBR virtualization supported, the guest can set this msr to any value
> > as long as it doesn't set reserved bits and then read back the written value,
> > but it is not used by the CPU, unless LBR bit is set in MSR_IA32_DEBUGCTLMSR,
> > because only then LBR virtualization is enabled, which makes the CPU
> > load the guest value on VM entry.
> >   
> > This means that MSR_IA32_DEBUGCTLMSR.BTF will magically start working when
> > MSR_IA32_DEBUGCTLMSR.LBR is set as well, and will not work otherwise.
> 
> That can be fixed by context-switching DEBUGCTLMSR by hand when LBR=0 && 
> BTF=1.  Would you like to give it a shot?
> 
> Paolo
> 
Yep exactly, I didn't do that yet only because mypatches didn't made it worse,
so I wanted to do this in a separate patch (+unit test), and it kind of
went to my backlog. I'll do that soon.

Best regards,
	Maxim Levitsky


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2022-03-29  8:46 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-22 17:40 [PATCH v4 0/6] nSVM/SVM features Maxim Levitsky
2022-03-22 17:40 ` [PATCH v4 1/6] KVM: x86: nSVM: correctly virtualize LBR msrs when L2 is running Maxim Levitsky
2022-03-22 17:40 ` [PATCH v4 2/6] KVM: x86: nSVM: implement nested LBR virtualization Maxim Levitsky
2022-03-24 18:21   ` Paolo Bonzini
2022-03-27 15:12     ` Maxim Levitsky
2022-03-28 17:12       ` Paolo Bonzini
2022-03-29  8:46         ` Maxim Levitsky
2022-03-22 17:40 ` [PATCH v4 3/6] KVM: x86: nSVM: support PAUSE filtering when L0 doesn't intercept PAUSE Maxim Levitsky
2022-03-24 18:24   ` Paolo Bonzini
2022-03-27 15:14     ` Maxim Levitsky
2022-03-22 17:40 ` [PATCH v4 4/6] KVM: x86: nSVM: implement nested vGIF Maxim Levitsky
2022-03-22 17:40 ` [PATCH v4 5/6] KVM: x86: allow per cpu apicv inhibit reasons Maxim Levitsky
2022-03-22 17:40 ` [PATCH v4 6/6] KVM: x86: SVM: allow AVIC to co-exist with a nested guest running Maxim Levitsky

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.