* [patch 1/4] KVM: x86: add start_assignment hook to kvm_x86_ops
2021-05-10 17:26 [patch 0/4] VMX: configure posted interrupt descriptor when assigning device (v3) Marcelo Tosatti
@ 2021-05-10 17:26 ` Marcelo Tosatti
2021-05-11 16:26 ` Peter Xu
2021-05-10 17:26 ` [patch 2/4] KVM: add arch specific vcpu_check_block callback Marcelo Tosatti
` (2 subsequent siblings)
3 siblings, 1 reply; 12+ messages in thread
From: Marcelo Tosatti @ 2021-05-10 17:26 UTC (permalink / raw)
To: kvm
Cc: Paolo Bonzini, Alex Williamson, Sean Christopherson, Peter Xu,
Marcelo Tosatti
Add a start_assignment hook to kvm_x86_ops, which is called when
kvm_arch_start_assignment is done.
The hook is required to update the wakeup vector of a sleeping vCPU
when a device is assigned to the guest.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Index: kvm/arch/x86/include/asm/kvm_host.h
===================================================================
--- kvm.orig/arch/x86/include/asm/kvm_host.h
+++ kvm/arch/x86/include/asm/kvm_host.h
@@ -1322,6 +1322,7 @@ struct kvm_x86_ops {
int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set);
+ void (*start_assignment)(struct kvm *kvm);
void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
Index: kvm/arch/x86/kvm/svm/svm.c
===================================================================
--- kvm.orig/arch/x86/kvm/svm/svm.c
+++ kvm/arch/x86/kvm/svm/svm.c
@@ -4601,6 +4601,7 @@ static struct kvm_x86_ops svm_x86_ops __
.deliver_posted_interrupt = svm_deliver_avic_intr,
.dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt,
.update_pi_irte = svm_update_pi_irte,
+ .start_assignment = NULL,
.setup_mce = svm_setup_mce,
.smi_allowed = svm_smi_allowed,
Index: kvm/arch/x86/kvm/vmx/vmx.c
===================================================================
--- kvm.orig/arch/x86/kvm/vmx/vmx.c
+++ kvm/arch/x86/kvm/vmx/vmx.c
@@ -7732,6 +7732,7 @@ static struct kvm_x86_ops vmx_x86_ops __
.nested_ops = &vmx_nested_ops,
.update_pi_irte = pi_update_irte,
+ .start_assignment = NULL,
#ifdef CONFIG_X86_64
.set_hv_timer = vmx_set_hv_timer,
Index: kvm/arch/x86/kvm/x86.c
===================================================================
--- kvm.orig/arch/x86/kvm/x86.c
+++ kvm/arch/x86/kvm/x86.c
@@ -11295,7 +11295,11 @@ bool kvm_arch_can_dequeue_async_page_pre
void kvm_arch_start_assignment(struct kvm *kvm)
{
- atomic_inc(&kvm->arch.assigned_device_count);
+ int ret;
+
+ ret = atomic_inc_return(&kvm->arch.assigned_device_count);
+ if (ret == 1)
+ static_call_cond(kvm_x86_start_assignment)(kvm);
}
EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
Index: kvm/arch/x86/include/asm/kvm-x86-ops.h
===================================================================
--- kvm.orig/arch/x86/include/asm/kvm-x86-ops.h
+++ kvm/arch/x86/include/asm/kvm-x86-ops.h
@@ -99,6 +99,7 @@ KVM_X86_OP_NULL(post_block)
KVM_X86_OP_NULL(vcpu_blocking)
KVM_X86_OP_NULL(vcpu_unblocking)
KVM_X86_OP_NULL(update_pi_irte)
+KVM_X86_OP_NULL(start_assignment)
KVM_X86_OP_NULL(apicv_post_state_restore)
KVM_X86_OP_NULL(dy_apicv_has_pending_interrupt)
KVM_X86_OP_NULL(set_hv_timer)
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [patch 1/4] KVM: x86: add start_assignment hook to kvm_x86_ops
2021-05-10 17:26 ` [patch 1/4] KVM: x86: add start_assignment hook to kvm_x86_ops Marcelo Tosatti
@ 2021-05-11 16:26 ` Peter Xu
2021-05-11 17:29 ` Marcelo Tosatti
0 siblings, 1 reply; 12+ messages in thread
From: Peter Xu @ 2021-05-11 16:26 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: kvm, Paolo Bonzini, Alex Williamson, Sean Christopherson
On Mon, May 10, 2021 at 02:26:47PM -0300, Marcelo Tosatti wrote:
> Add a start_assignment hook to kvm_x86_ops, which is called when
> kvm_arch_start_assignment is done.
>
> The hook is required to update the wakeup vector of a sleeping vCPU
> when a device is assigned to the guest.
>
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>
> Index: kvm/arch/x86/include/asm/kvm_host.h
> ===================================================================
> --- kvm.orig/arch/x86/include/asm/kvm_host.h
> +++ kvm/arch/x86/include/asm/kvm_host.h
> @@ -1322,6 +1322,7 @@ struct kvm_x86_ops {
>
> int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
> uint32_t guest_irq, bool set);
> + void (*start_assignment)(struct kvm *kvm);
> void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
> bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
>
> Index: kvm/arch/x86/kvm/svm/svm.c
> ===================================================================
> --- kvm.orig/arch/x86/kvm/svm/svm.c
> +++ kvm/arch/x86/kvm/svm/svm.c
> @@ -4601,6 +4601,7 @@ static struct kvm_x86_ops svm_x86_ops __
> .deliver_posted_interrupt = svm_deliver_avic_intr,
> .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt,
> .update_pi_irte = svm_update_pi_irte,
> + .start_assignment = NULL,
Can this be dropped (as default NULL)?
> .setup_mce = svm_setup_mce,
>
> .smi_allowed = svm_smi_allowed,
> Index: kvm/arch/x86/kvm/vmx/vmx.c
> ===================================================================
> --- kvm.orig/arch/x86/kvm/vmx/vmx.c
> +++ kvm/arch/x86/kvm/vmx/vmx.c
> @@ -7732,6 +7732,7 @@ static struct kvm_x86_ops vmx_x86_ops __
> .nested_ops = &vmx_nested_ops,
>
> .update_pi_irte = pi_update_irte,
> + .start_assignment = NULL,
Same here?
>
> #ifdef CONFIG_X86_64
> .set_hv_timer = vmx_set_hv_timer,
> Index: kvm/arch/x86/kvm/x86.c
> ===================================================================
> --- kvm.orig/arch/x86/kvm/x86.c
> +++ kvm/arch/x86/kvm/x86.c
> @@ -11295,7 +11295,11 @@ bool kvm_arch_can_dequeue_async_page_pre
>
> void kvm_arch_start_assignment(struct kvm *kvm)
> {
> - atomic_inc(&kvm->arch.assigned_device_count);
> + int ret;
> +
> + ret = atomic_inc_return(&kvm->arch.assigned_device_count);
> + if (ret == 1)
> + static_call_cond(kvm_x86_start_assignment)(kvm);
Maybe "ret" can be dropped too?
void kvm_arch_start_assignment(struct kvm *kvm)
{
if (atomic_inc_return(&kvm->arch.assigned_device_count) == 1)
static_call_cond(kvm_x86_start_assignment)(kvm);
}
Otherwise looks good to me. Thanks,
--
Peter Xu
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [patch 1/4] KVM: x86: add start_assignment hook to kvm_x86_ops
2021-05-11 16:26 ` Peter Xu
@ 2021-05-11 17:29 ` Marcelo Tosatti
0 siblings, 0 replies; 12+ messages in thread
From: Marcelo Tosatti @ 2021-05-11 17:29 UTC (permalink / raw)
To: Peter Xu; +Cc: kvm, Paolo Bonzini, Alex Williamson, Sean Christopherson
On Tue, May 11, 2021 at 12:26:08PM -0400, Peter Xu wrote:
> On Mon, May 10, 2021 at 02:26:47PM -0300, Marcelo Tosatti wrote:
> > Add a start_assignment hook to kvm_x86_ops, which is called when
> > kvm_arch_start_assignment is done.
> >
> > The hook is required to update the wakeup vector of a sleeping vCPU
> > when a device is assigned to the guest.
> >
> > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> >
> > Index: kvm/arch/x86/include/asm/kvm_host.h
> > ===================================================================
> > --- kvm.orig/arch/x86/include/asm/kvm_host.h
> > +++ kvm/arch/x86/include/asm/kvm_host.h
> > @@ -1322,6 +1322,7 @@ struct kvm_x86_ops {
> >
> > int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
> > uint32_t guest_irq, bool set);
> > + void (*start_assignment)(struct kvm *kvm);
> > void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
> > bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
> >
> > Index: kvm/arch/x86/kvm/svm/svm.c
> > ===================================================================
> > --- kvm.orig/arch/x86/kvm/svm/svm.c
> > +++ kvm/arch/x86/kvm/svm/svm.c
> > @@ -4601,6 +4601,7 @@ static struct kvm_x86_ops svm_x86_ops __
> > .deliver_posted_interrupt = svm_deliver_avic_intr,
> > .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt,
> > .update_pi_irte = svm_update_pi_irte,
> > + .start_assignment = NULL,
>
> Can this be dropped (as default NULL)?
Done.
>
> > .setup_mce = svm_setup_mce,
> >
> > .smi_allowed = svm_smi_allowed,
> > Index: kvm/arch/x86/kvm/vmx/vmx.c
> > ===================================================================
> > --- kvm.orig/arch/x86/kvm/vmx/vmx.c
> > +++ kvm/arch/x86/kvm/vmx/vmx.c
> > @@ -7732,6 +7732,7 @@ static struct kvm_x86_ops vmx_x86_ops __
> > .nested_ops = &vmx_nested_ops,
> >
> > .update_pi_irte = pi_update_irte,
> > + .start_assignment = NULL,
>
> Same here?
Done.
> >
> > #ifdef CONFIG_X86_64
> > .set_hv_timer = vmx_set_hv_timer,
> > Index: kvm/arch/x86/kvm/x86.c
> > ===================================================================
> > --- kvm.orig/arch/x86/kvm/x86.c
> > +++ kvm/arch/x86/kvm/x86.c
> > @@ -11295,7 +11295,11 @@ bool kvm_arch_can_dequeue_async_page_pre
> >
> > void kvm_arch_start_assignment(struct kvm *kvm)
> > {
> > - atomic_inc(&kvm->arch.assigned_device_count);
> > + int ret;
> > +
> > + if (atomic_inc_return(&kvm->arch.assigned_device_count) == 1)
> > + if (ret == 1)
> > + static_call_cond(kvm_x86_start_assignment)(kvm);
>
> Maybe "ret" can be dropped too?
>
> void kvm_arch_start_assignment(struct kvm *kvm)
> {
> if (atomic_inc_return(&kvm->arch.assigned_device_count) == 1)
> static_call_cond(kvm_x86_start_assignment)(kvm);
> }
>
> Otherwise looks good to me. Thanks,
Done.
^ permalink raw reply [flat|nested] 12+ messages in thread
* [patch 2/4] KVM: add arch specific vcpu_check_block callback
2021-05-10 17:26 [patch 0/4] VMX: configure posted interrupt descriptor when assigning device (v3) Marcelo Tosatti
2021-05-10 17:26 ` [patch 1/4] KVM: x86: add start_assignment hook to kvm_x86_ops Marcelo Tosatti
@ 2021-05-10 17:26 ` Marcelo Tosatti
2021-05-10 17:26 ` [patch 3/4] KVM: x86: implement kvm_arch_vcpu_check_block callback Marcelo Tosatti
2021-05-10 17:26 ` [patch 4/4] KVM: VMX: update vcpu posted-interrupt descriptor when assigning device Marcelo Tosatti
3 siblings, 0 replies; 12+ messages in thread
From: Marcelo Tosatti @ 2021-05-10 17:26 UTC (permalink / raw)
To: kvm
Cc: Paolo Bonzini, Alex Williamson, Sean Christopherson, Peter Xu,
Marcelo Tosatti
Add callback in kvm_vcpu_check_block, so that architectures
can direct a vcpu to exit the vcpu block loop without requiring
events that would unhalt it.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Index: kvm/include/linux/kvm_host.h
===================================================================
--- kvm.orig/include/linux/kvm_host.h
+++ kvm/include/linux/kvm_host.h
@@ -971,6 +971,13 @@ static inline int kvm_arch_flush_remote_
}
#endif
+#ifndef __KVM_HAVE_ARCH_VCPU_CHECK_BLOCK
+static inline int kvm_arch_vcpu_check_block(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+#endif
+
#ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA
void kvm_arch_register_noncoherent_dma(struct kvm *kvm);
void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm);
Index: kvm/virt/kvm/kvm_main.c
===================================================================
--- kvm.orig/virt/kvm/kvm_main.c
+++ kvm/virt/kvm/kvm_main.c
@@ -2794,6 +2794,8 @@ static int kvm_vcpu_check_block(struct k
goto out;
if (signal_pending(current))
goto out;
+ if (kvm_arch_vcpu_check_block(vcpu))
+ goto out;
ret = 0;
out:
^ permalink raw reply [flat|nested] 12+ messages in thread
* [patch 3/4] KVM: x86: implement kvm_arch_vcpu_check_block callback
2021-05-10 17:26 [patch 0/4] VMX: configure posted interrupt descriptor when assigning device (v3) Marcelo Tosatti
2021-05-10 17:26 ` [patch 1/4] KVM: x86: add start_assignment hook to kvm_x86_ops Marcelo Tosatti
2021-05-10 17:26 ` [patch 2/4] KVM: add arch specific vcpu_check_block callback Marcelo Tosatti
@ 2021-05-10 17:26 ` Marcelo Tosatti
2021-05-10 17:26 ` [patch 4/4] KVM: VMX: update vcpu posted-interrupt descriptor when assigning device Marcelo Tosatti
3 siblings, 0 replies; 12+ messages in thread
From: Marcelo Tosatti @ 2021-05-10 17:26 UTC (permalink / raw)
To: kvm
Cc: Paolo Bonzini, Alex Williamson, Sean Christopherson, Peter Xu,
Marcelo Tosatti
Implement kvm_arch_vcpu_check_block for x86. Next patch will add
implementation of kvm_x86_ops.vcpu_check_block for VMX.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Index: kvm/arch/x86/include/asm/kvm_host.h
===================================================================
--- kvm.orig/arch/x86/include/asm/kvm_host.h
+++ kvm/arch/x86/include/asm/kvm_host.h
@@ -1320,6 +1320,8 @@ struct kvm_x86_ops {
void (*vcpu_blocking)(struct kvm_vcpu *vcpu);
void (*vcpu_unblocking)(struct kvm_vcpu *vcpu);
+ int (*vcpu_check_block)(struct kvm_vcpu *vcpu);
+
int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set);
void (*start_assignment)(struct kvm *kvm);
@@ -1801,6 +1803,15 @@ static inline bool kvm_irq_is_postable(s
irq->delivery_mode == APIC_DM_LOWEST);
}
+#define __KVM_HAVE_ARCH_VCPU_CHECK_BLOCK
+static inline int kvm_arch_vcpu_check_block(struct kvm_vcpu *vcpu)
+{
+ if (kvm_x86_ops.vcpu_check_block)
+ return static_call(kvm_x86_vcpu_check_block)(vcpu);
+
+ return 0;
+}
+
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
{
static_call_cond(kvm_x86_vcpu_blocking)(vcpu);
Index: kvm/arch/x86/kvm/vmx/vmx.c
===================================================================
--- kvm.orig/arch/x86/kvm/vmx/vmx.c
+++ kvm/arch/x86/kvm/vmx/vmx.c
@@ -7727,6 +7727,7 @@ static struct kvm_x86_ops vmx_x86_ops __
.pre_block = vmx_pre_block,
.post_block = vmx_post_block,
+ .vcpu_check_block = NULL,
.pmu_ops = &intel_pmu_ops,
.nested_ops = &vmx_nested_ops,
Index: kvm/arch/x86/include/asm/kvm-x86-ops.h
===================================================================
--- kvm.orig/arch/x86/include/asm/kvm-x86-ops.h
+++ kvm/arch/x86/include/asm/kvm-x86-ops.h
@@ -98,6 +98,7 @@ KVM_X86_OP_NULL(pre_block)
KVM_X86_OP_NULL(post_block)
KVM_X86_OP_NULL(vcpu_blocking)
KVM_X86_OP_NULL(vcpu_unblocking)
+KVM_X86_OP_NULL(vcpu_check_block)
KVM_X86_OP_NULL(update_pi_irte)
KVM_X86_OP_NULL(start_assignment)
KVM_X86_OP_NULL(apicv_post_state_restore)
Index: kvm/arch/x86/kvm/svm/svm.c
===================================================================
--- kvm.orig/arch/x86/kvm/svm/svm.c
+++ kvm/arch/x86/kvm/svm/svm.c
@@ -4517,6 +4517,7 @@ static struct kvm_x86_ops svm_x86_ops __
.vcpu_put = svm_vcpu_put,
.vcpu_blocking = svm_vcpu_blocking,
.vcpu_unblocking = svm_vcpu_unblocking,
+ .vcpu_check_block = NULL,
.update_exception_bitmap = svm_update_exception_bitmap,
.get_msr_feature = svm_get_msr_feature,
^ permalink raw reply [flat|nested] 12+ messages in thread
* [patch 4/4] KVM: VMX: update vcpu posted-interrupt descriptor when assigning device
2021-05-10 17:26 [patch 0/4] VMX: configure posted interrupt descriptor when assigning device (v3) Marcelo Tosatti
` (2 preceding siblings ...)
2021-05-10 17:26 ` [patch 3/4] KVM: x86: implement kvm_arch_vcpu_check_block callback Marcelo Tosatti
@ 2021-05-10 17:26 ` Marcelo Tosatti
2021-05-24 15:55 ` Paolo Bonzini
3 siblings, 1 reply; 12+ messages in thread
From: Marcelo Tosatti @ 2021-05-10 17:26 UTC (permalink / raw)
To: kvm
Cc: Paolo Bonzini, Alex Williamson, Sean Christopherson, Peter Xu,
Pei Zhang, Marcelo Tosatti
For VMX, when a vcpu enters HLT emulation, pi_post_block will:
1) Add vcpu to per-cpu list of blocked vcpus.
2) Program the posted-interrupt descriptor "notification vector"
to POSTED_INTR_WAKEUP_VECTOR
With interrupt remapping, an interrupt will set the PIR bit for the
vector programmed for the device on the CPU, test-and-set the
ON bit on the posted interrupt descriptor, and if the ON bit is clear
generate an interrupt for the notification vector.
This way, the target CPU wakes upon a device interrupt and wakes up
the target vcpu.
Problem is that pi_post_block only programs the notification vector
if kvm_arch_has_assigned_device() is true. Its possible for the
following to happen:
1) vcpu V HLTs on pcpu P, kvm_arch_has_assigned_device is false,
notification vector is not programmed
2) device is assigned to VM
3) device interrupts vcpu V, sets ON bit
(notification vector not programmed, so pcpu P remains in idle)
4) vcpu 0 IPIs vcpu V (in guest), but since pi descriptor ON bit is set,
kvm_vcpu_kick is skipped
5) vcpu 0 busy spins on vcpu V's response for several seconds, until
RCU watchdog NMIs all vCPUs.
To fix this, use the start_assignment kvm_x86_ops callback to kick
vcpus out of the halt loop, so the notification vector is
properly reprogrammed to the wakeup vector.
Reported-by: Pei Zhang <pezhang@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Index: kvm/arch/x86/kvm/vmx/posted_intr.c
===================================================================
--- kvm.orig/arch/x86/kvm/vmx/posted_intr.c
+++ kvm/arch/x86/kvm/vmx/posted_intr.c
@@ -204,6 +204,32 @@ void pi_post_block(struct kvm_vcpu *vcpu
}
/*
+ * Bail out of the block loop if the VM has an assigned
+ * device, but the blocking vCPU didn't reconfigure the
+ * PI.NV to the wakeup vector, i.e. the assigned device
+ * came along after the initial check in vcpu_block().
+ */
+
+int vmx_vcpu_check_block(struct kvm_vcpu *vcpu)
+{
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+ if (!irq_remapping_cap(IRQ_POSTING_CAP))
+ return 0;
+
+ if (!kvm_vcpu_apicv_active(vcpu))
+ return 0;
+
+ if (!kvm_arch_has_assigned_device(vcpu->kvm))
+ return 0;
+
+ if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR)
+ return 0;
+
+ return 1;
+}
+
+/*
* Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
*/
void pi_wakeup_handler(void)
@@ -236,6 +262,25 @@ bool pi_has_pending_interrupt(struct kvm
(pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc));
}
+void vmx_pi_start_assignment(struct kvm *kvm)
+{
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ if (!irq_remapping_cap(IRQ_POSTING_CAP))
+ return;
+
+ /*
+ * Wakeup will cause the vCPU to bail out of kvm_vcpu_block() and
+ * go back through vcpu_block().
+ */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (!kvm_vcpu_apicv_active(vcpu))
+ continue;
+
+ kvm_vcpu_wake_up(vcpu);
+ }
+}
/*
* pi_update_irte - set IRTE for Posted-Interrupts
Index: kvm/arch/x86/kvm/vmx/posted_intr.h
===================================================================
--- kvm.orig/arch/x86/kvm/vmx/posted_intr.h
+++ kvm/arch/x86/kvm/vmx/posted_intr.h
@@ -95,5 +95,7 @@ void __init pi_init_cpu(int cpu);
bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu);
int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq,
bool set);
+void vmx_pi_start_assignment(struct kvm *kvm);
+int vmx_vcpu_check_block(struct kvm_vcpu *vcpu);
#endif /* __KVM_X86_VMX_POSTED_INTR_H */
Index: kvm/arch/x86/kvm/vmx/vmx.c
===================================================================
--- kvm.orig/arch/x86/kvm/vmx/vmx.c
+++ kvm/arch/x86/kvm/vmx/vmx.c
@@ -7727,13 +7727,13 @@ static struct kvm_x86_ops vmx_x86_ops __
.pre_block = vmx_pre_block,
.post_block = vmx_post_block,
- .vcpu_check_block = NULL,
+ .vcpu_check_block = vmx_vcpu_check_block,
.pmu_ops = &intel_pmu_ops,
.nested_ops = &vmx_nested_ops,
.update_pi_irte = pi_update_irte,
- .start_assignment = NULL,
+ .start_assignment = vmx_pi_start_assignment,
#ifdef CONFIG_X86_64
.set_hv_timer = vmx_set_hv_timer,
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [patch 4/4] KVM: VMX: update vcpu posted-interrupt descriptor when assigning device
2021-05-10 17:26 ` [patch 4/4] KVM: VMX: update vcpu posted-interrupt descriptor when assigning device Marcelo Tosatti
@ 2021-05-24 15:55 ` Paolo Bonzini
2021-05-24 17:53 ` Marcelo Tosatti
0 siblings, 1 reply; 12+ messages in thread
From: Paolo Bonzini @ 2021-05-24 15:55 UTC (permalink / raw)
To: Marcelo Tosatti, kvm
Cc: Alex Williamson, Sean Christopherson, Peter Xu, Pei Zhang
On 10/05/21 19:26, Marcelo Tosatti wrote:
> +void vmx_pi_start_assignment(struct kvm *kvm)
> +{
> + struct kvm_vcpu *vcpu;
> + int i;
> +
> + if (!irq_remapping_cap(IRQ_POSTING_CAP))
> + return;
> +
> + /*
> + * Wakeup will cause the vCPU to bail out of kvm_vcpu_block() and
> + * go back through vcpu_block().
> + */
> + kvm_for_each_vcpu(i, vcpu, kvm) {
> + if (!kvm_vcpu_apicv_active(vcpu))
> + continue;
> +
> + kvm_vcpu_wake_up(vcpu);
Would you still need the check_block callback, if you also added a
kvm_make_request(KVM_REQ_EVENT)?
In fact, since this is entirely not a hot path, can you just do
kvm_make_all_cpus_request(kvm, KVM_REQ_EVENT) instead of this loop?
Thanks,
Paolo
> + }
> +}
>
> /*
> * pi_update_irte - set IRTE for Posted-Interrupts
> Index: kvm/arch/x86/kvm/vmx/posted_intr.h
> ===================================================================
> --- kvm.orig/arch/x86/kvm/vmx/posted_intr.h
> +++ kvm/arch/x86/kvm/vmx/posted_intr.h
> @@ -95,5 +95,7 @@ void __init pi_init_cpu(int cpu);
> bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu);
> int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq,
> bool set);
> +void vmx_pi_start_assignment(struct kvm *kvm);
> +int vmx_vcpu_check_block(struct kvm_vcpu *vcpu);
>
> #endif /* __KVM_X86_VMX_POSTED_INTR_H */
> Index: kvm/arch/x86/kvm/vmx/vmx.c
> ===================================================================
> --- kvm.orig/arch/x86/kvm/vmx/vmx.c
> +++ kvm/arch/x86/kvm/vmx/vmx.c
> @@ -7727,13 +7727,13 @@ static struct kvm_x86_ops vmx_x86_ops __
>
> .pre_block = vmx_pre_block,
> .post_block = vmx_post_block,
> - .vcpu_check_block = NULL,
> + .vcpu_check_block = vmx_vcpu_check_block,
>
> .pmu_ops = &intel_pmu_ops,
> .nested_ops = &vmx_nested_ops,
>
> .update_pi_irte = pi_update_irte,
> - .start_assignment = NULL,
> + .start_assignment = vmx_pi_start_assignment,
>
> #ifdef CONFIG_X86_64
> .set_hv_timer = vmx_set_hv_timer,
>
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [patch 4/4] KVM: VMX: update vcpu posted-interrupt descriptor when assigning device
2021-05-24 15:55 ` Paolo Bonzini
@ 2021-05-24 17:53 ` Marcelo Tosatti
2021-05-25 11:58 ` Paolo Bonzini
0 siblings, 1 reply; 12+ messages in thread
From: Marcelo Tosatti @ 2021-05-24 17:53 UTC (permalink / raw)
To: Paolo Bonzini
Cc: kvm, Alex Williamson, Sean Christopherson, Peter Xu, Pei Zhang
On Mon, May 24, 2021 at 05:55:18PM +0200, Paolo Bonzini wrote:
> On 10/05/21 19:26, Marcelo Tosatti wrote:
> > +void vmx_pi_start_assignment(struct kvm *kvm)
> > +{
> > + struct kvm_vcpu *vcpu;
> > + int i;
> > +
> > + if (!irq_remapping_cap(IRQ_POSTING_CAP))
> > + return;
> > +
> > + /*
> > + * Wakeup will cause the vCPU to bail out of kvm_vcpu_block() and
> > + * go back through vcpu_block().
> > + */
> > + kvm_for_each_vcpu(i, vcpu, kvm) {
> > + if (!kvm_vcpu_apicv_active(vcpu))
> > + continue;
> > +
> > + kvm_vcpu_wake_up(vcpu);
>
> Would you still need the check_block callback, if you also added a
> kvm_make_request(KVM_REQ_EVENT)?
>
> In fact, since this is entirely not a hot path, can you just do
> kvm_make_all_cpus_request(kvm, KVM_REQ_EVENT) instead of this loop?
>
> Thanks,
>
> Paolo
Hi Paolo,
Don't think so:
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
}
static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
{
int ret = -EINTR;
int idx = srcu_read_lock(&vcpu->kvm->srcu);
if (kvm_arch_vcpu_runnable(vcpu)) {
kvm_make_request(KVM_REQ_UNHALT, vcpu); <---- don't want KVM_REQ_UNHALT
goto out;
}
if (kvm_cpu_has_pending_timer(vcpu))
goto out;
if (signal_pending(current))
goto out;
ret = 0;
out:
srcu_read_unlock(&vcpu->kvm->srcu, idx);
return ret;
}
See previous discussion:
Date: Wed, 12 May 2021 14:41:56 +0000
From: Sean Christopherson <seanjc@google.com>
To: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Peter Xu <peterx@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>, kvm@vger.kernel.org, Alex Williamson
<alex.williamson@redhat.com>, Pei Zhang <pezhang@redhat.com>
Subject: Re: [patch 4/4] KVM: VMX: update vcpu posted-interrupt descriptor when assigning device
On Tue, May 11, 2021, Marcelo Tosatti wrote:
> > The KVM_REQ_UNBLOCK patch will resume execution even any such event
>
> even without any such event
>
> > occuring. So the behaviour would be different from baremetal.
I agree with Marcelo, we don't want to spuriously unhalt the vCPU. It's legal,
albeit risky, to do something like
hlt
/* #UD to triple fault if this CPU is awakened. */
ud2
when offlining a CPU, in which case the spurious wake event will crash the guest.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [patch 4/4] KVM: VMX: update vcpu posted-interrupt descriptor when assigning device
2021-05-24 17:53 ` Marcelo Tosatti
@ 2021-05-25 11:58 ` Paolo Bonzini
0 siblings, 0 replies; 12+ messages in thread
From: Paolo Bonzini @ 2021-05-25 11:58 UTC (permalink / raw)
To: Marcelo Tosatti
Cc: kvm, Alex Williamson, Sean Christopherson, Peter Xu, Pei Zhang
On 24/05/21 19:53, Marcelo Tosatti wrote:
> On Mon, May 24, 2021 at 05:55:18PM +0200, Paolo Bonzini wrote:
>> On 10/05/21 19:26, Marcelo Tosatti wrote:
>>> +void vmx_pi_start_assignment(struct kvm *kvm)
>>> +{
>>> + struct kvm_vcpu *vcpu;
>>> + int i;
>>> +
>>> + if (!irq_remapping_cap(IRQ_POSTING_CAP))
>>> + return;
>>> +
>>> + /*
>>> + * Wakeup will cause the vCPU to bail out of kvm_vcpu_block() and
>>> + * go back through vcpu_block().
>>> + */
>>> + kvm_for_each_vcpu(i, vcpu, kvm) {
>>> + if (!kvm_vcpu_apicv_active(vcpu))
>>> + continue;
>>> +
>>> + kvm_vcpu_wake_up(vcpu);
>>
>> Would you still need the check_block callback, if you also added a
>> kvm_make_request(KVM_REQ_EVENT)?
>>
>> In fact, since this is entirely not a hot path, can you just do
>> kvm_make_all_cpus_request(kvm, KVM_REQ_EVENT) instead of this loop?
>>
>> Thanks,
>>
>> Paolo
>
> Hi Paolo,
>
> Don't think so:
>
> static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
> {
> int ret = -EINTR;
> int idx = srcu_read_lock(&vcpu->kvm->srcu);
>
> if (kvm_arch_vcpu_runnable(vcpu)) {
> kvm_make_request(KVM_REQ_UNHALT, vcpu); <---- don't want KVM_REQ_UNHALT
UNHALT is incorrect indeed, but requests don't have to unhalt the vCPU.
This case is somewhat similar to signal_pending(), where the next
KVM_RUN ioctl resumes the halt. It's also similar to
KVM_REQ_PENDING_TIMER. So you can:
- rename KVM_REQ_PENDING_TIMER to KVM_REQ_UNBLOCK except in
arch/powerpc, where instead you add KVM_REQ_PENDING_TIMER to
arch/powerpc/include/asm/kvm_host.h
- here, you add
if (kvm_check_request(KVM_REQ_UNBLOCK, vcpu))
goto out;
- then vmx_pi_start_assignment only needs to
if (!irq_remapping_cap(IRQ_POSTING_CAP))
return;
kvm_make_all_cpus_request(kvm, KVM_REQ_UNBLOCK);
kvm_arch_vcpu_runnable() would still return false, so the mp_state would
not change.
Paolo
^ permalink raw reply [flat|nested] 12+ messages in thread
* [patch 3/4] KVM: x86: implement kvm_arch_vcpu_check_block callback
2021-05-07 13:06 [patch 0/4] VMX: configure posted interrupt descriptor when assigning device Marcelo Tosatti
@ 2021-05-07 13:06 ` Marcelo Tosatti
0 siblings, 0 replies; 12+ messages in thread
From: Marcelo Tosatti @ 2021-05-07 13:06 UTC (permalink / raw)
To: kvm; +Cc: Paolo Bonzini, Alex Williamson, Sean Christopherson, Marcelo Tosatti
Implement kvm_arch_vcpu_check_block for x86. Next patch will add
implementation of kvm_x86_ops.vcpu_check_block for VMX.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Index: kvm/arch/x86/include/asm/kvm_host.h
===================================================================
--- kvm.orig/arch/x86/include/asm/kvm_host.h
+++ kvm/arch/x86/include/asm/kvm_host.h
@@ -1320,6 +1320,8 @@ struct kvm_x86_ops {
void (*vcpu_blocking)(struct kvm_vcpu *vcpu);
void (*vcpu_unblocking)(struct kvm_vcpu *vcpu);
+ int (*vcpu_check_block)(struct kvm_vcpu *vcpu);
+
int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set);
void (*start_assignment)(struct kvm *kvm, int device_count);
@@ -1801,6 +1803,15 @@ static inline bool kvm_irq_is_postable(s
irq->delivery_mode == APIC_DM_LOWEST);
}
+#define __KVM_HAVE_ARCH_VCPU_CHECK_BLOCK
+static inline int kvm_arch_vcpu_check_block(struct kvm_vcpu *vcpu)
+{
+ if (kvm_x86_ops.vcpu_check_block)
+ return static_call(kvm_x86_vcpu_check_block)(vcpu);
+
+ return 0;
+}
+
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
{
static_call_cond(kvm_x86_vcpu_blocking)(vcpu);
Index: kvm/arch/x86/kvm/vmx/vmx.c
===================================================================
--- kvm.orig/arch/x86/kvm/vmx/vmx.c
+++ kvm/arch/x86/kvm/vmx/vmx.c
@@ -7727,6 +7727,7 @@ static struct kvm_x86_ops vmx_x86_ops __
.pre_block = vmx_pre_block,
.post_block = vmx_post_block,
+ .vcpu_check_block = NULL,
.pmu_ops = &intel_pmu_ops,
.nested_ops = &vmx_nested_ops,
Index: kvm/arch/x86/include/asm/kvm-x86-ops.h
===================================================================
--- kvm.orig/arch/x86/include/asm/kvm-x86-ops.h
+++ kvm/arch/x86/include/asm/kvm-x86-ops.h
@@ -98,6 +98,7 @@ KVM_X86_OP_NULL(pre_block)
KVM_X86_OP_NULL(post_block)
KVM_X86_OP_NULL(vcpu_blocking)
KVM_X86_OP_NULL(vcpu_unblocking)
+KVM_X86_OP_NULL(vcpu_check_block)
KVM_X86_OP_NULL(update_pi_irte)
KVM_X86_OP_NULL(start_assignment)
KVM_X86_OP_NULL(apicv_post_state_restore)
Index: kvm/arch/x86/kvm/svm/svm.c
===================================================================
--- kvm.orig/arch/x86/kvm/svm/svm.c
+++ kvm/arch/x86/kvm/svm/svm.c
@@ -4517,6 +4517,7 @@ static struct kvm_x86_ops svm_x86_ops __
.vcpu_put = svm_vcpu_put,
.vcpu_blocking = svm_vcpu_blocking,
.vcpu_unblocking = svm_vcpu_unblocking,
+ .vcpu_check_block = NULL,
.update_exception_bitmap = svm_update_exception_bitmap,
.get_msr_feature = svm_get_msr_feature,
^ permalink raw reply [flat|nested] 12+ messages in thread