Add a start_assignment hook to kvm_x86_ops, which is called when kvm_arch_start_assignment is done. The hook is required to update the wakeup vector of a sleeping vCPU when a device is assigned to the guest. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Index: kvm/arch/x86/include/asm/kvm_host.h =================================================================== --- kvm.orig/arch/x86/include/asm/kvm_host.h +++ kvm/arch/x86/include/asm/kvm_host.h @@ -1322,6 +1322,7 @@ struct kvm_x86_ops { int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); + void (*start_assignment)(struct kvm *kvm, int device_count); void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); Index: kvm/arch/x86/kvm/svm/svm.c =================================================================== --- kvm.orig/arch/x86/kvm/svm/svm.c +++ kvm/arch/x86/kvm/svm/svm.c @@ -4601,6 +4601,7 @@ static struct kvm_x86_ops svm_x86_ops __ .deliver_posted_interrupt = svm_deliver_avic_intr, .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt, .update_pi_irte = svm_update_pi_irte, + .start_assignment = NULL, .setup_mce = svm_setup_mce, .smi_allowed = svm_smi_allowed, Index: kvm/arch/x86/kvm/vmx/vmx.c =================================================================== --- kvm.orig/arch/x86/kvm/vmx/vmx.c +++ kvm/arch/x86/kvm/vmx/vmx.c @@ -7732,6 +7732,7 @@ static struct kvm_x86_ops vmx_x86_ops __ .nested_ops = &vmx_nested_ops, .update_pi_irte = pi_update_irte, + .start_assignment = NULL, #ifdef CONFIG_X86_64 .set_hv_timer = vmx_set_hv_timer, Index: kvm/arch/x86/kvm/x86.c =================================================================== --- kvm.orig/arch/x86/kvm/x86.c +++ kvm/arch/x86/kvm/x86.c @@ -11295,7 +11295,10 @@ bool kvm_arch_can_dequeue_async_page_pre void kvm_arch_start_assignment(struct kvm *kvm) { - atomic_inc(&kvm->arch.assigned_device_count); + int ret; + + ret = atomic_inc_return(&kvm->arch.assigned_device_count); + static_call_cond(kvm_x86_start_assignment)(kvm, ret); } EXPORT_SYMBOL_GPL(kvm_arch_start_assignment); Index: kvm/arch/x86/include/asm/kvm-x86-ops.h =================================================================== --- kvm.orig/arch/x86/include/asm/kvm-x86-ops.h +++ kvm/arch/x86/include/asm/kvm-x86-ops.h @@ -99,6 +99,7 @@ KVM_X86_OP_NULL(post_block) KVM_X86_OP_NULL(vcpu_blocking) KVM_X86_OP_NULL(vcpu_unblocking) KVM_X86_OP_NULL(update_pi_irte) +KVM_X86_OP_NULL(start_assignment) KVM_X86_OP_NULL(apicv_post_state_restore) KVM_X86_OP_NULL(dy_apicv_has_pending_interrupt) KVM_X86_OP_NULL(set_hv_timer)
On Fri, May 07, 2021 at 10:06:10AM -0300, Marcelo Tosatti wrote:
> Add a start_assignment hook to kvm_x86_ops, which is called when
> kvm_arch_start_assignment is done.
>
> The hook is required to update the wakeup vector of a sleeping vCPU
> when a device is assigned to the guest.
>
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>
> Index: kvm/arch/x86/include/asm/kvm_host.h
> ===================================================================
> --- kvm.orig/arch/x86/include/asm/kvm_host.h
> +++ kvm/arch/x86/include/asm/kvm_host.h
> @@ -1322,6 +1322,7 @@ struct kvm_x86_ops {
>
> int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
> uint32_t guest_irq, bool set);
> + void (*start_assignment)(struct kvm *kvm, int device_count);
I'm thinking what the hook could do with the device_count besides comparing it
against 1...
If we can't think of any, perhaps we can directly make it an enablement hook
instead (so we avoid calling the hook at all when count>1)?
/* Called when the first assignment registers (count from 0 to 1) */
void (*enable_assignment)(struct kvm *kvm);
--
Peter Xu
Add a start_assignment hook to kvm_x86_ops, which is called when kvm_arch_start_assignment is done. The hook is required to update the wakeup vector of a sleeping vCPU when a device is assigned to the guest. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Index: kvm/arch/x86/include/asm/kvm_host.h =================================================================== --- kvm.orig/arch/x86/include/asm/kvm_host.h +++ kvm/arch/x86/include/asm/kvm_host.h @@ -1322,6 +1322,7 @@ struct kvm_x86_ops { int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); + void (*start_assignment)(struct kvm *kvm); void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); Index: kvm/arch/x86/kvm/svm/svm.c =================================================================== --- kvm.orig/arch/x86/kvm/svm/svm.c +++ kvm/arch/x86/kvm/svm/svm.c @@ -4601,6 +4601,7 @@ static struct kvm_x86_ops svm_x86_ops __ .deliver_posted_interrupt = svm_deliver_avic_intr, .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt, .update_pi_irte = svm_update_pi_irte, + .start_assignment = NULL, .setup_mce = svm_setup_mce, .smi_allowed = svm_smi_allowed, Index: kvm/arch/x86/kvm/vmx/vmx.c =================================================================== --- kvm.orig/arch/x86/kvm/vmx/vmx.c +++ kvm/arch/x86/kvm/vmx/vmx.c @@ -7732,6 +7732,7 @@ static struct kvm_x86_ops vmx_x86_ops __ .nested_ops = &vmx_nested_ops, .update_pi_irte = pi_update_irte, + .start_assignment = NULL, #ifdef CONFIG_X86_64 .set_hv_timer = vmx_set_hv_timer, Index: kvm/arch/x86/kvm/x86.c =================================================================== --- kvm.orig/arch/x86/kvm/x86.c +++ kvm/arch/x86/kvm/x86.c @@ -11295,7 +11295,11 @@ bool kvm_arch_can_dequeue_async_page_pre void kvm_arch_start_assignment(struct kvm *kvm) { - atomic_inc(&kvm->arch.assigned_device_count); + int ret; + + ret = atomic_inc_return(&kvm->arch.assigned_device_count); + if (ret == 1) + static_call_cond(kvm_x86_start_assignment)(kvm); } EXPORT_SYMBOL_GPL(kvm_arch_start_assignment); Index: kvm/arch/x86/include/asm/kvm-x86-ops.h =================================================================== --- kvm.orig/arch/x86/include/asm/kvm-x86-ops.h +++ kvm/arch/x86/include/asm/kvm-x86-ops.h @@ -99,6 +99,7 @@ KVM_X86_OP_NULL(post_block) KVM_X86_OP_NULL(vcpu_blocking) KVM_X86_OP_NULL(vcpu_unblocking) KVM_X86_OP_NULL(update_pi_irte) +KVM_X86_OP_NULL(start_assignment) KVM_X86_OP_NULL(apicv_post_state_restore) KVM_X86_OP_NULL(dy_apicv_has_pending_interrupt) KVM_X86_OP_NULL(set_hv_timer)
On Fri, May 07, 2021 at 03:16:00PM -0400, Peter Xu wrote:
> On Fri, May 07, 2021 at 10:06:10AM -0300, Marcelo Tosatti wrote:
> > Add a start_assignment hook to kvm_x86_ops, which is called when
> > kvm_arch_start_assignment is done.
> >
> > The hook is required to update the wakeup vector of a sleeping vCPU
> > when a device is assigned to the guest.
> >
> > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> >
> > Index: kvm/arch/x86/include/asm/kvm_host.h
> > ===================================================================
> > --- kvm.orig/arch/x86/include/asm/kvm_host.h
> > +++ kvm/arch/x86/include/asm/kvm_host.h
> > @@ -1322,6 +1322,7 @@ struct kvm_x86_ops {
> >
> > int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
> > uint32_t guest_irq, bool set);
> > + void (*start_assignment)(struct kvm *kvm, int device_count);
>
> I'm thinking what the hook could do with the device_count besides comparing it
> against 1...
>
> If we can't think of any, perhaps we can directly make it an enablement hook
> instead (so we avoid calling the hook at all when count>1)?
>
> /* Called when the first assignment registers (count from 0 to 1) */
> void (*enable_assignment)(struct kvm *kvm);
Sure, sounds good, just kept the original name...
On Mon, May 10, 2021 at 02:26:47PM -0300, Marcelo Tosatti wrote: > Add a start_assignment hook to kvm_x86_ops, which is called when > kvm_arch_start_assignment is done. > > The hook is required to update the wakeup vector of a sleeping vCPU > when a device is assigned to the guest. > > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> > > Index: kvm/arch/x86/include/asm/kvm_host.h > =================================================================== > --- kvm.orig/arch/x86/include/asm/kvm_host.h > +++ kvm/arch/x86/include/asm/kvm_host.h > @@ -1322,6 +1322,7 @@ struct kvm_x86_ops { > > int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, > uint32_t guest_irq, bool set); > + void (*start_assignment)(struct kvm *kvm); > void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); > bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); > > Index: kvm/arch/x86/kvm/svm/svm.c > =================================================================== > --- kvm.orig/arch/x86/kvm/svm/svm.c > +++ kvm/arch/x86/kvm/svm/svm.c > @@ -4601,6 +4601,7 @@ static struct kvm_x86_ops svm_x86_ops __ > .deliver_posted_interrupt = svm_deliver_avic_intr, > .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt, > .update_pi_irte = svm_update_pi_irte, > + .start_assignment = NULL, Can this be dropped (as default NULL)? > .setup_mce = svm_setup_mce, > > .smi_allowed = svm_smi_allowed, > Index: kvm/arch/x86/kvm/vmx/vmx.c > =================================================================== > --- kvm.orig/arch/x86/kvm/vmx/vmx.c > +++ kvm/arch/x86/kvm/vmx/vmx.c > @@ -7732,6 +7732,7 @@ static struct kvm_x86_ops vmx_x86_ops __ > .nested_ops = &vmx_nested_ops, > > .update_pi_irte = pi_update_irte, > + .start_assignment = NULL, Same here? > > #ifdef CONFIG_X86_64 > .set_hv_timer = vmx_set_hv_timer, > Index: kvm/arch/x86/kvm/x86.c > =================================================================== > --- kvm.orig/arch/x86/kvm/x86.c > +++ kvm/arch/x86/kvm/x86.c > @@ -11295,7 +11295,11 @@ bool kvm_arch_can_dequeue_async_page_pre > > void kvm_arch_start_assignment(struct kvm *kvm) > { > - atomic_inc(&kvm->arch.assigned_device_count); > + int ret; > + > + ret = atomic_inc_return(&kvm->arch.assigned_device_count); > + if (ret == 1) > + static_call_cond(kvm_x86_start_assignment)(kvm); Maybe "ret" can be dropped too? void kvm_arch_start_assignment(struct kvm *kvm) { if (atomic_inc_return(&kvm->arch.assigned_device_count) == 1) static_call_cond(kvm_x86_start_assignment)(kvm); } Otherwise looks good to me. Thanks, -- Peter Xu
On Tue, May 11, 2021 at 12:26:08PM -0400, Peter Xu wrote: > On Mon, May 10, 2021 at 02:26:47PM -0300, Marcelo Tosatti wrote: > > Add a start_assignment hook to kvm_x86_ops, which is called when > > kvm_arch_start_assignment is done. > > > > The hook is required to update the wakeup vector of a sleeping vCPU > > when a device is assigned to the guest. > > > > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> > > > > Index: kvm/arch/x86/include/asm/kvm_host.h > > =================================================================== > > --- kvm.orig/arch/x86/include/asm/kvm_host.h > > +++ kvm/arch/x86/include/asm/kvm_host.h > > @@ -1322,6 +1322,7 @@ struct kvm_x86_ops { > > > > int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, > > uint32_t guest_irq, bool set); > > + void (*start_assignment)(struct kvm *kvm); > > void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); > > bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); > > > > Index: kvm/arch/x86/kvm/svm/svm.c > > =================================================================== > > --- kvm.orig/arch/x86/kvm/svm/svm.c > > +++ kvm/arch/x86/kvm/svm/svm.c > > @@ -4601,6 +4601,7 @@ static struct kvm_x86_ops svm_x86_ops __ > > .deliver_posted_interrupt = svm_deliver_avic_intr, > > .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt, > > .update_pi_irte = svm_update_pi_irte, > > + .start_assignment = NULL, > > Can this be dropped (as default NULL)? Done. > > > .setup_mce = svm_setup_mce, > > > > .smi_allowed = svm_smi_allowed, > > Index: kvm/arch/x86/kvm/vmx/vmx.c > > =================================================================== > > --- kvm.orig/arch/x86/kvm/vmx/vmx.c > > +++ kvm/arch/x86/kvm/vmx/vmx.c > > @@ -7732,6 +7732,7 @@ static struct kvm_x86_ops vmx_x86_ops __ > > .nested_ops = &vmx_nested_ops, > > > > .update_pi_irte = pi_update_irte, > > + .start_assignment = NULL, > > Same here? Done. > > > > #ifdef CONFIG_X86_64 > > .set_hv_timer = vmx_set_hv_timer, > > Index: kvm/arch/x86/kvm/x86.c > > =================================================================== > > --- kvm.orig/arch/x86/kvm/x86.c > > +++ kvm/arch/x86/kvm/x86.c > > @@ -11295,7 +11295,11 @@ bool kvm_arch_can_dequeue_async_page_pre > > > > void kvm_arch_start_assignment(struct kvm *kvm) > > { > > - atomic_inc(&kvm->arch.assigned_device_count); > > + int ret; > > + > > + if (atomic_inc_return(&kvm->arch.assigned_device_count) == 1) > > + if (ret == 1) > > + static_call_cond(kvm_x86_start_assignment)(kvm); > > Maybe "ret" can be dropped too? > > void kvm_arch_start_assignment(struct kvm *kvm) > { > if (atomic_inc_return(&kvm->arch.assigned_device_count) == 1) > static_call_cond(kvm_x86_start_assignment)(kvm); > } > > Otherwise looks good to me. Thanks, Done.
Configuration of the posted interrupt descriptor is incorrect when devices are hotplugged to the guest (and vcpus are halted). See patch 4 for details. --- v4: remove NULL assignments from kvm_x86_ops (Peter Xu) check for return value of ->start_assignment directly (Peter Xu) v3: improved comments (Sean) use kvm_vcpu_wake_up (Sean) drop device_count from start_assignment function (Peter Xu) v2: rather than using a potentially racy IPI (vs vcpu->cpu switches), kick the vcpus when assigning a device and let the blocked per-CPU list manipulation happen locally at ->pre_block and ->post_block (Sean Christopherson).
Add a start_assignment hook to kvm_x86_ops, which is called when kvm_arch_start_assignment is done. The hook is required to update the wakeup vector of a sleeping vCPU when a device is assigned to the guest. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Index: kvm/arch/x86/include/asm/kvm_host.h =================================================================== --- kvm.orig/arch/x86/include/asm/kvm_host.h +++ kvm/arch/x86/include/asm/kvm_host.h @@ -1322,6 +1322,7 @@ struct kvm_x86_ops { int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); + void (*start_assignment)(struct kvm *kvm); void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); Index: kvm/arch/x86/kvm/x86.c =================================================================== --- kvm.orig/arch/x86/kvm/x86.c +++ kvm/arch/x86/kvm/x86.c @@ -11295,7 +11295,8 @@ bool kvm_arch_can_dequeue_async_page_pre void kvm_arch_start_assignment(struct kvm *kvm) { - atomic_inc(&kvm->arch.assigned_device_count); + if (atomic_inc_return(&kvm->arch.assigned_device_count) == 1) + static_call_cond(kvm_x86_start_assignment)(kvm); } EXPORT_SYMBOL_GPL(kvm_arch_start_assignment); Index: kvm/arch/x86/include/asm/kvm-x86-ops.h =================================================================== --- kvm.orig/arch/x86/include/asm/kvm-x86-ops.h +++ kvm/arch/x86/include/asm/kvm-x86-ops.h @@ -99,6 +99,7 @@ KVM_X86_OP_NULL(post_block) KVM_X86_OP_NULL(vcpu_blocking) KVM_X86_OP_NULL(vcpu_unblocking) KVM_X86_OP_NULL(update_pi_irte) +KVM_X86_OP_NULL(start_assignment) KVM_X86_OP_NULL(apicv_post_state_restore) KVM_X86_OP_NULL(dy_apicv_has_pending_interrupt) KVM_X86_OP_NULL(set_hv_timer)
Add callback in kvm_vcpu_check_block, so that architectures can direct a vcpu to exit the vcpu block loop without requiring events that would unhalt it. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Index: kvm/include/linux/kvm_host.h =================================================================== --- kvm.orig/include/linux/kvm_host.h +++ kvm/include/linux/kvm_host.h @@ -971,6 +971,13 @@ static inline int kvm_arch_flush_remote_ } #endif +#ifndef __KVM_HAVE_ARCH_VCPU_CHECK_BLOCK +static inline int kvm_arch_vcpu_check_block(struct kvm_vcpu *vcpu) +{ + return 0; +} +#endif + #ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA void kvm_arch_register_noncoherent_dma(struct kvm *kvm); void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm); Index: kvm/virt/kvm/kvm_main.c =================================================================== --- kvm.orig/virt/kvm/kvm_main.c +++ kvm/virt/kvm/kvm_main.c @@ -2794,6 +2794,8 @@ static int kvm_vcpu_check_block(struct k goto out; if (signal_pending(current)) goto out; + if (kvm_arch_vcpu_check_block(vcpu)) + goto out; ret = 0; out:
Implement kvm_arch_vcpu_check_block for x86. Next patch will add implementation of kvm_x86_ops.vcpu_check_block for VMX. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Index: kvm/arch/x86/include/asm/kvm_host.h =================================================================== --- kvm.orig/arch/x86/include/asm/kvm_host.h +++ kvm/arch/x86/include/asm/kvm_host.h @@ -1320,6 +1320,8 @@ struct kvm_x86_ops { void (*vcpu_blocking)(struct kvm_vcpu *vcpu); void (*vcpu_unblocking)(struct kvm_vcpu *vcpu); + int (*vcpu_check_block)(struct kvm_vcpu *vcpu); + int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); void (*start_assignment)(struct kvm *kvm); @@ -1801,6 +1803,15 @@ static inline bool kvm_irq_is_postable(s irq->delivery_mode == APIC_DM_LOWEST); } +#define __KVM_HAVE_ARCH_VCPU_CHECK_BLOCK +static inline int kvm_arch_vcpu_check_block(struct kvm_vcpu *vcpu) +{ + if (kvm_x86_ops.vcpu_check_block) + return static_call(kvm_x86_vcpu_check_block)(vcpu); + + return 0; +} + static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) { static_call_cond(kvm_x86_vcpu_blocking)(vcpu); Index: kvm/arch/x86/include/asm/kvm-x86-ops.h =================================================================== --- kvm.orig/arch/x86/include/asm/kvm-x86-ops.h +++ kvm/arch/x86/include/asm/kvm-x86-ops.h @@ -98,6 +98,7 @@ KVM_X86_OP_NULL(pre_block) KVM_X86_OP_NULL(post_block) KVM_X86_OP_NULL(vcpu_blocking) KVM_X86_OP_NULL(vcpu_unblocking) +KVM_X86_OP_NULL(vcpu_check_block) KVM_X86_OP_NULL(update_pi_irte) KVM_X86_OP_NULL(start_assignment) KVM_X86_OP_NULL(apicv_post_state_restore)
For VMX, when a vcpu enters HLT emulation, pi_post_block will: 1) Add vcpu to per-cpu list of blocked vcpus. 2) Program the posted-interrupt descriptor "notification vector" to POSTED_INTR_WAKEUP_VECTOR With interrupt remapping, an interrupt will set the PIR bit for the vector programmed for the device on the CPU, test-and-set the ON bit on the posted interrupt descriptor, and if the ON bit is clear generate an interrupt for the notification vector. This way, the target CPU wakes upon a device interrupt and wakes up the target vcpu. Problem is that pi_post_block only programs the notification vector if kvm_arch_has_assigned_device() is true. Its possible for the following to happen: 1) vcpu V HLTs on pcpu P, kvm_arch_has_assigned_device is false, notification vector is not programmed 2) device is assigned to VM 3) device interrupts vcpu V, sets ON bit (notification vector not programmed, so pcpu P remains in idle) 4) vcpu 0 IPIs vcpu V (in guest), but since pi descriptor ON bit is set, kvm_vcpu_kick is skipped 5) vcpu 0 busy spins on vcpu V's response for several seconds, until RCU watchdog NMIs all vCPUs. To fix this, use the start_assignment kvm_x86_ops callback to kick vcpus out of the halt loop, so the notification vector is properly reprogrammed to the wakeup vector. Reported-by: Pei Zhang <pezhang@redhat.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Index: kvm/arch/x86/kvm/vmx/posted_intr.c =================================================================== --- kvm.orig/arch/x86/kvm/vmx/posted_intr.c +++ kvm/arch/x86/kvm/vmx/posted_intr.c @@ -204,6 +204,32 @@ void pi_post_block(struct kvm_vcpu *vcpu } /* + * Bail out of the block loop if the VM has an assigned + * device, but the blocking vCPU didn't reconfigure the + * PI.NV to the wakeup vector, i.e. the assigned device + * came along after the initial check in vcpu_block(). + */ + +int vmx_vcpu_check_block(struct kvm_vcpu *vcpu) +{ + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + + if (!irq_remapping_cap(IRQ_POSTING_CAP)) + return 0; + + if (!kvm_vcpu_apicv_active(vcpu)) + return 0; + + if (!kvm_arch_has_assigned_device(vcpu->kvm)) + return 0; + + if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) + return 0; + + return 1; +} + +/* * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. */ void pi_wakeup_handler(void) @@ -236,6 +262,25 @@ bool pi_has_pending_interrupt(struct kvm (pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc)); } +void vmx_pi_start_assignment(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + int i; + + if (!irq_remapping_cap(IRQ_POSTING_CAP)) + return; + + /* + * Wakeup will cause the vCPU to bail out of kvm_vcpu_block() and + * go back through vcpu_block(). + */ + kvm_for_each_vcpu(i, vcpu, kvm) { + if (!kvm_vcpu_apicv_active(vcpu)) + continue; + + kvm_vcpu_wake_up(vcpu); + } +} /* * pi_update_irte - set IRTE for Posted-Interrupts Index: kvm/arch/x86/kvm/vmx/posted_intr.h =================================================================== --- kvm.orig/arch/x86/kvm/vmx/posted_intr.h +++ kvm/arch/x86/kvm/vmx/posted_intr.h @@ -95,5 +95,7 @@ void __init pi_init_cpu(int cpu); bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu); int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); +void vmx_pi_start_assignment(struct kvm *kvm); +int vmx_vcpu_check_block(struct kvm_vcpu *vcpu); #endif /* __KVM_X86_VMX_POSTED_INTR_H */ Index: kvm/arch/x86/kvm/vmx/vmx.c =================================================================== --- kvm.orig/arch/x86/kvm/vmx/vmx.c +++ kvm/arch/x86/kvm/vmx/vmx.c @@ -7727,11 +7727,13 @@ static struct kvm_x86_ops vmx_x86_ops __ .pre_block = vmx_pre_block, .post_block = vmx_post_block, + .vcpu_check_block = vmx_vcpu_check_block, .pmu_ops = &intel_pmu_ops, .nested_ops = &vmx_nested_ops, .update_pi_irte = pi_update_irte, + .start_assignment = vmx_pi_start_assignment, #ifdef CONFIG_X86_64 .set_hv_timer = vmx_set_hv_timer,
On Tue, May 11, 2021 at 08:57:39PM -0300, Marcelo Tosatti wrote:
> Add a start_assignment hook to kvm_x86_ops, which is called when
> kvm_arch_start_assignment is done.
>
> The hook is required to update the wakeup vector of a sleeping vCPU
> when a device is assigned to the guest.
>
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Thanks,
--
Peter Xu