[patch 0/4] use smp_send_reschedule in vcpu_kick / assigned dev host intx race fix

All of lore.kernel.org
 help / color / mirror / Atom feed

* [patch 0/4] use smp_send_reschedule in vcpu_kick / assigned dev host intx race fix
@ 2009-04-27 21:07 mtosatti
  2009-04-27 21:07 ` [patch 1/4] qemu: external module: smp_send_reschedule compat mtosatti
                   ` (4 more replies)
  0 siblings, 5 replies; 19+ messages in thread
From: mtosatti @ 2009-04-27 21:07 UTC (permalink / raw)
  To: kvm; +Cc: sheng



^ permalink raw reply	[flat|nested] 19+ messages in thread

* [patch 1/4] qemu: external module: smp_send_reschedule compat
  2009-04-27 21:07 [patch 0/4] use smp_send_reschedule in vcpu_kick / assigned dev host intx race fix mtosatti
@ 2009-04-27 21:07 ` mtosatti
  2009-05-07 13:28   ` Avi Kivity
  2009-04-27 21:07 ` [patch 2/4] KVM: x86: wake up waitqueue before calling get_cpu() mtosatti
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 19+ messages in thread
From: mtosatti @ 2009-04-27 21:07 UTC (permalink / raw)
  To: kvm; +Cc: sheng, Marcelo Tosatti

[-- Attachment #1: smp-send-resched-compat --]
[-- Type: text/plain, Size: 2965 bytes --]

smp_send_reschedule was exported (via smp_ops) in v2.6.24.
Create a compat function which schedules the IPI to keventd context, 
in case interrupts are disabled, for kernels < 2.6.24.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

diff --git a/kvm/kernel/external-module-compat-comm.h b/kvm/kernel/external-module-compat-comm.h
index c955927..852a8c1 100644
--- a/kvm/kernel/external-module-compat-comm.h
+++ b/kvm/kernel/external-module-compat-comm.h
@@ -116,6 +116,10 @@ int kvm_smp_call_function_single(int cpu, void (*func)(void *info),
 
 #endif
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
+#define nr_cpu_ids NR_CPUS
+#endif
+
 #include <linux/miscdevice.h>
 #ifndef KVM_MINOR
 #define KVM_MINOR 232
@@ -220,6 +224,10 @@ int kvm_smp_call_function_mask(cpumask_t mask, void (*func) (void *info),
 
 #define smp_call_function_mask kvm_smp_call_function_mask
 
+void kvm_smp_send_reschedule(int cpu);
+
+#define smp_send_reschedule kvm_smp_send_reschedule
+
 #endif
 
 /* empty_zero_page isn't exported in all kernels */
diff --git a/kvm/kernel/external-module-compat.c b/kvm/kernel/external-module-compat.c
index 0d858be..ca269cc 100644
--- a/kvm/kernel/external-module-compat.c
+++ b/kvm/kernel/external-module-compat.c
@@ -221,6 +221,58 @@ out:
 	return 0;
 }
 
+#include <linux/workqueue.h>
+
+static void vcpu_kick_intr(void *info)
+{
+}
+
+struct kvm_kick {
+	int cpu;
+	struct work_struct work;
+};
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
+static void kvm_do_smp_call_function(void *data)
+{
+	int me;
+	struct kvm_kick *kvm_kick = data;
+#else
+static void kvm_do_smp_call_function(struct work_struct *work)
+{
+	int me;
+	struct kvm_kick *kvm_kick = container_of(work, struct kvm_kick, work);
+#endif
+	me = get_cpu();
+
+	if (kvm_kick->cpu != me)
+		smp_call_function_single(kvm_kick->cpu, vcpu_kick_intr,
+					 NULL, 0);
+	kfree(kvm_kick);
+	put_cpu();
+}
+
+void kvm_queue_smp_call_function(int cpu)
+{
+	struct kvm_kick *kvm_kick = kmalloc(sizeof(struct kvm_kick), GFP_ATOMIC);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
+	INIT_WORK(&kvm_kick->work, kvm_do_smp_call_function, kvm_kick);
+#else
+	INIT_WORK(&kvm_kick->work, kvm_do_smp_call_function);
+#endif
+
+	schedule_work(&kvm_kick->work);
+}
+
+void kvm_smp_send_reschedule(int cpu)
+{
+	if (irqs_disabled()) {
+		kvm_queue_smp_call_function(cpu);
+		return;
+	}
+	smp_call_function_single(cpu, vcpu_kick_intr, NULL, 0);
+}
 #endif
 
 /* manually export hrtimer_init/start/cancel */
diff --git a/kvm/kernel/x86/hack-module.awk b/kvm/kernel/x86/hack-module.awk
index 260eeef..f4d14da 100644
--- a/kvm/kernel/x86/hack-module.awk
+++ b/kvm/kernel/x86/hack-module.awk
@@ -35,6 +35,8 @@ BEGIN { split("INIT_WORK desc_struct ldttss_desc64 desc_ptr " \
 
 { sub(/match->dev->msi_enabled/, "kvm_pcidev_msi_enabled(match->dev)") }
 
+{ sub(/smp_send_reschedule/, "kvm_smp_send_reschedule") }
+
 /^static void __vmx_load_host_state/ {
     vmx_load_host_state = 1
 }



^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [patch 2/4] KVM: x86: wake up waitqueue before calling get_cpu()
  2009-04-27 21:07 [patch 0/4] use smp_send_reschedule in vcpu_kick / assigned dev host intx race fix mtosatti
  2009-04-27 21:07 ` [patch 1/4] qemu: external module: smp_send_reschedule compat mtosatti
@ 2009-04-27 21:07 ` mtosatti
  2009-04-27 21:07 ` [patch 3/4] KVM: use smp_send_reschedule in kvm_vcpu_kick mtosatti
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 19+ messages in thread
From: mtosatti @ 2009-04-27 21:07 UTC (permalink / raw)
  To: kvm; +Cc: sheng, Jan Blunck, Sven-Thorsten Dietrich, Marcelo Tosatti

[-- Attachment #1: vcpu-kick-rt --]
[-- Type: text/plain, Size: 1085 bytes --]

From: Jan Blunck <jblunck@suse.de>
 
This moves the get_cpu() call down to be called after we wake up the
waiters. Therefore the waitqueue locks can savely be rt mutex.
 
Signed-off-by: Jan Blunck <jblunck@suse.de>
Signed-off-by: Sven-Thorsten Dietrich <sven@thebigcorporation.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm/arch/x86/kvm/x86.c
===================================================================
--- kvm.orig/arch/x86/kvm/x86.c
+++ kvm/arch/x86/kvm/x86.c
@@ -4544,7 +4544,7 @@ static void vcpu_kick_intr(void *info)
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 {
 	int ipi_pcpu = vcpu->cpu;
-	int cpu = get_cpu();
+	int cpu;
 
 	if (waitqueue_active(&vcpu->wq)) {
 		wake_up_interruptible(&vcpu->wq);
@@ -4554,6 +4554,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu
 	 * We may be called synchronously with irqs disabled in guest mode,
 	 * So need not to call smp_call_function_single() in that case.
 	 */
+	cpu = get_cpu();
 	if (vcpu->guest_mode && vcpu->cpu != cpu)
 		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
 	put_cpu();



^ permalink raw reply	[flat|nested] 19+ messages in thread

* [patch 3/4] KVM: use smp_send_reschedule in kvm_vcpu_kick
  2009-04-27 21:07 [patch 0/4] use smp_send_reschedule in vcpu_kick / assigned dev host intx race fix mtosatti
  2009-04-27 21:07 ` [patch 1/4] qemu: external module: smp_send_reschedule compat mtosatti
  2009-04-27 21:07 ` [patch 2/4] KVM: x86: wake up waitqueue before calling get_cpu() mtosatti
@ 2009-04-27 21:07 ` mtosatti
  2009-04-27 21:07 ` [patch 4/4] KVM: protect assigned dev workqueue, int handler and irq acker mtosatti
  2009-04-28  7:08 ` [patch 0/4] use smp_send_reschedule in vcpu_kick / assigned dev host intx race fix Sheng Yang
  4 siblings, 0 replies; 19+ messages in thread
From: mtosatti @ 2009-04-27 21:07 UTC (permalink / raw)
  To: kvm; +Cc: sheng, Marcelo Tosatti

[-- Attachment #1: smp-ipi --]
[-- Type: text/plain, Size: 5406 bytes --]

KVM uses a function call IPI to cause the exit of a guest running on a
physical cpu. For virtual interrupt notification there is no need to
wait on IPI receival, or to execute any function.

This is exactly what the reschedule IPI does, without the overhead
of function IPI. So use it instead of smp_call_function_single in
kvm_vcpu_kick.

Also change the "guest_mode" variable to a bit in vcpu->requests, and
use that to collapse multiple IPI's that would be issued between the
first one and zeroing of guest mode.

This allows kvm_vcpu_kick to called with interrupts disabled.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm/arch/ia64/kernel/irq_ia64.c
===================================================================
--- kvm.orig/arch/ia64/kernel/irq_ia64.c
+++ kvm/arch/ia64/kernel/irq_ia64.c
@@ -610,6 +610,9 @@ static struct irqaction ipi_irqaction = 
 	.name =		"IPI"
 };
 
+/*
+ * KVM uses this interrupt to force a cpu out of guest mode
+ */
 static struct irqaction resched_irqaction = {
 	.handler =	dummy_handler,
 	.flags =	IRQF_DISABLED,
Index: kvm/arch/ia64/kvm/kvm-ia64.c
===================================================================
--- kvm.orig/arch/ia64/kvm/kvm-ia64.c
+++ kvm/arch/ia64/kvm/kvm-ia64.c
@@ -668,7 +668,7 @@ again:
 	host_ctx = kvm_get_host_context(vcpu);
 	guest_ctx = kvm_get_guest_context(vcpu);
 
-	vcpu->guest_mode = 1;
+	clear_bit(KVM_REQ_KICK, &vcpu->requests);
 
 	r = kvm_vcpu_pre_transition(vcpu);
 	if (r < 0)
@@ -685,7 +685,7 @@ again:
 	kvm_vcpu_post_transition(vcpu);
 
 	vcpu->arch.launched = 1;
-	vcpu->guest_mode = 0;
+	set_bit(KVM_REQ_KICK, &vcpu->requests);
 	local_irq_enable();
 
 	/*
@@ -1884,24 +1884,18 @@ void kvm_arch_hardware_unsetup(void)
 {
 }
 
-static void vcpu_kick_intr(void *info)
-{
-#ifdef DEBUG
-	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
-	printk(KERN_DEBUG"vcpu_kick_intr %p \n", vcpu);
-#endif
-}
-
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 {
-	int ipi_pcpu = vcpu->cpu;
-	int cpu = get_cpu();
+	int me;
+	int cpu = vcpu->cpu;
 
 	if (waitqueue_active(&vcpu->wq))
 		wake_up_interruptible(&vcpu->wq);
 
-	if (vcpu->guest_mode && cpu != ipi_pcpu)
-		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
+	me = get_cpu();
+	if (cpu != me && (unsigned) cpu < nr_cpu_ids && cpu_online(cpu))
+		if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
+			smp_send_reschedule(cpu);
 	put_cpu();
 }
 
Index: kvm/arch/x86/kernel/smp.c
===================================================================
--- kvm.orig/arch/x86/kernel/smp.c
+++ kvm/arch/x86/kernel/smp.c
@@ -172,6 +172,9 @@ void smp_reschedule_interrupt(struct pt_
 {
 	ack_APIC_irq();
 	inc_irq_stat(irq_resched_count);
+	/*
+	 * KVM uses this interrupt to force a cpu out of guest mode
+	 */
 }
 
 void smp_call_function_interrupt(struct pt_regs *regs)
Index: kvm/arch/x86/kvm/x86.c
===================================================================
--- kvm.orig/arch/x86/kvm/x86.c
+++ kvm/arch/x86/kvm/x86.c
@@ -3197,6 +3197,9 @@ static int vcpu_enter_guest(struct kvm_v
 
 	local_irq_disable();
 
+	clear_bit(KVM_REQ_KICK, &vcpu->requests);
+	smp_mb__after_clear_bit();
+
 	if (vcpu->requests || need_resched() || signal_pending(current)) {
 		local_irq_enable();
 		preempt_enable();
@@ -3204,13 +3207,6 @@ static int vcpu_enter_guest(struct kvm_v
 		goto out;
 	}
 
-	vcpu->guest_mode = 1;
-	/*
-	 * Make sure that guest_mode assignment won't happen after
-	 * testing the pending IRQ vector bitmap.
-	 */
-	smp_wmb();
-
 	if (vcpu->arch.exception.pending)
 		__queue_exception(vcpu);
 	else if (irqchip_in_kernel(vcpu->kvm))
@@ -3252,7 +3248,7 @@ static int vcpu_enter_guest(struct kvm_v
 	set_debugreg(vcpu->arch.host_dr6, 6);
 	set_debugreg(vcpu->arch.host_dr7, 7);
 
-	vcpu->guest_mode = 0;
+	set_bit(KVM_REQ_KICK, &vcpu->requests);
 	local_irq_enable();
 
 	++vcpu->stat.exits;
@@ -4550,30 +4546,20 @@ int kvm_arch_vcpu_runnable(struct kvm_vc
 	       || vcpu->arch.nmi_pending;
 }
 
-static void vcpu_kick_intr(void *info)
-{
-#ifdef DEBUG
-	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
-	printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
-#endif
-}
-
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 {
-	int ipi_pcpu = vcpu->cpu;
-	int cpu;
+	int me;
+	int cpu = vcpu->cpu;
 
 	if (waitqueue_active(&vcpu->wq)) {
 		wake_up_interruptible(&vcpu->wq);
 		++vcpu->stat.halt_wakeup;
 	}
-	/*
-	 * We may be called synchronously with irqs disabled in guest mode,
-	 * So need not to call smp_call_function_single() in that case.
-	 */
-	cpu = get_cpu();
-	if (vcpu->guest_mode && vcpu->cpu != cpu)
-		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
+
+	me = get_cpu();
+	if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
+		if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
+			smp_send_reschedule(cpu);
 	put_cpu();
 }
 
Index: kvm/include/linux/kvm_host.h
===================================================================
--- kvm.orig/include/linux/kvm_host.h
+++ kvm/include/linux/kvm_host.h
@@ -38,6 +38,7 @@
 #define KVM_REQ_UNHALT             6
 #define KVM_REQ_MMU_SYNC           7
 #define KVM_REQ_KVMCLOCK_UPDATE    8
+#define KVM_REQ_KICK               9
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID	0
 
@@ -72,7 +73,6 @@ struct kvm_vcpu {
 	struct mutex mutex;
 	int   cpu;
 	struct kvm_run *run;
-	int guest_mode;
 	unsigned long requests;
 	unsigned long guest_debug;
 	int fpu_active;



^ permalink raw reply	[flat|nested] 19+ messages in thread

* [patch 4/4] KVM: protect assigned dev workqueue, int handler and irq acker
  2009-04-27 21:07 [patch 0/4] use smp_send_reschedule in vcpu_kick / assigned dev host intx race fix mtosatti
                   ` (2 preceding siblings ...)
  2009-04-27 21:07 ` [patch 3/4] KVM: use smp_send_reschedule in kvm_vcpu_kick mtosatti
@ 2009-04-27 21:07 ` mtosatti
  2009-04-28  7:08 ` [patch 0/4] use smp_send_reschedule in vcpu_kick / assigned dev host intx race fix Sheng Yang
  4 siblings, 0 replies; 19+ messages in thread
From: mtosatti @ 2009-04-27 21:07 UTC (permalink / raw)
  To: kvm; +Cc: sheng, sheng.yang, Marcelo Tosatti

[-- Attachment #1: devass-protect-irq-ack --]
[-- Type: text/plain, Size: 3715 bytes --]

kvm_assigned_dev_ack_irq is vulnerable to a race condition with the
interrupt handler function. It does:

        if (dev->host_irq_disabled) {
                enable_irq(dev->host_irq);
                dev->host_irq_disabled = false;
        }

If an interrupt triggers before the host->dev_irq_disabled assignment,
it will disable the interrupt and set dev->host_irq_disabled to true.

On return to kvm_assigned_dev_ack_irq, dev->host_irq_disabled is set to
false, and the next kvm_assigned_dev_ack_irq call will fail to reenable
it.

Other than that, having the interrupt handler and work handlers run in
parallel sounds like asking for trouble (could not spot any obvious
problem, but better not have to, its fragile).

CC: sheng.yang@intel.com
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm/include/linux/kvm_host.h
===================================================================
--- kvm.orig/include/linux/kvm_host.h
+++ kvm/include/linux/kvm_host.h
@@ -345,6 +345,7 @@ struct kvm_assigned_dev_kernel {
 	int flags;
 	struct pci_dev *dev;
 	struct kvm *kvm;
+	spinlock_t assigned_dev_lock;
 };
 
 struct kvm_irq_mask_notifier {
Index: kvm/virt/kvm/kvm_main.c
===================================================================
--- kvm.orig/virt/kvm/kvm_main.c
+++ kvm/virt/kvm/kvm_main.c
@@ -42,6 +42,7 @@
 #include <linux/mman.h>
 #include <linux/swap.h>
 #include <linux/bitops.h>
+#include <linux/spinlock.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -130,6 +131,7 @@ static void kvm_assigned_dev_interrupt_w
 	 * finer-grained lock, update this
 	 */
 	mutex_lock(&kvm->lock);
+	spin_lock_irq(&assigned_dev->assigned_dev_lock);
 	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 		struct kvm_guest_msix_entry *guest_entries =
 			assigned_dev->guest_msix_entries;
@@ -156,18 +158,21 @@ static void kvm_assigned_dev_interrupt_w
 		}
 	}
 
+	spin_unlock_irq(&assigned_dev->assigned_dev_lock);
 	mutex_unlock(&assigned_dev->kvm->lock);
 }
 
 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 {
+	unsigned long flags;
 	struct kvm_assigned_dev_kernel *assigned_dev =
 		(struct kvm_assigned_dev_kernel *) dev_id;
 
+	spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags);
 	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 		int index = find_index_from_host_irq(assigned_dev, irq);
 		if (index < 0)
-			return IRQ_HANDLED;
+			goto out;
 		assigned_dev->guest_msix_entries[index].flags |=
 			KVM_ASSIGNED_MSIX_PENDING;
 	}
@@ -177,6 +182,8 @@ static irqreturn_t kvm_assigned_dev_intr
 	disable_irq_nosync(irq);
 	assigned_dev->host_irq_disabled = true;
 
+out:
+	spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
 	return IRQ_HANDLED;
 }
 
@@ -184,6 +191,7 @@ static irqreturn_t kvm_assigned_dev_intr
 static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 {
 	struct kvm_assigned_dev_kernel *dev;
+	unsigned long flags;
 
 	if (kian->gsi == -1)
 		return;
@@ -196,10 +204,12 @@ static void kvm_assigned_dev_ack_irq(str
 	/* The guest irq may be shared so this ack may be
 	 * from another device.
 	 */
+	spin_lock_irqsave(&dev->assigned_dev_lock, flags);
 	if (dev->host_irq_disabled) {
 		enable_irq(dev->host_irq);
 		dev->host_irq_disabled = false;
 	}
+	spin_unlock_irqrestore(&dev->assigned_dev_lock, flags);
 }
 
 static void deassign_guest_irq(struct kvm *kvm,
@@ -615,6 +625,7 @@ static int kvm_vm_ioctl_assign_device(st
 	match->host_devfn = assigned_dev->devfn;
 	match->flags = assigned_dev->flags;
 	match->dev = dev;
+	spin_lock_init(&match->assigned_dev_lock);
 	match->irq_source_id = -1;
 	match->kvm = kvm;
 	match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [patch 0/4] use smp_send_reschedule in vcpu_kick / assigned dev host intx race fix
  2009-04-27 21:07 [patch 0/4] use smp_send_reschedule in vcpu_kick / assigned dev host intx race fix mtosatti
                   ` (3 preceding siblings ...)
  2009-04-27 21:07 ` [patch 4/4] KVM: protect assigned dev workqueue, int handler and irq acker mtosatti
@ 2009-04-28  7:08 ` Sheng Yang
  2009-04-29 17:47   ` Marcelo Tosatti
  4 siblings, 1 reply; 19+ messages in thread
From: Sheng Yang @ 2009-04-28  7:08 UTC (permalink / raw)
  To: mtosatti; +Cc: kvm

Ack all. This also solved one bug by my hand. Thanks!

I observe one point: the performance of high workload interrupt(e.g. 10 
gigabyte oplin card) dropped dramatically with smp_send_reschedule() method...
In one environment(the speed of oplin card also limited by cpu performance), 
Using smp_call_function_single() can get more than 1G bit/s stably(native got 
1.2G), but smp_send_reschedule() can only got around 600M bit/s... And the 
rescheduling interrupt number is about 2000/second per cpu. And the interrupt 
rate is about tens of thousands per second for the device.

Anyway, this method is more elegant and correct. Though there is still room 
for optimize - but of course, the correctness is first priority.

-- 
regards
Yang, Sheng

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [patch 0/4] use smp_send_reschedule in vcpu_kick / assigned dev host intx race fix
  2009-04-28  7:08 ` [patch 0/4] use smp_send_reschedule in vcpu_kick / assigned dev host intx race fix Sheng Yang
@ 2009-04-29 17:47   ` Marcelo Tosatti
  2009-04-30  0:56     ` Sheng Yang
  0 siblings, 1 reply; 19+ messages in thread
From: Marcelo Tosatti @ 2009-04-29 17:47 UTC (permalink / raw)
  To: Sheng Yang; +Cc: kvm

On Tue, Apr 28, 2009 at 03:08:46PM +0800, Sheng Yang wrote:
> Ack all. This also solved one bug by my hand. Thanks!
> 
> I observe one point: the performance of high workload interrupt(e.g. 10 
> gigabyte oplin card) dropped dramatically with smp_send_reschedule() method...
> In one environment(the speed of oplin card also limited by cpu performance), 
> Using smp_call_function_single() can get more than 1G bit/s stably(native got 
> 1.2G), but smp_send_reschedule() can only got around 600M bit/s... And the 
> rescheduling interrupt number is about 2000/second per cpu. And the interrupt 
> rate is about tens of thousands per second for the device.
> 
> Anyway, this method is more elegant and correct. Though there is still room 
> for optimize - but of course, the correctness is first priority.

Are you using the compat code or a kvm.git kernel? Can you remove only the
last patch (the spinlock) to confirm its the cause of the slowdown?




^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [patch 0/4] use smp_send_reschedule in vcpu_kick / assigned dev host intx race fix
  2009-04-29 17:47   ` Marcelo Tosatti
@ 2009-04-30  0:56     ` Sheng Yang
  2009-04-30  1:59       ` Sheng Yang
  0 siblings, 1 reply; 19+ messages in thread
From: Sheng Yang @ 2009-04-30  0:56 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

On Thursday 30 April 2009 01:47:57 Marcelo Tosatti wrote:
> On Tue, Apr 28, 2009 at 03:08:46PM +0800, Sheng Yang wrote:
> > Ack all. This also solved one bug by my hand. Thanks!
> >
> > I observe one point: the performance of high workload interrupt(e.g. 10
> > gigabyte oplin card) dropped dramatically with smp_send_reschedule()
> > method... In one environment(the speed of oplin card also limited by cpu
> > performance), Using smp_call_function_single() can get more than 1G bit/s
> > stably(native got 1.2G), but smp_send_reschedule() can only got around
> > 600M bit/s... And the rescheduling interrupt number is about 2000/second
> > per cpu. And the interrupt rate is about tens of thousands per second for
> > the device.
> >
> > Anyway, this method is more elegant and correct. Though there is still
> > room for optimize - but of course, the correctness is first priority.
>
> Are you using the compat code or a kvm.git kernel? Can you remove only the
> last patch (the spinlock) to confirm its the cause of the slowdown?

I am using kvm.git.

I said this because I tried the old version of patch(which have warning) and 
it would got more than 1G/sec. 

I'd like to take a close look at what's happened.

-- 
regards
Yang, Sheng

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [patch 0/4] use smp_send_reschedule in vcpu_kick / assigned dev host intx race fix
  2009-04-30  0:56     ` Sheng Yang
@ 2009-04-30  1:59       ` Sheng Yang
  2009-05-06  5:07         ` Sheng Yang
  0 siblings, 1 reply; 19+ messages in thread
From: Sheng Yang @ 2009-04-30  1:59 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

On Thursday 30 April 2009 08:56:57 Sheng Yang wrote:
> On Thursday 30 April 2009 01:47:57 Marcelo Tosatti wrote:
> > On Tue, Apr 28, 2009 at 03:08:46PM +0800, Sheng Yang wrote:
> > > Ack all. This also solved one bug by my hand. Thanks!
> > >
> > > I observe one point: the performance of high workload interrupt(e.g. 10
> > > gigabyte oplin card) dropped dramatically with smp_send_reschedule()
> > > method... In one environment(the speed of oplin card also limited by
> > > cpu performance), Using smp_call_function_single() can get more than 1G
> > > bit/s stably(native got 1.2G), but smp_send_reschedule() can only got
> > > around 600M bit/s... And the rescheduling interrupt number is about
> > > 2000/second per cpu. And the interrupt rate is about tens of thousands
> > > per second for the device.
> > >
> > > Anyway, this method is more elegant and correct. Though there is still
> > > room for optimize - but of course, the correctness is first priority.
> >
> > Are you using the compat code or a kvm.git kernel? Can you remove only
> > the last patch (the spinlock) to confirm its the cause of the slowdown?
>
> I am using kvm.git.
>
> I said this because I tried the old version of patch(which have warning)
> and it would got more than 1G/sec.
>
> I'd like to take a close look at what's happened.

Still ACK this patchset.

And sorry, my memory messed...

The old version of patch and this one offered the same performance. So the 
problem is not here.

I get more than 1g per second by one of myself's experiment.

Disable/enable irq purposed to use with level interrupt to prevent it send 
interrupt again after kernel handler return, but it not applied to MSI/MSI-X. 
Though some interrupt may be merged with one, but AFAIK the driver can handle 
it well.

My experiment is discard disable/enable IRQ for MSI/MSI-X, then can get much 
better performance for oplin card, 2x with disable/enable one.

I would prepare a patch for it.

-- 
regards
Yang, Sheng

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [patch 0/4] use smp_send_reschedule in vcpu_kick / assigned dev host intx race fix
  2009-04-30  1:59       ` Sheng Yang
@ 2009-05-06  5:07         ` Sheng Yang
  2009-05-07 13:21           ` Avi Kivity
  0 siblings, 1 reply; 19+ messages in thread
From: Sheng Yang @ 2009-05-06  5:07 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Marcelo Tosatti

On Thursday 30 April 2009 09:59:56 Sheng Yang wrote:
> On Thursday 30 April 2009 08:56:57 Sheng Yang wrote:
> > On Thursday 30 April 2009 01:47:57 Marcelo Tosatti wrote:
> > > On Tue, Apr 28, 2009 at 03:08:46PM +0800, Sheng Yang wrote:
> > > > Ack all. This also solved one bug by my hand. Thanks!
> > > >
> > > > I observe one point: the performance of high workload interrupt(e.g.
> > > > 10 gigabyte oplin card) dropped dramatically with
> > > > smp_send_reschedule() method... In one environment(the speed of oplin
> > > > card also limited by cpu performance), Using
> > > > smp_call_function_single() can get more than 1G bit/s stably(native
> > > > got 1.2G), but smp_send_reschedule() can only got around 600M
> > > > bit/s... And the rescheduling interrupt number is about 2000/second
> > > > per cpu. And the interrupt rate is about tens of thousands per second
> > > > for the device.
> > > >
> > > > Anyway, this method is more elegant and correct. Though there is
> > > > still room for optimize - but of course, the correctness is first
> > > > priority.
> > >
> > > Are you using the compat code or a kvm.git kernel? Can you remove only
> > > the last patch (the spinlock) to confirm its the cause of the slowdown?
> >
> > I am using kvm.git.
> >
> > I said this because I tried the old version of patch(which have warning)
> > and it would got more than 1G/sec.
> >
> > I'd like to take a close look at what's happened.
>
> Still ACK this patchset.
>
> And sorry, my memory messed...
>
> The old version of patch and this one offered the same performance. So the
> problem is not here.
>
> I get more than 1g per second by one of myself's experiment.
>
> Disable/enable irq purposed to use with level interrupt to prevent it send
> interrupt again after kernel handler return, but it not applied to
> MSI/MSI-X. Though some interrupt may be merged with one, but AFAIK the
> driver can handle it well.
>
> My experiment is discard disable/enable IRQ for MSI/MSI-X, then can get
> much better performance for oplin card, 2x with disable/enable one.
>
> I would prepare a patch for it.

Hi Avi

Is there any issue blocked this patchset?

Thanks!

-- 
regards
Yang, Sheng

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [patch 0/4] use smp_send_reschedule in vcpu_kick / assigned dev host intx race fix
  2009-05-06  5:07         ` Sheng Yang
@ 2009-05-07 13:21           ` Avi Kivity
  2009-05-07 20:55             ` [patch 0/4] smp_send_reschedule / assigned dev host intx race v2 mtosatti
  0 siblings, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2009-05-07 13:21 UTC (permalink / raw)
  To: Sheng Yang; +Cc: kvm, Marcelo Tosatti

Sheng Yang wrote:
> Is there any issue blocked this patchset?
>   

Yes, a slow maintainer.  I'll go review it now.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [patch 1/4] qemu: external module: smp_send_reschedule compat
  2009-04-27 21:07 ` [patch 1/4] qemu: external module: smp_send_reschedule compat mtosatti
@ 2009-05-07 13:28   ` Avi Kivity
  0 siblings, 0 replies; 19+ messages in thread
From: Avi Kivity @ 2009-05-07 13:28 UTC (permalink / raw)
  To: mtosatti; +Cc: kvm, sheng

mtosatti@redhat.com wrote:
> smp_send_reschedule was exported (via smp_ops) in v2.6.24.
> Create a compat function which schedules the IPI to keventd context, 
> in case interrupts are disabled, for kernels < 2.6.24.
>
>  
>  #endif
>  
> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
> +#define nr_cpu_ids NR_CPUS
> +#endif
> +
>   

Doesn't seem related?  Please send as a separate patch.

> diff --git a/kvm/kernel/x86/hack-module.awk b/kvm/kernel/x86/hack-module.awk
> index 260eeef..f4d14da 100644
> --- a/kvm/kernel/x86/hack-module.awk
> +++ b/kvm/kernel/x86/hack-module.awk
> @@ -35,6 +35,8 @@ BEGIN { split("INIT_WORK desc_struct ldttss_desc64 desc_ptr " \
>  
>  { sub(/match->dev->msi_enabled/, "kvm_pcidev_msi_enabled(match->dev)") }
>  
> +{ sub(/smp_send_reschedule/, "kvm_smp_send_reschedule") }
> +
>  /^static void __vmx_load_host_state/ {
>      vmx_load_host_state = 1
>  }
>
>   

There's a bit on the top of hack-module.awk that does this slightly 
simpler.  Please also adjust ia64.

This now resides in kvm-kmod.git, please patch against that.


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [patch 0/4] smp_send_reschedule / assigned dev host intx race v2
  2009-05-07 13:21           ` Avi Kivity
@ 2009-05-07 20:55             ` mtosatti
  2009-05-07 20:55               ` [patch 1/4] kvm-kmod: nr_cpu_ids compat mtosatti
                                 ` (4 more replies)
  0 siblings, 5 replies; 19+ messages in thread
From: mtosatti @ 2009-05-07 20:55 UTC (permalink / raw)
  To: kvm; +Cc: avi

Addressing comments.



^ permalink raw reply	[flat|nested] 19+ messages in thread

* [patch 1/4] kvm-kmod: nr_cpu_ids compat
  2009-05-07 20:55             ` [patch 0/4] smp_send_reschedule / assigned dev host intx race v2 mtosatti
@ 2009-05-07 20:55               ` mtosatti
  2009-05-07 20:55               ` [patch 2/4] kvm-kmod: smp_send_reschedule compat mtosatti
                                 ` (3 subsequent siblings)
  4 siblings, 0 replies; 19+ messages in thread
From: mtosatti @ 2009-05-07 20:55 UTC (permalink / raw)
  To: kvm; +Cc: avi, Marcelo Tosatti

[-- Attachment #1: kvm-kmod-compat-nr_cpu_ids --]
[-- Type: text/plain, Size: 495 bytes --]

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-kmod/external-module-compat-comm.h
===================================================================
--- kvm-kmod.orig/external-module-compat-comm.h
+++ kvm-kmod/external-module-compat-comm.h
@@ -116,6 +116,10 @@ int kvm_smp_call_function_single(int cpu
 
 #endif
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
+#define nr_cpu_ids NR_CPUS
+#endif
+
 #include <linux/miscdevice.h>
 #ifndef KVM_MINOR
 #define KVM_MINOR 232



^ permalink raw reply	[flat|nested] 19+ messages in thread

* [patch 2/4] kvm-kmod: smp_send_reschedule compat
  2009-05-07 20:55             ` [patch 0/4] smp_send_reschedule / assigned dev host intx race v2 mtosatti
  2009-05-07 20:55               ` [patch 1/4] kvm-kmod: nr_cpu_ids compat mtosatti
@ 2009-05-07 20:55               ` mtosatti
  2009-05-07 20:55               ` [patch 3/4] KVM: use smp_send_reschedule in kvm_vcpu_kick mtosatti
                                 ` (2 subsequent siblings)
  4 siblings, 0 replies; 19+ messages in thread
From: mtosatti @ 2009-05-07 20:55 UTC (permalink / raw)
  To: kvm; +Cc: avi, Marcelo Tosatti

[-- Attachment #1: kvm-kmod-compat-smp-send-reschedule --]
[-- Type: text/plain, Size: 3237 bytes --]

smp_send_reschedule was exported (via smp_ops) in v2.6.24.
Create a compat function which schedules the IPI to keventd context,
in case interrupts are disabled, for kernels < 2.6.24.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-kmod/ia64/hack-module.awk
===================================================================
--- kvm-kmod.orig/ia64/hack-module.awk
+++ kvm-kmod/ia64/hack-module.awk
@@ -1,4 +1,4 @@
-BEGIN { split("INIT_WORK on_each_cpu smp_call_function " \
+BEGIN { split("INIT_WORK on_each_cpu smp_call_function smp_send_reschedule " \
 	      "hrtimer_add_expires_ns hrtimer_get_expires " \
 	      "hrtimer_get_expires_ns hrtimer_start_expires " \
 	      "hrtimer_expires_remaining " \
Index: kvm-kmod/x86/hack-module.awk
===================================================================
--- kvm-kmod.orig/x86/hack-module.awk
+++ kvm-kmod/x86/hack-module.awk
@@ -1,7 +1,7 @@
 BEGIN { split("INIT_WORK desc_struct ldttss_desc64 desc_ptr " \
 	      "hrtimer_add_expires_ns hrtimer_get_expires " \
 	      "hrtimer_get_expires_ns hrtimer_start_expires " \
-	      "hrtimer_expires_remaining " \
+	      "hrtimer_expires_remaining smp_send_reschedule " \
 	      "on_each_cpu relay_open request_irq free_irq" , compat_apis); }
 
 /^int kvm_init\(/ { anon_inodes = 1 }
Index: kvm-kmod/external-module-compat-comm.h
===================================================================
--- kvm-kmod.orig/external-module-compat-comm.h
+++ kvm-kmod/external-module-compat-comm.h
@@ -224,6 +224,10 @@ int kvm_smp_call_function_mask(cpumask_t
 
 #define smp_call_function_mask kvm_smp_call_function_mask
 
+void kvm_smp_send_reschedule(int cpu);
+
+#define smp_send_reschedule kvm_smp_send_reschedule
+
 #endif
 
 /* empty_zero_page isn't exported in all kernels */
Index: kvm-kmod/external-module-compat.c
===================================================================
--- kvm-kmod.orig/external-module-compat.c
+++ kvm-kmod/external-module-compat.c
@@ -221,6 +221,58 @@ out:
 	return 0;
 }
 
+#include <linux/workqueue.h>
+
+static void vcpu_kick_intr(void *info)
+{
+}
+
+struct kvm_kick {
+	int cpu;
+	struct work_struct work;
+};
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
+static void kvm_do_smp_call_function(void *data)
+{
+	int me;
+	struct kvm_kick *kvm_kick = data;
+#else
+static void kvm_do_smp_call_function(struct work_struct *work)
+{
+	int me;
+	struct kvm_kick *kvm_kick = container_of(work, struct kvm_kick, work);
+#endif
+	me = get_cpu();
+
+	if (kvm_kick->cpu != me)
+		smp_call_function_single(kvm_kick->cpu, vcpu_kick_intr,
+					 NULL, 0);
+	kfree(kvm_kick);
+	put_cpu();
+}
+
+void kvm_queue_smp_call_function(int cpu)
+{
+	struct kvm_kick *kvm_kick = kmalloc(sizeof(struct kvm_kick), GFP_ATOMIC);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
+	INIT_WORK(&kvm_kick->work, kvm_do_smp_call_function, kvm_kick);
+#else
+	INIT_WORK(&kvm_kick->work, kvm_do_smp_call_function);
+#endif
+
+	schedule_work(&kvm_kick->work);
+}
+
+void kvm_smp_send_reschedule(int cpu)
+{
+	if (irqs_disabled()) {
+		kvm_queue_smp_call_function(cpu);
+		return;
+	}
+	smp_call_function_single(cpu, vcpu_kick_intr, NULL, 0);
+}
 #endif
 
 /* manually export hrtimer_init/start/cancel */



^ permalink raw reply	[flat|nested] 19+ messages in thread

* [patch 3/4] KVM: use smp_send_reschedule in kvm_vcpu_kick
  2009-05-07 20:55             ` [patch 0/4] smp_send_reschedule / assigned dev host intx race v2 mtosatti
  2009-05-07 20:55               ` [patch 1/4] kvm-kmod: nr_cpu_ids compat mtosatti
  2009-05-07 20:55               ` [patch 2/4] kvm-kmod: smp_send_reschedule compat mtosatti
@ 2009-05-07 20:55               ` mtosatti
  2009-05-08  7:13                 ` Gleb Natapov
  2009-05-07 20:55               ` [patch 4/4] KVM: protect assigned dev workqueue, int handler and irq acker mtosatti
  2009-05-10 16:31               ` [patch 0/4] smp_send_reschedule / assigned dev host intx race v2 Avi Kivity
  4 siblings, 1 reply; 19+ messages in thread
From: mtosatti @ 2009-05-07 20:55 UTC (permalink / raw)
  To: kvm; +Cc: avi, Marcelo Tosatti

[-- Attachment #1: smp-ipi --]
[-- Type: text/plain, Size: 5492 bytes --]

KVM uses a function call IPI to cause the exit of a guest running on a
physical cpu. For virtual interrupt notification there is no need to
wait on IPI receival, or to execute any function.

This is exactly what the reschedule IPI does, without the overhead
of function IPI. So use it instead of smp_call_function_single in
kvm_vcpu_kick.

Also change the "guest_mode" variable to a bit in vcpu->requests, and
use that to collapse multiple IPI's that would be issued between the
first one and zeroing of guest mode.

This allows kvm_vcpu_kick to called with interrupts disabled.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-pending/arch/ia64/kernel/irq_ia64.c
===================================================================
--- kvm-pending.orig/arch/ia64/kernel/irq_ia64.c
+++ kvm-pending/arch/ia64/kernel/irq_ia64.c
@@ -610,6 +610,9 @@ static struct irqaction ipi_irqaction = 
 	.name =		"IPI"
 };
 
+/*
+ * KVM uses this interrupt to force a cpu out of guest mode
+ */
 static struct irqaction resched_irqaction = {
 	.handler =	dummy_handler,
 	.flags =	IRQF_DISABLED,
Index: kvm-pending/arch/ia64/kvm/kvm-ia64.c
===================================================================
--- kvm-pending.orig/arch/ia64/kvm/kvm-ia64.c
+++ kvm-pending/arch/ia64/kvm/kvm-ia64.c
@@ -668,7 +668,7 @@ again:
 	host_ctx = kvm_get_host_context(vcpu);
 	guest_ctx = kvm_get_guest_context(vcpu);
 
-	vcpu->guest_mode = 1;
+	clear_bit(KVM_REQ_KICK, &vcpu->requests);
 
 	r = kvm_vcpu_pre_transition(vcpu);
 	if (r < 0)
@@ -685,7 +685,7 @@ again:
 	kvm_vcpu_post_transition(vcpu);
 
 	vcpu->arch.launched = 1;
-	vcpu->guest_mode = 0;
+	set_bit(KVM_REQ_KICK, &vcpu->requests);
 	local_irq_enable();
 
 	/*
@@ -1879,24 +1879,18 @@ void kvm_arch_hardware_unsetup(void)
 {
 }
 
-static void vcpu_kick_intr(void *info)
-{
-#ifdef DEBUG
-	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
-	printk(KERN_DEBUG"vcpu_kick_intr %p \n", vcpu);
-#endif
-}
-
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 {
-	int ipi_pcpu = vcpu->cpu;
-	int cpu = get_cpu();
+	int me;
+	int cpu = vcpu->cpu;
 
 	if (waitqueue_active(&vcpu->wq))
 		wake_up_interruptible(&vcpu->wq);
 
-	if (vcpu->guest_mode && cpu != ipi_pcpu)
-		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
+	me = get_cpu();
+	if (cpu != me && (unsigned) cpu < nr_cpu_ids && cpu_online(cpu))
+		if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
+			smp_send_reschedule(cpu);
 	put_cpu();
 }
 
Index: kvm-pending/arch/x86/kernel/smp.c
===================================================================
--- kvm-pending.orig/arch/x86/kernel/smp.c
+++ kvm-pending/arch/x86/kernel/smp.c
@@ -172,6 +172,9 @@ void smp_reschedule_interrupt(struct pt_
 {
 	ack_APIC_irq();
 	inc_irq_stat(irq_resched_count);
+	/*
+	 * KVM uses this interrupt to force a cpu out of guest mode
+	 */
 }
 
 void smp_call_function_interrupt(struct pt_regs *regs)
Index: kvm-pending/arch/x86/kvm/x86.c
===================================================================
--- kvm-pending.orig/arch/x86/kvm/x86.c
+++ kvm-pending/arch/x86/kvm/x86.c
@@ -3259,6 +3259,9 @@ static int vcpu_enter_guest(struct kvm_v
 
 	local_irq_disable();
 
+	clear_bit(KVM_REQ_KICK, &vcpu->requests);
+	smp_mb__after_clear_bit();
+
 	if (vcpu->requests || need_resched() || signal_pending(current)) {
 		local_irq_enable();
 		preempt_enable();
@@ -3266,13 +3269,6 @@ static int vcpu_enter_guest(struct kvm_v
 		goto out;
 	}
 
-	vcpu->guest_mode = 1;
-	/*
-	 * Make sure that guest_mode assignment won't happen after
-	 * testing the pending IRQ vector bitmap.
-	 */
-	smp_wmb();
-
 	if (vcpu->arch.exception.pending)
 		__queue_exception(vcpu);
 	else
@@ -3317,7 +3313,7 @@ static int vcpu_enter_guest(struct kvm_v
 	set_debugreg(vcpu->arch.host_dr6, 6);
 	set_debugreg(vcpu->arch.host_dr7, 7);
 
-	vcpu->guest_mode = 0;
+	set_bit(KVM_REQ_KICK, &vcpu->requests);
 	local_irq_enable();
 
 	++vcpu->stat.exits;
@@ -4611,30 +4607,20 @@ int kvm_arch_vcpu_runnable(struct kvm_vc
 	       || vcpu->arch.nmi_pending;
 }
 
-static void vcpu_kick_intr(void *info)
-{
-#ifdef DEBUG
-	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
-	printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
-#endif
-}
-
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 {
-	int ipi_pcpu = vcpu->cpu;
-	int cpu;
+	int me;
+	int cpu = vcpu->cpu;
 
 	if (waitqueue_active(&vcpu->wq)) {
 		wake_up_interruptible(&vcpu->wq);
 		++vcpu->stat.halt_wakeup;
 	}
-	/*
-	 * We may be called synchronously with irqs disabled in guest mode,
-	 * So need not to call smp_call_function_single() in that case.
-	 */
-	cpu = get_cpu();
-	if (vcpu->guest_mode && vcpu->cpu != cpu)
-		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
+
+	me = get_cpu();
+	if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
+		if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
+			smp_send_reschedule(cpu);
 	put_cpu();
 }
 
Index: kvm-pending/include/linux/kvm_host.h
===================================================================
--- kvm-pending.orig/include/linux/kvm_host.h
+++ kvm-pending/include/linux/kvm_host.h
@@ -38,6 +38,7 @@
 #define KVM_REQ_UNHALT             6
 #define KVM_REQ_MMU_SYNC           7
 #define KVM_REQ_KVMCLOCK_UPDATE    8
+#define KVM_REQ_KICK               9
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID	0
 
@@ -72,7 +73,6 @@ struct kvm_vcpu {
 	struct mutex mutex;
 	int   cpu;
 	struct kvm_run *run;
-	int guest_mode;
 	unsigned long requests;
 	unsigned long guest_debug;
 	int fpu_active;



^ permalink raw reply	[flat|nested] 19+ messages in thread

* [patch 4/4] KVM: protect assigned dev workqueue, int handler and irq acker
  2009-05-07 20:55             ` [patch 0/4] smp_send_reschedule / assigned dev host intx race v2 mtosatti
                                 ` (2 preceding siblings ...)
  2009-05-07 20:55               ` [patch 3/4] KVM: use smp_send_reschedule in kvm_vcpu_kick mtosatti
@ 2009-05-07 20:55               ` mtosatti
  2009-05-10 16:31               ` [patch 0/4] smp_send_reschedule / assigned dev host intx race v2 Avi Kivity
  4 siblings, 0 replies; 19+ messages in thread
From: mtosatti @ 2009-05-07 20:55 UTC (permalink / raw)
  To: kvm; +Cc: avi, sheng.yang, Marcelo Tosatti

[-- Attachment #1: dev-ass-protect-irq-ack --]
[-- Type: text/plain, Size: 3763 bytes --]

kvm_assigned_dev_ack_irq is vulnerable to a race condition with the
interrupt handler function. It does:

        if (dev->host_irq_disabled) {
                enable_irq(dev->host_irq);
                dev->host_irq_disabled = false;
        }

If an interrupt triggers before the host->dev_irq_disabled assignment,
it will disable the interrupt and set dev->host_irq_disabled to true.

On return to kvm_assigned_dev_ack_irq, dev->host_irq_disabled is set to
false, and the next kvm_assigned_dev_ack_irq call will fail to reenable
it.

Other than that, having the interrupt handler and work handlers run in
parallel sounds like asking for trouble (could not spot any obvious
problem, but better not have to, its fragile).

CC: sheng.yang@intel.com
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-pending/include/linux/kvm_host.h
===================================================================
--- kvm-pending.orig/include/linux/kvm_host.h
+++ kvm-pending/include/linux/kvm_host.h
@@ -346,6 +346,7 @@ struct kvm_assigned_dev_kernel {
 	int flags;
 	struct pci_dev *dev;
 	struct kvm *kvm;
+	spinlock_t assigned_dev_lock;
 };
 
 struct kvm_irq_mask_notifier {
Index: kvm-pending/virt/kvm/kvm_main.c
===================================================================
--- kvm-pending.orig/virt/kvm/kvm_main.c
+++ kvm-pending/virt/kvm/kvm_main.c
@@ -42,6 +42,7 @@
 #include <linux/mman.h>
 #include <linux/swap.h>
 #include <linux/bitops.h>
+#include <linux/spinlock.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -130,6 +131,7 @@ static void kvm_assigned_dev_interrupt_w
 	 * finer-grained lock, update this
 	 */
 	mutex_lock(&kvm->lock);
+	spin_lock_irq(&assigned_dev->assigned_dev_lock);
 	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 		struct kvm_guest_msix_entry *guest_entries =
 			assigned_dev->guest_msix_entries;
@@ -156,18 +158,21 @@ static void kvm_assigned_dev_interrupt_w
 		}
 	}
 
+	spin_unlock_irq(&assigned_dev->assigned_dev_lock);
 	mutex_unlock(&assigned_dev->kvm->lock);
 }
 
 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 {
+	unsigned long flags;
 	struct kvm_assigned_dev_kernel *assigned_dev =
 		(struct kvm_assigned_dev_kernel *) dev_id;
 
+	spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags);
 	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 		int index = find_index_from_host_irq(assigned_dev, irq);
 		if (index < 0)
-			return IRQ_HANDLED;
+			goto out;
 		assigned_dev->guest_msix_entries[index].flags |=
 			KVM_ASSIGNED_MSIX_PENDING;
 	}
@@ -177,6 +182,8 @@ static irqreturn_t kvm_assigned_dev_intr
 	disable_irq_nosync(irq);
 	assigned_dev->host_irq_disabled = true;
 
+out:
+	spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
 	return IRQ_HANDLED;
 }
 
@@ -184,6 +191,7 @@ static irqreturn_t kvm_assigned_dev_intr
 static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 {
 	struct kvm_assigned_dev_kernel *dev;
+	unsigned long flags;
 
 	if (kian->gsi == -1)
 		return;
@@ -196,10 +204,12 @@ static void kvm_assigned_dev_ack_irq(str
 	/* The guest irq may be shared so this ack may be
 	 * from another device.
 	 */
+	spin_lock_irqsave(&dev->assigned_dev_lock, flags);
 	if (dev->host_irq_disabled) {
 		enable_irq(dev->host_irq);
 		dev->host_irq_disabled = false;
 	}
+	spin_unlock_irqrestore(&dev->assigned_dev_lock, flags);
 }
 
 static void deassign_guest_irq(struct kvm *kvm,
@@ -615,6 +625,7 @@ static int kvm_vm_ioctl_assign_device(st
 	match->host_devfn = assigned_dev->devfn;
 	match->flags = assigned_dev->flags;
 	match->dev = dev;
+	spin_lock_init(&match->assigned_dev_lock);
 	match->irq_source_id = -1;
 	match->kvm = kvm;
 	match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [patch 3/4] KVM: use smp_send_reschedule in kvm_vcpu_kick
  2009-05-07 20:55               ` [patch 3/4] KVM: use smp_send_reschedule in kvm_vcpu_kick mtosatti
@ 2009-05-08  7:13                 ` Gleb Natapov
  0 siblings, 0 replies; 19+ messages in thread
From: Gleb Natapov @ 2009-05-08  7:13 UTC (permalink / raw)
  To: mtosatti; +Cc: kvm, avi

On Thu, May 07, 2009 at 05:55:12PM -0300, mtosatti@redhat.com wrote:
> KVM uses a function call IPI to cause the exit of a guest running on a
> physical cpu. For virtual interrupt notification there is no need to
> wait on IPI receival, or to execute any function.
> 
> This is exactly what the reschedule IPI does, without the overhead
> of function IPI. So use it instead of smp_call_function_single in
> kvm_vcpu_kick.
> 
> Also change the "guest_mode" variable to a bit in vcpu->requests, and
> use that to collapse multiple IPI's that would be issued between the
> first one and zeroing of guest mode.
> 
> This allows kvm_vcpu_kick to called with interrupts disabled.
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
Acked-by: Gleb Natapov <gleb@redhat.com>

Except one small nitpick below.
> Index: kvm-pending/arch/x86/kvm/x86.c
> ===================================================================
> --- kvm-pending.orig/arch/x86/kvm/x86.c
> +++ kvm-pending/arch/x86/kvm/x86.c
> @@ -3259,6 +3259,9 @@ static int vcpu_enter_guest(struct kvm_v
>  
>  	local_irq_disable();
>  
> +	clear_bit(KVM_REQ_KICK, &vcpu->requests);
> +	smp_mb__after_clear_bit();
> +
>  	if (vcpu->requests || need_resched() || signal_pending(current)) {
>  		local_irq_enable();
>  		preempt_enable();
> @@ -3266,13 +3269,6 @@ static int vcpu_enter_guest(struct kvm_v
>  		goto out;
>  	}
>  
> -	vcpu->guest_mode = 1;
> -	/*
> -	 * Make sure that guest_mode assignment won't happen after
> -	 * testing the pending IRQ vector bitmap.
> -	 */
> -	smp_wmb();
> -
Can you leave this comment. I like it :)

--
			Gleb.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [patch 0/4] smp_send_reschedule / assigned dev host intx race v2
  2009-05-07 20:55             ` [patch 0/4] smp_send_reschedule / assigned dev host intx race v2 mtosatti
                                 ` (3 preceding siblings ...)
  2009-05-07 20:55               ` [patch 4/4] KVM: protect assigned dev workqueue, int handler and irq acker mtosatti
@ 2009-05-10 16:31               ` Avi Kivity
  4 siblings, 0 replies; 19+ messages in thread
From: Avi Kivity @ 2009-05-10 16:31 UTC (permalink / raw)
  To: mtosatti; +Cc: kvm

mtosatti@redhat.com wrote:
> Addressing comments.
>
>
>   

Applied, thanks.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2009-05-10 16:32 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-04-27 21:07 [patch 0/4] use smp_send_reschedule in vcpu_kick / assigned dev host intx race fix mtosatti
2009-04-27 21:07 ` [patch 1/4] qemu: external module: smp_send_reschedule compat mtosatti
2009-05-07 13:28   ` Avi Kivity
2009-04-27 21:07 ` [patch 2/4] KVM: x86: wake up waitqueue before calling get_cpu() mtosatti
2009-04-27 21:07 ` [patch 3/4] KVM: use smp_send_reschedule in kvm_vcpu_kick mtosatti
2009-04-27 21:07 ` [patch 4/4] KVM: protect assigned dev workqueue, int handler and irq acker mtosatti
2009-04-28  7:08 ` [patch 0/4] use smp_send_reschedule in vcpu_kick / assigned dev host intx race fix Sheng Yang
2009-04-29 17:47   ` Marcelo Tosatti
2009-04-30  0:56     ` Sheng Yang
2009-04-30  1:59       ` Sheng Yang
2009-05-06  5:07         ` Sheng Yang
2009-05-07 13:21           ` Avi Kivity
2009-05-07 20:55             ` [patch 0/4] smp_send_reschedule / assigned dev host intx race v2 mtosatti
2009-05-07 20:55               ` [patch 1/4] kvm-kmod: nr_cpu_ids compat mtosatti
2009-05-07 20:55               ` [patch 2/4] kvm-kmod: smp_send_reschedule compat mtosatti
2009-05-07 20:55               ` [patch 3/4] KVM: use smp_send_reschedule in kvm_vcpu_kick mtosatti
2009-05-08  7:13                 ` Gleb Natapov
2009-05-07 20:55               ` [patch 4/4] KVM: protect assigned dev workqueue, int handler and irq acker mtosatti
2009-05-10 16:31               ` [patch 0/4] smp_send_reschedule / assigned dev host intx race v2 Avi Kivity

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.