All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch 0/4] move irq protection role to separate kvm->irq_lock
@ 2009-05-18 16:56 Marcelo Tosatti
  2009-05-18 16:56 ` [patch 1/4] KVM: x86: grab pic lock in kvm_pic_clear_isr_ack Marcelo Tosatti
                   ` (3 more replies)
  0 siblings, 4 replies; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-18 16:56 UTC (permalink / raw)
  To: kvm

Move irq protection role to separate kvm->irq_lock, and push locking
down to MMIO/PIO in-kernel devices.

Fixes http://article.gmane.org/gmane.comp.emulators.kvm.devel/32286.

-- 


^ permalink raw reply	[flat|nested] 49+ messages in thread

* [patch 1/4] KVM: x86: grab pic lock in kvm_pic_clear_isr_ack
  2009-05-18 16:56 [patch 0/4] move irq protection role to separate kvm->irq_lock Marcelo Tosatti
@ 2009-05-18 16:56 ` Marcelo Tosatti
  2009-05-18 16:56 ` [patch 2/4] KVM: move coalesced_mmio locking to its own device Marcelo Tosatti
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-18 16:56 UTC (permalink / raw)
  To: kvm; +Cc: Marcelo Tosatti

[-- Attachment #1: pic-clear-isr-lcok --]
[-- Type: text/plain, Size: 509 bytes --]

isr_ack is protected by kvm_pic->lock

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm/arch/x86/kvm/i8259.c
===================================================================
--- kvm.orig/arch/x86/kvm/i8259.c
+++ kvm/arch/x86/kvm/i8259.c
@@ -72,8 +72,10 @@ static void pic_clear_isr(struct kvm_kpi
 void kvm_pic_clear_isr_ack(struct kvm *kvm)
 {
 	struct kvm_pic *s = pic_irqchip(kvm);
+	pic_lock(s);
 	s->pics[0].isr_ack = 0xff;
 	s->pics[1].isr_ack = 0xff;
+	pic_unlock(s);
 }
 
 /*

-- 


^ permalink raw reply	[flat|nested] 49+ messages in thread

* [patch 2/4] KVM: move coalesced_mmio locking to its own device
  2009-05-18 16:56 [patch 0/4] move irq protection role to separate kvm->irq_lock Marcelo Tosatti
  2009-05-18 16:56 ` [patch 1/4] KVM: x86: grab pic lock in kvm_pic_clear_isr_ack Marcelo Tosatti
@ 2009-05-18 16:56 ` Marcelo Tosatti
  2009-05-20 12:06   ` Avi Kivity
  2009-05-18 16:56 ` [patch 3/4] KVM: introduce irq_lock, use it protect ioapic Marcelo Tosatti
  2009-05-18 16:56 ` [patch 4/4] KVM: switch irq injection/acking to irq_lock Marcelo Tosatti
  3 siblings, 1 reply; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-18 16:56 UTC (permalink / raw)
  To: kvm; +Cc: Marcelo Tosatti

[-- Attachment #1: coalesced-mmio-lock --]
[-- Type: text/plain, Size: 3077 bytes --]

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm/virt/kvm/coalesced_mmio.c
===================================================================
--- kvm.orig/virt/kvm/coalesced_mmio.c
+++ kvm/virt/kvm/coalesced_mmio.c
@@ -26,9 +26,10 @@ static int coalesced_mmio_in_range(struc
 	if (!is_write)
 		return 0;
 
-	/* kvm->lock is taken by the caller and must be not released before
-         * dev.read/write
-         */
+	/*
+ 	 * dev->lock must not be released before coalesced_mmio_write.
+	 */
+	mutex_lock(&dev->lock);
 
 	/* Are we able to batch it ? */
 
@@ -41,6 +42,7 @@ static int coalesced_mmio_in_range(struc
 							KVM_COALESCED_MMIO_MAX;
 	if (next == dev->kvm->coalesced_mmio_ring->first) {
 		/* full */
+		mutex_unlock(&dev->lock);
 		return 0;
 	}
 
@@ -57,6 +59,7 @@ static int coalesced_mmio_in_range(struc
 		    addr + len <= zone->addr + zone->size)
 			return 1;
 	}
+	mutex_unlock(&dev->lock);
 	return 0;
 }
 
@@ -67,8 +70,6 @@ static void coalesced_mmio_write(struct 
 				(struct kvm_coalesced_mmio_dev*)this->private;
 	struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
 
-	/* kvm->lock must be taken by caller before call to in_range()*/
-
 	/* copy data in first free entry of the ring */
 
 	ring->coalesced_mmio[ring->last].phys_addr = addr;
@@ -76,6 +77,7 @@ static void coalesced_mmio_write(struct 
 	memcpy(ring->coalesced_mmio[ring->last].data, val, len);
 	smp_wmb();
 	ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX;
+	mutex_unlock(&dev->lock);
 }
 
 static void coalesced_mmio_destructor(struct kvm_io_device *this)
@@ -90,6 +92,8 @@ int kvm_coalesced_mmio_init(struct kvm *
 	dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
 	if (!dev)
 		return -ENOMEM;
+	mutex_init(&dev->lock);
+
 	dev->dev.write  = coalesced_mmio_write;
 	dev->dev.in_range  = coalesced_mmio_in_range;
 	dev->dev.destructor  = coalesced_mmio_destructor;
@@ -109,16 +113,16 @@ int kvm_vm_ioctl_register_coalesced_mmio
 	if (dev == NULL)
 		return -EINVAL;
 
-	mutex_lock(&kvm->lock);
+	mutex_lock(&dev->lock);
 	if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) {
-		mutex_unlock(&kvm->lock);
+		mutex_unlock(&dev->lock);
 		return -ENOBUFS;
 	}
 
 	dev->zone[dev->nb_zones] = *zone;
 	dev->nb_zones++;
 
-	mutex_unlock(&kvm->lock);
+	mutex_unlock(&dev->lock);
 	return 0;
 }
 
@@ -132,7 +136,7 @@ int kvm_vm_ioctl_unregister_coalesced_mm
 	if (dev == NULL)
 		return -EINVAL;
 
-	mutex_lock(&kvm->lock);
+	mutex_lock(&dev->lock);
 
 	i = dev->nb_zones;
 	while(i) {
@@ -150,7 +154,7 @@ int kvm_vm_ioctl_unregister_coalesced_mm
 		i--;
 	}
 
-	mutex_unlock(&kvm->lock);
+	mutex_unlock(&dev->lock);
 
 	return 0;
 }
Index: kvm/virt/kvm/coalesced_mmio.h
===================================================================
--- kvm.orig/virt/kvm/coalesced_mmio.h
+++ kvm/virt/kvm/coalesced_mmio.h
@@ -12,6 +12,7 @@
 struct kvm_coalesced_mmio_dev {
 	struct kvm_io_device dev;
 	struct kvm *kvm;
+	struct mutex lock;
 	int nb_zones;
 	struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX];
 };

-- 


^ permalink raw reply	[flat|nested] 49+ messages in thread

* [patch 3/4] KVM: introduce irq_lock, use it protect ioapic
  2009-05-18 16:56 [patch 0/4] move irq protection role to separate kvm->irq_lock Marcelo Tosatti
  2009-05-18 16:56 ` [patch 1/4] KVM: x86: grab pic lock in kvm_pic_clear_isr_ack Marcelo Tosatti
  2009-05-18 16:56 ` [patch 2/4] KVM: move coalesced_mmio locking to its own device Marcelo Tosatti
@ 2009-05-18 16:56 ` Marcelo Tosatti
  2009-05-20 12:11   ` Avi Kivity
  2009-05-18 16:56 ` [patch 4/4] KVM: switch irq injection/acking to irq_lock Marcelo Tosatti
  3 siblings, 1 reply; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-18 16:56 UTC (permalink / raw)
  To: kvm; +Cc: Marcelo Tosatti

[-- Attachment #1: ioapic-lock --]
[-- Type: text/plain, Size: 3228 bytes --]

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm/include/linux/kvm_host.h
===================================================================
--- kvm.orig/include/linux/kvm_host.h
+++ kvm/include/linux/kvm_host.h
@@ -123,7 +123,6 @@ struct kvm_kernel_irq_routing_entry {
 };
 
 struct kvm {
-	struct mutex lock; /* protects the vcpus array and APIC accesses */
 	spinlock_t mmu_lock;
 	struct rw_semaphore slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
@@ -132,6 +131,12 @@ struct kvm {
 					KVM_PRIVATE_MEM_SLOTS];
 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
 	struct list_head vm_list;
+	struct mutex lock; /*
+			    * - protects mmio_bus, pio_bus.
+			    * - protects a few concurrent ioctl's (FIXME).
+			    * - protects concurrent create_vcpu, but
+			    *   kvm->vcpus walkers do it locklessly (FIXME).
+			    */
 	struct kvm_io_bus mmio_bus;
 	struct kvm_io_bus pio_bus;
 	struct kvm_vm_stat stat;
@@ -142,6 +147,7 @@ struct kvm {
 	struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
 #endif
 
+	struct mutex irq_lock; /* protects high level irq logic, ioapic */
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
 	struct hlist_head mask_notifier_list;
Index: kvm/virt/kvm/ioapic.c
===================================================================
--- kvm.orig/virt/kvm/ioapic.c
+++ kvm/virt/kvm/ioapic.c
@@ -225,6 +225,10 @@ static int ioapic_in_range(struct kvm_io
 {
 	struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
 
+	/*
+	 * Lockless check for ioapic->base_address on the hazy assumption
+	 * it does not change during the lifetime of a VM.
+	 */
 	return ((addr >= ioapic->base_address &&
 		 (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
 }
@@ -238,6 +242,7 @@ static void ioapic_mmio_read(struct kvm_
 	ioapic_debug("addr %lx\n", (unsigned long)addr);
 	ASSERT(!(addr & 0xf));	/* check alignment */
 
+	mutex_lock(&ioapic->kvm->irq_lock);
 	addr &= 0xff;
 	switch (addr) {
 	case IOAPIC_REG_SELECT:
@@ -264,6 +269,7 @@ static void ioapic_mmio_read(struct kvm_
 	default:
 		printk(KERN_WARNING "ioapic: wrong length %d\n", len);
 	}
+	mutex_unlock(&ioapic->kvm->irq_lock);
 }
 
 static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
@@ -275,6 +281,8 @@ static void ioapic_mmio_write(struct kvm
 	ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
 		     (void*)addr, len, val);
 	ASSERT(!(addr & 0xf));	/* check alignment */
+
+	mutex_lock(&ioapic->kvm->irq_lock);
 	if (len == 4 || len == 8)
 		data = *(u32 *) val;
 	else {
@@ -300,6 +308,7 @@ static void ioapic_mmio_write(struct kvm
 	default:
 		break;
 	}
+	mutex_unlock(&ioapic->kvm->irq_lock);
 }
 
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
Index: kvm/virt/kvm/kvm_main.c
===================================================================
--- kvm.orig/virt/kvm/kvm_main.c
+++ kvm/virt/kvm/kvm_main.c
@@ -984,6 +984,7 @@ static struct kvm *kvm_create_vm(void)
 	spin_lock_init(&kvm->mmu_lock);
 	kvm_io_bus_init(&kvm->pio_bus);
 	mutex_init(&kvm->lock);
+	mutex_init(&kvm->irq_lock);
 	kvm_io_bus_init(&kvm->mmio_bus);
 	init_rwsem(&kvm->slots_lock);
 	atomic_set(&kvm->users_count, 1);

-- 


^ permalink raw reply	[flat|nested] 49+ messages in thread

* [patch 4/4] KVM: switch irq injection/acking to irq_lock
  2009-05-18 16:56 [patch 0/4] move irq protection role to separate kvm->irq_lock Marcelo Tosatti
                   ` (2 preceding siblings ...)
  2009-05-18 16:56 ` [patch 3/4] KVM: introduce irq_lock, use it protect ioapic Marcelo Tosatti
@ 2009-05-18 16:56 ` Marcelo Tosatti
  2009-05-20 12:15   ` Gregory Haskins
  3 siblings, 1 reply; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-18 16:56 UTC (permalink / raw)
  To: kvm; +Cc: Marcelo Tosatti

[-- Attachment #1: irqlock --]
[-- Type: text/plain, Size: 10973 bytes --]

Switch irq injection/acking to irq_lock, and change PIO/MMIO paths 
so that the device search is protected by kvm->lock, but not the
read/write callbacks (which is responsability of the device).

Fix for http://article.gmane.org/gmane.comp.emulators.kvm.devel/32286.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm/arch/x86/kvm/i8254.c
===================================================================
--- kvm.orig/arch/x86/kvm/i8254.c
+++ kvm/arch/x86/kvm/i8254.c
@@ -634,10 +634,10 @@ static void __inject_pit_timer_intr(stru
 	struct kvm_vcpu *vcpu;
 	int i;
 
-	mutex_lock(&kvm->lock);
+	mutex_lock(&kvm->irq_lock);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
-	mutex_unlock(&kvm->lock);
+	mutex_unlock(&kvm->irq_lock);
 
 	/*
 	 * Provides NMI watchdog support via Virtual Wire mode.
Index: kvm/arch/x86/kvm/x86.c
===================================================================
--- kvm.orig/arch/x86/kvm/x86.c
+++ kvm/arch/x86/kvm/x86.c
@@ -1909,10 +1909,10 @@ long kvm_arch_vm_ioctl(struct file *filp
 			goto out;
 		if (irqchip_in_kernel(kvm)) {
 			__s32 status;
-			mutex_lock(&kvm->lock);
+			mutex_lock(&kvm->irq_lock);
 			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
 					irq_event.irq, irq_event.level);
-			mutex_unlock(&kvm->lock);
+			mutex_unlock(&kvm->irq_lock);
 			if (ioctl == KVM_IRQ_LINE_STATUS) {
 				irq_event.status = status;
 				if (copy_to_user(argp, &irq_event,
@@ -2158,12 +2158,11 @@ mmio:
 	 */
 	mutex_lock(&vcpu->kvm->lock);
 	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
+	mutex_unlock(&vcpu->kvm->lock);
 	if (mmio_dev) {
 		kvm_iodevice_read(mmio_dev, gpa, bytes, val);
-		mutex_unlock(&vcpu->kvm->lock);
 		return X86EMUL_CONTINUE;
 	}
-	mutex_unlock(&vcpu->kvm->lock);
 
 	vcpu->mmio_needed = 1;

 	vcpu->mmio_phys_addr = gpa;
@@ -2541,7 +2540,6 @@ static void kernel_pio(struct kvm_io_dev
 {
 	/* TODO: String I/O for in kernel device */
 
-	mutex_lock(&vcpu->kvm->lock);
 	if (vcpu->arch.pio.in)
 		kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
 				  vcpu->arch.pio.size,
@@ -2550,7 +2548,6 @@ static void kernel_pio(struct kvm_io_dev
 		kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
 				   vcpu->arch.pio.size,
 				   pd);
-	mutex_unlock(&vcpu->kvm->lock);
 }
 
 static void pio_string_write(struct kvm_io_device *pio_dev,
@@ -2560,14 +2557,12 @@ static void pio_string_write(struct kvm_
 	void *pd = vcpu->arch.pio_data;
 	int i;
 
-	mutex_lock(&vcpu->kvm->lock);
 	for (i = 0; i < io->cur_count; i++) {
 		kvm_iodevice_write(pio_dev, io->port,
 				   io->size,
 				   pd);
 		pd += io->size;
 	}
-	mutex_unlock(&vcpu->kvm->lock);
 }
 
 static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
@@ -2604,7 +2599,9 @@ int kvm_emulate_pio(struct kvm_vcpu *vcp
 	val = kvm_register_read(vcpu, VCPU_REGS_RAX);
 	memcpy(vcpu->arch.pio_data, &val, 4);
 
+	mutex_lock(&vcpu->kvm->lock);
 	pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
+	mutex_unlock(&vcpu->kvm->lock);
 	if (pio_dev) {
 		kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
 		complete_pio(vcpu);
@@ -2668,9 +2665,11 @@ int kvm_emulate_pio_string(struct kvm_vc
 
 	vcpu->arch.pio.guest_gva = address;
 
+	mutex_lock(&vcpu->kvm->lock);
 	pio_dev = vcpu_find_pio_dev(vcpu, port,
 				    vcpu->arch.pio.cur_count,
 				    !vcpu->arch.pio.in);
+	mutex_unlock(&vcpu->kvm->lock);
 	if (!vcpu->arch.pio.in) {
 		/* string PIO write */
 		ret = pio_copy_data(vcpu);
Index: kvm/virt/kvm/kvm_main.c
===================================================================
--- kvm.orig/virt/kvm/kvm_main.c
+++ kvm/virt/kvm/kvm_main.c
@@ -62,6 +62,12 @@
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
+/*
+ * Ordering of locks:
+ *
+ * 		kvm->lock --> kvm->irq_lock
+ */
+
 DEFINE_SPINLOCK(kvm_lock);
 LIST_HEAD(vm_list);
 
@@ -126,11 +132,7 @@ static void kvm_assigned_dev_interrupt_w
 				    interrupt_work);
 	kvm = assigned_dev->kvm;
 
-	/* This is taken to safely inject irq inside the guest. When
-	 * the interrupt injection (or the ioapic code) uses a
-	 * finer-grained lock, update this
-	 */
-	mutex_lock(&kvm->lock);
+	mutex_lock(&kvm->irq_lock);
 	spin_lock_irq(&assigned_dev->assigned_dev_lock);
 	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 		struct kvm_guest_msix_entry *guest_entries =
@@ -159,7 +161,7 @@ static void kvm_assigned_dev_interrupt_w
 	}
 
 	spin_unlock_irq(&assigned_dev->assigned_dev_lock);
-	mutex_unlock(&assigned_dev->kvm->lock);
+	mutex_unlock(&assigned_dev->kvm->irq_lock);
 }
 
 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
@@ -215,7 +217,7 @@ static void kvm_assigned_dev_ack_irq(str
 static void deassign_guest_irq(struct kvm *kvm,
 			       struct kvm_assigned_dev_kernel *assigned_dev)
 {
-	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
+	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
 	assigned_dev->ack_notifier.gsi = -1;
 
 	if (assigned_dev->irq_source_id != -1)
Index: kvm/virt/kvm/irq_comm.c
===================================================================
--- kvm.orig/virt/kvm/irq_comm.c
+++ kvm/virt/kvm/irq_comm.c
@@ -62,6 +62,8 @@ int kvm_irq_delivery_to_apic(struct kvm 
 	int i, r = -1;
 	struct kvm_vcpu *vcpu, *lowest = NULL;
 
+	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
+
 	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
 			kvm_is_dm_lowest_prio(irq))
 		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
@@ -113,7 +115,7 @@ static int kvm_set_msi(struct kvm_kernel
 	return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
 }
 
-/* This should be called with the kvm->lock mutex held
+/* This should be called with the kvm->irq_lock mutex held
  * Return value:
  *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
  *  = 0   Interrupt was coalesced (previous irq is still pending)
@@ -125,6 +127,8 @@ int kvm_set_irq(struct kvm *kvm, int irq
 	unsigned long *irq_state, sig_level;
 	int ret = -1;
 
+	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
+
 	if (irq < KVM_IOAPIC_NUM_PINS) {
 		irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
 
@@ -174,19 +178,26 @@ void kvm_notify_acked_irq(struct kvm *kv
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian)
 {
+	mutex_lock(&kvm->irq_lock);
 	hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
+	mutex_unlock(&kvm->irq_lock);
 }
 
-void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian)
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				    struct kvm_irq_ack_notifier *kian)
 {
+	mutex_lock(&kvm->irq_lock);
 	hlist_del_init(&kian->link);
+	mutex_unlock(&kvm->irq_lock);
 }
 
-/* The caller must hold kvm->lock mutex */
 int kvm_request_irq_source_id(struct kvm *kvm)
 {
 	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
-	int irq_source_id = find_first_zero_bit(bitmap,
+	int irq_source_id;
+
+	mutex_lock(&kvm->irq_lock);
+	irq_source_id = find_first_zero_bit(bitmap,
 				sizeof(kvm->arch.irq_sources_bitmap));
 
 	if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
@@ -196,6 +207,7 @@ int kvm_request_irq_source_id(struct kvm
 
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
 	set_bit(irq_source_id, bitmap);
+	mutex_unlock(&kvm->irq_lock);
 
 	return irq_source_id;
 }
@@ -206,6 +218,7 @@ void kvm_free_irq_source_id(struct kvm *
 
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
 
+	mutex_lock(&kvm->irq_lock);
 	if (irq_source_id < 0 ||
 	    irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
 		printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
@@ -214,19 +227,24 @@ void kvm_free_irq_source_id(struct kvm *
 	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
 		clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
 	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
 				    struct kvm_irq_mask_notifier *kimn)
 {
+	mutex_lock(&kvm->irq_lock);
 	kimn->irq = irq;
 	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 				      struct kvm_irq_mask_notifier *kimn)
 {
+	mutex_lock(&kvm->irq_lock);
 	hlist_del(&kimn->link);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
@@ -234,6 +252,8 @@ void kvm_fire_mask_notifiers(struct kvm 
 	struct kvm_irq_mask_notifier *kimn;
 	struct hlist_node *n;
 
+	WARN_ON(!mutex_is_locked(&kvm->lock));
+
 	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
 		if (kimn->irq == irq)
 			kimn->func(kimn, mask);
@@ -249,7 +269,9 @@ static void __kvm_free_irq_routing(struc
 
 void kvm_free_irq_routing(struct kvm *kvm)
 {
+	mutex_lock(&kvm->irq_lock);
 	__kvm_free_irq_routing(&kvm->irq_routing);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
@@ -323,13 +345,13 @@ int kvm_set_irq_routing(struct kvm *kvm,
 		e = NULL;
 	}
 
-	mutex_lock(&kvm->lock);
+	mutex_lock(&kvm->irq_lock);
 	list_splice(&kvm->irq_routing, &tmp);
 	INIT_LIST_HEAD(&kvm->irq_routing);
 	list_splice(&irq_list, &kvm->irq_routing);
 	INIT_LIST_HEAD(&irq_list);
 	list_splice(&tmp, &irq_list);
-	mutex_unlock(&kvm->lock);
+	mutex_unlock(&kvm->irq_lock);
 
 	r = 0;
 
Index: kvm/arch/x86/kvm/lapic.c
===================================================================
--- kvm.orig/arch/x86/kvm/lapic.c
+++ kvm/arch/x86/kvm/lapic.c
@@ -425,7 +425,9 @@ static void apic_set_eoi(struct kvm_lapi
 		trigger_mode = IOAPIC_LEVEL_TRIG;
 	else
 		trigger_mode = IOAPIC_EDGE_TRIG;
+	mutex_lock(&apic->vcpu->kvm->irq_lock);
 	kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
+	mutex_unlock(&apic->vcpu->kvm->irq_lock);
 }
 
 static void apic_send_ipi(struct kvm_lapic *apic)
@@ -449,7 +451,9 @@ static void apic_send_ipi(struct kvm_lap
 		   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
 		   irq.vector);
 
+	mutex_lock(&apic->vcpu->kvm->irq_lock);
 	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
+	mutex_unlock(&apic->vcpu->kvm->irq_lock);
 }
 
 static u32 apic_get_tmcct(struct kvm_lapic *apic)
Index: kvm/include/linux/kvm_host.h
===================================================================
--- kvm.orig/include/linux/kvm_host.h
+++ kvm/include/linux/kvm_host.h
@@ -375,7 +375,8 @@ int kvm_set_irq(struct kvm *kvm, int irq
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian);
-void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				   struct kvm_irq_ack_notifier *kian);
 int kvm_request_irq_source_id(struct kvm *kvm);
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 

-- 


^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 2/4] KVM: move coalesced_mmio locking to its own device
  2009-05-18 16:56 ` [patch 2/4] KVM: move coalesced_mmio locking to its own device Marcelo Tosatti
@ 2009-05-20 12:06   ` Avi Kivity
  2009-05-20 14:09     ` Marcelo Tosatti
  0 siblings, 1 reply; 49+ messages in thread
From: Avi Kivity @ 2009-05-20 12:06 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

Marcelo Tosatti wrote:
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>
> Index: kvm/virt/kvm/coalesced_mmio.c
> ===================================================================
> --- kvm.orig/virt/kvm/coalesced_mmio.c
> +++ kvm/virt/kvm/coalesced_mmio.c
> @@ -26,9 +26,10 @@ static int coalesced_mmio_in_range(struc
>  	if (!is_write)
>  		return 0;
>  
> -	/* kvm->lock is taken by the caller and must be not released before
> -         * dev.read/write
> -         */
> +	/*
> + 	 * dev->lock must not be released before coalesced_mmio_write.
> +	 */
> +	mutex_lock(&dev->lock);
>  
>  	/* Are we able to batch it ? */
>  
> @@ -41,6 +42,7 @@ static int coalesced_mmio_in_range(struc
>  							KVM_COALESCED_MMIO_MAX;
>  	if (next == dev->kvm->coalesced_mmio_ring->first) {
>  		/* full */
> +		mutex_unlock(&dev->lock);
>  		return 0;
>  	}
>  
> @@ -57,6 +59,7 @@ static int coalesced_mmio_in_range(struc
>  		    addr + len <= zone->addr + zone->size)
>  			return 1;
>  	}
> +	mutex_unlock(&dev->lock);
>  	return 0;
>  }
>   

So we have a function that takes a lock and conditionally releases it?


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 3/4] KVM: introduce irq_lock, use it protect ioapic
  2009-05-18 16:56 ` [patch 3/4] KVM: introduce irq_lock, use it protect ioapic Marcelo Tosatti
@ 2009-05-20 12:11   ` Avi Kivity
  2009-05-20 14:11     ` Marcelo Tosatti
  0 siblings, 1 reply; 49+ messages in thread
From: Avi Kivity @ 2009-05-20 12:11 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

Marcelo Tosatti wrote:
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>
> Index: kvm/include/linux/kvm_host.h
> ===================================================================
> --- kvm.orig/include/linux/kvm_host.h
> +++ kvm/include/linux/kvm_host.h
> @@ -123,7 +123,6 @@ struct kvm_kernel_irq_routing_entry {
>  };
>  
>  struct kvm {
> -	struct mutex lock; /* protects the vcpus array and APIC accesses */
>   

Why move?


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 4/4] KVM: switch irq injection/acking to irq_lock
  2009-05-18 16:56 ` [patch 4/4] KVM: switch irq injection/acking to irq_lock Marcelo Tosatti
@ 2009-05-20 12:15   ` Gregory Haskins
  2009-05-20 14:16     ` Marcelo Tosatti
  2009-05-24 14:53     ` Avi Kivity
  0 siblings, 2 replies; 49+ messages in thread
From: Gregory Haskins @ 2009-05-20 12:15 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

[-- Attachment #1: Type: text/plain, Size: 12298 bytes --]

Marcelo Tosatti wrote:
> Switch irq injection/acking to irq_lock, and change PIO/MMIO paths 
> so that the device search is protected by kvm->lock, but not the
> read/write callbacks (which is responsability of the device).
>
> Fix for http://article.gmane.org/gmane.comp.emulators.kvm.devel/32286.
>
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>
> Index: kvm/arch/x86/kvm/i8254.c
> ===================================================================
> --- kvm.orig/arch/x86/kvm/i8254.c
> +++ kvm/arch/x86/kvm/i8254.c
> @@ -634,10 +634,10 @@ static void __inject_pit_timer_intr(stru
>  	struct kvm_vcpu *vcpu;
>  	int i;
>  
> -	mutex_lock(&kvm->lock);
> +	mutex_lock(&kvm->irq_lock);
>   

There would be advantages to having irq_lock be
interrupt/nonpreempt-friendly design, such as s/mutex/spinlock.  For
instance, irqfd could inject the interrupt directly without deferring to
a workqueue.   I'm not sure if this is possible/easy, but its something
to consider.

-Greg

>  	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
>  	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
> -	mutex_unlock(&kvm->lock);
> +	mutex_unlock(&kvm->irq_lock);
>  
>  	/*
>  	 * Provides NMI watchdog support via Virtual Wire mode.
> Index: kvm/arch/x86/kvm/x86.c
> ===================================================================
> --- kvm.orig/arch/x86/kvm/x86.c
> +++ kvm/arch/x86/kvm/x86.c
> @@ -1909,10 +1909,10 @@ long kvm_arch_vm_ioctl(struct file *filp
>  			goto out;
>  		if (irqchip_in_kernel(kvm)) {
>  			__s32 status;
> -			mutex_lock(&kvm->lock);
> +			mutex_lock(&kvm->irq_lock);
>  			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
>  					irq_event.irq, irq_event.level);
> -			mutex_unlock(&kvm->lock);
> +			mutex_unlock(&kvm->irq_lock);
>  			if (ioctl == KVM_IRQ_LINE_STATUS) {
>  				irq_event.status = status;
>  				if (copy_to_user(argp, &irq_event,
> @@ -2158,12 +2158,11 @@ mmio:
>  	 */
>  	mutex_lock(&vcpu->kvm->lock);
>  	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
> +	mutex_unlock(&vcpu->kvm->lock);
>  	if (mmio_dev) {
>  		kvm_iodevice_read(mmio_dev, gpa, bytes, val);
> -		mutex_unlock(&vcpu->kvm->lock);
>  		return X86EMUL_CONTINUE;
>  	}
> -	mutex_unlock(&vcpu->kvm->lock);
>  
>  	vcpu->mmio_needed = 1;
>
>  	vcpu->mmio_phys_addr = gpa;
> @@ -2541,7 +2540,6 @@ static void kernel_pio(struct kvm_io_dev
>  {
>  	/* TODO: String I/O for in kernel device */
>  
> -	mutex_lock(&vcpu->kvm->lock);
>  	if (vcpu->arch.pio.in)
>  		kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
>  				  vcpu->arch.pio.size,
> @@ -2550,7 +2548,6 @@ static void kernel_pio(struct kvm_io_dev
>  		kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
>  				   vcpu->arch.pio.size,
>  				   pd);
> -	mutex_unlock(&vcpu->kvm->lock);
>  }
>  
>  static void pio_string_write(struct kvm_io_device *pio_dev,
> @@ -2560,14 +2557,12 @@ static void pio_string_write(struct kvm_
>  	void *pd = vcpu->arch.pio_data;
>  	int i;
>  
> -	mutex_lock(&vcpu->kvm->lock);
>  	for (i = 0; i < io->cur_count; i++) {
>  		kvm_iodevice_write(pio_dev, io->port,
>  				   io->size,
>  				   pd);
>  		pd += io->size;
>  	}
> -	mutex_unlock(&vcpu->kvm->lock);
>  }
>  
>  static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
> @@ -2604,7 +2599,9 @@ int kvm_emulate_pio(struct kvm_vcpu *vcp
>  	val = kvm_register_read(vcpu, VCPU_REGS_RAX);
>  	memcpy(vcpu->arch.pio_data, &val, 4);
>  
> +	mutex_lock(&vcpu->kvm->lock);
>  	pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
> +	mutex_unlock(&vcpu->kvm->lock);
>  	if (pio_dev) {
>  		kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
>  		complete_pio(vcpu);
> @@ -2668,9 +2665,11 @@ int kvm_emulate_pio_string(struct kvm_vc
>  
>  	vcpu->arch.pio.guest_gva = address;
>  
> +	mutex_lock(&vcpu->kvm->lock);
>  	pio_dev = vcpu_find_pio_dev(vcpu, port,
>  				    vcpu->arch.pio.cur_count,
>  				    !vcpu->arch.pio.in);
> +	mutex_unlock(&vcpu->kvm->lock);
>  	if (!vcpu->arch.pio.in) {
>  		/* string PIO write */
>  		ret = pio_copy_data(vcpu);
> Index: kvm/virt/kvm/kvm_main.c
> ===================================================================
> --- kvm.orig/virt/kvm/kvm_main.c
> +++ kvm/virt/kvm/kvm_main.c
> @@ -62,6 +62,12 @@
>  MODULE_AUTHOR("Qumranet");
>  MODULE_LICENSE("GPL");
>  
> +/*
> + * Ordering of locks:
> + *
> + * 		kvm->lock --> kvm->irq_lock
> + */
> +
>  DEFINE_SPINLOCK(kvm_lock);
>  LIST_HEAD(vm_list);
>  
> @@ -126,11 +132,7 @@ static void kvm_assigned_dev_interrupt_w
>  				    interrupt_work);
>  	kvm = assigned_dev->kvm;
>  
> -	/* This is taken to safely inject irq inside the guest. When
> -	 * the interrupt injection (or the ioapic code) uses a
> -	 * finer-grained lock, update this
> -	 */
> -	mutex_lock(&kvm->lock);
> +	mutex_lock(&kvm->irq_lock);
>  	spin_lock_irq(&assigned_dev->assigned_dev_lock);
>  	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
>  		struct kvm_guest_msix_entry *guest_entries =
> @@ -159,7 +161,7 @@ static void kvm_assigned_dev_interrupt_w
>  	}
>  
>  	spin_unlock_irq(&assigned_dev->assigned_dev_lock);
> -	mutex_unlock(&assigned_dev->kvm->lock);
> +	mutex_unlock(&assigned_dev->kvm->irq_lock);
>  }
>  
>  static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
> @@ -215,7 +217,7 @@ static void kvm_assigned_dev_ack_irq(str
>  static void deassign_guest_irq(struct kvm *kvm,
>  			       struct kvm_assigned_dev_kernel *assigned_dev)
>  {
> -	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
> +	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
>  	assigned_dev->ack_notifier.gsi = -1;
>  
>  	if (assigned_dev->irq_source_id != -1)
> Index: kvm/virt/kvm/irq_comm.c
> ===================================================================
> --- kvm.orig/virt/kvm/irq_comm.c
> +++ kvm/virt/kvm/irq_comm.c
> @@ -62,6 +62,8 @@ int kvm_irq_delivery_to_apic(struct kvm 
>  	int i, r = -1;
>  	struct kvm_vcpu *vcpu, *lowest = NULL;
>  
> +	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
> +
>  	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
>  			kvm_is_dm_lowest_prio(irq))
>  		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
> @@ -113,7 +115,7 @@ static int kvm_set_msi(struct kvm_kernel
>  	return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
>  }
>  
> -/* This should be called with the kvm->lock mutex held
> +/* This should be called with the kvm->irq_lock mutex held
>   * Return value:
>   *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
>   *  = 0   Interrupt was coalesced (previous irq is still pending)
> @@ -125,6 +127,8 @@ int kvm_set_irq(struct kvm *kvm, int irq
>  	unsigned long *irq_state, sig_level;
>  	int ret = -1;
>  
> +	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
> +
>  	if (irq < KVM_IOAPIC_NUM_PINS) {
>  		irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
>  
> @@ -174,19 +178,26 @@ void kvm_notify_acked_irq(struct kvm *kv
>  void kvm_register_irq_ack_notifier(struct kvm *kvm,
>  				   struct kvm_irq_ack_notifier *kian)
>  {
> +	mutex_lock(&kvm->irq_lock);
>  	hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
> +	mutex_unlock(&kvm->irq_lock);
>  }
>  
> -void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian)
> +void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> +				    struct kvm_irq_ack_notifier *kian)
>  {
> +	mutex_lock(&kvm->irq_lock);
>  	hlist_del_init(&kian->link);
> +	mutex_unlock(&kvm->irq_lock);
>  }
>  
> -/* The caller must hold kvm->lock mutex */
>  int kvm_request_irq_source_id(struct kvm *kvm)
>  {
>  	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
> -	int irq_source_id = find_first_zero_bit(bitmap,
> +	int irq_source_id;
> +
> +	mutex_lock(&kvm->irq_lock);
> +	irq_source_id = find_first_zero_bit(bitmap,
>  				sizeof(kvm->arch.irq_sources_bitmap));
>  
>  	if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
> @@ -196,6 +207,7 @@ int kvm_request_irq_source_id(struct kvm
>  
>  	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
>  	set_bit(irq_source_id, bitmap);
> +	mutex_unlock(&kvm->irq_lock);
>  
>  	return irq_source_id;
>  }
> @@ -206,6 +218,7 @@ void kvm_free_irq_source_id(struct kvm *
>  
>  	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
>  
> +	mutex_lock(&kvm->irq_lock);
>  	if (irq_source_id < 0 ||
>  	    irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
>  		printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
> @@ -214,19 +227,24 @@ void kvm_free_irq_source_id(struct kvm *
>  	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
>  		clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
>  	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
> +	mutex_unlock(&kvm->irq_lock);
>  }
>  
>  void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
>  				    struct kvm_irq_mask_notifier *kimn)
>  {
> +	mutex_lock(&kvm->irq_lock);
>  	kimn->irq = irq;
>  	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
> +	mutex_unlock(&kvm->irq_lock);
>  }
>  
>  void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
>  				      struct kvm_irq_mask_notifier *kimn)
>  {
> +	mutex_lock(&kvm->irq_lock);
>  	hlist_del(&kimn->link);
> +	mutex_unlock(&kvm->irq_lock);
>  }
>  
>  void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> @@ -234,6 +252,8 @@ void kvm_fire_mask_notifiers(struct kvm 
>  	struct kvm_irq_mask_notifier *kimn;
>  	struct hlist_node *n;
>  
> +	WARN_ON(!mutex_is_locked(&kvm->lock));
> +
>  	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
>  		if (kimn->irq == irq)
>  			kimn->func(kimn, mask);
> @@ -249,7 +269,9 @@ static void __kvm_free_irq_routing(struc
>  
>  void kvm_free_irq_routing(struct kvm *kvm)
>  {
> +	mutex_lock(&kvm->irq_lock);
>  	__kvm_free_irq_routing(&kvm->irq_routing);
> +	mutex_unlock(&kvm->irq_lock);
>  }
>  
>  static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> @@ -323,13 +345,13 @@ int kvm_set_irq_routing(struct kvm *kvm,
>  		e = NULL;
>  	}
>  
> -	mutex_lock(&kvm->lock);
> +	mutex_lock(&kvm->irq_lock);
>  	list_splice(&kvm->irq_routing, &tmp);
>  	INIT_LIST_HEAD(&kvm->irq_routing);
>  	list_splice(&irq_list, &kvm->irq_routing);
>  	INIT_LIST_HEAD(&irq_list);
>  	list_splice(&tmp, &irq_list);
> -	mutex_unlock(&kvm->lock);
> +	mutex_unlock(&kvm->irq_lock);
>  
>  	r = 0;
>  
> Index: kvm/arch/x86/kvm/lapic.c
> ===================================================================
> --- kvm.orig/arch/x86/kvm/lapic.c
> +++ kvm/arch/x86/kvm/lapic.c
> @@ -425,7 +425,9 @@ static void apic_set_eoi(struct kvm_lapi
>  		trigger_mode = IOAPIC_LEVEL_TRIG;
>  	else
>  		trigger_mode = IOAPIC_EDGE_TRIG;
> +	mutex_lock(&apic->vcpu->kvm->irq_lock);
>  	kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
> +	mutex_unlock(&apic->vcpu->kvm->irq_lock);
>  }
>  
>  static void apic_send_ipi(struct kvm_lapic *apic)
> @@ -449,7 +451,9 @@ static void apic_send_ipi(struct kvm_lap
>  		   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
>  		   irq.vector);
>  
> +	mutex_lock(&apic->vcpu->kvm->irq_lock);
>  	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
> +	mutex_unlock(&apic->vcpu->kvm->irq_lock);
>  }
>  
>  static u32 apic_get_tmcct(struct kvm_lapic *apic)
> Index: kvm/include/linux/kvm_host.h
> ===================================================================
> --- kvm.orig/include/linux/kvm_host.h
> +++ kvm/include/linux/kvm_host.h
> @@ -375,7 +375,8 @@ int kvm_set_irq(struct kvm *kvm, int irq
>  void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
>  void kvm_register_irq_ack_notifier(struct kvm *kvm,
>  				   struct kvm_irq_ack_notifier *kian);
> -void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
> +void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> +				   struct kvm_irq_ack_notifier *kian);
>  int kvm_request_irq_source_id(struct kvm *kvm);
>  void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
>  
>
>   



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 266 bytes --]

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 2/4] KVM: move coalesced_mmio locking to its own device
  2009-05-20 12:06   ` Avi Kivity
@ 2009-05-20 14:09     ` Marcelo Tosatti
  2009-05-20 14:29       ` Avi Kivity
  0 siblings, 1 reply; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-20 14:09 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Wed, May 20, 2009 at 03:06:26PM +0300, Avi Kivity wrote:
> Marcelo Tosatti wrote:
>> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>>
>> Index: kvm/virt/kvm/coalesced_mmio.c
>> ===================================================================
>> --- kvm.orig/virt/kvm/coalesced_mmio.c
>> +++ kvm/virt/kvm/coalesced_mmio.c
>> @@ -26,9 +26,10 @@ static int coalesced_mmio_in_range(struc
>>  	if (!is_write)
>>  		return 0;
>>  -	/* kvm->lock is taken by the caller and must be not released before
>> -         * dev.read/write
>> -         */
>> +	/*
>> + 	 * dev->lock must not be released before coalesced_mmio_write.
>> +	 */
>> +	mutex_lock(&dev->lock);
>>   	/* Are we able to batch it ? */
>>  @@ -41,6 +42,7 @@ static int coalesced_mmio_in_range(struc
>>  							KVM_COALESCED_MMIO_MAX;
>>  	if (next == dev->kvm->coalesced_mmio_ring->first) {
>>  		/* full */
>> +		mutex_unlock(&dev->lock);
>>  		return 0;
>>  	}
>>  @@ -57,6 +59,7 @@ static int coalesced_mmio_in_range(struc
>>  		    addr + len <= zone->addr + zone->size)
>>  			return 1;
>>  	}
>> +	mutex_unlock(&dev->lock);
>>  	return 0;
>>  }
>>   
>
> So we have a function that takes a lock and conditionally releases it?

Yes, but it is correct: it will only return with the lock held in case
it returns 1, in which case its guaranteed ->write will be called (which
will unlock it).

It should check the range first and/or use some smarter synchronization,
but one thing at a time.


^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 3/4] KVM: introduce irq_lock, use it protect ioapic
  2009-05-20 12:11   ` Avi Kivity
@ 2009-05-20 14:11     ` Marcelo Tosatti
  2009-05-20 14:29       ` Avi Kivity
  0 siblings, 1 reply; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-20 14:11 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Wed, May 20, 2009 at 03:11:35PM +0300, Avi Kivity wrote:
> Marcelo Tosatti wrote:
>> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>>
>> Index: kvm/include/linux/kvm_host.h
>> ===================================================================
>> --- kvm.orig/include/linux/kvm_host.h
>> +++ kvm/include/linux/kvm_host.h
>> @@ -123,7 +123,6 @@ struct kvm_kernel_irq_routing_entry {
>>  };
>>   struct kvm {
>> -	struct mutex lock; /* protects the vcpus array and APIC accesses */
>>   
>
> Why move?

To have it close to the data it protects, for readability. Need to
pahole and organize it at some point.



^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 4/4] KVM: switch irq injection/acking to irq_lock
  2009-05-20 12:15   ` Gregory Haskins
@ 2009-05-20 14:16     ` Marcelo Tosatti
  2009-05-24 14:53     ` Avi Kivity
  1 sibling, 0 replies; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-20 14:16 UTC (permalink / raw)
  To: Gregory Haskins; +Cc: kvm

On Wed, May 20, 2009 at 08:15:55AM -0400, Gregory Haskins wrote:
> Marcelo Tosatti wrote:
> > Switch irq injection/acking to irq_lock, and change PIO/MMIO paths 
> > so that the device search is protected by kvm->lock, but not the
> > read/write callbacks (which is responsability of the device).
> >
> > Fix for http://article.gmane.org/gmane.comp.emulators.kvm.devel/32286.
> >
> > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> >
> > Index: kvm/arch/x86/kvm/i8254.c
> > ===================================================================
> > --- kvm.orig/arch/x86/kvm/i8254.c
> > +++ kvm/arch/x86/kvm/i8254.c
> > @@ -634,10 +634,10 @@ static void __inject_pit_timer_intr(stru
> >  	struct kvm_vcpu *vcpu;
> >  	int i;
> >  
> > -	mutex_lock(&kvm->lock);
> > +	mutex_lock(&kvm->irq_lock);
> >   
> 
> There would be advantages to having irq_lock be
> interrupt/nonpreempt-friendly design, such as s/mutex/spinlock.  For
> instance, irqfd could inject the interrupt directly without deferring to
> a workqueue.   I'm not sure if this is possible/easy, but its something
> to consider.

It is possible to convert to a spinlock (and required to switch
bus->devs[] to be protected by RCU as discussed).

Now if its worth to make it interrupt safe depends on how bad disabling
interrupts on the non-interrupt-disabled paths is. I've seen kvm_set_irq
relatively high in profiling yesterday, BTW, but this is just
hand-waving..

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 2/4] KVM: move coalesced_mmio locking to its own device
  2009-05-20 14:09     ` Marcelo Tosatti
@ 2009-05-20 14:29       ` Avi Kivity
  2009-05-20 15:13         ` Marcelo Tosatti
  2009-05-20 18:48         ` [patch 0/4] move irq protection role to separate lock v2 Marcelo Tosatti
  0 siblings, 2 replies; 49+ messages in thread
From: Avi Kivity @ 2009-05-20 14:29 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

Marcelo Tosatti wrote:

  

>> So we have a function that takes a lock and conditionally releases it?
>>     
>
> Yes, but it is correct: it will only return with the lock held in case
> it returns 1, in which case its guaranteed ->write will be called (which
> will unlock it).
>
> It should check the range first and/or use some smarter synchronization,
> but one thing at a time.
>   

Yes it's correct but we'll get an endless stream of patches to 'fix' it 
because it is so unorthodox.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 3/4] KVM: introduce irq_lock, use it protect ioapic
  2009-05-20 14:11     ` Marcelo Tosatti
@ 2009-05-20 14:29       ` Avi Kivity
  0 siblings, 0 replies; 49+ messages in thread
From: Avi Kivity @ 2009-05-20 14:29 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

Marcelo Tosatti wrote:
> On Wed, May 20, 2009 at 03:11:35PM +0300, Avi Kivity wrote:
>   
>> Marcelo Tosatti wrote:
>>     
>>> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>>>
>>> Index: kvm/include/linux/kvm_host.h
>>> ===================================================================
>>> --- kvm.orig/include/linux/kvm_host.h
>>> +++ kvm/include/linux/kvm_host.h
>>> @@ -123,7 +123,6 @@ struct kvm_kernel_irq_routing_entry {
>>>  };
>>>   struct kvm {
>>> -	struct mutex lock; /* protects the vcpus array and APIC accesses */
>>>   
>>>       
>> Why move?
>>     
>
> To have it close to the data it protects, for readability. Need to
> pahole and organize it at some point.
>
>   

Ok.


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 2/4] KVM: move coalesced_mmio locking to its own device
  2009-05-20 14:29       ` Avi Kivity
@ 2009-05-20 15:13         ` Marcelo Tosatti
  2009-05-20 15:22           ` Marcelo Tosatti
  2009-05-20 15:22           ` Avi Kivity
  2009-05-20 18:48         ` [patch 0/4] move irq protection role to separate lock v2 Marcelo Tosatti
  1 sibling, 2 replies; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-20 15:13 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Wed, May 20, 2009 at 05:29:23PM +0300, Avi Kivity wrote:
> Marcelo Tosatti wrote:
>
>  
>
>>> So we have a function that takes a lock and conditionally releases it?
>>>     
>>
>> Yes, but it is correct: it will only return with the lock held in case
>> it returns 1, in which case its guaranteed ->write will be called (which
>> will unlock it).
>>
>> It should check the range first and/or use some smarter synchronization,
>> but one thing at a time.
>>   
>
> Yes it's correct but we'll get an endless stream of patches to 'fix' it  
> because it is so unorthodox.

Does it have to guarantee any kind of ordering in case of parallel
writes by distincting vcpus? This is what it does now (so if a vcpu
arrives first, the second will wait until the first is finished
processing).

I suppose that is the responsability of the guest (if it does MMIO
writes to a device in parallel it'll screwup in real HW too).

Because in such case, you can drop the mutex and protect only the kvm
data.



^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 2/4] KVM: move coalesced_mmio locking to its own device
  2009-05-20 15:13         ` Marcelo Tosatti
@ 2009-05-20 15:22           ` Marcelo Tosatti
  2009-05-20 15:22           ` Avi Kivity
  1 sibling, 0 replies; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-20 15:22 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Wed, May 20, 2009 at 12:13:03PM -0300, Marcelo Tosatti wrote:
> On Wed, May 20, 2009 at 05:29:23PM +0300, Avi Kivity wrote:
> > Marcelo Tosatti wrote:
> >
> >  
> >
> >>> So we have a function that takes a lock and conditionally releases it?
> >>>     
> >>
> >> Yes, but it is correct: it will only return with the lock held in case
> >> it returns 1, in which case its guaranteed ->write will be called (which
> >> will unlock it).
> >>
> >> It should check the range first and/or use some smarter synchronization,
> >> but one thing at a time.
> >>   
> >
> > Yes it's correct but we'll get an endless stream of patches to 'fix' it  
> > because it is so unorthodox.
> 
> Does it have to guarantee any kind of ordering in case of parallel
> writes by distincting vcpus? This is what it does now (so if a vcpu
> arrives first, the second will wait until the first is finished
> processing).
> 
> I suppose that is the responsability of the guest (if it does MMIO
> writes to a device in parallel it'll screwup in real HW too).
> 
> Because in such case, you can drop the mutex and protect only the kvm
> data.

If you want it to provide ordering (as in process the MMIO writes, by
multiple vcpus, in the order they happen), you need a mutex or spinlock.

And in that case, I don't see any other way around this given the way
->in_range / ->read/->write work.

Even if you change the order in which the full mmio buffer check and 
coalesced-allowed-in-this-range happen you still need a spinlock/mutex 
in this unorthodox way. 

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 2/4] KVM: move coalesced_mmio locking to its own device
  2009-05-20 15:13         ` Marcelo Tosatti
  2009-05-20 15:22           ` Marcelo Tosatti
@ 2009-05-20 15:22           ` Avi Kivity
  1 sibling, 0 replies; 49+ messages in thread
From: Avi Kivity @ 2009-05-20 15:22 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

Marcelo Tosatti wrote:

  

>> Yes it's correct but we'll get an endless stream of patches to 'fix' it  
>> because it is so unorthodox.
>>     
>
> Does it have to guarantee any kind of ordering in case of parallel
> writes by distincting vcpus? This is what it does now (so if a vcpu
> arrives first, the second will wait until the first is finished
> processing).
>   

No.

> I suppose that is the responsability of the guest (if it does MMIO
> writes to a device in parallel it'll screwup in real HW too).
>   

Yes.

> Because in such case, you can drop the mutex and protect only the kvm
> data.
>   

One has to be before the other.  The order doesn't matter, but both have 
to be there.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 49+ messages in thread

* [patch 0/4] move irq protection role to separate lock v2
  2009-05-20 14:29       ` Avi Kivity
  2009-05-20 15:13         ` Marcelo Tosatti
@ 2009-05-20 18:48         ` Marcelo Tosatti
  2009-05-20 18:48           ` [patch 1/4] KVM: x86: grab pic lock in kvm_pic_clear_isr_ack Marcelo Tosatti
                             ` (4 more replies)
  1 sibling, 5 replies; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-20 18:48 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Gregory Haskins

Addressing comment and covering irqfd (did not remove the deadlock avoidance 
there since it might be useful later).

-- 


^ permalink raw reply	[flat|nested] 49+ messages in thread

* [patch 1/4] KVM: x86: grab pic lock in kvm_pic_clear_isr_ack
  2009-05-20 18:48         ` [patch 0/4] move irq protection role to separate lock v2 Marcelo Tosatti
@ 2009-05-20 18:48           ` Marcelo Tosatti
  2009-05-20 18:48           ` [patch 2/4] KVM: move coalesced_mmio locking to its own device Marcelo Tosatti
                             ` (3 subsequent siblings)
  4 siblings, 0 replies; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-20 18:48 UTC (permalink / raw)
  To: Avi Kivity, kvm; +Cc: Gregory Haskins, Marcelo Tosatti

[-- Attachment #1: pic-clear-isr-lock --]
[-- Type: text/plain, Size: 514 bytes --]

isr_ack is protected by kvm_pic->lock

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm/arch/x86/kvm/i8259.c
===================================================================
--- kvm.orig/arch/x86/kvm/i8259.c
+++ kvm/arch/x86/kvm/i8259.c
@@ -72,8 +72,10 @@ static void pic_clear_isr(struct kvm_kpi
 void kvm_pic_clear_isr_ack(struct kvm *kvm)
 {
 	struct kvm_pic *s = pic_irqchip(kvm);
+	pic_lock(s);
 	s->pics[0].isr_ack = 0xff;
 	s->pics[1].isr_ack = 0xff;
+	pic_unlock(s);
 }
 
 /*

-- 

-- 


^ permalink raw reply	[flat|nested] 49+ messages in thread

* [patch 2/4] KVM: move coalesced_mmio locking to its own device
  2009-05-20 18:48         ` [patch 0/4] move irq protection role to separate lock v2 Marcelo Tosatti
  2009-05-20 18:48           ` [patch 1/4] KVM: x86: grab pic lock in kvm_pic_clear_isr_ack Marcelo Tosatti
@ 2009-05-20 18:48           ` Marcelo Tosatti
  2009-05-24 14:04             ` Avi Kivity
  2009-05-20 18:48           ` [patch 3/4] KVM: introduce irq_lock, use it to protect ioapic Marcelo Tosatti
                             ` (2 subsequent siblings)
  4 siblings, 1 reply; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-20 18:48 UTC (permalink / raw)
  To: Avi Kivity, kvm; +Cc: Gregory Haskins, Marcelo Tosatti

[-- Attachment #1: coalesced-mmio-lock --]
[-- Type: text/plain, Size: 2666 bytes --]

Get rid of kvm->lock dependency on coalesced_mmio methods. Use an 
atomic variable instead to guarantee only one vcpu is batching
data into the ring at a given time.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-irqlock/virt/kvm/coalesced_mmio.c
===================================================================
--- kvm-irqlock.orig/virt/kvm/coalesced_mmio.c
+++ kvm-irqlock/virt/kvm/coalesced_mmio.c
@@ -26,9 +26,12 @@ static int coalesced_mmio_in_range(struc
 	if (!is_write)
 		return 0;
 
-	/* kvm->lock is taken by the caller and must be not released before
-         * dev.read/write
-         */
+	/*
+ 	 * Some other vcpu might be batching data into the ring,
+ 	 * fallback to userspace. Ordering not our problem.
+ 	 */
+	if (!atomic_add_unless(&dev->in_use, 1, 1))
+		return 0;
 
 	/* Are we able to batch it ? */
 
@@ -41,7 +44,7 @@ static int coalesced_mmio_in_range(struc
 							KVM_COALESCED_MMIO_MAX;
 	if (next == dev->kvm->coalesced_mmio_ring->first) {
 		/* full */
-		return 0;
+		goto out_denied;
 	}
 
 	/* is it in a batchable area ? */
@@ -57,6 +60,8 @@ static int coalesced_mmio_in_range(struc
 		    addr + len <= zone->addr + zone->size)
 			return 1;
 	}
+out_denied:
+	atomic_set(&dev->in_use, 0);
 	return 0;
 }
 
@@ -67,15 +72,14 @@ static void coalesced_mmio_write(struct 
 				(struct kvm_coalesced_mmio_dev*)this->private;
 	struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
 
-	/* kvm->lock must be taken by caller before call to in_range()*/
-
 	/* copy data in first free entry of the ring */
 
 	ring->coalesced_mmio[ring->last].phys_addr = addr;
 	ring->coalesced_mmio[ring->last].len = len;
 	memcpy(ring->coalesced_mmio[ring->last].data, val, len);
-	smp_wmb();
 	ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX;
+	smp_wmb();
+	atomic_set(&dev->in_use, 0);
 }
 
 static void coalesced_mmio_destructor(struct kvm_io_device *this)
@@ -90,6 +94,8 @@ int kvm_coalesced_mmio_init(struct kvm *
 	dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
 	if (!dev)
 		return -ENOMEM;
+	atomic_set(&dev->in_use, 0);
+
 	dev->dev.write  = coalesced_mmio_write;
 	dev->dev.in_range  = coalesced_mmio_in_range;
 	dev->dev.destructor  = coalesced_mmio_destructor;
Index: kvm-irqlock/virt/kvm/coalesced_mmio.h
===================================================================
--- kvm-irqlock.orig/virt/kvm/coalesced_mmio.h
+++ kvm-irqlock/virt/kvm/coalesced_mmio.h
@@ -12,6 +12,7 @@
 struct kvm_coalesced_mmio_dev {
 	struct kvm_io_device dev;
 	struct kvm *kvm;
+	atomic_t in_use;
 	int nb_zones;
 	struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX];
 };

-- 


^ permalink raw reply	[flat|nested] 49+ messages in thread

* [patch 3/4] KVM: introduce irq_lock, use it to protect ioapic
  2009-05-20 18:48         ` [patch 0/4] move irq protection role to separate lock v2 Marcelo Tosatti
  2009-05-20 18:48           ` [patch 1/4] KVM: x86: grab pic lock in kvm_pic_clear_isr_ack Marcelo Tosatti
  2009-05-20 18:48           ` [patch 2/4] KVM: move coalesced_mmio locking to its own device Marcelo Tosatti
@ 2009-05-20 18:48           ` Marcelo Tosatti
  2009-05-24 14:10             ` Avi Kivity
  2009-05-20 18:48           ` [patch 4/4] KVM: switch irq injection/acking " Marcelo Tosatti
  2009-05-21  4:50           ` [patch 0/4] move irq protection role to separate lock v2 Marcelo Tosatti
  4 siblings, 1 reply; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-20 18:48 UTC (permalink / raw)
  To: Avi Kivity, kvm; +Cc: Gregory Haskins, Marcelo Tosatti

[-- Attachment #1: ioapic-lock --]
[-- Type: text/plain, Size: 3320 bytes --]

Subject says it all.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-irqlock/include/linux/kvm_host.h
===================================================================
--- kvm-irqlock.orig/include/linux/kvm_host.h
+++ kvm-irqlock/include/linux/kvm_host.h
@@ -123,7 +123,6 @@ struct kvm_kernel_irq_routing_entry {
 };
 
 struct kvm {
-	struct mutex lock; /* protects the vcpus array and APIC accesses */
 	spinlock_t mmu_lock;
 	struct rw_semaphore slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
@@ -132,6 +131,12 @@ struct kvm {
 					KVM_PRIVATE_MEM_SLOTS];
 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
 	struct list_head vm_list;
+	struct mutex lock; /*
+			    * - protects mmio_bus, pio_bus.
+			    * - protects a few concurrent ioctl's (FIXME).
+			    * - protects concurrent create_vcpu, but
+			    *   kvm->vcpus walkers do it locklessly (FIXME).
+			    */
 	struct kvm_io_bus mmio_bus;
 	struct kvm_io_bus pio_bus;
 	struct list_head irqfds;
@@ -143,6 +148,7 @@ struct kvm {
 	struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
 #endif
 
+	struct mutex irq_lock; /* protects high level irq logic, ioapic */
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
 	struct hlist_head mask_notifier_list;
Index: kvm-irqlock/virt/kvm/ioapic.c
===================================================================
--- kvm-irqlock.orig/virt/kvm/ioapic.c
+++ kvm-irqlock/virt/kvm/ioapic.c
@@ -225,6 +225,10 @@ static int ioapic_in_range(struct kvm_io
 {
 	struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
 
+	/*
+	 * Lockless check for ioapic->base_address on the hazy assumption
+	 * it does not change during the lifetime of a VM.
+	 */
 	return ((addr >= ioapic->base_address &&
 		 (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
 }
@@ -238,6 +242,7 @@ static void ioapic_mmio_read(struct kvm_
 	ioapic_debug("addr %lx\n", (unsigned long)addr);
 	ASSERT(!(addr & 0xf));	/* check alignment */
 
+	mutex_lock(&ioapic->kvm->irq_lock);
 	addr &= 0xff;
 	switch (addr) {
 	case IOAPIC_REG_SELECT:
@@ -264,6 +269,7 @@ static void ioapic_mmio_read(struct kvm_
 	default:
 		printk(KERN_WARNING "ioapic: wrong length %d\n", len);
 	}
+	mutex_unlock(&ioapic->kvm->irq_lock);
 }
 
 static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
@@ -275,6 +281,8 @@ static void ioapic_mmio_write(struct kvm
 	ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
 		     (void*)addr, len, val);
 	ASSERT(!(addr & 0xf));	/* check alignment */
+
+	mutex_lock(&ioapic->kvm->irq_lock);
 	if (len == 4 || len == 8)
 		data = *(u32 *) val;
 	else {
@@ -300,6 +308,7 @@ static void ioapic_mmio_write(struct kvm
 	default:
 		break;
 	}
+	mutex_unlock(&ioapic->kvm->irq_lock);
 }
 
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
Index: kvm-irqlock/virt/kvm/kvm_main.c
===================================================================
--- kvm-irqlock.orig/virt/kvm/kvm_main.c
+++ kvm-irqlock/virt/kvm/kvm_main.c
@@ -985,6 +985,7 @@ static struct kvm *kvm_create_vm(void)
 	kvm_io_bus_init(&kvm->pio_bus);
 	INIT_LIST_HEAD(&kvm->irqfds);
 	mutex_init(&kvm->lock);
+	mutex_init(&kvm->irq_lock);
 	kvm_io_bus_init(&kvm->mmio_bus);
 	init_rwsem(&kvm->slots_lock);
 	atomic_set(&kvm->users_count, 1);

-- 


^ permalink raw reply	[flat|nested] 49+ messages in thread

* [patch 4/4] KVM: switch irq injection/acking to irq_lock
  2009-05-20 18:48         ` [patch 0/4] move irq protection role to separate lock v2 Marcelo Tosatti
                             ` (2 preceding siblings ...)
  2009-05-20 18:48           ` [patch 3/4] KVM: introduce irq_lock, use it to protect ioapic Marcelo Tosatti
@ 2009-05-20 18:48           ` Marcelo Tosatti
  2009-05-21  4:50           ` [patch 0/4] move irq protection role to separate lock v2 Marcelo Tosatti
  4 siblings, 0 replies; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-20 18:48 UTC (permalink / raw)
  To: Avi Kivity, kvm; +Cc: Gregory Haskins, Marcelo Tosatti

[-- Attachment #1: irqlock --]
[-- Type: text/plain, Size: 11937 bytes --]

Switch irq injection/acking to irq_lock, and change PIO/MMIO paths 
so that the device search is protected by kvm->lock, but not the
read/write callbacks (which is responsability of the device).

Fix for http://article.gmane.org/gmane.comp.emulators.kvm.devel/32286.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm/arch/x86/kvm/i8254.c
===================================================================
--- kvm.orig/arch/x86/kvm/i8254.c
+++ kvm/arch/x86/kvm/i8254.c
@@ -634,10 +634,10 @@ static void __inject_pit_timer_intr(stru
 	struct kvm_vcpu *vcpu;
 	int i;
 
-	mutex_lock(&kvm->lock);
+	mutex_lock(&kvm->irq_lock);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
-	mutex_unlock(&kvm->lock);
+	mutex_unlock(&kvm->irq_lock);
 
 	/*
 	 * Provides NMI watchdog support via Virtual Wire mode.
Index: kvm/arch/x86/kvm/x86.c
===================================================================
--- kvm.orig/arch/x86/kvm/x86.c
+++ kvm/arch/x86/kvm/x86.c
@@ -2077,10 +2077,10 @@ long kvm_arch_vm_ioctl(struct file *filp
 			goto out;
 		if (irqchip_in_kernel(kvm)) {
 			__s32 status;
-			mutex_lock(&kvm->lock);
+			mutex_lock(&kvm->irq_lock);
 			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
 					irq_event.irq, irq_event.level);
-			mutex_unlock(&kvm->lock);
+			mutex_unlock(&kvm->irq_lock);
 			if (ioctl == KVM_IRQ_LINE_STATUS) {
 				irq_event.status = status;
 				if (copy_to_user(argp, &irq_event,
@@ -2326,12 +2326,11 @@ mmio:
 	 */
 	mutex_lock(&vcpu->kvm->lock);
 	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
+	mutex_unlock(&vcpu->kvm->lock);
 	if (mmio_dev) {
 		kvm_iodevice_read(mmio_dev, gpa, bytes, val);
-		mutex_unlock(&vcpu->kvm->lock);
 		return X86EMUL_CONTINUE;
 	}
-	mutex_unlock(&vcpu->kvm->lock);
 
 	vcpu->mmio_needed = 1;
 	vcpu->mmio_phys_addr = gpa;
@@ -2381,12 +2380,11 @@ mmio:
 	 */
 	mutex_lock(&vcpu->kvm->lock);
 	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1);
+	mutex_unlock(&vcpu->kvm->lock);
 	if (mmio_dev) {
 		kvm_iodevice_write(mmio_dev, gpa, bytes, val);
-		mutex_unlock(&vcpu->kvm->lock);
 		return X86EMUL_CONTINUE;
 	}
-	mutex_unlock(&vcpu->kvm->lock);
 
 	vcpu->mmio_needed = 1;
 	vcpu->mmio_phys_addr = gpa;
@@ -2709,7 +2707,6 @@ static void kernel_pio(struct kvm_io_dev
 {
 	/* TODO: String I/O for in kernel device */
 
-	mutex_lock(&vcpu->kvm->lock);
 	if (vcpu->arch.pio.in)
 		kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
 				  vcpu->arch.pio.size,
@@ -2718,7 +2715,6 @@ static void kernel_pio(struct kvm_io_dev
 		kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
 				   vcpu->arch.pio.size,
 				   pd);
-	mutex_unlock(&vcpu->kvm->lock);
 }
 
 static void pio_string_write(struct kvm_io_device *pio_dev,
@@ -2728,14 +2724,12 @@ static void pio_string_write(struct kvm_
 	void *pd = vcpu->arch.pio_data;
 	int i;
 
-	mutex_lock(&vcpu->kvm->lock);
 	for (i = 0; i < io->cur_count; i++) {
 		kvm_iodevice_write(pio_dev, io->port,
 				   io->size,
 				   pd);
 		pd += io->size;
 	}
-	mutex_unlock(&vcpu->kvm->lock);
 }
 
 static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
@@ -2772,7 +2766,9 @@ int kvm_emulate_pio(struct kvm_vcpu *vcp
 	val = kvm_register_read(vcpu, VCPU_REGS_RAX);
 	memcpy(vcpu->arch.pio_data, &val, 4);
 
+	mutex_lock(&vcpu->kvm->lock);
 	pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
+	mutex_unlock(&vcpu->kvm->lock);
 	if (pio_dev) {
 		kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
 		complete_pio(vcpu);
@@ -2836,9 +2832,12 @@ int kvm_emulate_pio_string(struct kvm_vc
 
 	vcpu->arch.pio.guest_gva = address;
 
+	mutex_lock(&vcpu->kvm->lock);
 	pio_dev = vcpu_find_pio_dev(vcpu, port,
 				    vcpu->arch.pio.cur_count,
 				    !vcpu->arch.pio.in);
+	mutex_unlock(&vcpu->kvm->lock);
+
 	if (!vcpu->arch.pio.in) {
 		/* string PIO write */
 		ret = pio_copy_data(vcpu);
Index: kvm/virt/kvm/kvm_main.c
===================================================================
--- kvm.orig/virt/kvm/kvm_main.c
+++ kvm/virt/kvm/kvm_main.c
@@ -62,6 +62,12 @@
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
+/*
+ * Ordering of locks:
+ *
+ * 		kvm->lock --> kvm->irq_lock
+ */
+
 DEFINE_SPINLOCK(kvm_lock);
 LIST_HEAD(vm_list);
 
@@ -126,11 +132,7 @@ static void kvm_assigned_dev_interrupt_w
 				    interrupt_work);
 	kvm = assigned_dev->kvm;
 
-	/* This is taken to safely inject irq inside the guest. When
-	 * the interrupt injection (or the ioapic code) uses a
-	 * finer-grained lock, update this
-	 */
-	mutex_lock(&kvm->lock);
+	mutex_lock(&kvm->irq_lock);
 	spin_lock_irq(&assigned_dev->assigned_dev_lock);
 	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 		struct kvm_guest_msix_entry *guest_entries =
@@ -159,7 +161,7 @@ static void kvm_assigned_dev_interrupt_w
 	}
 
 	spin_unlock_irq(&assigned_dev->assigned_dev_lock);
-	mutex_unlock(&assigned_dev->kvm->lock);
+	mutex_unlock(&assigned_dev->kvm->irq_lock);
 }
 
 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
@@ -215,7 +217,7 @@ static void kvm_assigned_dev_ack_irq(str
 static void deassign_guest_irq(struct kvm *kvm,
 			       struct kvm_assigned_dev_kernel *assigned_dev)
 {
-	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
+	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
 	assigned_dev->ack_notifier.gsi = -1;
 
 	if (assigned_dev->irq_source_id != -1)
Index: kvm/virt/kvm/irq_comm.c
===================================================================
--- kvm.orig/virt/kvm/irq_comm.c
+++ kvm/virt/kvm/irq_comm.c
@@ -62,6 +62,8 @@ int kvm_irq_delivery_to_apic(struct kvm 
 	int i, r = -1;
 	struct kvm_vcpu *vcpu, *lowest = NULL;
 
+	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
+
 	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
 			kvm_is_dm_lowest_prio(irq))
 		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
@@ -113,7 +115,7 @@ static int kvm_set_msi(struct kvm_kernel
 	return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
 }
 
-/* This should be called with the kvm->lock mutex held
+/* This should be called with the kvm->irq_lock mutex held
  * Return value:
  *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
  *  = 0   Interrupt was coalesced (previous irq is still pending)
@@ -125,6 +127,8 @@ int kvm_set_irq(struct kvm *kvm, int irq
 	unsigned long *irq_state, sig_level;
 	int ret = -1;
 
+	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
+
 	if (irq < KVM_IOAPIC_NUM_PINS) {
 		irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
 
@@ -174,19 +178,26 @@ void kvm_notify_acked_irq(struct kvm *kv
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian)
 {
+	mutex_lock(&kvm->irq_lock);
 	hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
+	mutex_unlock(&kvm->irq_lock);
 }
 
-void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian)
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				    struct kvm_irq_ack_notifier *kian)
 {
+	mutex_lock(&kvm->irq_lock);
 	hlist_del_init(&kian->link);
+	mutex_unlock(&kvm->irq_lock);
 }
 
-/* The caller must hold kvm->lock mutex */
 int kvm_request_irq_source_id(struct kvm *kvm)
 {
 	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
-	int irq_source_id = find_first_zero_bit(bitmap,
+	int irq_source_id;
+
+	mutex_lock(&kvm->irq_lock);
+	irq_source_id = find_first_zero_bit(bitmap,
 				sizeof(kvm->arch.irq_sources_bitmap));
 
 	if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
@@ -196,6 +207,7 @@ int kvm_request_irq_source_id(struct kvm
 
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
 	set_bit(irq_source_id, bitmap);
+	mutex_unlock(&kvm->irq_lock);
 
 	return irq_source_id;
 }
@@ -206,6 +218,7 @@ void kvm_free_irq_source_id(struct kvm *
 
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
 
+	mutex_lock(&kvm->irq_lock);
 	if (irq_source_id < 0 ||
 	    irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
 		printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
@@ -214,19 +227,24 @@ void kvm_free_irq_source_id(struct kvm *
 	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
 		clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
 	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
 				    struct kvm_irq_mask_notifier *kimn)
 {
+	mutex_lock(&kvm->irq_lock);
 	kimn->irq = irq;
 	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 				      struct kvm_irq_mask_notifier *kimn)
 {
+	mutex_lock(&kvm->irq_lock);
 	hlist_del(&kimn->link);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
@@ -234,6 +252,8 @@ void kvm_fire_mask_notifiers(struct kvm 
 	struct kvm_irq_mask_notifier *kimn;
 	struct hlist_node *n;
 
+	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
+
 	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
 		if (kimn->irq == irq)
 			kimn->func(kimn, mask);
@@ -249,7 +269,9 @@ static void __kvm_free_irq_routing(struc
 
 void kvm_free_irq_routing(struct kvm *kvm)
 {
+	mutex_lock(&kvm->irq_lock);
 	__kvm_free_irq_routing(&kvm->irq_routing);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
@@ -323,13 +345,13 @@ int kvm_set_irq_routing(struct kvm *kvm,
 		e = NULL;
 	}
 
-	mutex_lock(&kvm->lock);
+	mutex_lock(&kvm->irq_lock);
 	list_splice(&kvm->irq_routing, &tmp);
 	INIT_LIST_HEAD(&kvm->irq_routing);
 	list_splice(&irq_list, &kvm->irq_routing);
 	INIT_LIST_HEAD(&irq_list);
 	list_splice(&tmp, &irq_list);
-	mutex_unlock(&kvm->lock);
+	mutex_unlock(&kvm->irq_lock);
 
 	r = 0;
 
Index: kvm/arch/x86/kvm/lapic.c
===================================================================
--- kvm.orig/arch/x86/kvm/lapic.c
+++ kvm/arch/x86/kvm/lapic.c
@@ -425,7 +425,9 @@ static void apic_set_eoi(struct kvm_lapi
 		trigger_mode = IOAPIC_LEVEL_TRIG;
 	else
 		trigger_mode = IOAPIC_EDGE_TRIG;
+	mutex_lock(&apic->vcpu->kvm->irq_lock);
 	kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
+	mutex_unlock(&apic->vcpu->kvm->irq_lock);
 }
 
 static void apic_send_ipi(struct kvm_lapic *apic)
@@ -449,7 +451,9 @@ static void apic_send_ipi(struct kvm_lap
 		   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
 		   irq.vector);
 
+	mutex_lock(&apic->vcpu->kvm->irq_lock);
 	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
+	mutex_unlock(&apic->vcpu->kvm->irq_lock);
 }
 
 static u32 apic_get_tmcct(struct kvm_lapic *apic)
Index: kvm/include/linux/kvm_host.h
===================================================================
--- kvm.orig/include/linux/kvm_host.h
+++ kvm/include/linux/kvm_host.h
@@ -376,7 +376,8 @@ int kvm_set_irq(struct kvm *kvm, int irq
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian);
-void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				   struct kvm_irq_ack_notifier *kian);
 int kvm_request_irq_source_id(struct kvm *kvm);
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 
Index: kvm/virt/kvm/eventfd.c
===================================================================
--- kvm.orig/virt/kvm/eventfd.c
+++ kvm/virt/kvm/eventfd.c
@@ -52,10 +52,10 @@ irqfd_inject(struct work_struct *work)
 	struct _irqfd *irqfd = container_of(work, struct _irqfd, work);
 	struct kvm *kvm = irqfd->kvm;
 
-	mutex_lock(&kvm->lock);
+	mutex_lock(&kvm->irq_lock);
 	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
 	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
-	mutex_unlock(&kvm->lock);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 static int

-- 


^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 0/4] move irq protection role to separate lock v2
  2009-05-20 18:48         ` [patch 0/4] move irq protection role to separate lock v2 Marcelo Tosatti
                             ` (3 preceding siblings ...)
  2009-05-20 18:48           ` [patch 4/4] KVM: switch irq injection/acking " Marcelo Tosatti
@ 2009-05-21  4:50           ` Marcelo Tosatti
  2009-05-21  6:55             ` Christian Bornträger
  4 siblings, 1 reply; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-21  4:50 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Christian Borntraeger

On Wed, May 20, 2009 at 03:48:41PM -0300, Marcelo Tosatti wrote:
> Addressing comment and covering irqfd (did not remove the deadlock avoidance 
> there since it might be useful later).

I can't see any reason why serializing the vm ioctl is a bad idea.

There is one indication of disagreement here:

http://patchwork.kernel.org/patch/5661/

"The original intent was that kvm_arch_vcpu_create() not "link in" the
vcpu to any registers. That allows most of the vcpu creation to happen
outside a lock."

But I fail to see the case where vcpu creation is a fast path (unless
you're benchmarking cpu hotplug/hotunplug).

Note there is no risk of a deadlock in case a vcpu thread attempts to
execute vm_ioctl.

Also vm_ioctl_lock will always be the first lock grabbed in the vm_ioctl
path, and not used in any other path, there's no risk of deadlock.

The reason for this is there are sites, such as KVM_CREATE_PIT, which
handle concurrency properly (with custom locking that becomes obsolete),
but some don't.


Index: kvm/include/linux/kvm_host.h
===================================================================
--- kvm.orig/include/linux/kvm_host.h
+++ kvm/include/linux/kvm_host.h
@@ -131,9 +131,9 @@ struct kvm {
 					KVM_PRIVATE_MEM_SLOTS];
 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
 	struct list_head vm_list;
+	struct mutex vm_ioctl_lock; /* protects concurrent vm fd ioctls */
 	struct mutex lock; /*
 			    * - protects mmio_bus, pio_bus.
-			    * - protects a few concurrent ioctl's (FIXME).
 			    * - protects concurrent create_vcpu, but
 			    *   kvm->vcpus walkers do it locklessly (FIXME).
 			    */
Index: kvm/virt/kvm/kvm_main.c
===================================================================
--- kvm.orig/virt/kvm/kvm_main.c
+++ kvm/virt/kvm/kvm_main.c
@@ -988,6 +988,7 @@ static struct kvm *kvm_create_vm(void)
 	INIT_LIST_HEAD(&kvm->irqfds);
 	mutex_init(&kvm->lock);
 	mutex_init(&kvm->irq_lock);
+	mutex_init(&kvm->vm_ioctl_lock);
 	kvm_io_bus_init(&kvm->mmio_bus);
 	init_rwsem(&kvm->slots_lock);
 	atomic_set(&kvm->users_count, 1);
@@ -2053,6 +2054,9 @@ static long kvm_vm_ioctl(struct file *fi
 
 	if (kvm->mm != current->mm)
 		return -EIO;
+
+	mutex_lock(&kvm->vm_ioctl_lock);
+
 	switch (ioctl) {
 	case KVM_CREATE_VCPU:
 		r = kvm_vm_ioctl_create_vcpu(kvm, arg);
@@ -2228,6 +2232,7 @@ static long kvm_vm_ioctl(struct file *fi
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 	}
 out:
+	mutex_unlock(&kvm->vm_ioctl_lock);
 	return r;
 }
 

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 0/4] move irq protection role to separate lock v2
  2009-05-21  4:50           ` [patch 0/4] move irq protection role to separate lock v2 Marcelo Tosatti
@ 2009-05-21  6:55             ` Christian Bornträger
  2009-05-21  7:26               ` Avi Kivity
  0 siblings, 1 reply; 49+ messages in thread
From: Christian Bornträger @ 2009-05-21  6:55 UTC (permalink / raw)
  To: Marcelo Tosatti, Christian Ehrhardt, Carsten Otte
  Cc: Avi Kivity, kvm, Christian Borntraeger

Am Donnerstag 21 Mai 2009 06:50:15 schrieb Marcelo Tosatti:
> But I fail to see the case where vcpu creation is a fast path (unless
> you're benchmarking cpu hotplug/hotunplug).

[...]

> @@ -2053,6 +2054,9 @@ static long kvm_vm_ioctl(struct file *fi
>
>  	if (kvm->mm != current->mm)
>  		return -EIO;
> +
> +	mutex_lock(&kvm->vm_ioctl_lock);
> +
>  	switch (ioctl) {
>  	case KVM_CREATE_VCPU:
>  		r = kvm_vm_ioctl_create_vcpu(kvm, arg);
> @@ -2228,6 +2232,7 @@ static long kvm_vm_ioctl(struct file *fi
>  		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
>  	}
>  out:
> +	mutex_unlock(&kvm->vm_ioctl_lock);
>  	return r;
>  }

The thing that looks worrysome is that the s390 version of kvm_arch_vm_ioctl 
has KVM_S390_INTERRUPT. This allows userspace to inject interrupts - which 
would be serialized. The thing is, that external interrupts and I/O interrupts 
are floating - which means they can arrive on all cpus. This is somewhat of a 
fast path.
On the other hand, kvm_s390_inject_vm already takes the kvm->lock to protect 
agains hotplug. With this patch we might be able to remove the kvm->lock in 
kvm_s390_inject_vm - that would reduce the impact. 

This needs more thinking on our side.

Christian

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 0/4] move irq protection role to separate lock v2
  2009-05-21  6:55             ` Christian Bornträger
@ 2009-05-21  7:26               ` Avi Kivity
  2009-05-21 14:32                 ` Marcelo Tosatti
  0 siblings, 1 reply; 49+ messages in thread
From: Avi Kivity @ 2009-05-21  7:26 UTC (permalink / raw)
  To: Christian Bornträger
  Cc: Marcelo Tosatti, Christian Ehrhardt, Carsten Otte, kvm,
	Christian Borntraeger

Christian Bornträger wrote:
>> @@ -2053,6 +2054,9 @@ static long kvm_vm_ioctl(struct file *fi
>>
>>  	if (kvm->mm != current->mm)
>>  		return -EIO;
>> +
>> +	mutex_lock(&kvm->vm_ioctl_lock);
>> +
>>  	switch (ioctl) {
>>  	case KVM_CREATE_VCPU:
>>  		r = kvm_vm_ioctl_create_vcpu(kvm, arg);
>> @@ -2228,6 +2232,7 @@ static long kvm_vm_ioctl(struct file *fi
>>  		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
>>  	}
>>  out:
>> +	mutex_unlock(&kvm->vm_ioctl_lock);
>>  	return r;
>>  }
>>     
>
> The thing that looks worrysome is that the s390 version of kvm_arch_vm_ioctl 
> has KVM_S390_INTERRUPT. This allows userspace to inject interrupts - which 
> would be serialized. The thing is, that external interrupts and I/O interrupts 
> are floating - which means they can arrive on all cpus. This is somewhat of a 
> fast path.
> On the other hand, kvm_s390_inject_vm already takes the kvm->lock to protect 
> agains hotplug. With this patch we might be able to remove the kvm->lock in 
> kvm_s390_inject_vm - that would reduce the impact. 
>
> This needs more thinking on our side.
>   

x86 actually shares the same problem.  KVM_IRQ_LINE interrupts may 
arrive at any vcpu.  Furthermore, with irqfd interrupts may be injected 
from userspace (the vm process or other processes) or from the kernel 
(assigned device, kernel virtio-net device).  So we have the same 
motivation to drop this lock and replace it by rcu for the fast paths.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 0/4] move irq protection role to separate lock v2
  2009-05-21  7:26               ` Avi Kivity
@ 2009-05-21 14:32                 ` Marcelo Tosatti
  2009-05-21 15:02                   ` Avi Kivity
  0 siblings, 1 reply; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-21 14:32 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Christian Bornträger, Christian Ehrhardt, Carsten Otte, kvm,
	Christian Borntraeger

On Thu, May 21, 2009 at 10:26:57AM +0300, Avi Kivity wrote:
> Christian Bornträger wrote:
>>> @@ -2053,6 +2054,9 @@ static long kvm_vm_ioctl(struct file *fi
>>>
>>>  	if (kvm->mm != current->mm)
>>>  		return -EIO;
>>> +
>>> +	mutex_lock(&kvm->vm_ioctl_lock);
>>> +
>>>  	switch (ioctl) {
>>>  	case KVM_CREATE_VCPU:
>>>  		r = kvm_vm_ioctl_create_vcpu(kvm, arg);
>>> @@ -2228,6 +2232,7 @@ static long kvm_vm_ioctl(struct file *fi
>>>  		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
>>>  	}
>>>  out:
>>> +	mutex_unlock(&kvm->vm_ioctl_lock);
>>>  	return r;
>>>  }
>>>     
>>
>> The thing that looks worrysome is that the s390 version of 
>> kvm_arch_vm_ioctl has KVM_S390_INTERRUPT. This allows userspace to 
>> inject interrupts - which would be serialized. The thing is, that 
>> external interrupts and I/O interrupts are floating - which means they 
>> can arrive on all cpus. This is somewhat of a fast path.
>> On the other hand, kvm_s390_inject_vm already takes the kvm->lock to 
>> protect agains hotplug. With this patch we might be able to remove the 
>> kvm->lock in kvm_s390_inject_vm - that would reduce the impact. 
>>
>> This needs more thinking on our side.
>>   
>
> x86 actually shares the same problem.  KVM_IRQ_LINE interrupts may  
> arrive at any vcpu.  Furthermore, with irqfd interrupts may be injected  
> from userspace (the vm process or other processes) or from the kernel  
> (assigned device, kernel virtio-net device).  So we have the same  
> motivation to drop this lock and replace it by rcu for the fast paths.

OK, will use the lock to serialize individual ioctl commands that are
not performance sensitive and need serialization.

Any objection to v2 of the irq_lock patch?


^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 0/4] move irq protection role to separate lock v2
  2009-05-21 14:32                 ` Marcelo Tosatti
@ 2009-05-21 15:02                   ` Avi Kivity
  0 siblings, 0 replies; 49+ messages in thread
From: Avi Kivity @ 2009-05-21 15:02 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: Christian Bornträger, Christian Ehrhardt, Carsten Otte, kvm,
	Christian Borntraeger

Marcelo Tosatti wrote:

  

>> x86 actually shares the same problem.  KVM_IRQ_LINE interrupts may  
>> arrive at any vcpu.  Furthermore, with irqfd interrupts may be injected  
>> from userspace (the vm process or other processes) or from the kernel  
>> (assigned device, kernel virtio-net device).  So we have the same  
>> motivation to drop this lock and replace it by rcu for the fast paths.
>>     
>
> OK, will use the lock to serialize individual ioctl commands that are
> not performance sensitive and need serialization.
>   

Locks should protect data, not operations.  Something named ioctl_lock 
indicates something is wrong.

> Any objection to v2 of the irq_lock patch?
>   

Haven't done a detailed review yet, sorry.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 2/4] KVM: move coalesced_mmio locking to its own device
  2009-05-20 18:48           ` [patch 2/4] KVM: move coalesced_mmio locking to its own device Marcelo Tosatti
@ 2009-05-24 14:04             ` Avi Kivity
  2009-05-25 11:04               ` Marcelo Tosatti
  0 siblings, 1 reply; 49+ messages in thread
From: Avi Kivity @ 2009-05-24 14:04 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, Gregory Haskins

Marcelo Tosatti wrote:
> Get rid of kvm->lock dependency on coalesced_mmio methods. Use an 
> atomic variable instead to guarantee only one vcpu is batching
> data into the ring at a given time.
>
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>
> Index: kvm-irqlock/virt/kvm/coalesced_mmio.c
> ===================================================================
> --- kvm-irqlock.orig/virt/kvm/coalesced_mmio.c
> +++ kvm-irqlock/virt/kvm/coalesced_mmio.c
> @@ -26,9 +26,12 @@ static int coalesced_mmio_in_range(struc
>  	if (!is_write)
>  		return 0;
>  
> -	/* kvm->lock is taken by the caller and must be not released before
> -         * dev.read/write
> -         */
> +	/*
> + 	 * Some other vcpu might be batching data into the ring,
> + 	 * fallback to userspace. Ordering not our problem.
> + 	 */
> +	if (!atomic_add_unless(&dev->in_use, 1, 1))
> +		return 0;
>  
>   

Ordering with simultaneous writes is indeed not our problem, but the 
ring may contain ordered writes (even by the same vcpu!)

Suggest our own lock here.  in_use is basically a homemade lock, better 
to use the factory made ones which come with a warranty.


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 3/4] KVM: introduce irq_lock, use it to protect ioapic
  2009-05-20 18:48           ` [patch 3/4] KVM: introduce irq_lock, use it to protect ioapic Marcelo Tosatti
@ 2009-05-24 14:10             ` Avi Kivity
  2009-05-25 11:11               ` Marcelo Tosatti
                                 ` (5 more replies)
  0 siblings, 6 replies; 49+ messages in thread
From: Avi Kivity @ 2009-05-24 14:10 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, Gregory Haskins

Marcelo Tosatti wrote:
> Subject says it all.
>   

I hate those changelogs.  I guess Subject never reviews code.

You might put some evidence that we're suffering from contention here 
(I'm very willing to believe it, but hard evidence is better).

> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>
> Index: kvm-irqlock/include/linux/kvm_host.h
> ===================================================================
> --- kvm-irqlock.orig/include/linux/kvm_host.h
> +++ kvm-irqlock/include/linux/kvm_host.h
> @@ -123,7 +123,6 @@ struct kvm_kernel_irq_routing_entry {
>  };
>  
>  struct kvm {
> -	struct mutex lock; /* protects the vcpus array and APIC accesses */
>  	spinlock_t mmu_lock;
>  	struct rw_semaphore slots_lock;
>  	struct mm_struct *mm; /* userspace tied to this vm */
> @@ -132,6 +131,12 @@ struct kvm {
>  					KVM_PRIVATE_MEM_SLOTS];
>  	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
>  	struct list_head vm_list;
> +	struct mutex lock; /*
> +			    * - protects mmio_bus, pio_bus.
> +			    * - protects a few concurrent ioctl's (FIXME).
> +			    * - protects concurrent create_vcpu, but
>   
List data structures, not operations.

> +			    *   kvm->vcpus walkers do it locklessly (FIXME).
>   

I think we're fine?  maybe add an smp_mb() between cpu creation and 
assignment into the vcpu array?


> +			    */
>  	struct kvm_io_bus mmio_bus;
>  	struct kvm_io_bus pio_bus;
>  	struct list_head irqfds;
> @@ -143,6 +148,7 @@ struct kvm {
>  	struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
>  #endif
>  
> +	struct mutex irq_lock; /* protects high level irq logic, ioapic */
>   

List the data structures protected please.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 4/4] KVM: switch irq injection/acking to irq_lock
  2009-05-20 12:15   ` Gregory Haskins
  2009-05-20 14:16     ` Marcelo Tosatti
@ 2009-05-24 14:53     ` Avi Kivity
  1 sibling, 0 replies; 49+ messages in thread
From: Avi Kivity @ 2009-05-24 14:53 UTC (permalink / raw)
  To: Gregory Haskins; +Cc: Marcelo Tosatti, kvm

Gregory Haskins wrote:
> Marcelo Tosatti wrote:
>   
>> Switch irq injection/acking to irq_lock, and change PIO/MMIO paths 
>> so that the device search is protected by kvm->lock, but not the
>> read/write callbacks (which is responsability of the device).
>>
>> Fix for http://article.gmane.org/gmane.comp.emulators.kvm.devel/32286.
>>
>> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>>
>> Index: kvm/arch/x86/kvm/i8254.c
>> ===================================================================
>> --- kvm.orig/arch/x86/kvm/i8254.c
>> +++ kvm/arch/x86/kvm/i8254.c
>> @@ -634,10 +634,10 @@ static void __inject_pit_timer_intr(stru
>>  	struct kvm_vcpu *vcpu;
>>  	int i;
>>  
>> -	mutex_lock(&kvm->lock);
>> +	mutex_lock(&kvm->irq_lock);
>>   
>>     
>
> There would be advantages to having irq_lock be
> interrupt/nonpreempt-friendly design, such as s/mutex/spinlock.  For
> instance, irqfd could inject the interrupt directly without deferring to
> a workqueue.   I'm not sure if this is possible/easy, but its something
> to consider.
>   

I agree, but let's make the first step splitting the interrupt and a 
second step changing it to a spinlock.  It's scary enough as is.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 2/4] KVM: move coalesced_mmio locking to its own device
  2009-05-24 14:04             ` Avi Kivity
@ 2009-05-25 11:04               ` Marcelo Tosatti
  2009-05-26 11:24                 ` Avi Kivity
  0 siblings, 1 reply; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-25 11:04 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Gregory Haskins

On Sun, May 24, 2009 at 05:04:52PM +0300, Avi Kivity wrote:
> Marcelo Tosatti wrote:
>> Get rid of kvm->lock dependency on coalesced_mmio methods. Use an  
>> atomic variable instead to guarantee only one vcpu is batching
>> data into the ring at a given time.
>>
>> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>>
>> Index: kvm-irqlock/virt/kvm/coalesced_mmio.c
>> ===================================================================
>> --- kvm-irqlock.orig/virt/kvm/coalesced_mmio.c
>> +++ kvm-irqlock/virt/kvm/coalesced_mmio.c
>> @@ -26,9 +26,12 @@ static int coalesced_mmio_in_range(struc
>>  	if (!is_write)
>>  		return 0;
>>  -	/* kvm->lock is taken by the caller and must be not released before
>> -         * dev.read/write
>> -         */
>> +	/*
>> + 	 * Some other vcpu might be batching data into the ring,
>> + 	 * fallback to userspace. Ordering not our problem.
>> + 	 */
>> +	if (!atomic_add_unless(&dev->in_use, 1, 1))
>> +		return 0;
>>    
>
> Ordering with simultaneous writes is indeed not our problem, but the  
> ring may contain ordered writes (even by the same vcpu!)

You mean writes by a vcpu should maintain ordering even when that vcpu
migrates to a different pcpu? 

The smp_wmb in coalesced_write guarantees that.

> Suggest our own lock here.  in_use is basically a homemade lock, better  
> to use the factory made ones which come with a warranty.

The first submission had a mutex but was considered unorthodox. Although
i agree with your argument made then, i don't see a way around that.

Some pseudocode please? 

Agree with the suggestion on patch 3, will fix the commentary on
kvm_host.h.


^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 3/4] KVM: introduce irq_lock, use it to protect ioapic
  2009-05-24 14:10             ` Avi Kivity
@ 2009-05-25 11:11               ` Marcelo Tosatti
  2009-05-26 11:33                 ` Avi Kivity
  2009-05-28  4:45               ` [patch 0/4] move irq protection role to separate lock v3 Marcelo Tosatti
                                 ` (4 subsequent siblings)
  5 siblings, 1 reply; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-25 11:11 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Gregory Haskins

On Sun, May 24, 2009 at 05:10:07PM +0300, Avi Kivity wrote:
> Marcelo Tosatti wrote:
>> Subject says it all.
>>   
>
> I hate those changelogs.  I guess Subject never reviews code.
>
> You might put some evidence that we're suffering from contention here  
> (I'm very willing to believe it, but hard evidence is better).

Don't have any evidence yet, the main purpose of the split is to fix the
deadlock.

But, with the data we have so far, slots_lock and kvm->lock will cause
significant cache bouncing on EPT hardware (with measurements on shadow
mmu_lock is #1 offender).


^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 2/4] KVM: move coalesced_mmio locking to its own device
  2009-05-25 11:04               ` Marcelo Tosatti
@ 2009-05-26 11:24                 ` Avi Kivity
  2009-05-26 13:15                   ` Marcelo Tosatti
  0 siblings, 1 reply; 49+ messages in thread
From: Avi Kivity @ 2009-05-26 11:24 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, Gregory Haskins

Marcelo Tosatti wrote:
>>> ===================================================================
>>> --- kvm-irqlock.orig/virt/kvm/coalesced_mmio.c
>>> +++ kvm-irqlock/virt/kvm/coalesced_mmio.c
>>> @@ -26,9 +26,12 @@ static int coalesced_mmio_in_range(struc
>>>  	if (!is_write)
>>>  		return 0;
>>>  -	/* kvm->lock is taken by the caller and must be not released before
>>> -         * dev.read/write
>>> -         */
>>> +	/*
>>> + 	 * Some other vcpu might be batching data into the ring,
>>> + 	 * fallback to userspace. Ordering not our problem.
>>> + 	 */
>>> +	if (!atomic_add_unless(&dev->in_use, 1, 1))
>>> +		return 0;
>>>    
>>>       
>> Ordering with simultaneous writes is indeed not our problem, but the  
>> ring may contain ordered writes (even by the same vcpu!)
>>     
>
> You mean writes by a vcpu should maintain ordering even when that vcpu
> migrates to a different pcpu? 
>   

No.  Writes by a single vcpu need to preserve their ordering relative to 
each other.  If a write by vcpu A completes before a write by vcpu B 
starts, then they need to be ordered, since the vcpus may have 
synchronized in between.

Hm, I don't thimk you broke these rules.

> The smp_wmb in coalesced_write guarantees that.
>
>   
>> Suggest our own lock here.  in_use is basically a homemade lock, better  
>> to use the factory made ones which come with a warranty.
>>     
>
> The first submission had a mutex but was considered unorthodox. Although
> i agree with your argument made then, i don't see a way around that.
>
> Some pseudocode please? 
>   

Why not use slots_lock to protect the entire iodevice list (rcu one 
day), and an internal spinlock for coalesced mmio?

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 3/4] KVM: introduce irq_lock, use it to protect ioapic
  2009-05-25 11:11               ` Marcelo Tosatti
@ 2009-05-26 11:33                 ` Avi Kivity
  0 siblings, 0 replies; 49+ messages in thread
From: Avi Kivity @ 2009-05-26 11:33 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, Gregory Haskins

Marcelo Tosatti wrote:
> On Sun, May 24, 2009 at 05:10:07PM +0300, Avi Kivity wrote:
>   
>> Marcelo Tosatti wrote:
>>     
>>> Subject says it all.
>>>   
>>>       
>> I hate those changelogs.  I guess Subject never reviews code.
>>
>> You might put some evidence that we're suffering from contention here  
>> (I'm very willing to believe it, but hard evidence is better).
>>     
>
> Don't have any evidence yet, the main purpose of the split is to fix the
> deadlock.
>   

You might have said that.

> But, with the data we have so far, slots_lock and kvm->lock will cause
> significant cache bouncing on EPT hardware (with measurements on shadow
> mmu_lock is #1 offender).
>   

Well, mmu_lock is probably contended, which is much worse than just 
cache bouncing.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 2/4] KVM: move coalesced_mmio locking to its own device
  2009-05-26 11:24                 ` Avi Kivity
@ 2009-05-26 13:15                   ` Marcelo Tosatti
  2009-05-26 13:27                     ` Avi Kivity
  0 siblings, 1 reply; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-26 13:15 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Gregory Haskins

On Tue, May 26, 2009 at 02:24:33PM +0300, Avi Kivity wrote:
> Marcelo Tosatti wrote:
>>>> ===================================================================
>>>> --- kvm-irqlock.orig/virt/kvm/coalesced_mmio.c
>>>> +++ kvm-irqlock/virt/kvm/coalesced_mmio.c
>>>> @@ -26,9 +26,12 @@ static int coalesced_mmio_in_range(struc
>>>>  	if (!is_write)
>>>>  		return 0;
>>>>  -	/* kvm->lock is taken by the caller and must be not released before
>>>> -         * dev.read/write
>>>> -         */
>>>> +	/*
>>>> + 	 * Some other vcpu might be batching data into the ring,
>>>> + 	 * fallback to userspace. Ordering not our problem.
>>>> + 	 */
>>>> +	if (!atomic_add_unless(&dev->in_use, 1, 1))
>>>> +		return 0;
>>>>          
>>> Ordering with simultaneous writes is indeed not our problem, but the  
>>> ring may contain ordered writes (even by the same vcpu!)
>>>     
>>
>> You mean writes by a vcpu should maintain ordering even when that vcpu
>> migrates to a different pcpu?   
>
> No.  Writes by a single vcpu need to preserve their ordering relative to  
> each other.  If a write by vcpu A completes before a write by vcpu B  
> starts, then they need to be ordered, since the vcpus may have  
> synchronized in between.
>
> Hm, I don't thimk you broke these rules.
>
>> The smp_wmb in coalesced_write guarantees that.
>>
>>   
>>> Suggest our own lock here.  in_use is basically a homemade lock, 
>>> better  to use the factory made ones which come with a warranty.
>>>     
>>
>> The first submission had a mutex but was considered unorthodox. Although
>> i agree with your argument made then, i don't see a way around that.
>>
>> Some pseudocode please?   
>
> Why not use slots_lock to protect the entire iodevice list (rcu one  
> day), and an internal spinlock for coalesced mmio?

Don't like using slots_lock to protect the entire iodevice list, its
reverse progress in my opinion. The PIO/MMIO device lists are data
structures used in hot path, they are not directly related to the
memslots, and therefore deserve a separate lock. Whenever slots_lock
becomes an issue, you'll have to entangle the mess, so better start
doing it now.

Sure can switch to a internal spinlock for coalesced_mmio, but breaking
out if spin_trylock fails. You're OK with that?




^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 2/4] KVM: move coalesced_mmio locking to its own device
  2009-05-26 13:15                   ` Marcelo Tosatti
@ 2009-05-26 13:27                     ` Avi Kivity
  0 siblings, 0 replies; 49+ messages in thread
From: Avi Kivity @ 2009-05-26 13:27 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, Gregory Haskins

Marcelo Tosatti wrote:
>>
>> Why not use slots_lock to protect the entire iodevice list (rcu one  
>> day), and an internal spinlock for coalesced mmio?
>>     
>
> Don't like using slots_lock to protect the entire iodevice list, its
> reverse progress in my opinion. The PIO/MMIO device lists are data
> structures used in hot path, they are not directly related to the
> memslots, and therefore deserve a separate lock. Whenever slots_lock
> becomes an issue, you'll have to entangle the mess, so better start
> doing it now.
>   

It's a reader/writer lock, so you'll never have contention.

I'm okay with a new lock though.

> Sure can switch to a internal spinlock for coalesced_mmio, but breaking
> out if spin_trylock fails. You're OK with that?
>   

Why not do a normal spin_lock()?  It's not like you'll wait years for 
the store to complete.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 49+ messages in thread

* [patch 0/4] move irq protection role to separate lock v3
  2009-05-24 14:10             ` Avi Kivity
  2009-05-25 11:11               ` Marcelo Tosatti
@ 2009-05-28  4:45               ` Marcelo Tosatti
  2009-05-28  4:45               ` [patch 1/4] KVM: x86: grab pic lock in kvm_pic_clear_isr_ack Marcelo Tosatti
                                 ` (3 subsequent siblings)
  5 siblings, 0 replies; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-28  4:45 UTC (permalink / raw)
  To: kvm

This is to fix a deadlock reported by Alex Williamson, while at
the same time makes it easier to allow PIO/MMIO regions to be
registered/unregistered while a guest is alive.

-- 


^ permalink raw reply	[flat|nested] 49+ messages in thread

* [patch 1/4] KVM: x86: grab pic lock in kvm_pic_clear_isr_ack
  2009-05-24 14:10             ` Avi Kivity
  2009-05-25 11:11               ` Marcelo Tosatti
  2009-05-28  4:45               ` [patch 0/4] move irq protection role to separate lock v3 Marcelo Tosatti
@ 2009-05-28  4:45               ` Marcelo Tosatti
  2009-05-28  4:45               ` [patch 2/4] KVM: move coalesced_mmio locking to its own device Marcelo Tosatti
                                 ` (2 subsequent siblings)
  5 siblings, 0 replies; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-28  4:45 UTC (permalink / raw)
  To: kvm; +Cc: Marcelo Tosatti

[-- Attachment #1: pic-clear-isr-lock --]
[-- Type: text/plain, Size: 515 bytes --]

isr_ack is protected by kvm_pic->lock.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm/arch/x86/kvm/i8259.c
===================================================================
--- kvm.orig/arch/x86/kvm/i8259.c
+++ kvm/arch/x86/kvm/i8259.c
@@ -72,8 +72,10 @@ static void pic_clear_isr(struct kvm_kpi
 void kvm_pic_clear_isr_ack(struct kvm *kvm)
 {
 	struct kvm_pic *s = pic_irqchip(kvm);
+	pic_lock(s);
 	s->pics[0].isr_ack = 0xff;
 	s->pics[1].isr_ack = 0xff;
+	pic_unlock(s);
 }
 
 /*

-- 

-- 


^ permalink raw reply	[flat|nested] 49+ messages in thread

* [patch 2/4] KVM: move coalesced_mmio locking to its own device
  2009-05-24 14:10             ` Avi Kivity
                                 ` (2 preceding siblings ...)
  2009-05-28  4:45               ` [patch 1/4] KVM: x86: grab pic lock in kvm_pic_clear_isr_ack Marcelo Tosatti
@ 2009-05-28  4:45               ` Marcelo Tosatti
  2009-05-31 12:14                 ` Avi Kivity
  2009-05-28  4:45               ` [patch 3/4] KVM: introduce irq_lock, use it to protect ioapic Marcelo Tosatti
  2009-05-28  4:45               ` [patch 4/4] KVM: switch irq injection/acking data structures to irq_lock Marcelo Tosatti
  5 siblings, 1 reply; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-28  4:45 UTC (permalink / raw)
  To: kvm; +Cc: Marcelo Tosatti

[-- Attachment #1: coalesced-mmio-lock --]
[-- Type: text/plain, Size: 2407 bytes --]

Move coalesced_mmio locking to its own device, instead of relying on
kvm->lock.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-irqlock/virt/kvm/coalesced_mmio.c
===================================================================
--- kvm-irqlock.orig/virt/kvm/coalesced_mmio.c
+++ kvm-irqlock/virt/kvm/coalesced_mmio.c
@@ -26,9 +26,7 @@ static int coalesced_mmio_in_range(struc
 	if (!is_write)
 		return 0;
 
-	/* kvm->lock is taken by the caller and must be not released before
-         * dev.read/write
-         */
+	spin_lock(&dev->lock);
 
 	/* Are we able to batch it ? */
 
@@ -41,7 +39,7 @@ static int coalesced_mmio_in_range(struc
 							KVM_COALESCED_MMIO_MAX;
 	if (next == dev->kvm->coalesced_mmio_ring->first) {
 		/* full */
-		return 0;
+		goto out_denied;
 	}
 
 	/* is it in a batchable area ? */
@@ -57,6 +55,8 @@ static int coalesced_mmio_in_range(struc
 		    addr + len <= zone->addr + zone->size)
 			return 1;
 	}
+out_denied:
+	spin_unlock(&dev->lock);
 	return 0;
 }
 
@@ -67,8 +67,6 @@ static void coalesced_mmio_write(struct 
 				(struct kvm_coalesced_mmio_dev*)this->private;
 	struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
 
-	/* kvm->lock must be taken by caller before call to in_range()*/
-
 	/* copy data in first free entry of the ring */
 
 	ring->coalesced_mmio[ring->last].phys_addr = addr;
@@ -76,6 +74,7 @@ static void coalesced_mmio_write(struct 
 	memcpy(ring->coalesced_mmio[ring->last].data, val, len);
 	smp_wmb();
 	ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX;
+	spin_unlock(&dev->lock);
 }
 
 static void coalesced_mmio_destructor(struct kvm_io_device *this)
@@ -90,6 +89,8 @@ int kvm_coalesced_mmio_init(struct kvm *
 	dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
 	if (!dev)
 		return -ENOMEM;
+	spin_lock_init(&dev->lock);
+
 	dev->dev.write  = coalesced_mmio_write;
 	dev->dev.in_range  = coalesced_mmio_in_range;
 	dev->dev.destructor  = coalesced_mmio_destructor;
Index: kvm-irqlock/virt/kvm/coalesced_mmio.h
===================================================================
--- kvm-irqlock.orig/virt/kvm/coalesced_mmio.h
+++ kvm-irqlock/virt/kvm/coalesced_mmio.h
@@ -12,6 +12,7 @@
 struct kvm_coalesced_mmio_dev {
 	struct kvm_io_device dev;
 	struct kvm *kvm;
+	spinlock_t lock;
 	int nb_zones;
 	struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX];
 };

-- 


^ permalink raw reply	[flat|nested] 49+ messages in thread

* [patch 3/4] KVM: introduce irq_lock, use it to protect ioapic
  2009-05-24 14:10             ` Avi Kivity
                                 ` (3 preceding siblings ...)
  2009-05-28  4:45               ` [patch 2/4] KVM: move coalesced_mmio locking to its own device Marcelo Tosatti
@ 2009-05-28  4:45               ` Marcelo Tosatti
  2009-05-28  4:45               ` [patch 4/4] KVM: switch irq injection/acking data structures to irq_lock Marcelo Tosatti
  5 siblings, 0 replies; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-28  4:45 UTC (permalink / raw)
  To: kvm; +Cc: Marcelo Tosatti

[-- Attachment #1: ioapic-lock --]
[-- Type: text/plain, Size: 2730 bytes --]

Introduce irq_lock, and use to protect ioapic data structures.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-irqlock/include/linux/kvm_host.h
===================================================================
--- kvm-irqlock.orig/include/linux/kvm_host.h
+++ kvm-irqlock/include/linux/kvm_host.h
@@ -123,7 +123,6 @@ struct kvm_kernel_irq_routing_entry {
 };
 
 struct kvm {
-	struct mutex lock; /* protects the vcpus array and APIC accesses */
 	spinlock_t mmu_lock;
 	struct rw_semaphore slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
@@ -132,6 +131,7 @@ struct kvm {
 					KVM_PRIVATE_MEM_SLOTS];
 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
 	struct list_head vm_list;
+	struct mutex lock;
 	struct kvm_io_bus mmio_bus;
 	struct kvm_io_bus pio_bus;
 #ifdef CONFIG_HAVE_KVM_EVENTFD
@@ -145,6 +145,7 @@ struct kvm {
 	struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
 #endif
 
+	struct mutex irq_lock;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
 	struct hlist_head mask_notifier_list;
Index: kvm-irqlock/virt/kvm/ioapic.c
===================================================================
--- kvm-irqlock.orig/virt/kvm/ioapic.c
+++ kvm-irqlock/virt/kvm/ioapic.c
@@ -238,6 +238,7 @@ static void ioapic_mmio_read(struct kvm_
 	ioapic_debug("addr %lx\n", (unsigned long)addr);
 	ASSERT(!(addr & 0xf));	/* check alignment */
 
+	mutex_lock(&ioapic->kvm->irq_lock);
 	addr &= 0xff;
 	switch (addr) {
 	case IOAPIC_REG_SELECT:
@@ -264,6 +265,7 @@ static void ioapic_mmio_read(struct kvm_
 	default:
 		printk(KERN_WARNING "ioapic: wrong length %d\n", len);
 	}
+	mutex_unlock(&ioapic->kvm->irq_lock);
 }
 
 static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
@@ -275,6 +277,8 @@ static void ioapic_mmio_write(struct kvm
 	ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
 		     (void*)addr, len, val);
 	ASSERT(!(addr & 0xf));	/* check alignment */
+
+	mutex_lock(&ioapic->kvm->irq_lock);
 	if (len == 4 || len == 8)
 		data = *(u32 *) val;
 	else {
@@ -300,6 +304,7 @@ static void ioapic_mmio_write(struct kvm
 	default:
 		break;
 	}
+	mutex_unlock(&ioapic->kvm->irq_lock);
 }
 
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
Index: kvm-irqlock/virt/kvm/kvm_main.c
===================================================================
--- kvm-irqlock.orig/virt/kvm/kvm_main.c
+++ kvm-irqlock/virt/kvm/kvm_main.c
@@ -985,6 +985,7 @@ static struct kvm *kvm_create_vm(void)
 	kvm_io_bus_init(&kvm->pio_bus);
 	kvm_irqfd_init(kvm);
 	mutex_init(&kvm->lock);
+	mutex_init(&kvm->irq_lock);
 	kvm_io_bus_init(&kvm->mmio_bus);
 	init_rwsem(&kvm->slots_lock);
 	atomic_set(&kvm->users_count, 1);

-- 


^ permalink raw reply	[flat|nested] 49+ messages in thread

* [patch 4/4] KVM: switch irq injection/acking data structures to irq_lock
  2009-05-24 14:10             ` Avi Kivity
                                 ` (4 preceding siblings ...)
  2009-05-28  4:45               ` [patch 3/4] KVM: introduce irq_lock, use it to protect ioapic Marcelo Tosatti
@ 2009-05-28  4:45               ` Marcelo Tosatti
  5 siblings, 0 replies; 49+ messages in thread
From: Marcelo Tosatti @ 2009-05-28  4:45 UTC (permalink / raw)
  To: kvm; +Cc: Marcelo Tosatti

[-- Attachment #1: irqlock --]
[-- Type: text/plain, Size: 12151 bytes --]

Protect irq injection/acking data structures with a separate irq_lock
mutex. This fixes the following deadlock:

CPU A                               CPU B
kvm_vm_ioctl_deassign_dev_irq()
  mutex_lock(&kvm->lock);            worker_thread()
  -> kvm_deassign_irq()                -> kvm_assigned_dev_interrupt_work_handler()
    -> deassign_host_irq()               mutex_lock(&kvm->lock);
      -> cancel_work_sync() [blocked]

Reported-by: Alex Williamson <alex.williamson@hp.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm/arch/x86/kvm/i8254.c
===================================================================
--- kvm.orig/arch/x86/kvm/i8254.c
+++ kvm/arch/x86/kvm/i8254.c
@@ -636,10 +636,10 @@ static void __inject_pit_timer_intr(stru
 	struct kvm_vcpu *vcpu;
 	int i;
 
-	mutex_lock(&kvm->lock);
+	mutex_lock(&kvm->irq_lock);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
-	mutex_unlock(&kvm->lock);
+	mutex_unlock(&kvm->irq_lock);
 
 	/*
 	 * Provides NMI watchdog support via Virtual Wire mode.
Index: kvm/arch/x86/kvm/x86.c
===================================================================
--- kvm.orig/arch/x86/kvm/x86.c
+++ kvm/arch/x86/kvm/x86.c
@@ -2091,10 +2091,10 @@ long kvm_arch_vm_ioctl(struct file *filp
 			goto out;
 		if (irqchip_in_kernel(kvm)) {
 			__s32 status;
-			mutex_lock(&kvm->lock);
+			mutex_lock(&kvm->irq_lock);
 			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
 					irq_event.irq, irq_event.level);
-			mutex_unlock(&kvm->lock);
+			mutex_unlock(&kvm->irq_lock);
 			if (ioctl == KVM_IRQ_LINE_STATUS) {
 				irq_event.status = status;
 				if (copy_to_user(argp, &irq_event,
@@ -2340,12 +2340,11 @@ mmio:
 	 */
 	mutex_lock(&vcpu->kvm->lock);
 	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
+	mutex_unlock(&vcpu->kvm->lock);
 	if (mmio_dev) {
 		kvm_iodevice_read(mmio_dev, gpa, bytes, val);
-		mutex_unlock(&vcpu->kvm->lock);
 		return X86EMUL_CONTINUE;
 	}
-	mutex_unlock(&vcpu->kvm->lock);
 
 	vcpu->mmio_needed = 1;
 	vcpu->mmio_phys_addr = gpa;
@@ -2395,12 +2394,11 @@ mmio:
 	 */
 	mutex_lock(&vcpu->kvm->lock);
 	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1);
+	mutex_unlock(&vcpu->kvm->lock);
 	if (mmio_dev) {
 		kvm_iodevice_write(mmio_dev, gpa, bytes, val);
-		mutex_unlock(&vcpu->kvm->lock);
 		return X86EMUL_CONTINUE;
 	}
-	mutex_unlock(&vcpu->kvm->lock);
 
 	vcpu->mmio_needed = 1;
 	vcpu->mmio_phys_addr = gpa;
@@ -2723,7 +2721,6 @@ static void kernel_pio(struct kvm_io_dev
 {
 	/* TODO: String I/O for in kernel device */
 
-	mutex_lock(&vcpu->kvm->lock);
 	if (vcpu->arch.pio.in)
 		kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
 				  vcpu->arch.pio.size,
@@ -2732,7 +2729,6 @@ static void kernel_pio(struct kvm_io_dev
 		kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
 				   vcpu->arch.pio.size,
 				   pd);
-	mutex_unlock(&vcpu->kvm->lock);
 }
 
 static void pio_string_write(struct kvm_io_device *pio_dev,
@@ -2742,14 +2738,12 @@ static void pio_string_write(struct kvm_
 	void *pd = vcpu->arch.pio_data;
 	int i;
 
-	mutex_lock(&vcpu->kvm->lock);
 	for (i = 0; i < io->cur_count; i++) {
 		kvm_iodevice_write(pio_dev, io->port,
 				   io->size,
 				   pd);
 		pd += io->size;
 	}
-	mutex_unlock(&vcpu->kvm->lock);
 }
 
 static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
@@ -2786,7 +2780,9 @@ int kvm_emulate_pio(struct kvm_vcpu *vcp
 	val = kvm_register_read(vcpu, VCPU_REGS_RAX);
 	memcpy(vcpu->arch.pio_data, &val, 4);
 
+	mutex_lock(&vcpu->kvm->lock);
 	pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
+	mutex_unlock(&vcpu->kvm->lock);
 	if (pio_dev) {
 		kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
 		complete_pio(vcpu);
@@ -2850,9 +2846,12 @@ int kvm_emulate_pio_string(struct kvm_vc
 
 	vcpu->arch.pio.guest_gva = address;
 
+	mutex_lock(&vcpu->kvm->lock);
 	pio_dev = vcpu_find_pio_dev(vcpu, port,
 				    vcpu->arch.pio.cur_count,
 				    !vcpu->arch.pio.in);
+	mutex_unlock(&vcpu->kvm->lock);
+
 	if (!vcpu->arch.pio.in) {
 		/* string PIO write */
 		ret = pio_copy_data(vcpu);
Index: kvm/virt/kvm/kvm_main.c
===================================================================
--- kvm.orig/virt/kvm/kvm_main.c
+++ kvm/virt/kvm/kvm_main.c
@@ -62,6 +62,12 @@
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
+/*
+ * Ordering of locks:
+ *
+ * 		kvm->lock --> kvm->irq_lock
+ */
+
 DEFINE_SPINLOCK(kvm_lock);
 LIST_HEAD(vm_list);
 
@@ -126,11 +132,7 @@ static void kvm_assigned_dev_interrupt_w
 				    interrupt_work);
 	kvm = assigned_dev->kvm;
 
-	/* This is taken to safely inject irq inside the guest. When
-	 * the interrupt injection (or the ioapic code) uses a
-	 * finer-grained lock, update this
-	 */
-	mutex_lock(&kvm->lock);
+	mutex_lock(&kvm->irq_lock);
 	spin_lock_irq(&assigned_dev->assigned_dev_lock);
 	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 		struct kvm_guest_msix_entry *guest_entries =
@@ -159,7 +161,7 @@ static void kvm_assigned_dev_interrupt_w
 	}
 
 	spin_unlock_irq(&assigned_dev->assigned_dev_lock);
-	mutex_unlock(&assigned_dev->kvm->lock);
+	mutex_unlock(&assigned_dev->kvm->irq_lock);
 }
 
 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
@@ -215,7 +217,7 @@ static void kvm_assigned_dev_ack_irq(str
 static void deassign_guest_irq(struct kvm *kvm,
 			       struct kvm_assigned_dev_kernel *assigned_dev)
 {
-	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
+	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
 	assigned_dev->ack_notifier.gsi = -1;
 
 	if (assigned_dev->irq_source_id != -1)
Index: kvm/virt/kvm/irq_comm.c
===================================================================
--- kvm.orig/virt/kvm/irq_comm.c
+++ kvm/virt/kvm/irq_comm.c
@@ -62,6 +62,8 @@ int kvm_irq_delivery_to_apic(struct kvm 
 	int i, r = -1;
 	struct kvm_vcpu *vcpu, *lowest = NULL;
 
+	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
+
 	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
 			kvm_is_dm_lowest_prio(irq))
 		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
@@ -113,7 +115,7 @@ static int kvm_set_msi(struct kvm_kernel
 	return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
 }
 
-/* This should be called with the kvm->lock mutex held
+/* This should be called with the kvm->irq_lock mutex held
  * Return value:
  *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
  *  = 0   Interrupt was coalesced (previous irq is still pending)
@@ -125,6 +127,8 @@ int kvm_set_irq(struct kvm *kvm, int irq
 	unsigned long *irq_state, sig_level;
 	int ret = -1;
 
+	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
+
 	if (irq < KVM_IOAPIC_NUM_PINS) {
 		irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
 
@@ -174,19 +178,26 @@ void kvm_notify_acked_irq(struct kvm *kv
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian)
 {
+	mutex_lock(&kvm->irq_lock);
 	hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
+	mutex_unlock(&kvm->irq_lock);
 }
 
-void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian)
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				    struct kvm_irq_ack_notifier *kian)
 {
+	mutex_lock(&kvm->irq_lock);
 	hlist_del_init(&kian->link);
+	mutex_unlock(&kvm->irq_lock);
 }
 
-/* The caller must hold kvm->lock mutex */
 int kvm_request_irq_source_id(struct kvm *kvm)
 {
 	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
-	int irq_source_id = find_first_zero_bit(bitmap,
+	int irq_source_id;
+
+	mutex_lock(&kvm->irq_lock);
+	irq_source_id = find_first_zero_bit(bitmap,
 				sizeof(kvm->arch.irq_sources_bitmap));
 
 	if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
@@ -196,6 +207,7 @@ int kvm_request_irq_source_id(struct kvm
 
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
 	set_bit(irq_source_id, bitmap);
+	mutex_unlock(&kvm->irq_lock);
 
 	return irq_source_id;
 }
@@ -206,6 +218,7 @@ void kvm_free_irq_source_id(struct kvm *
 
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
 
+	mutex_lock(&kvm->irq_lock);
 	if (irq_source_id < 0 ||
 	    irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
 		printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
@@ -214,19 +227,24 @@ void kvm_free_irq_source_id(struct kvm *
 	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
 		clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
 	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
 				    struct kvm_irq_mask_notifier *kimn)
 {
+	mutex_lock(&kvm->irq_lock);
 	kimn->irq = irq;
 	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 				      struct kvm_irq_mask_notifier *kimn)
 {
+	mutex_lock(&kvm->irq_lock);
 	hlist_del(&kimn->link);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
@@ -234,6 +252,8 @@ void kvm_fire_mask_notifiers(struct kvm 
 	struct kvm_irq_mask_notifier *kimn;
 	struct hlist_node *n;
 
+	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
+
 	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
 		if (kimn->irq == irq)
 			kimn->func(kimn, mask);
@@ -249,7 +269,9 @@ static void __kvm_free_irq_routing(struc
 
 void kvm_free_irq_routing(struct kvm *kvm)
 {
+	mutex_lock(&kvm->irq_lock);
 	__kvm_free_irq_routing(&kvm->irq_routing);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
@@ -323,13 +345,13 @@ int kvm_set_irq_routing(struct kvm *kvm,
 		e = NULL;
 	}
 
-	mutex_lock(&kvm->lock);
+	mutex_lock(&kvm->irq_lock);
 	list_splice(&kvm->irq_routing, &tmp);
 	INIT_LIST_HEAD(&kvm->irq_routing);
 	list_splice(&irq_list, &kvm->irq_routing);
 	INIT_LIST_HEAD(&irq_list);
 	list_splice(&tmp, &irq_list);
-	mutex_unlock(&kvm->lock);
+	mutex_unlock(&kvm->irq_lock);
 
 	r = 0;
 
Index: kvm/arch/x86/kvm/lapic.c
===================================================================
--- kvm.orig/arch/x86/kvm/lapic.c
+++ kvm/arch/x86/kvm/lapic.c
@@ -425,7 +425,9 @@ static void apic_set_eoi(struct kvm_lapi
 		trigger_mode = IOAPIC_LEVEL_TRIG;
 	else
 		trigger_mode = IOAPIC_EDGE_TRIG;
+	mutex_lock(&apic->vcpu->kvm->irq_lock);
 	kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
+	mutex_unlock(&apic->vcpu->kvm->irq_lock);
 }
 
 static void apic_send_ipi(struct kvm_lapic *apic)
@@ -449,7 +451,9 @@ static void apic_send_ipi(struct kvm_lap
 		   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
 		   irq.vector);
 
+	mutex_lock(&apic->vcpu->kvm->irq_lock);
 	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
+	mutex_unlock(&apic->vcpu->kvm->irq_lock);
 }
 
 static u32 apic_get_tmcct(struct kvm_lapic *apic)
Index: kvm/include/linux/kvm_host.h
===================================================================
--- kvm.orig/include/linux/kvm_host.h
+++ kvm/include/linux/kvm_host.h
@@ -373,7 +373,8 @@ int kvm_set_irq(struct kvm *kvm, int irq
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian);
-void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				   struct kvm_irq_ack_notifier *kian);
 int kvm_request_irq_source_id(struct kvm *kvm);
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 
Index: kvm/virt/kvm/eventfd.c
===================================================================
--- kvm.orig/virt/kvm/eventfd.c
+++ kvm/virt/kvm/eventfd.c
@@ -52,10 +52,10 @@ irqfd_inject(struct work_struct *work)
 	struct _irqfd *irqfd = container_of(work, struct _irqfd, work);
 	struct kvm *kvm = irqfd->kvm;
 
-	mutex_lock(&kvm->lock);
+	mutex_lock(&kvm->irq_lock);
 	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
 	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
-	mutex_unlock(&kvm->lock);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 static int

-- 


^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 2/4] KVM: move coalesced_mmio locking to its own device
  2009-05-28  4:45               ` [patch 2/4] KVM: move coalesced_mmio locking to its own device Marcelo Tosatti
@ 2009-05-31 12:14                 ` Avi Kivity
  2009-06-01 21:23                   ` Marcelo Tosatti
  2009-06-04 18:08                   ` [patch 0/4] move irq protection role to separate lock v4 Marcelo Tosatti
  0 siblings, 2 replies; 49+ messages in thread
From: Avi Kivity @ 2009-05-31 12:14 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

Marcelo Tosatti wrote:
> Move coalesced_mmio locking to its own device, instead of relying on
> kvm->lock.
>
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>
> Index: kvm-irqlock/virt/kvm/coalesced_mmio.c
> ===================================================================
> --- kvm-irqlock.orig/virt/kvm/coalesced_mmio.c
> +++ kvm-irqlock/virt/kvm/coalesced_mmio.c
> @@ -26,9 +26,7 @@ static int coalesced_mmio_in_range(struc
>  	if (!is_write)
>  		return 0;
>  
> -	/* kvm->lock is taken by the caller and must be not released before
> -         * dev.read/write
> -         */
> +	spin_lock(&dev->lock);
>   

This unbalanced locking is still very displeasing.  At a minimum you 
need a sparse annotation to indicate it.

But I think it really indicates a problem with the io_device API.

Potential solutions:
- fold in_range() into ->write and ->read.  Make those functions 
responsible for both determining whether they can handle the range and 
performing the I/O.
- have a separate rwlock for the device list.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 2/4] KVM: move coalesced_mmio locking to its own device
  2009-05-31 12:14                 ` Avi Kivity
@ 2009-06-01 21:23                   ` Marcelo Tosatti
  2009-06-01 21:43                     ` Avi Kivity
  2009-06-04 18:08                   ` [patch 0/4] move irq protection role to separate lock v4 Marcelo Tosatti
  1 sibling, 1 reply; 49+ messages in thread
From: Marcelo Tosatti @ 2009-06-01 21:23 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Sun, May 31, 2009 at 03:14:36PM +0300, Avi Kivity wrote:
> Marcelo Tosatti wrote:
>> Move coalesced_mmio locking to its own device, instead of relying on
>> kvm->lock.
>>
>> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>>
>> Index: kvm-irqlock/virt/kvm/coalesced_mmio.c
>> ===================================================================
>> --- kvm-irqlock.orig/virt/kvm/coalesced_mmio.c
>> +++ kvm-irqlock/virt/kvm/coalesced_mmio.c
>> @@ -26,9 +26,7 @@ static int coalesced_mmio_in_range(struc
>>  	if (!is_write)
>>  		return 0;
>>  -	/* kvm->lock is taken by the caller and must be not released before
>> -         * dev.read/write
>> -         */
>> +	spin_lock(&dev->lock);
>>   
>
> This unbalanced locking is still very displeasing.  At a minimum you  
> need a sparse annotation to indicate it.
>
> But I think it really indicates a problem with the io_device API.
>
> Potential solutions:
> - fold in_range() into ->write and ->read.  Make those functions  
> responsible for both determining whether they can handle the range and  
> performing the I/O.
> - have a separate rwlock for the device list.

IMO the problem is the coalesced_mmio device. The unbalanced locking is
a result of the abuse of the in_range() and read/write() methods.

Normally you'd expect parallel accesses to in_range() to be allowed,
since its just checking whether (aha) the access is in range, returning
a pointer to the device if positive. Now read/write() are the ones who
need serialization, since they touch the device internal state.

coalesced_mmio abuses in_range() to do more things than it should.

Ideally we should fix coalesced_mmio, but i'm not going to do that now
(sorry, not confident in changing it without seeing go through intense
torture testing).

That said, is sparse annotation enough the convince you?


^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 2/4] KVM: move coalesced_mmio locking to its own device
  2009-06-01 21:23                   ` Marcelo Tosatti
@ 2009-06-01 21:43                     ` Avi Kivity
  0 siblings, 0 replies; 49+ messages in thread
From: Avi Kivity @ 2009-06-01 21:43 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

Marcelo Tosatti wrote:
> On Sun, May 31, 2009 at 03:14:36PM +0300, Avi Kivity wrote:
>   
>> Marcelo Tosatti wrote:
>>     
>>> Move coalesced_mmio locking to its own device, instead of relying on
>>> kvm->lock.
>>>
>>> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>>>
>>> Index: kvm-irqlock/virt/kvm/coalesced_mmio.c
>>> ===================================================================
>>> --- kvm-irqlock.orig/virt/kvm/coalesced_mmio.c
>>> +++ kvm-irqlock/virt/kvm/coalesced_mmio.c
>>> @@ -26,9 +26,7 @@ static int coalesced_mmio_in_range(struc
>>>  	if (!is_write)
>>>  		return 0;
>>>  -	/* kvm->lock is taken by the caller and must be not released before
>>> -         * dev.read/write
>>> -         */
>>> +	spin_lock(&dev->lock);
>>>   
>>>       
>> This unbalanced locking is still very displeasing.  At a minimum you  
>> need a sparse annotation to indicate it.
>>
>> But I think it really indicates a problem with the io_device API.
>>
>> Potential solutions:
>> - fold in_range() into ->write and ->read.  Make those functions  
>> responsible for both determining whether they can handle the range and  
>> performing the I/O.
>> - have a separate rwlock for the device list.
>>     
>
> IMO the problem is the coalesced_mmio device. The unbalanced locking is
> a result of the abuse of the in_range() and read/write() methods.
>
>   

Okay, the penny has dropped.  I understand now.

> Normally you'd expect parallel accesses to in_range() to be allowed,
> since its just checking whether (aha) the access is in range, returning
> a pointer to the device if positive. Now read/write() are the ones who
> need serialization, since they touch the device internal state.
>
> coalesced_mmio abuses in_range() to do more things than it should.
>
> Ideally we should fix coalesced_mmio, but i'm not going to do that now
> (sorry, not confident in changing it without seeing go through intense
> torture testing).
>   

It's not trivial since it's userspace that clears the ring, and we can't 
wait on userspace.

> That said, is sparse annotation enough the convince you?
>   

Let me have a look at fixing coalesced_mmio first.  We might allow 
->write to fail, causing a fallback to userspace.  Or we could fail if 
n_avail < MAX_VCPUS, so even the worst-case race leaves us one entry.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 49+ messages in thread

* [patch 0/4] move irq protection role to separate lock v4
  2009-05-31 12:14                 ` Avi Kivity
  2009-06-01 21:23                   ` Marcelo Tosatti
@ 2009-06-04 18:08                   ` Marcelo Tosatti
  2009-06-04 18:08                     ` [patch 1/4] KVM: x86: grab pic lock in kvm_pic_clear_isr_ack Marcelo Tosatti
                                       ` (4 more replies)
  1 sibling, 5 replies; 49+ messages in thread
From: Marcelo Tosatti @ 2009-06-04 18:08 UTC (permalink / raw)
  To: avi; +Cc: kvm

This is to fix a deadlock reported by Alex Williamson, while at
the same time makes it easier to allow PIO/MMIO regions to be
registered/unregistered while a guest is alive.


-- 


^ permalink raw reply	[flat|nested] 49+ messages in thread

* [patch 1/4] KVM: x86: grab pic lock in kvm_pic_clear_isr_ack
  2009-06-04 18:08                   ` [patch 0/4] move irq protection role to separate lock v4 Marcelo Tosatti
@ 2009-06-04 18:08                     ` Marcelo Tosatti
  2009-06-04 18:08                     ` [patch 2/4] KVM: move coalesced_mmio locking to its own device Marcelo Tosatti
                                       ` (3 subsequent siblings)
  4 siblings, 0 replies; 49+ messages in thread
From: Marcelo Tosatti @ 2009-06-04 18:08 UTC (permalink / raw)
  To: avi, kvm; +Cc: Marcelo Tosatti

[-- Attachment #1: pic-clear-isr-lock --]
[-- Type: text/plain, Size: 515 bytes --]

isr_ack is protected by kvm_pic->lock.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm/arch/x86/kvm/i8259.c
===================================================================
--- kvm.orig/arch/x86/kvm/i8259.c
+++ kvm/arch/x86/kvm/i8259.c
@@ -72,8 +72,10 @@ static void pic_clear_isr(struct kvm_kpi
 void kvm_pic_clear_isr_ack(struct kvm *kvm)
 {
 	struct kvm_pic *s = pic_irqchip(kvm);
+	pic_lock(s);
 	s->pics[0].isr_ack = 0xff;
 	s->pics[1].isr_ack = 0xff;
+	pic_unlock(s);
 }
 
 /*

-- 

-- 


^ permalink raw reply	[flat|nested] 49+ messages in thread

* [patch 2/4] KVM: move coalesced_mmio locking to its own device
  2009-06-04 18:08                   ` [patch 0/4] move irq protection role to separate lock v4 Marcelo Tosatti
  2009-06-04 18:08                     ` [patch 1/4] KVM: x86: grab pic lock in kvm_pic_clear_isr_ack Marcelo Tosatti
@ 2009-06-04 18:08                     ` Marcelo Tosatti
  2009-06-04 18:08                     ` [patch 3/4] KVM: introduce irq_lock, use it to protect ioapic Marcelo Tosatti
                                       ` (2 subsequent siblings)
  4 siblings, 0 replies; 49+ messages in thread
From: Marcelo Tosatti @ 2009-06-04 18:08 UTC (permalink / raw)
  To: avi, kvm; +Cc: Marcelo Tosatti

[-- Attachment #1: coalesced-mmio-lock --]
[-- Type: text/plain, Size: 1890 bytes --]

Move coalesced_mmio locking to its own device, instead of relying on
kvm->lock.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm/virt/kvm/coalesced_mmio.c
===================================================================
--- kvm.orig/virt/kvm/coalesced_mmio.c
+++ kvm/virt/kvm/coalesced_mmio.c
@@ -31,10 +31,6 @@ static int coalesced_mmio_in_range(struc
 	if (!is_write)
 		return 0;
 
-	/* kvm->lock is taken by the caller and must be not released before
-         * dev.read/write
-         */
-
 	/* Are we able to batch it ? */
 
 	/* last is the first free entry
@@ -70,7 +66,7 @@ static void coalesced_mmio_write(struct 
 	struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
 	struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
 
-	/* kvm->lock must be taken by caller before call to in_range()*/
+	spin_lock(&dev->lock);
 
 	/* copy data in first free entry of the ring */
 
@@ -79,6 +75,7 @@ static void coalesced_mmio_write(struct 
 	memcpy(ring->coalesced_mmio[ring->last].data, val, len);
 	smp_wmb();
 	ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX;
+	spin_unlock(&dev->lock);
 }
 
 static void coalesced_mmio_destructor(struct kvm_io_device *this)
@@ -101,6 +98,7 @@ int kvm_coalesced_mmio_init(struct kvm *
 	dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
 	if (!dev)
 		return -ENOMEM;
+	spin_lock_init(&dev->lock);
 	kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
 	dev->kvm = kvm;
 	kvm->coalesced_mmio_dev = dev;
Index: kvm/virt/kvm/coalesced_mmio.h
===================================================================
--- kvm.orig/virt/kvm/coalesced_mmio.h
+++ kvm/virt/kvm/coalesced_mmio.h
@@ -12,6 +12,7 @@
 struct kvm_coalesced_mmio_dev {
 	struct kvm_io_device dev;
 	struct kvm *kvm;
+	spinlock_t lock;
 	int nb_zones;
 	struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX];
 };

-- 


^ permalink raw reply	[flat|nested] 49+ messages in thread

* [patch 3/4] KVM: introduce irq_lock, use it to protect ioapic
  2009-06-04 18:08                   ` [patch 0/4] move irq protection role to separate lock v4 Marcelo Tosatti
  2009-06-04 18:08                     ` [patch 1/4] KVM: x86: grab pic lock in kvm_pic_clear_isr_ack Marcelo Tosatti
  2009-06-04 18:08                     ` [patch 2/4] KVM: move coalesced_mmio locking to its own device Marcelo Tosatti
@ 2009-06-04 18:08                     ` Marcelo Tosatti
  2009-06-04 18:08                     ` [patch 4/4] KVM: switch irq injection/acking data structures to irq_lock Marcelo Tosatti
  2009-06-08  9:18                     ` [patch 0/4] move irq protection role to separate lock v4 Avi Kivity
  4 siblings, 0 replies; 49+ messages in thread
From: Marcelo Tosatti @ 2009-06-04 18:08 UTC (permalink / raw)
  To: avi, kvm; +Cc: Marcelo Tosatti

[-- Attachment #1: ioapic-lock --]
[-- Type: text/plain, Size: 2658 bytes --]

Introduce irq_lock, and use to protect ioapic data structures.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm/include/linux/kvm_host.h
===================================================================
--- kvm.orig/include/linux/kvm_host.h
+++ kvm/include/linux/kvm_host.h
@@ -123,7 +123,6 @@ struct kvm_kernel_irq_routing_entry {
 };
 
 struct kvm {
-	struct mutex lock; /* protects the vcpus array and APIC accesses */
 	spinlock_t mmu_lock;
 	struct rw_semaphore slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
@@ -132,6 +131,7 @@ struct kvm {
 					KVM_PRIVATE_MEM_SLOTS];
 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
 	struct list_head vm_list;
+	struct mutex lock;
 	struct kvm_io_bus mmio_bus;
 	struct kvm_io_bus pio_bus;
 #ifdef CONFIG_HAVE_KVM_EVENTFD
@@ -145,6 +145,7 @@ struct kvm {
 	struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
 #endif
 
+	struct mutex irq_lock;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
 	struct hlist_head mask_notifier_list;
Index: kvm/virt/kvm/ioapic.c
===================================================================
--- kvm.orig/virt/kvm/ioapic.c
+++ kvm/virt/kvm/ioapic.c
@@ -243,6 +243,7 @@ static void ioapic_mmio_read(struct kvm_
 	ioapic_debug("addr %lx\n", (unsigned long)addr);
 	ASSERT(!(addr & 0xf));	/* check alignment */
 
+	mutex_lock(&ioapic->kvm->irq_lock);
 	addr &= 0xff;
 	switch (addr) {
 	case IOAPIC_REG_SELECT:
@@ -269,6 +270,7 @@ static void ioapic_mmio_read(struct kvm_
 	default:
 		printk(KERN_WARNING "ioapic: wrong length %d\n", len);
 	}
+	mutex_unlock(&ioapic->kvm->irq_lock);
 }
 
 static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
@@ -280,6 +282,8 @@ static void ioapic_mmio_write(struct kvm
 	ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
 		     (void*)addr, len, val);
 	ASSERT(!(addr & 0xf));	/* check alignment */
+
+	mutex_lock(&ioapic->kvm->irq_lock);
 	if (len == 4 || len == 8)
 		data = *(u32 *) val;
 	else {
@@ -305,6 +309,7 @@ static void ioapic_mmio_write(struct kvm
 	default:
 		break;
 	}
+	mutex_unlock(&ioapic->kvm->irq_lock);
 }
 
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
Index: kvm/virt/kvm/kvm_main.c
===================================================================
--- kvm.orig/virt/kvm/kvm_main.c
+++ kvm/virt/kvm/kvm_main.c
@@ -979,6 +979,7 @@ static struct kvm *kvm_create_vm(void)
 	kvm_io_bus_init(&kvm->pio_bus);
 	kvm_irqfd_init(kvm);
 	mutex_init(&kvm->lock);
+	mutex_init(&kvm->irq_lock);
 	kvm_io_bus_init(&kvm->mmio_bus);
 	init_rwsem(&kvm->slots_lock);
 	atomic_set(&kvm->users_count, 1);

-- 


^ permalink raw reply	[flat|nested] 49+ messages in thread

* [patch 4/4] KVM: switch irq injection/acking data structures to irq_lock
  2009-06-04 18:08                   ` [patch 0/4] move irq protection role to separate lock v4 Marcelo Tosatti
                                       ` (2 preceding siblings ...)
  2009-06-04 18:08                     ` [patch 3/4] KVM: introduce irq_lock, use it to protect ioapic Marcelo Tosatti
@ 2009-06-04 18:08                     ` Marcelo Tosatti
  2009-06-08  9:18                     ` [patch 0/4] move irq protection role to separate lock v4 Avi Kivity
  4 siblings, 0 replies; 49+ messages in thread
From: Marcelo Tosatti @ 2009-06-04 18:08 UTC (permalink / raw)
  To: avi, kvm; +Cc: Marcelo Tosatti

[-- Attachment #1: irqlock --]
[-- Type: text/plain, Size: 12170 bytes --]

Protect irq injection/acking data structures with a separate irq_lock
mutex. This fixes the following deadlock:

CPU A                               CPU B
kvm_vm_ioctl_deassign_dev_irq()
  mutex_lock(&kvm->lock);            worker_thread()
  -> kvm_deassign_irq()                -> kvm_assigned_dev_interrupt_work_handler()
    -> deassign_host_irq()               mutex_lock(&kvm->lock);
      -> cancel_work_sync() [blocked]

Reported-by: Alex Williamson <alex.williamson@hp.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm/arch/x86/kvm/i8254.c
===================================================================
--- kvm.orig/arch/x86/kvm/i8254.c
+++ kvm/arch/x86/kvm/i8254.c
@@ -651,10 +651,10 @@ static void __inject_pit_timer_intr(stru
 	struct kvm_vcpu *vcpu;
 	int i;
 
-	mutex_lock(&kvm->lock);
+	mutex_lock(&kvm->irq_lock);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
-	mutex_unlock(&kvm->lock);
+	mutex_unlock(&kvm->irq_lock);
 
 	/*
 	 * Provides NMI watchdog support via Virtual Wire mode.
Index: kvm/arch/x86/kvm/x86.c
===================================================================
--- kvm.orig/arch/x86/kvm/x86.c
+++ kvm/arch/x86/kvm/x86.c
@@ -2099,10 +2099,10 @@ long kvm_arch_vm_ioctl(struct file *filp
 			goto out;
 		if (irqchip_in_kernel(kvm)) {
 			__s32 status;
-			mutex_lock(&kvm->lock);
+			mutex_lock(&kvm->irq_lock);
 			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
 					irq_event.irq, irq_event.level);
-			mutex_unlock(&kvm->lock);
+			mutex_unlock(&kvm->irq_lock);
 			if (ioctl == KVM_IRQ_LINE_STATUS) {
 				irq_event.status = status;
 				if (copy_to_user(argp, &irq_event,
@@ -2348,12 +2348,11 @@ mmio:
 	 */
 	mutex_lock(&vcpu->kvm->lock);
 	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
+	mutex_unlock(&vcpu->kvm->lock);
 	if (mmio_dev) {
 		kvm_iodevice_read(mmio_dev, gpa, bytes, val);
-		mutex_unlock(&vcpu->kvm->lock);
 		return X86EMUL_CONTINUE;
 	}
-	mutex_unlock(&vcpu->kvm->lock);
 
 	vcpu->mmio_needed = 1;
 	vcpu->mmio_phys_addr = gpa;
@@ -2403,12 +2402,11 @@ mmio:
 	 */
 	mutex_lock(&vcpu->kvm->lock);
 	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1);
+	mutex_unlock(&vcpu->kvm->lock);
 	if (mmio_dev) {
 		kvm_iodevice_write(mmio_dev, gpa, bytes, val);
-		mutex_unlock(&vcpu->kvm->lock);
 		return X86EMUL_CONTINUE;
 	}
-	mutex_unlock(&vcpu->kvm->lock);
 
 	vcpu->mmio_needed = 1;
 	vcpu->mmio_phys_addr = gpa;
@@ -2731,7 +2729,6 @@ static void kernel_pio(struct kvm_io_dev
 {
 	/* TODO: String I/O for in kernel device */
 
-	mutex_lock(&vcpu->kvm->lock);
 	if (vcpu->arch.pio.in)
 		kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
 				  vcpu->arch.pio.size,
@@ -2740,7 +2737,6 @@ static void kernel_pio(struct kvm_io_dev
 		kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
 				   vcpu->arch.pio.size,
 				   pd);
-	mutex_unlock(&vcpu->kvm->lock);
 }
 
 static void pio_string_write(struct kvm_io_device *pio_dev,
@@ -2750,14 +2746,12 @@ static void pio_string_write(struct kvm_
 	void *pd = vcpu->arch.pio_data;
 	int i;
 
-	mutex_lock(&vcpu->kvm->lock);
 	for (i = 0; i < io->cur_count; i++) {
 		kvm_iodevice_write(pio_dev, io->port,
 				   io->size,
 				   pd);
 		pd += io->size;
 	}
-	mutex_unlock(&vcpu->kvm->lock);
 }
 
 static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
@@ -2794,7 +2788,9 @@ int kvm_emulate_pio(struct kvm_vcpu *vcp
 	val = kvm_register_read(vcpu, VCPU_REGS_RAX);
 	memcpy(vcpu->arch.pio_data, &val, 4);
 
+	mutex_lock(&vcpu->kvm->lock);
 	pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
+	mutex_unlock(&vcpu->kvm->lock);
 	if (pio_dev) {
 		kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
 		complete_pio(vcpu);
@@ -2858,9 +2854,12 @@ int kvm_emulate_pio_string(struct kvm_vc
 
 	vcpu->arch.pio.guest_gva = address;
 
+	mutex_lock(&vcpu->kvm->lock);
 	pio_dev = vcpu_find_pio_dev(vcpu, port,
 				    vcpu->arch.pio.cur_count,
 				    !vcpu->arch.pio.in);
+	mutex_unlock(&vcpu->kvm->lock);
+
 	if (!vcpu->arch.pio.in) {
 		/* string PIO write */
 		ret = pio_copy_data(vcpu);
Index: kvm/virt/kvm/kvm_main.c
===================================================================
--- kvm.orig/virt/kvm/kvm_main.c
+++ kvm/virt/kvm/kvm_main.c
@@ -62,6 +62,12 @@
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
+/*
+ * Ordering of locks:
+ *
+ * 		kvm->lock --> kvm->irq_lock
+ */
+
 DEFINE_SPINLOCK(kvm_lock);
 LIST_HEAD(vm_list);
 
@@ -126,11 +132,7 @@ static void kvm_assigned_dev_interrupt_w
 				    interrupt_work);
 	kvm = assigned_dev->kvm;
 
-	/* This is taken to safely inject irq inside the guest. When
-	 * the interrupt injection (or the ioapic code) uses a
-	 * finer-grained lock, update this
-	 */
-	mutex_lock(&kvm->lock);
+	mutex_lock(&kvm->irq_lock);
 	spin_lock_irq(&assigned_dev->assigned_dev_lock);
 	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 		struct kvm_guest_msix_entry *guest_entries =
@@ -149,7 +151,7 @@ static void kvm_assigned_dev_interrupt_w
 			    assigned_dev->guest_irq, 1);
 
 	spin_unlock_irq(&assigned_dev->assigned_dev_lock);
-	mutex_unlock(&assigned_dev->kvm->lock);
+	mutex_unlock(&assigned_dev->kvm->irq_lock);
 }
 
 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
@@ -207,7 +209,7 @@ static void kvm_assigned_dev_ack_irq(str
 static void deassign_guest_irq(struct kvm *kvm,
 			       struct kvm_assigned_dev_kernel *assigned_dev)
 {
-	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
+	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
 	assigned_dev->ack_notifier.gsi = -1;
 
 	if (assigned_dev->irq_source_id != -1)
Index: kvm/virt/kvm/irq_comm.c
===================================================================
--- kvm.orig/virt/kvm/irq_comm.c
+++ kvm/virt/kvm/irq_comm.c
@@ -62,6 +62,8 @@ int kvm_irq_delivery_to_apic(struct kvm 
 	int i, r = -1;
 	struct kvm_vcpu *vcpu, *lowest = NULL;
 
+	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
+
 	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
 			kvm_is_dm_lowest_prio(irq))
 		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
@@ -113,7 +115,7 @@ static int kvm_set_msi(struct kvm_kernel
 	return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
 }
 
-/* This should be called with the kvm->lock mutex held
+/* This should be called with the kvm->irq_lock mutex held
  * Return value:
  *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
  *  = 0   Interrupt was coalesced (previous irq is still pending)
@@ -125,6 +127,8 @@ int kvm_set_irq(struct kvm *kvm, int irq
 	unsigned long *irq_state, sig_level;
 	int ret = -1;
 
+	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
+
 	if (irq < KVM_IOAPIC_NUM_PINS) {
 		irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
 
@@ -174,19 +178,26 @@ void kvm_notify_acked_irq(struct kvm *kv
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian)
 {
+	mutex_lock(&kvm->irq_lock);
 	hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
+	mutex_unlock(&kvm->irq_lock);
 }
 
-void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian)
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				    struct kvm_irq_ack_notifier *kian)
 {
+	mutex_lock(&kvm->irq_lock);
 	hlist_del_init(&kian->link);
+	mutex_unlock(&kvm->irq_lock);
 }
 
-/* The caller must hold kvm->lock mutex */
 int kvm_request_irq_source_id(struct kvm *kvm)
 {
 	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
-	int irq_source_id = find_first_zero_bit(bitmap,
+	int irq_source_id;
+
+	mutex_lock(&kvm->irq_lock);
+	irq_source_id = find_first_zero_bit(bitmap,
 				sizeof(kvm->arch.irq_sources_bitmap));
 
 	if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
@@ -196,6 +207,7 @@ int kvm_request_irq_source_id(struct kvm
 
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
 	set_bit(irq_source_id, bitmap);
+	mutex_unlock(&kvm->irq_lock);
 
 	return irq_source_id;
 }
@@ -206,6 +218,7 @@ void kvm_free_irq_source_id(struct kvm *
 
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
 
+	mutex_lock(&kvm->irq_lock);
 	if (irq_source_id < 0 ||
 	    irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
 		printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
@@ -214,19 +227,24 @@ void kvm_free_irq_source_id(struct kvm *
 	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
 		clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
 	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
 				    struct kvm_irq_mask_notifier *kimn)
 {
+	mutex_lock(&kvm->irq_lock);
 	kimn->irq = irq;
 	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 				      struct kvm_irq_mask_notifier *kimn)
 {
+	mutex_lock(&kvm->irq_lock);
 	hlist_del(&kimn->link);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
@@ -234,6 +252,8 @@ void kvm_fire_mask_notifiers(struct kvm 
 	struct kvm_irq_mask_notifier *kimn;
 	struct hlist_node *n;
 
+	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
+
 	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
 		if (kimn->irq == irq)
 			kimn->func(kimn, mask);
@@ -249,7 +269,9 @@ static void __kvm_free_irq_routing(struc
 
 void kvm_free_irq_routing(struct kvm *kvm)
 {
+	mutex_lock(&kvm->irq_lock);
 	__kvm_free_irq_routing(&kvm->irq_routing);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
@@ -323,13 +345,13 @@ int kvm_set_irq_routing(struct kvm *kvm,
 		e = NULL;
 	}
 
-	mutex_lock(&kvm->lock);
+	mutex_lock(&kvm->irq_lock);
 	list_splice(&kvm->irq_routing, &tmp);
 	INIT_LIST_HEAD(&kvm->irq_routing);
 	list_splice(&irq_list, &kvm->irq_routing);
 	INIT_LIST_HEAD(&irq_list);
 	list_splice(&tmp, &irq_list);
-	mutex_unlock(&kvm->lock);
+	mutex_unlock(&kvm->irq_lock);
 
 	r = 0;
 
Index: kvm/arch/x86/kvm/lapic.c
===================================================================
--- kvm.orig/arch/x86/kvm/lapic.c
+++ kvm/arch/x86/kvm/lapic.c
@@ -425,7 +425,9 @@ static void apic_set_eoi(struct kvm_lapi
 		trigger_mode = IOAPIC_LEVEL_TRIG;
 	else
 		trigger_mode = IOAPIC_EDGE_TRIG;
+	mutex_lock(&apic->vcpu->kvm->irq_lock);
 	kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
+	mutex_unlock(&apic->vcpu->kvm->irq_lock);
 }
 
 static void apic_send_ipi(struct kvm_lapic *apic)
@@ -449,7 +451,9 @@ static void apic_send_ipi(struct kvm_lap
 		   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
 		   irq.vector);
 
+	mutex_lock(&apic->vcpu->kvm->irq_lock);
 	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
+	mutex_unlock(&apic->vcpu->kvm->irq_lock);
 }
 
 static u32 apic_get_tmcct(struct kvm_lapic *apic)
Index: kvm/include/linux/kvm_host.h
===================================================================
--- kvm.orig/include/linux/kvm_host.h
+++ kvm/include/linux/kvm_host.h
@@ -371,7 +371,8 @@ int kvm_set_irq(struct kvm *kvm, int irq
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian);
-void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				   struct kvm_irq_ack_notifier *kian);
 int kvm_request_irq_source_id(struct kvm *kvm);
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 
Index: kvm/virt/kvm/eventfd.c
===================================================================
--- kvm.orig/virt/kvm/eventfd.c
+++ kvm/virt/kvm/eventfd.c
@@ -59,10 +59,10 @@ irqfd_inject(struct work_struct *work)
 
 	kvm = rcu_dereference(irqfd->kvm);
 	if (kvm) {
-		mutex_lock(&kvm->lock);
+		mutex_lock(&kvm->irq_lock);
 		kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
 		kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
-		mutex_unlock(&kvm->lock);
+		mutex_unlock(&kvm->irq_lock);
 	}
 
 	srcu_read_unlock(&irqfd->srcu, idx);

-- 


^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [patch 0/4] move irq protection role to separate lock v4
  2009-06-04 18:08                   ` [patch 0/4] move irq protection role to separate lock v4 Marcelo Tosatti
                                       ` (3 preceding siblings ...)
  2009-06-04 18:08                     ` [patch 4/4] KVM: switch irq injection/acking data structures to irq_lock Marcelo Tosatti
@ 2009-06-08  9:18                     ` Avi Kivity
  4 siblings, 0 replies; 49+ messages in thread
From: Avi Kivity @ 2009-06-08  9:18 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

Marcelo Tosatti wrote:
> This is to fix a deadlock reported by Alex Williamson, while at
> the same time makes it easier to allow PIO/MMIO regions to be
> registered/unregistered while a guest is alive.
>   

Applied all, thanks.  I also changed the coalesced_mmio overflow check 
to account for KVM_MAX_VCPUS.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 49+ messages in thread

end of thread, other threads:[~2009-06-08  9:18 UTC | newest]

Thread overview: 49+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-05-18 16:56 [patch 0/4] move irq protection role to separate kvm->irq_lock Marcelo Tosatti
2009-05-18 16:56 ` [patch 1/4] KVM: x86: grab pic lock in kvm_pic_clear_isr_ack Marcelo Tosatti
2009-05-18 16:56 ` [patch 2/4] KVM: move coalesced_mmio locking to its own device Marcelo Tosatti
2009-05-20 12:06   ` Avi Kivity
2009-05-20 14:09     ` Marcelo Tosatti
2009-05-20 14:29       ` Avi Kivity
2009-05-20 15:13         ` Marcelo Tosatti
2009-05-20 15:22           ` Marcelo Tosatti
2009-05-20 15:22           ` Avi Kivity
2009-05-20 18:48         ` [patch 0/4] move irq protection role to separate lock v2 Marcelo Tosatti
2009-05-20 18:48           ` [patch 1/4] KVM: x86: grab pic lock in kvm_pic_clear_isr_ack Marcelo Tosatti
2009-05-20 18:48           ` [patch 2/4] KVM: move coalesced_mmio locking to its own device Marcelo Tosatti
2009-05-24 14:04             ` Avi Kivity
2009-05-25 11:04               ` Marcelo Tosatti
2009-05-26 11:24                 ` Avi Kivity
2009-05-26 13:15                   ` Marcelo Tosatti
2009-05-26 13:27                     ` Avi Kivity
2009-05-20 18:48           ` [patch 3/4] KVM: introduce irq_lock, use it to protect ioapic Marcelo Tosatti
2009-05-24 14:10             ` Avi Kivity
2009-05-25 11:11               ` Marcelo Tosatti
2009-05-26 11:33                 ` Avi Kivity
2009-05-28  4:45               ` [patch 0/4] move irq protection role to separate lock v3 Marcelo Tosatti
2009-05-28  4:45               ` [patch 1/4] KVM: x86: grab pic lock in kvm_pic_clear_isr_ack Marcelo Tosatti
2009-05-28  4:45               ` [patch 2/4] KVM: move coalesced_mmio locking to its own device Marcelo Tosatti
2009-05-31 12:14                 ` Avi Kivity
2009-06-01 21:23                   ` Marcelo Tosatti
2009-06-01 21:43                     ` Avi Kivity
2009-06-04 18:08                   ` [patch 0/4] move irq protection role to separate lock v4 Marcelo Tosatti
2009-06-04 18:08                     ` [patch 1/4] KVM: x86: grab pic lock in kvm_pic_clear_isr_ack Marcelo Tosatti
2009-06-04 18:08                     ` [patch 2/4] KVM: move coalesced_mmio locking to its own device Marcelo Tosatti
2009-06-04 18:08                     ` [patch 3/4] KVM: introduce irq_lock, use it to protect ioapic Marcelo Tosatti
2009-06-04 18:08                     ` [patch 4/4] KVM: switch irq injection/acking data structures to irq_lock Marcelo Tosatti
2009-06-08  9:18                     ` [patch 0/4] move irq protection role to separate lock v4 Avi Kivity
2009-05-28  4:45               ` [patch 3/4] KVM: introduce irq_lock, use it to protect ioapic Marcelo Tosatti
2009-05-28  4:45               ` [patch 4/4] KVM: switch irq injection/acking data structures to irq_lock Marcelo Tosatti
2009-05-20 18:48           ` [patch 4/4] KVM: switch irq injection/acking " Marcelo Tosatti
2009-05-21  4:50           ` [patch 0/4] move irq protection role to separate lock v2 Marcelo Tosatti
2009-05-21  6:55             ` Christian Bornträger
2009-05-21  7:26               ` Avi Kivity
2009-05-21 14:32                 ` Marcelo Tosatti
2009-05-21 15:02                   ` Avi Kivity
2009-05-18 16:56 ` [patch 3/4] KVM: introduce irq_lock, use it protect ioapic Marcelo Tosatti
2009-05-20 12:11   ` Avi Kivity
2009-05-20 14:11     ` Marcelo Tosatti
2009-05-20 14:29       ` Avi Kivity
2009-05-18 16:56 ` [patch 4/4] KVM: switch irq injection/acking to irq_lock Marcelo Tosatti
2009-05-20 12:15   ` Gregory Haskins
2009-05-20 14:16     ` Marcelo Tosatti
2009-05-24 14:53     ` Avi Kivity

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.