All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/10] make interrupt injection lockless (almost)
@ 2009-08-09 12:41 Gleb Natapov
  2009-08-09 12:41 ` [PATCH 01/10] Change irq routing table to use gsi indexed array Gleb Natapov
                   ` (9 more replies)
  0 siblings, 10 replies; 28+ messages in thread
From: Gleb Natapov @ 2009-08-09 12:41 UTC (permalink / raw)
  To: avi; +Cc: kvm

kvm->irq_lock protects too much stuff, but still fail to protect
everything it was design to protect (see ack notifiers call in pic). I
want to make IRQ injection fast path as lockless as possible. This patch
series split kvm->irq_lock mutex to smaller spinlocks each one protects
only one thing. Irq routing, irq notifier lists and ioapic gain their own
spinlock.  Pic is already has its own lock. This patch series also makes
interrupt injection to lapic lockless (several kvm_irq_delivery_to_apic()
may run in parallel), but access to lapic was never fully locked in the
first place. VCPU could access lapic in parallel with interrupt injection.
Patch 1 changes irq routing data structure to much more efficient one.
Patch 10 introduce API that allows to send MSI message without going
through irq routing table. This allows us, among other thing, to limit irq
routing table to a small number of entries.

Gleb Natapov (10):
  Change irq routing table to use gsi indexed array.
  Move irq routing data structure to rcu locking
  Move irq ack notifier list to arch independent code.
  Convert irq notifiers lists to RCU locking.
  Protect irq_sources_bitmap by kvm->lock instead of kvm->irq_lock
  Move irq routing to its own locking.
  Move irq notifiers lists to its own locking.
  Move IO APIC to its own lock.
  Drop kvm->irq_lock lock.
  Introduce MSI message sending interface that bypass IRQ routing.

 arch/ia64/include/asm/kvm_host.h |    1 -
 arch/ia64/kvm/kvm-ia64.c         |   55 +++++++++--
 arch/x86/include/asm/kvm_host.h  |    3 +-
 arch/x86/kvm/i8254.c             |    4 +-
 arch/x86/kvm/i8259.c             |    8 +-
 arch/x86/kvm/lapic.c             |    7 +-
 arch/x86/kvm/x86.c               |   58 +++++++++---
 include/linux/kvm.h              |   10 ++-
 include/linux/kvm_host.h         |   21 +++-
 virt/kvm/eventfd.c               |    2 -
 virt/kvm/ioapic.c                |   53 +++++++----
 virt/kvm/ioapic.h                |    4 +-
 virt/kvm/irq_comm.c              |  196 +++++++++++++++++++++----------------
 virt/kvm/kvm_main.c              |   11 +-
 14 files changed, 277 insertions(+), 156 deletions(-)


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 01/10] Change irq routing table to use gsi indexed array.
  2009-08-09 12:41 [PATCH 00/10] make interrupt injection lockless (almost) Gleb Natapov
@ 2009-08-09 12:41 ` Gleb Natapov
  2009-08-09 14:25   ` Avi Kivity
  2009-08-09 12:41 ` [PATCH 02/10] Move irq routing data structure to rcu locking Gleb Natapov
                   ` (8 subsequent siblings)
  9 siblings, 1 reply; 28+ messages in thread
From: Gleb Natapov @ 2009-08-09 12:41 UTC (permalink / raw)
  To: avi; +Cc: kvm

Use gsi indexed array instead of scanning all entries on each interrupt
injection. Also maintain back mapping from irqchip/pin to gsi to speedup
interrupt acknowledgment notifications.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 include/linux/kvm_host.h |   11 +++++-
 virt/kvm/irq_comm.c      |   87 ++++++++++++++++++++++++---------------------
 virt/kvm/kvm_main.c      |    1 -
 3 files changed, 55 insertions(+), 44 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f814512..a38f576 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -129,7 +129,14 @@ struct kvm_kernel_irq_routing_entry {
 		} irqchip;
 		struct msi_msg msi;
 	};
-	struct list_head link;
+	struct hlist_node link;
+};
+
+struct kvm_irq_routing_table {
+	int chip[3][KVM_IOAPIC_NUM_PINS];
+	struct kvm_kernel_irq_routing_entry *rt_entries;
+	u32 max_gsi;
+	struct hlist_head map[0];
 };
 
 struct kvm {
@@ -167,7 +174,7 @@ struct kvm {
 
 	struct mutex irq_lock;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
-	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
+	struct kvm_irq_routing_table *irq_routing;
 	struct hlist_head mask_notifier_list;
 #endif
 
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 001663f..2ab807f 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -127,6 +127,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 	struct kvm_kernel_irq_routing_entry *e;
 	unsigned long *irq_state, sig_level;
 	int ret = -1;
+	struct kvm_irq_routing_table *irq_rt;
+	struct hlist_node *n;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
 
@@ -150,8 +152,9 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 	 * IOAPIC.  So set the bit in both. The guest will ignore
 	 * writes to the unused one.
 	 */
-	list_for_each_entry(e, &kvm->irq_routing, link)
-		if (e->gsi == irq) {
+	irq_rt = kvm->irq_routing;
+	if (irq < irq_rt->max_gsi)
+		hlist_for_each_entry(e, n, &irq_rt->map[irq], link) {
 			int r = e->set(e, kvm, sig_level);
 			if (r < 0)
 				continue;
@@ -163,20 +166,15 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
-	struct kvm_kernel_irq_routing_entry *e;
 	struct kvm_irq_ack_notifier *kian;
 	struct hlist_node *n;
-	unsigned gsi = pin;
+	unsigned gsi;
 
 	trace_kvm_ack_irq(irqchip, pin);
 
-	list_for_each_entry(e, &kvm->irq_routing, link)
-		if (e->type == KVM_IRQ_ROUTING_IRQCHIP &&
-		    e->irqchip.irqchip == irqchip &&
-		    e->irqchip.pin == pin) {
-			gsi = e->gsi;
-			break;
-		}
+	gsi = kvm->irq_routing->chip[irqchip][pin];
+	if (gsi == -1)
+		gsi = pin;
 
 	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
 		if (kian->gsi == gsi)
@@ -267,22 +265,15 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
 			kimn->func(kimn, mask);
 }
 
-static void __kvm_free_irq_routing(struct list_head *irq_routing)
-{
-	struct kvm_kernel_irq_routing_entry *e, *n;
-
-	list_for_each_entry_safe(e, n, irq_routing, link)
-		kfree(e);
-}
-
 void kvm_free_irq_routing(struct kvm *kvm)
 {
 	mutex_lock(&kvm->irq_lock);
-	__kvm_free_irq_routing(&kvm->irq_routing);
+	kfree(kvm->irq_routing);
 	mutex_unlock(&kvm->irq_lock);
 }
 
-static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+static int setup_routing_entry(struct kvm_irq_routing_table *rt,
+			       struct kvm_kernel_irq_routing_entry *e,
 			       const struct kvm_irq_routing_entry *ue)
 {
 	int r = -EINVAL;
@@ -309,6 +300,9 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 		}
 		e->irqchip.irqchip = ue->u.irqchip.irqchip;
 		e->irqchip.pin = ue->u.irqchip.pin + delta;
+		if (e->irqchip.pin > KVM_IOAPIC_NUM_PINS)
+			goto out;
+		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
 		break;
 	case KVM_IRQ_ROUTING_MSI:
 		e->set = kvm_set_msi;
@@ -319,6 +313,8 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 	default:
 		goto out;
 	}
+
+	hlist_add_head(&e->link, &rt->map[e->gsi]);
 	r = 0;
 out:
 	return r;
@@ -330,43 +326,52 @@ int kvm_set_irq_routing(struct kvm *kvm,
 			unsigned nr,
 			unsigned flags)
 {
-	struct list_head irq_list = LIST_HEAD_INIT(irq_list);
-	struct list_head tmp = LIST_HEAD_INIT(tmp);
-	struct kvm_kernel_irq_routing_entry *e = NULL;
-	unsigned i;
+	struct kvm_irq_routing_table *new, *old;
+	u32 i, j, max_gsi = 0;
 	int r;
 
 	for (i = 0; i < nr; ++i) {
+		if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES)
+			return -EINVAL;
+		max_gsi = max(max_gsi, ue[i].gsi);
+	}
+
+	max_gsi += 1;
+
+	new = kzalloc(sizeof(*new) + (max_gsi * sizeof(struct hlist_head)) +
+		      (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
+		      GFP_KERNEL);
+
+	if (!new)
+		return -ENOMEM;
+
+	new->rt_entries = (void *)&new->map[max_gsi];
+
+	new->max_gsi = max_gsi;
+	for (i = 0; i < 3; i++)
+		for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++)
+			new->chip[i][j] = -1;
+
+	for (i = 0; i < nr; ++i) {
 		r = -EINVAL;
-		if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
-			goto out;
 		if (ue->flags)
 			goto out;
-		r = -ENOMEM;
-		e = kzalloc(sizeof(*e), GFP_KERNEL);
-		if (!e)
-			goto out;
-		r = setup_routing_entry(e, ue);
+		r = setup_routing_entry(new, &new->rt_entries[i], ue);
 		if (r)
 			goto out;
 		++ue;
-		list_add(&e->link, &irq_list);
-		e = NULL;
 	}
 
 	mutex_lock(&kvm->irq_lock);
-	list_splice(&kvm->irq_routing, &tmp);
-	INIT_LIST_HEAD(&kvm->irq_routing);
-	list_splice(&irq_list, &kvm->irq_routing);
-	INIT_LIST_HEAD(&irq_list);
-	list_splice(&tmp, &irq_list);
+	old = kvm->irq_routing;
+	kvm->irq_routing = new;
 	mutex_unlock(&kvm->irq_lock);
 
+	new = old;
 	r = 0;
 
 out:
-	kfree(e);
-	__kvm_free_irq_routing(&irq_list);
+	kfree(new);
 	return r;
 }
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1df4c04..e8b03ee 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -944,7 +944,6 @@ static struct kvm *kvm_create_vm(void)
 	if (IS_ERR(kvm))
 		goto out;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
-	INIT_LIST_HEAD(&kvm->irq_routing);
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
 #endif
 
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 02/10] Move irq routing data structure to rcu locking
  2009-08-09 12:41 [PATCH 00/10] make interrupt injection lockless (almost) Gleb Natapov
  2009-08-09 12:41 ` [PATCH 01/10] Change irq routing table to use gsi indexed array Gleb Natapov
@ 2009-08-09 12:41 ` Gleb Natapov
  2009-08-09 12:41 ` [PATCH 03/10] Move irq ack notifier list to arch independent code Gleb Natapov
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 28+ messages in thread
From: Gleb Natapov @ 2009-08-09 12:41 UTC (permalink / raw)
  To: avi; +Cc: kvm


Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 virt/kvm/irq_comm.c |   15 ++++++++++-----
 1 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 2ab807f..9cdd984 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -152,7 +152,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 	 * IOAPIC.  So set the bit in both. The guest will ignore
 	 * writes to the unused one.
 	 */
-	irq_rt = kvm->irq_routing;
+	rcu_read_lock();
+	irq_rt = rcu_dereference(kvm->irq_routing);
 	if (irq < irq_rt->max_gsi)
 		hlist_for_each_entry(e, n, &irq_rt->map[irq], link) {
 			int r = e->set(e, kvm, sig_level);
@@ -161,6 +162,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 
 			ret = r + ((ret < 0) ? 0 : ret);
 		}
+	rcu_read_unlock();
 	return ret;
 }
 
@@ -172,9 +174,11 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 
 	trace_kvm_ack_irq(irqchip, pin);
 
-	gsi = kvm->irq_routing->chip[irqchip][pin];
+	rcu_read_lock();
+	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
 	if (gsi == -1)
 		gsi = pin;
+	rcu_read_unlock();
 
 	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
 		if (kian->gsi == gsi)
@@ -267,9 +271,9 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
 
 void kvm_free_irq_routing(struct kvm *kvm)
 {
-	mutex_lock(&kvm->irq_lock);
+	/* Called only during vm destruction. Nobody can use the pointer
+	   at this stage */
 	kfree(kvm->irq_routing);
-	mutex_unlock(&kvm->irq_lock);
 }
 
 static int setup_routing_entry(struct kvm_irq_routing_table *rt,
@@ -364,8 +368,9 @@ int kvm_set_irq_routing(struct kvm *kvm,
 
 	mutex_lock(&kvm->irq_lock);
 	old = kvm->irq_routing;
-	kvm->irq_routing = new;
+	rcu_assign_pointer(kvm->irq_routing, new);
 	mutex_unlock(&kvm->irq_lock);
+	synchronize_rcu();
 
 	new = old;
 	r = 0;
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 03/10] Move irq ack notifier list to arch independent code.
  2009-08-09 12:41 [PATCH 00/10] make interrupt injection lockless (almost) Gleb Natapov
  2009-08-09 12:41 ` [PATCH 01/10] Change irq routing table to use gsi indexed array Gleb Natapov
  2009-08-09 12:41 ` [PATCH 02/10] Move irq routing data structure to rcu locking Gleb Natapov
@ 2009-08-09 12:41 ` Gleb Natapov
  2009-08-09 12:41 ` [PATCH 04/10] Convert irq notifiers lists to RCU locking Gleb Natapov
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 28+ messages in thread
From: Gleb Natapov @ 2009-08-09 12:41 UTC (permalink / raw)
  To: avi; +Cc: kvm

Mask irq notifier list is already there.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/ia64/include/asm/kvm_host.h |    1 -
 arch/x86/include/asm/kvm_host.h  |    1 -
 include/linux/kvm_host.h         |    1 +
 virt/kvm/irq_comm.c              |    4 ++--
 virt/kvm/kvm_main.c              |    1 +
 5 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index d9b6325..a362e67 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -475,7 +475,6 @@ struct kvm_arch {
 	struct list_head assigned_dev_head;
 	struct iommu_domain *iommu_domain;
 	int iommu_flags;
-	struct hlist_head irq_ack_notifier_list;
 
 	unsigned long irq_sources_bitmap;
 	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b17d845..7dfde38 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -400,7 +400,6 @@ struct kvm_arch{
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
-	struct hlist_head irq_ack_notifier_list;
 	int vapics_in_nmi_mode;
 
 	unsigned int tss_addr;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a38f576..d573652 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -176,6 +176,7 @@ struct kvm {
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	struct kvm_irq_routing_table *irq_routing;
 	struct hlist_head mask_notifier_list;
+	struct hlist_head irq_ack_notifier_list;
 #endif
 
 #ifdef KVM_ARCH_WANT_MMU_NOTIFIER
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 9cdd984..e0940d5 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -180,7 +180,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 		gsi = pin;
 	rcu_read_unlock();
 
-	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
+	hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
 		if (kian->gsi == gsi)
 			kian->irq_acked(kian);
 }
@@ -189,7 +189,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian)
 {
 	mutex_lock(&kvm->irq_lock);
-	hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
+	hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
 	mutex_unlock(&kvm->irq_lock);
 }
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e8b03ee..fcc0c44 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -945,6 +945,7 @@ static struct kvm *kvm_create_vm(void)
 		goto out;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
+	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
 #endif
 
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 04/10] Convert irq notifiers lists to RCU locking.
  2009-08-09 12:41 [PATCH 00/10] make interrupt injection lockless (almost) Gleb Natapov
                   ` (2 preceding siblings ...)
  2009-08-09 12:41 ` [PATCH 03/10] Move irq ack notifier list to arch independent code Gleb Natapov
@ 2009-08-09 12:41 ` Gleb Natapov
  2009-08-09 12:41 ` [PATCH 05/10] Protect irq_sources_bitmap by kvm->lock instead of kvm->irq_lock Gleb Natapov
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 28+ messages in thread
From: Gleb Natapov @ 2009-08-09 12:41 UTC (permalink / raw)
  To: avi; +Cc: kvm

Use RCU locking for mask/ack notifiers lists.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 virt/kvm/irq_comm.c |   20 +++++++++++---------
 1 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index e0940d5..02412c3 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -178,18 +178,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
 	if (gsi == -1)
 		gsi = pin;
-	rcu_read_unlock();
 
-	hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
+	hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
 		if (kian->gsi == gsi)
 			kian->irq_acked(kian);
+	rcu_read_unlock();
 }
 
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian)
 {
 	mutex_lock(&kvm->irq_lock);
-	hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
+	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
 	mutex_unlock(&kvm->irq_lock);
 }
 
@@ -197,8 +197,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 				    struct kvm_irq_ack_notifier *kian)
 {
 	mutex_lock(&kvm->irq_lock);
-	hlist_del_init(&kian->link);
+	hlist_del_init_rcu(&kian->link);
 	mutex_unlock(&kvm->irq_lock);
+	synchronize_rcu();
 }
 
 int kvm_request_irq_source_id(struct kvm *kvm)
@@ -245,7 +246,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
 {
 	mutex_lock(&kvm->irq_lock);
 	kimn->irq = irq;
-	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
+	hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
 	mutex_unlock(&kvm->irq_lock);
 }
 
@@ -253,8 +254,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 				      struct kvm_irq_mask_notifier *kimn)
 {
 	mutex_lock(&kvm->irq_lock);
-	hlist_del(&kimn->link);
+	hlist_del_rcu(&kimn->link);
 	mutex_unlock(&kvm->irq_lock);
+	synchronize_rcu();
 }
 
 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
@@ -262,11 +264,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
 	struct kvm_irq_mask_notifier *kimn;
 	struct hlist_node *n;
 
-	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
-
-	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
 		if (kimn->irq == irq)
 			kimn->func(kimn, mask);
+	rcu_read_unlock();
 }
 
 void kvm_free_irq_routing(struct kvm *kvm)
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 05/10] Protect irq_sources_bitmap by kvm->lock instead of kvm->irq_lock
  2009-08-09 12:41 [PATCH 00/10] make interrupt injection lockless (almost) Gleb Natapov
                   ` (3 preceding siblings ...)
  2009-08-09 12:41 ` [PATCH 04/10] Convert irq notifiers lists to RCU locking Gleb Natapov
@ 2009-08-09 12:41 ` Gleb Natapov
  2009-08-09 12:41 ` [PATCH 06/10] Move irq routing to its own locking Gleb Natapov
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 28+ messages in thread
From: Gleb Natapov @ 2009-08-09 12:41 UTC (permalink / raw)
  To: avi; +Cc: kvm

It is already protected by kvm->lock on device assignment path. Just
take the same lock in the PIT code.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/i8254.c |    2 ++
 virt/kvm/irq_comm.c  |    8 ++++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 82ad523..1330b2a 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -611,7 +611,9 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 	if (!pit)
 		return NULL;
 
+	mutex_lock(&kvm->lock);
 	pit->irq_source_id = kvm_request_irq_source_id(kvm);
+	mutex_unlock(&kvm->lock);
 	if (pit->irq_source_id < 0) {
 		kfree(pit);
 		return NULL;
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 02412c3..6b09053 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -207,7 +207,8 @@ int kvm_request_irq_source_id(struct kvm *kvm)
 	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
 	int irq_source_id;
 
-	mutex_lock(&kvm->irq_lock);
+	WARN_ON(!mutex_is_locked(&kvm->lock));
+
 	irq_source_id = find_first_zero_bit(bitmap,
 				sizeof(kvm->arch.irq_sources_bitmap));
 
@@ -218,7 +219,6 @@ int kvm_request_irq_source_id(struct kvm *kvm)
 
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
 	set_bit(irq_source_id, bitmap);
-	mutex_unlock(&kvm->irq_lock);
 
 	return irq_source_id;
 }
@@ -227,9 +227,10 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 {
 	int i;
 
+	/* during vm destruction this function is called without locking */
+	WARN_ON(!mutex_is_locked(&kvm->lock) && atomic_read(&kvm->users_count));
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
 
-	mutex_lock(&kvm->irq_lock);
 	if (irq_source_id < 0 ||
 	    irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
 		printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
@@ -238,7 +239,6 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
 		clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
 	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
-	mutex_unlock(&kvm->irq_lock);
 }
 
 void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 06/10] Move irq routing to its own locking.
  2009-08-09 12:41 [PATCH 00/10] make interrupt injection lockless (almost) Gleb Natapov
                   ` (4 preceding siblings ...)
  2009-08-09 12:41 ` [PATCH 05/10] Protect irq_sources_bitmap by kvm->lock instead of kvm->irq_lock Gleb Natapov
@ 2009-08-09 12:41 ` Gleb Natapov
  2009-08-09 14:52   ` Avi Kivity
  2009-08-09 12:41 ` [PATCH 07/10] Move irq notifiers lists " Gleb Natapov
                   ` (3 subsequent siblings)
  9 siblings, 1 reply; 28+ messages in thread
From: Gleb Natapov @ 2009-08-09 12:41 UTC (permalink / raw)
  To: avi; +Cc: kvm


Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 include/linux/kvm_host.h |    1 +
 virt/kvm/irq_comm.c      |    4 ++--
 virt/kvm/kvm_main.c      |    1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d573652..ce28cb7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -175,6 +175,7 @@ struct kvm {
 	struct mutex irq_lock;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	struct kvm_irq_routing_table *irq_routing;
+	spinlock_t irq_routing_lock;
 	struct hlist_head mask_notifier_list;
 	struct hlist_head irq_ack_notifier_list;
 #endif
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 6b09053..55eb6f6 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -368,10 +368,10 @@ int kvm_set_irq_routing(struct kvm *kvm,
 		++ue;
 	}
 
-	mutex_lock(&kvm->irq_lock);
+	spin_lock(&kvm->irq_routing_lock);
 	old = kvm->irq_routing;
 	rcu_assign_pointer(kvm->irq_routing, new);
-	mutex_unlock(&kvm->irq_lock);
+	spin_unlock(&kvm->irq_routing_lock);
 	synchronize_rcu();
 
 	new = old;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fcc0c44..2f4a47e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -944,6 +944,7 @@ static struct kvm *kvm_create_vm(void)
 	if (IS_ERR(kvm))
 		goto out;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
+	spin_lock_init(&kvm->irq_routing_lock);
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
 	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
 #endif
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 07/10] Move irq notifiers lists to its own locking.
  2009-08-09 12:41 [PATCH 00/10] make interrupt injection lockless (almost) Gleb Natapov
                   ` (5 preceding siblings ...)
  2009-08-09 12:41 ` [PATCH 06/10] Move irq routing to its own locking Gleb Natapov
@ 2009-08-09 12:41 ` Gleb Natapov
  2009-08-09 14:52   ` Avi Kivity
  2009-08-09 12:41 ` [PATCH 08/10] Move IO APIC to its own lock Gleb Natapov
                   ` (2 subsequent siblings)
  9 siblings, 1 reply; 28+ messages in thread
From: Gleb Natapov @ 2009-08-09 12:41 UTC (permalink / raw)
  To: avi; +Cc: kvm


Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 include/linux/kvm_host.h |    1 +
 virt/kvm/irq_comm.c      |   16 ++++++++--------
 virt/kvm/kvm_main.c      |    1 +
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ce28cb7..8791ce8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -178,6 +178,7 @@ struct kvm {
 	spinlock_t irq_routing_lock;
 	struct hlist_head mask_notifier_list;
 	struct hlist_head irq_ack_notifier_list;
+	spinlock_t irq_notifier_list_lock;
 #endif
 
 #ifdef KVM_ARCH_WANT_MMU_NOTIFIER
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 55eb6f6..65f66f1 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -188,17 +188,17 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian)
 {
-	mutex_lock(&kvm->irq_lock);
+	spin_lock(&kvm->irq_notifier_list_lock);
 	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
-	mutex_unlock(&kvm->irq_lock);
+	spin_unlock(&kvm->irq_notifier_list_lock);
 }
 
 void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 				    struct kvm_irq_ack_notifier *kian)
 {
-	mutex_lock(&kvm->irq_lock);
+	spin_lock(&kvm->irq_notifier_list_lock);
 	hlist_del_init_rcu(&kian->link);
-	mutex_unlock(&kvm->irq_lock);
+	spin_unlock(&kvm->irq_notifier_list_lock);
 	synchronize_rcu();
 }
 
@@ -244,18 +244,18 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
 				    struct kvm_irq_mask_notifier *kimn)
 {
-	mutex_lock(&kvm->irq_lock);
+	spin_lock(&kvm->irq_notifier_list_lock);
 	kimn->irq = irq;
 	hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
-	mutex_unlock(&kvm->irq_lock);
+	spin_unlock(&kvm->irq_notifier_list_lock);
 }
 
 void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 				      struct kvm_irq_mask_notifier *kimn)
 {
-	mutex_lock(&kvm->irq_lock);
+	spin_lock(&kvm->irq_notifier_list_lock);
 	hlist_del_rcu(&kimn->link);
-	mutex_unlock(&kvm->irq_lock);
+	spin_unlock(&kvm->irq_notifier_list_lock);
 	synchronize_rcu();
 }
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2f4a47e..c108edc 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -947,6 +947,7 @@ static struct kvm *kvm_create_vm(void)
 	spin_lock_init(&kvm->irq_routing_lock);
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
 	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
+	spin_lock_init(&kvm->irq_notifier_list_lock);
 #endif
 
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 08/10] Move IO APIC to its own lock.
  2009-08-09 12:41 [PATCH 00/10] make interrupt injection lockless (almost) Gleb Natapov
                   ` (6 preceding siblings ...)
  2009-08-09 12:41 ` [PATCH 07/10] Move irq notifiers lists " Gleb Natapov
@ 2009-08-09 12:41 ` Gleb Natapov
  2009-08-09 14:54   ` Avi Kivity
  2009-08-09 12:41 ` [PATCH 09/10] Drop kvm->irq_lock lock Gleb Natapov
  2009-08-09 12:41 ` [PATCH 10/10] Introduce MSI message sending interface that bypass IRQ routing Gleb Natapov
  9 siblings, 1 reply; 28+ messages in thread
From: Gleb Natapov @ 2009-08-09 12:41 UTC (permalink / raw)
  To: avi; +Cc: kvm

Introduce new function kvm_notifier_set_irq() that should be used
to change irq line level from irq notifiers. When irq notifier
change irq line level it calls into irq chip code recursively. The
function avoids taking a lock recursively.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c        |   27 ++++++++++++++-----
 arch/x86/include/asm/kvm_host.h |    2 +-
 arch/x86/kvm/i8259.c            |    8 +++--
 arch/x86/kvm/lapic.c            |    5 +---
 arch/x86/kvm/x86.c              |   30 ++++++++++++++-------
 include/linux/kvm_host.h        |    3 +-
 virt/kvm/ioapic.c               |   53 ++++++++++++++++++++++++--------------
 virt/kvm/ioapic.h               |    4 ++-
 virt/kvm/irq_comm.c             |   25 ++++++++++++++----
 virt/kvm/kvm_main.c             |    2 +-
 10 files changed, 105 insertions(+), 54 deletions(-)

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 0ad09f0..dd7ef2d 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -850,9 +850,16 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
 
 	r = 0;
 	switch (chip->chip_id) {
-	case KVM_IRQCHIP_IOAPIC:
-		memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm),
-				sizeof(struct kvm_ioapic_state));
+	case KVM_IRQCHIP_IOAPIC: {
+		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+		if (ioapic) {
+			spin_lock(&ioapic->lock);
+			memcpy(&chip->chip.ioapic, ioapic,
+			       sizeof(struct kvm_ioapic_state));
+			spin_unlock(&ioapic->lock);
+		} else
+			r = -EINVAL;
+	}
 		break;
 	default:
 		r = -EINVAL;
@@ -867,10 +874,16 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 
 	r = 0;
 	switch (chip->chip_id) {
-	case KVM_IRQCHIP_IOAPIC:
-		memcpy(ioapic_irqchip(kvm),
-				&chip->chip.ioapic,
-				sizeof(struct kvm_ioapic_state));
+	case KVM_IRQCHIP_IOAPIC: {
+		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+		if (ioapic) {
+			spin_lock(&ioapic->lock);
+			memcpy(ioapic, &chip->chip.ioapic,
+			       sizeof(struct kvm_ioapic_state));
+			spin_unlock(&ioapic->lock);
+		} else
+			r = -EINVAL;
+	}
 		break;
 	default:
 		r = -EINVAL;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7dfde38..09b2ef9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -623,7 +623,7 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
 			   u32 error_code);
 
-int kvm_pic_set_irq(void *opaque, int irq, int level);
+int kvm_pic_set_irq(void *opaque, int irq, int level, bool notifier);
 
 void kvm_inject_nmi(struct kvm_vcpu *vcpu);
 
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 01f1516..d579ff1 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -153,19 +153,21 @@ void kvm_pic_update_irq(struct kvm_pic *s)
 	spin_unlock(&s->lock);
 }
 
-int kvm_pic_set_irq(void *opaque, int irq, int level)
+int kvm_pic_set_irq(void *opaque, int irq, int level, bool notifier)
 {
 	struct kvm_pic *s = opaque;
 	int ret = -1;
 
-	spin_lock(&s->lock);
+	if (!notifier)
+		spin_lock(&s->lock);
 	if (irq >= 0 && irq < PIC_NUM_PINS) {
 		ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
 		pic_update_irq(s);
 		trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
 				      s->pics[irq >> 3].imr, ret == 0);
 	}
-	spin_unlock(&s->lock);
+	if (!notifier)
+		spin_unlock(&s->lock);
 
 	return ret;
 }
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ce195f8..f24d4d0 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -471,11 +471,8 @@ static void apic_set_eoi(struct kvm_lapic *apic)
 		trigger_mode = IOAPIC_LEVEL_TRIG;
 	else
 		trigger_mode = IOAPIC_EDGE_TRIG;
-	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) {
-		mutex_lock(&apic->vcpu->kvm->irq_lock);
+	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI))
 		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
-		mutex_unlock(&apic->vcpu->kvm->irq_lock);
-	}
 }
 
 static void apic_send_ipi(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5a69ad1..1ac1695 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2022,10 +2022,16 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 			&pic_irqchip(kvm)->pics[1],
 			sizeof(struct kvm_pic_state));
 		break;
-	case KVM_IRQCHIP_IOAPIC:
-		memcpy(&chip->chip.ioapic,
-			ioapic_irqchip(kvm),
-			sizeof(struct kvm_ioapic_state));
+	case KVM_IRQCHIP_IOAPIC: {
+		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+		if (ioapic) {
+			spin_lock(&ioapic->lock);
+			memcpy(&chip->chip.ioapic, ioapic,
+			       sizeof(struct kvm_ioapic_state));
+			spin_unlock(&ioapic->lock);
+		} else
+			r = -EINVAL;
+	}
 		break;
 	default:
 		r = -EINVAL;
@@ -2054,12 +2060,16 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 			sizeof(struct kvm_pic_state));
 		spin_unlock(&pic_irqchip(kvm)->lock);
 		break;
-	case KVM_IRQCHIP_IOAPIC:
-		mutex_lock(&kvm->irq_lock);
-		memcpy(ioapic_irqchip(kvm),
-			&chip->chip.ioapic,
-			sizeof(struct kvm_ioapic_state));
-		mutex_unlock(&kvm->irq_lock);
+	case KVM_IRQCHIP_IOAPIC: {
+		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+		if (ioapic) {
+			spin_lock(&ioapic->lock);
+			memcpy(ioapic, &chip->chip.ioapic,
+			       sizeof(struct kvm_ioapic_state));
+			spin_unlock(&ioapic->lock);
+		} else
+			r = -EINVAL;
+	}
 		break;
 	default:
 		r = -EINVAL;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8791ce8..3610661 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -121,7 +121,7 @@ struct kvm_kernel_irq_routing_entry {
 	u32 gsi;
 	u32 type;
 	int (*set)(struct kvm_kernel_irq_routing_entry *e,
-		    struct kvm *kvm, int level);
+		   struct kvm *kvm, int level, bool notifier);
 	union {
 		struct {
 			unsigned irqchip;
@@ -407,6 +407,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
 				   unsigned long *deliver_bitmask);
 #endif
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
+int kvm_notifier_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian);
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index fa05f67..5a74649 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -175,13 +175,17 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
 }
 
-int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
+int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level,
+		       bool notifier)
 {
 	u32 old_irr = ioapic->irr;
 	u32 mask = 1 << irq;
 	union kvm_ioapic_redirect_entry entry;
 	int ret = 1;
 
+	/* notifier may call ioapic recursively */
+	if (!notifier)
+		spin_lock(&ioapic->lock);
 	if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
 		entry = ioapic->redirtbl[irq];
 		level ^= entry.fields.polarity;
@@ -196,34 +200,42 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 		}
 		trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
 	}
+	if (!notifier)
+		spin_unlock(&ioapic->lock);
+
 	return ret;
 }
 
-static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
-				    int trigger_mode)
+static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
+				     int trigger_mode)
 {
-	union kvm_ioapic_redirect_entry *ent;
+	int i;
+
+	for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+		union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
+
+		if (ent->fields.vector != vector)
+			continue;
 
-	ent = &ioapic->redirtbl[pin];
+		kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
 
-	kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
+		if (trigger_mode != IOAPIC_LEVEL_TRIG)
+			continue;
 
-	if (trigger_mode == IOAPIC_LEVEL_TRIG) {
 		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
 		ent->fields.remote_irr = 0;
-		if (!ent->fields.mask && (ioapic->irr & (1 << pin)))
-			ioapic_service(ioapic, pin);
+		if (!ent->fields.mask && (ioapic->irr & (1 << i)))
+			ioapic_service(ioapic, i);
 	}
 }
 
 void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
 {
 	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-	int i;
 
-	for (i = 0; i < IOAPIC_NUM_PINS; i++)
-		if (ioapic->redirtbl[i].fields.vector == vector)
-			__kvm_ioapic_update_eoi(ioapic, i, trigger_mode);
+	spin_lock(&ioapic->lock);
+	__kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
+	spin_unlock(&ioapic->lock);
 }
 
 static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
@@ -248,8 +260,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 	ioapic_debug("addr %lx\n", (unsigned long)addr);
 	ASSERT(!(addr & 0xf));	/* check alignment */
 
-	mutex_lock(&ioapic->kvm->irq_lock);
 	addr &= 0xff;
+	spin_lock(&ioapic->lock);
 	switch (addr) {
 	case IOAPIC_REG_SELECT:
 		result = ioapic->ioregsel;
@@ -263,6 +275,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 		result = 0;
 		break;
 	}
+	spin_unlock(&ioapic->lock);
+
 	switch (len) {
 	case 8:
 		*(u64 *) val = result;
@@ -275,7 +289,6 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 	default:
 		printk(KERN_WARNING "ioapic: wrong length %d\n", len);
 	}
-	mutex_unlock(&ioapic->kvm->irq_lock);
 	return 0;
 }
 
@@ -291,15 +304,15 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 		     (void*)addr, len, val);
 	ASSERT(!(addr & 0xf));	/* check alignment */
 
-	mutex_lock(&ioapic->kvm->irq_lock);
 	if (len == 4 || len == 8)
 		data = *(u32 *) val;
 	else {
 		printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
-		goto unlock;
+		return 0;
 	}
 
 	addr &= 0xff;
+	spin_lock(&ioapic->lock);
 	switch (addr) {
 	case IOAPIC_REG_SELECT:
 		ioapic->ioregsel = data;
@@ -310,15 +323,14 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 		break;
 #ifdef	CONFIG_IA64
 	case IOAPIC_REG_EOI:
-		kvm_ioapic_update_eoi(ioapic->kvm, data, IOAPIC_LEVEL_TRIG);
+		__kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG);
 		break;
 #endif
 
 	default:
 		break;
 	}
-unlock:
-	mutex_unlock(&ioapic->kvm->irq_lock);
+	spin_unlock(&ioapic->lock);
 	return 0;
 }
 
@@ -347,6 +359,7 @@ int kvm_ioapic_init(struct kvm *kvm)
 	ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
 	if (!ioapic)
 		return -ENOMEM;
+	spin_lock_init(&ioapic->lock);
 	kvm->arch.vioapic = ioapic;
 	kvm_ioapic_reset(ioapic);
 	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 7080b71..2cfa662 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -44,6 +44,7 @@ struct kvm_ioapic {
 	struct kvm_io_device dev;
 	struct kvm *kvm;
 	void (*ack_notifier)(void *opaque, int irq);
+	spinlock_t lock;
 };
 
 #ifdef DEBUG
@@ -69,7 +70,8 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
 void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
 int kvm_ioapic_init(struct kvm *kvm);
-int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
+int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level,
+		       bool notifier);
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
 int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 		struct kvm_lapic_irq *irq);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 65f66f1..43ace95 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -32,19 +32,21 @@
 #include "ioapic.h"
 
 static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
-			   struct kvm *kvm, int level)
+			   struct kvm *kvm, int level, bool notifier)
 {
 #ifdef CONFIG_X86
-	return kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level);
+	return kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level,
+			       notifier);
 #else
 	return -1;
 #endif
 }
 
 static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
-			      struct kvm *kvm, int level)
+			      struct kvm *kvm, int level, bool notifier)
 {
-	return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
+	return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level,
+		notifier);
 }
 
 inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
@@ -122,7 +124,8 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
  *  = 0   Interrupt was coalesced (previous irq is still pending)
  *  > 0   Number of CPUs interrupt was delivered to
  */
-int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
+static int __kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level,
+	bool notifier)
 {
 	struct kvm_kernel_irq_routing_entry *e;
 	unsigned long *irq_state, sig_level;
@@ -156,7 +159,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 	irq_rt = rcu_dereference(kvm->irq_routing);
 	if (irq < irq_rt->max_gsi)
 		hlist_for_each_entry(e, n, &irq_rt->map[irq], link) {
-			int r = e->set(e, kvm, sig_level);
+			int r = e->set(e, kvm, sig_level, notifier);
 			if (r < 0)
 				continue;
 
@@ -166,6 +169,16 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 	return ret;
 }
 
+int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
+{
+	return __kvm_set_irq(kvm, irq_source_id, irq, level, 0);
+}
+
+int kvm_notifier_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
+{
+	return __kvm_set_irq(kvm, irq_source_id, irq, level, 1);
+}
+
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
 	struct kvm_irq_ack_notifier *kian;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c108edc..d8dc75c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -198,7 +198,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 	dev = container_of(kian, struct kvm_assigned_dev_kernel,
 			   ack_notifier);
 
-	kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
+	kvm_notifier_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
 
 	/* The guest irq may be shared so this ack may be
 	 * from another device.
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 09/10] Drop kvm->irq_lock lock.
  2009-08-09 12:41 [PATCH 00/10] make interrupt injection lockless (almost) Gleb Natapov
                   ` (7 preceding siblings ...)
  2009-08-09 12:41 ` [PATCH 08/10] Move IO APIC to its own lock Gleb Natapov
@ 2009-08-09 12:41 ` Gleb Natapov
  2009-08-09 12:41 ` [PATCH 10/10] Introduce MSI message sending interface that bypass IRQ routing Gleb Natapov
  9 siblings, 0 replies; 28+ messages in thread
From: Gleb Natapov @ 2009-08-09 12:41 UTC (permalink / raw)
  To: avi; +Cc: kvm

The only thing it protects now is interrupt injection into lapic and
this can work lockless. Even now with kvm->irq_lock in place access
to lapic is not entirely serialized since vcpu access doesn't take
kvm->irq_lock.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c |    2 --
 arch/x86/kvm/i8254.c     |    2 --
 arch/x86/kvm/lapic.c     |    2 --
 arch/x86/kvm/x86.c       |    2 --
 include/linux/kvm_host.h |    1 -
 virt/kvm/eventfd.c       |    2 --
 virt/kvm/irq_comm.c      |    6 +-----
 virt/kvm/kvm_main.c      |    5 +----
 8 files changed, 2 insertions(+), 20 deletions(-)

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index dd7ef2d..8f1fc3a 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -998,10 +998,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			goto out;
 		if (irqchip_in_kernel(kvm)) {
 			__s32 status;
-			mutex_lock(&kvm->irq_lock);
 			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
 				    irq_event.irq, irq_event.level);
-			mutex_unlock(&kvm->irq_lock);
 			if (ioctl == KVM_IRQ_LINE_STATUS) {
 				irq_event.status = status;
 				if (copy_to_user(argp, &irq_event,
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 1330b2a..f3d3382 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -690,10 +690,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
 	struct kvm_vcpu *vcpu;
 	int i;
 
-	mutex_lock(&kvm->irq_lock);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
-	mutex_unlock(&kvm->irq_lock);
 
 	/*
 	 * Provides NMI watchdog support via Virtual Wire mode.
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index f24d4d0..e41e948 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -501,9 +501,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 		   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
 		   irq.vector);
 
-	mutex_lock(&apic->vcpu->kvm->irq_lock);
 	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
-	mutex_unlock(&apic->vcpu->kvm->irq_lock);
 }
 
 static u32 apic_get_tmcct(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1ac1695..d313462 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2284,10 +2284,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			goto out;
 		if (irqchip_in_kernel(kvm)) {
 			__s32 status;
-			mutex_lock(&kvm->irq_lock);
 			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
 					irq_event.irq, irq_event.level);
-			mutex_unlock(&kvm->irq_lock);
 			if (ioctl == KVM_IRQ_LINE_STATUS) {
 				irq_event.status = status;
 				if (copy_to_user(argp, &irq_event,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3610661..b3db9bf 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -172,7 +172,6 @@ struct kvm {
 	struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
 #endif
 
-	struct mutex irq_lock;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	struct kvm_irq_routing_table *irq_routing;
 	spinlock_t irq_routing_lock;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 99017e8..95954ad 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -61,10 +61,8 @@ irqfd_inject(struct work_struct *work)
 	struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
 	struct kvm *kvm = irqfd->kvm;
 
-	mutex_lock(&kvm->irq_lock);
 	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
 	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
-	mutex_unlock(&kvm->irq_lock);
 }
 
 /*
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 43ace95..dd9a5ca 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -65,8 +65,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 	int i, r = -1;
 	struct kvm_vcpu *vcpu, *lowest = NULL;
 
-	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
-
 	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
 			kvm_is_dm_lowest_prio(irq))
 		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
@@ -118,7 +116,7 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 	return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
 }
 
-/* This should be called with the kvm->irq_lock mutex held
+/*
  * Return value:
  *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
  *  = 0   Interrupt was coalesced (previous irq is still pending)
@@ -135,8 +133,6 @@ static int __kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level,
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
 
-	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
-
 	if (irq < KVM_IOAPIC_NUM_PINS) {
 		irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d8dc75c..df1362b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -68,7 +68,7 @@ MODULE_LICENSE("GPL");
 /*
  * Ordering of locks:
  *
- * 		kvm->slots_lock --> kvm->lock --> kvm->irq_lock
+ * 		kvm->slots_lock --> kvm->lock
  */
 
 DEFINE_SPINLOCK(kvm_lock);
@@ -137,7 +137,6 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 				    interrupt_work);
 	kvm = assigned_dev->kvm;
 
-	mutex_lock(&kvm->irq_lock);
 	spin_lock_irq(&assigned_dev->assigned_dev_lock);
 	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 		struct kvm_guest_msix_entry *guest_entries =
@@ -156,7 +155,6 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 			    assigned_dev->guest_irq, 1);
 
 	spin_unlock_irq(&assigned_dev->assigned_dev_lock);
-	mutex_unlock(&assigned_dev->kvm->irq_lock);
 }
 
 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
@@ -982,7 +980,6 @@ static struct kvm *kvm_create_vm(void)
 	kvm_io_bus_init(&kvm->pio_bus);
 	kvm_eventfd_init(kvm);
 	mutex_init(&kvm->lock);
-	mutex_init(&kvm->irq_lock);
 	kvm_io_bus_init(&kvm->mmio_bus);
 	init_rwsem(&kvm->slots_lock);
 	atomic_set(&kvm->users_count, 1);
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 10/10] Introduce MSI message sending interface that bypass IRQ routing.
  2009-08-09 12:41 [PATCH 00/10] make interrupt injection lockless (almost) Gleb Natapov
                   ` (8 preceding siblings ...)
  2009-08-09 12:41 ` [PATCH 09/10] Drop kvm->irq_lock lock Gleb Natapov
@ 2009-08-09 12:41 ` Gleb Natapov
  2009-08-09 14:56   ` Avi Kivity
  9 siblings, 1 reply; 28+ messages in thread
From: Gleb Natapov @ 2009-08-09 12:41 UTC (permalink / raw)
  To: avi; +Cc: kvm

Sending of MSI using IRQ routing is an artificial concept and potentially
big number of MSIs (2048 per device) make it also inefficient. This
patch adds an interface to inject MSI messages from userspace to lapic
logic directly. The patch also reduces the maximum number of IRQ routing
entries to 128 since MSIs will no longer go there and 128 entries cover
5 ioapics and this ought to be enough for anybody.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c |   26 ++++++++++++++++++++++++++
 arch/x86/kvm/x86.c       |   26 ++++++++++++++++++++++++++
 include/linux/kvm.h      |   10 ++++++++--
 include/linux/kvm_host.h |    3 ++-
 virt/kvm/irq_comm.c      |   23 ++++++++++++++---------
 5 files changed, 76 insertions(+), 12 deletions(-)

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 8f1fc3a..c136085 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -195,6 +195,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_IRQCHIP:
 	case KVM_CAP_MP_STATE:
 	case KVM_CAP_IRQ_INJECT_STATUS:
+	case KVM_CAP_MSI_MSG:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -1010,6 +1011,31 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		}
 		break;
 		}
+	case KVM_MSI_MSG: {
+		struct kvm_irq_routing_msi msg;
+		struct msi_msg msi;
+
+		r = -EFAULT;
+		if (copy_from_user(&msg, argp, sizeof msg))
+			goto out;
+		r = -EINVAL;
+		if (!irqchip_in_kernel(kvm))
+			goto out;
+		if (msg.flags)
+			goto out;
+
+		msi.address_lo = msg.address_lo;
+		msi.address_hi = msg.address_hi;
+		msi.data = msg.data;
+
+		msg.status = kvm_set_msi(kvm, &msi);
+		if (copy_to_user(argp, &msg, sizeof msg)) {
+			r = -EFAULT;
+			goto out;
+		}
+		r = 0;
+		break;
+	}
 	case KVM_GET_IRQCHIP: {
 		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
 		struct kvm_irqchip chip;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d313462..0e5c946 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1211,6 +1211,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PIT2:
 	case KVM_CAP_PIT_STATE2:
 	case KVM_CAP_SET_IDENTITY_MAP_ADDR:
+	case KVM_CAP_MSI_MSG:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -2296,6 +2297,31 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		}
 		break;
 	}
+	case KVM_MSI_MSG: {
+		struct kvm_irq_routing_msi msg;
+		struct msi_msg msi;
+
+		r = -EFAULT;
+		if (copy_from_user(&msg, argp, sizeof msg))
+			goto out;
+		r = -EINVAL;
+		if (!irqchip_in_kernel(kvm))
+			goto out;
+		if (msg.flags)
+			goto out;
+
+		msi.address_lo = msg.address_lo;
+		msi.address_hi = msg.address_hi;
+		msi.data = msg.data;
+
+		msg.status = kvm_set_msi(kvm, &msi);
+		if (copy_to_user(argp, &msg, sizeof msg)) {
+			r = -EFAULT;
+			goto out;
+		}
+		r = 0;
+		break;
+	}
 	case KVM_GET_IRQCHIP: {
 		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
 		struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index f8f8900..a326034 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -436,6 +436,7 @@ struct kvm_ioeventfd {
 #endif
 #define KVM_CAP_IOEVENTFD 36
 #define KVM_CAP_SET_IDENTITY_MAP_ADDR 37
+#define KVM_CAP_MSI_MSG 38
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -447,8 +448,11 @@ struct kvm_irq_routing_irqchip {
 struct kvm_irq_routing_msi {
 	__u32 address_lo;
 	__u32 address_hi;
-	__u32 data;
-	__u32 pad;
+	union {
+		__u32 data;
+		__s32 status;
+	};
+	__u32 flags;
 };
 
 /* gsi routing entry types */
@@ -593,6 +597,8 @@ struct kvm_irqfd {
 #define KVM_X86_SETUP_MCE         _IOW(KVMIO,  0x9c, __u64)
 #define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO,  0x9d, __u64)
 #define KVM_X86_SET_MCE           _IOW(KVMIO,  0x9e, struct kvm_x86_mce)
+#define KVM_MSI_MSG					\
+	_IOWR(KVMIO,  0x9f, struct kvm_irq_routing_msi)
 
 /*
  * Deprecated interfaces
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b3db9bf..857f2f4 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -407,6 +407,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
 #endif
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
 int kvm_notifier_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
+int kvm_set_msi(struct kvm *kvm, struct msi_msg *msg);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian);
@@ -524,7 +525,7 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se
 
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 
-#define KVM_MAX_IRQ_ROUTES 1024
+#define KVM_MAX_IRQ_ROUTES 128
 
 int kvm_setup_default_irq_routing(struct kvm *kvm);
 int kvm_set_irq_routing(struct kvm *kvm,
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index dd9a5ca..963d7a3 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -49,6 +49,12 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
 		notifier);
 }
 
+static int kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+		    struct kvm *kvm, int level, bool notifier)
+{
+	return kvm_set_msi(kvm, &e->msi);
+}
+
 inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
 {
 #ifdef CONFIG_IA64
@@ -95,20 +101,19 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 	return r;
 }
 
-static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
-		       struct kvm *kvm, int level)
+int kvm_set_msi(struct kvm *kvm, struct msi_msg *msi)
 {
 	struct kvm_lapic_irq irq;
 
-	trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
+	trace_kvm_msi_set_irq(msi->address_lo, msi->data);
 
-	irq.dest_id = (e->msi.address_lo &
+	irq.dest_id = (msi->address_lo &
 			MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
-	irq.vector = (e->msi.data &
+	irq.vector = (msi->data &
 			MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
-	irq.dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
-	irq.trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
-	irq.delivery_mode = e->msi.data & 0x700;
+	irq.dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & msi->address_lo;
+	irq.trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & msi->data;
+	irq.delivery_mode = msi->data & 0x700;
 	irq.level = 1;
 	irq.shorthand = 0;
 
@@ -320,7 +325,7 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
 		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
 		break;
 	case KVM_IRQ_ROUTING_MSI:
-		e->set = kvm_set_msi;
+		e->set = kvm_set_msi_irq;
 		e->msi.address_lo = ue->u.msi.address_lo;
 		e->msi.address_hi = ue->u.msi.address_hi;
 		e->msi.data = ue->u.msi.data;
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH 01/10] Change irq routing table to use gsi indexed array.
  2009-08-09 12:41 ` [PATCH 01/10] Change irq routing table to use gsi indexed array Gleb Natapov
@ 2009-08-09 14:25   ` Avi Kivity
  0 siblings, 0 replies; 28+ messages in thread
From: Avi Kivity @ 2009-08-09 14:25 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On 08/09/2009 03:41 PM, Gleb Natapov wrote:
> Use gsi indexed array instead of scanning all entries on each interrupt
> injection. Also maintain back mapping from irqchip/pin to gsi to speedup
> interrupt acknowledgment notifications.
>
> Signed-off-by: Gleb Natapov<gleb@redhat.com>
> ---
>   include/linux/kvm_host.h |   11 +++++-
>   virt/kvm/irq_comm.c      |   87 ++++++++++++++++++++++++---------------------
>   virt/kvm/kvm_main.c      |    1 -
>   3 files changed, 55 insertions(+), 44 deletions(-)
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index f814512..a38f576 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -129,7 +129,14 @@ struct kvm_kernel_irq_routing_entry {
>   		} irqchip;
>   		struct msi_msg msi;
>   	};
> -	struct list_head link;
> +	struct hlist_node link;
> +};
> +
> +struct kvm_irq_routing_table {
> +	int chip[3][KVM_IOAPIC_NUM_PINS];
> +	struct kvm_kernel_irq_routing_entry *rt_entries;
> +	u32 max_gsi;
>    

That's the table size, no?  so call it nr_rt_entries to reflect that 
(max_* implies an off-by-one).

> +	struct hlist_head map[0];
>   };
>    

What's this? (comment explaining the type of data).

>
>   struct kvm {
> @@ -167,7 +174,7 @@ struct kvm {
>
>   	struct mutex irq_lock;
>   #ifdef CONFIG_HAVE_KVM_IRQCHIP
> -	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
>    

Like here.

> @@ -150,8 +152,9 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
>   	 * IOAPIC.  So set the bit in both. The guest will ignore
>   	 * writes to the unused one.
>   	 */
> -	list_for_each_entry(e,&kvm->irq_routing, link)
> -		if (e->gsi == irq) {
> +	irq_rt = kvm->irq_routing;
> +	if (irq<  irq_rt->max_gsi)
>    

irq here is int, so can overflow.  I think all paths are protected, but 
better to change irq to unsigned just in case.

> +		hlist_for_each_entry(e, n,&irq_rt->map[irq], link) {
>   			int r = e->set(e, kvm, sig_level);
>   			if (r<  0)
>   				continue;
> @@ -163,20 +166,15 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
>
>   void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>   {
> -	struct kvm_kernel_irq_routing_entry *e;
>   	struct kvm_irq_ack_notifier *kian;
>   	struct hlist_node *n;
> -	unsigned gsi = pin;
> +	unsigned gsi;
>
>   	trace_kvm_ack_irq(irqchip, pin);
>
> -	list_for_each_entry(e,&kvm->irq_routing, link)
> -		if (e->type == KVM_IRQ_ROUTING_IRQCHIP&&
> -		    e->irqchip.irqchip == irqchip&&
> -		    e->irqchip.pin == pin) {
> -			gsi = e->gsi;
> -			break;
> -		}
> +	gsi = kvm->irq_routing->chip[irqchip][pin];
> +	if (gsi == -1)
> +		gsi = pin;
>    

Please separate the reverse map to a separate patch.

> +	new = kzalloc(sizeof(*new) + (max_gsi * sizeof(struct hlist_head)) +
> +		      (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
> +		      GFP_KERNEL);
>    

Wow.


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 07/10] Move irq notifiers lists to its own locking.
  2009-08-09 14:52   ` Avi Kivity
@ 2009-08-09 14:49     ` Gleb Natapov
  2009-08-09 14:57       ` Avi Kivity
  0 siblings, 1 reply; 28+ messages in thread
From: Gleb Natapov @ 2009-08-09 14:49 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Sun, Aug 09, 2009 at 05:52:30PM +0300, Avi Kivity wrote:
> On 08/09/2009 03:41 PM, Gleb Natapov wrote:
>> Signed-off-by: Gleb Natapov<gleb@redhat.com>
>> ---
>>   include/linux/kvm_host.h |    1 +
>>   virt/kvm/irq_comm.c      |   16 ++++++++--------
>>   virt/kvm/kvm_main.c      |    1 +
>>   3 files changed, 10 insertions(+), 8 deletions(-)
>>
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index ce28cb7..8791ce8 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -178,6 +178,7 @@ struct kvm {
>>   	spinlock_t irq_routing_lock;
>>   	struct hlist_head mask_notifier_list;
>>   	struct hlist_head irq_ack_notifier_list;
>> +	spinlock_t irq_notifier_list_lock;
>>    
>
> Again, why?
>
Because later patches remove irq_lock entirely and lock is need for
write even with rcu. So instead of reusing irq_lock to protect those
structures on write I introduce locks with self descriptive names.

--
			Gleb.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 06/10] Move irq routing to its own locking.
  2009-08-09 12:41 ` [PATCH 06/10] Move irq routing to its own locking Gleb Natapov
@ 2009-08-09 14:52   ` Avi Kivity
  0 siblings, 0 replies; 28+ messages in thread
From: Avi Kivity @ 2009-08-09 14:52 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On 08/09/2009 03:41 PM, Gleb Natapov wrote:
> Signed-off-by: Gleb Natapov<gleb@redhat.com>
> ---
>   include/linux/kvm_host.h |    1 +
>   virt/kvm/irq_comm.c      |    4 ++--
>   virt/kvm/kvm_main.c      |    1 +
>   3 files changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index d573652..ce28cb7 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -175,6 +175,7 @@ struct kvm {
>   	struct mutex irq_lock;
>   #ifdef CONFIG_HAVE_KVM_IRQCHIP
>   	struct kvm_irq_routing_table *irq_routing;
> +	spinlock_t irq_routing_lock;
>    

Why is this needed?  The irq routing table adds very little contention 
since it is rcu locked.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 07/10] Move irq notifiers lists to its own locking.
  2009-08-09 12:41 ` [PATCH 07/10] Move irq notifiers lists " Gleb Natapov
@ 2009-08-09 14:52   ` Avi Kivity
  2009-08-09 14:49     ` Gleb Natapov
  0 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2009-08-09 14:52 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On 08/09/2009 03:41 PM, Gleb Natapov wrote:
> Signed-off-by: Gleb Natapov<gleb@redhat.com>
> ---
>   include/linux/kvm_host.h |    1 +
>   virt/kvm/irq_comm.c      |   16 ++++++++--------
>   virt/kvm/kvm_main.c      |    1 +
>   3 files changed, 10 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index ce28cb7..8791ce8 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -178,6 +178,7 @@ struct kvm {
>   	spinlock_t irq_routing_lock;
>   	struct hlist_head mask_notifier_list;
>   	struct hlist_head irq_ack_notifier_list;
> +	spinlock_t irq_notifier_list_lock;
>    

Again, why?

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 10/10] Introduce MSI message sending interface that bypass IRQ routing.
  2009-08-09 14:56   ` Avi Kivity
@ 2009-08-09 14:52     ` Gleb Natapov
  2009-08-09 15:01       ` Avi Kivity
  0 siblings, 1 reply; 28+ messages in thread
From: Gleb Natapov @ 2009-08-09 14:52 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Sun, Aug 09, 2009 at 05:56:30PM +0300, Avi Kivity wrote:
> On 08/09/2009 03:41 PM, Gleb Natapov wrote:
> >Sending of MSI using IRQ routing is an artificial concept and potentially
> >big number of MSIs (2048 per device) make it also inefficient. This
> >patch adds an interface to inject MSI messages from userspace to lapic
> >logic directly. The patch also reduces the maximum number of IRQ routing
> >entries to 128 since MSIs will no longer go there and 128 entries cover
> >5 ioapics and this ought to be enough for anybody.
> 
> In the future many MSIs will be triggered via irqfds, and those
> require irq routing.
> 
Why? My plan is to change irqfd to use the MSI functions.

--
			Gleb.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 08/10] Move IO APIC to its own lock.
  2009-08-09 12:41 ` [PATCH 08/10] Move IO APIC to its own lock Gleb Natapov
@ 2009-08-09 14:54   ` Avi Kivity
  2009-08-09 14:57     ` Gleb Natapov
  0 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2009-08-09 14:54 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On 08/09/2009 03:41 PM, Gleb Natapov wrote:
> Introduce new function kvm_notifier_set_irq() that should be used
> to change irq line level from irq notifiers. When irq notifier
> change irq line level it calls into irq chip code recursively. The
> function avoids taking a lock recursively.
>    

This looks really horrible.  I don't have an alternative yet, but I'll 
think of one.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 10/10] Introduce MSI message sending interface that bypass IRQ routing.
  2009-08-09 12:41 ` [PATCH 10/10] Introduce MSI message sending interface that bypass IRQ routing Gleb Natapov
@ 2009-08-09 14:56   ` Avi Kivity
  2009-08-09 14:52     ` Gleb Natapov
  0 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2009-08-09 14:56 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On 08/09/2009 03:41 PM, Gleb Natapov wrote:
> Sending of MSI using IRQ routing is an artificial concept and potentially
> big number of MSIs (2048 per device) make it also inefficient. This
> patch adds an interface to inject MSI messages from userspace to lapic
> logic directly. The patch also reduces the maximum number of IRQ routing
> entries to 128 since MSIs will no longer go there and 128 entries cover
> 5 ioapics and this ought to be enough for anybody.
>    

In the future many MSIs will be triggered via irqfds, and those require 
irq routing.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 08/10] Move IO APIC to its own lock.
  2009-08-09 14:54   ` Avi Kivity
@ 2009-08-09 14:57     ` Gleb Natapov
  2009-08-09 15:10       ` Avi Kivity
  0 siblings, 1 reply; 28+ messages in thread
From: Gleb Natapov @ 2009-08-09 14:57 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Sun, Aug 09, 2009 at 05:54:30PM +0300, Avi Kivity wrote:
> On 08/09/2009 03:41 PM, Gleb Natapov wrote:
>> Introduce new function kvm_notifier_set_irq() that should be used
>> to change irq line level from irq notifiers. When irq notifier
>> change irq line level it calls into irq chip code recursively. The
>> function avoids taking a lock recursively.
>>    
>
> This looks really horrible.  I don't have an alternative yet, but I'll  
> think of one.
I agree this is not nice. This is needed only for device assignment
case. That explains why I don't like device assignment :) The problem
is that the only communication channel from guest to assigned device that
goes through the host is interrupt injection/acknowledgement, so we try to
do things (lowering IRQ) on this path that usually are done somewhere else.
 
--
			Gleb.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 07/10] Move irq notifiers lists to its own locking.
  2009-08-09 14:49     ` Gleb Natapov
@ 2009-08-09 14:57       ` Avi Kivity
  0 siblings, 0 replies; 28+ messages in thread
From: Avi Kivity @ 2009-08-09 14:57 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On 08/09/2009 05:49 PM, Gleb Natapov wrote:
> On Sun, Aug 09, 2009 at 05:52:30PM +0300, Avi Kivity wrote:
>    
>> On 08/09/2009 03:41 PM, Gleb Natapov wrote:
>>      
>>> Signed-off-by: Gleb Natapov<gleb@redhat.com>
>>> ---
>>>    include/linux/kvm_host.h |    1 +
>>>    virt/kvm/irq_comm.c      |   16 ++++++++--------
>>>    virt/kvm/kvm_main.c      |    1 +
>>>    3 files changed, 10 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>>> index ce28cb7..8791ce8 100644
>>> --- a/include/linux/kvm_host.h
>>> +++ b/include/linux/kvm_host.h
>>> @@ -178,6 +178,7 @@ struct kvm {
>>>    	spinlock_t irq_routing_lock;
>>>    	struct hlist_head mask_notifier_list;
>>>    	struct hlist_head irq_ack_notifier_list;
>>> +	spinlock_t irq_notifier_list_lock;
>>>
>>>        
>> Again, why?
>>
>>      
> Because later patches remove irq_lock entirely and lock is need for
> write even with rcu. So instead of reusing irq_lock to protect those
> structures on write I introduce locks with self descriptive names.
>    

I prefer a /* protected by irq_lock */ rather than lock proliferation.  
Locks should be split only if they are contended, and your rcu work 
removes any possible contention.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 10/10] Introduce MSI message sending interface that bypass IRQ routing.
  2009-08-09 14:52     ` Gleb Natapov
@ 2009-08-09 15:01       ` Avi Kivity
  2009-08-09 15:07         ` Gleb Natapov
  0 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2009-08-09 15:01 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On 08/09/2009 05:52 PM, Gleb Natapov wrote:
> On Sun, Aug 09, 2009 at 05:56:30PM +0300, Avi Kivity wrote:
>    
>> On 08/09/2009 03:41 PM, Gleb Natapov wrote:
>>      
>>> Sending of MSI using IRQ routing is an artificial concept and potentially
>>> big number of MSIs (2048 per device) make it also inefficient. This
>>> patch adds an interface to inject MSI messages from userspace to lapic
>>> logic directly. The patch also reduces the maximum number of IRQ routing
>>> entries to 128 since MSIs will no longer go there and 128 entries cover
>>> 5 ioapics and this ought to be enough for anybody.
>>>        
>> In the future many MSIs will be triggered via irqfds, and those
>> require irq routing.
>>
>>      
> Why? My plan is to change irqfd to use the MSI functions.
>
>    

It's still an "install handle, call handle" interface.  Maybe it would 
have been better to start off with your new interface, but having both 
is too much for too little gain.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 10/10] Introduce MSI message sending interface that bypass IRQ routing.
  2009-08-09 15:01       ` Avi Kivity
@ 2009-08-09 15:07         ` Gleb Natapov
  2009-08-09 15:14           ` Avi Kivity
  0 siblings, 1 reply; 28+ messages in thread
From: Gleb Natapov @ 2009-08-09 15:07 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Sun, Aug 09, 2009 at 06:01:15PM +0300, Avi Kivity wrote:
> On 08/09/2009 05:52 PM, Gleb Natapov wrote:
>> On Sun, Aug 09, 2009 at 05:56:30PM +0300, Avi Kivity wrote:
>>    
>>> On 08/09/2009 03:41 PM, Gleb Natapov wrote:
>>>      
>>>> Sending of MSI using IRQ routing is an artificial concept and potentially
>>>> big number of MSIs (2048 per device) make it also inefficient. This
>>>> patch adds an interface to inject MSI messages from userspace to lapic
>>>> logic directly. The patch also reduces the maximum number of IRQ routing
>>>> entries to 128 since MSIs will no longer go there and 128 entries cover
>>>> 5 ioapics and this ought to be enough for anybody.
>>>>        
>>> In the future many MSIs will be triggered via irqfds, and those
>>> require irq routing.
>>>
>>>      
>> Why? My plan is to change irqfd to use the MSI functions.
>>
>>    
>
> It's still an "install handle, call handle" interface.  Maybe it would  
> have been better to start off with your new interface, but having both  
> is too much for too little gain.
>
Is it not too late to change interface? There was no released kernel with
irqfd yet. And this just adds another level of indirection and one more
point of false cache sharing.

--
			Gleb.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 08/10] Move IO APIC to its own lock.
  2009-08-09 15:10       ` Avi Kivity
@ 2009-08-09 15:09         ` Gleb Natapov
  0 siblings, 0 replies; 28+ messages in thread
From: Gleb Natapov @ 2009-08-09 15:09 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Sun, Aug 09, 2009 at 06:10:07PM +0300, Avi Kivity wrote:
> On 08/09/2009 05:57 PM, Gleb Natapov wrote:
>> On Sun, Aug 09, 2009 at 05:54:30PM +0300, Avi Kivity wrote:
>>    
>>> On 08/09/2009 03:41 PM, Gleb Natapov wrote:
>>>      
>>>> Introduce new function kvm_notifier_set_irq() that should be used
>>>> to change irq line level from irq notifiers. When irq notifier
>>>> change irq line level it calls into irq chip code recursively. The
>>>> function avoids taking a lock recursively.
>>>>
>>>>        
>>> This looks really horrible.  I don't have an alternative yet, but I'll
>>> think of one.
>>>      
>> I agree this is not nice. This is needed only for device assignment
>> case. That explains why I don't like device assignment :)
>
> Well, implementation problems can be fixed.  Other issues with device  
> assignment cannot.
>
>> The problem
>> is that the only communication channel from guest to assigned device that
>> goes through the host is interrupt injection/acknowledgement, so we try to
>> do things (lowering IRQ) on this path that usually are done somewhere else.
>>    
>
> You can queue the injection somehow (work struct? special purpose queue  
> examined after unlock?) and avoid the recursive locking.
>
We can't. Line status should be update here and now. Otherwise interrupt
will be immediately reinjected. 

--
			Gleb.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 08/10] Move IO APIC to its own lock.
  2009-08-09 14:57     ` Gleb Natapov
@ 2009-08-09 15:10       ` Avi Kivity
  2009-08-09 15:09         ` Gleb Natapov
  0 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2009-08-09 15:10 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On 08/09/2009 05:57 PM, Gleb Natapov wrote:
> On Sun, Aug 09, 2009 at 05:54:30PM +0300, Avi Kivity wrote:
>    
>> On 08/09/2009 03:41 PM, Gleb Natapov wrote:
>>      
>>> Introduce new function kvm_notifier_set_irq() that should be used
>>> to change irq line level from irq notifiers. When irq notifier
>>> change irq line level it calls into irq chip code recursively. The
>>> function avoids taking a lock recursively.
>>>
>>>        
>> This looks really horrible.  I don't have an alternative yet, but I'll
>> think of one.
>>      
> I agree this is not nice. This is needed only for device assignment
> case. That explains why I don't like device assignment :)

Well, implementation problems can be fixed.  Other issues with device 
assignment cannot.

> The problem
> is that the only communication channel from guest to assigned device that
> goes through the host is interrupt injection/acknowledgement, so we try to
> do things (lowering IRQ) on this path that usually are done somewhere else.
>    

You can queue the injection somehow (work struct? special purpose queue 
examined after unlock?) and avoid the recursive locking.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 10/10] Introduce MSI message sending interface that bypass IRQ routing.
  2009-08-09 15:14           ` Avi Kivity
@ 2009-08-09 15:14             ` Gleb Natapov
  2009-08-09 15:22               ` Avi Kivity
  0 siblings, 1 reply; 28+ messages in thread
From: Gleb Natapov @ 2009-08-09 15:14 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Sun, Aug 09, 2009 at 06:14:32PM +0300, Avi Kivity wrote:
> On 08/09/2009 06:07 PM, Gleb Natapov wrote:
>>> It's still an "install handle, call handle" interface.  Maybe it would
>>> have been better to start off with your new interface, but having both
>>> is too much for too little gain.
>>>
>>>      
>> Is it not too late to change interface? There was no released kernel with
>> irqfd yet. And this just adds another level of indirection and one more
>> point of false cache sharing.
>>    
>
> No irqfd, but we do have irq routing.  The sharing isn't a problem since  
> it's read only.
>
We don't have many users of the old interface now. So what I propose is
to retain old interface for use with old binaries, but limit the number
of GSI entries to small number. New code should use new interface.

Correct sharing is not the problem, but two levels of indirection still
is (one indirection is to find list element another is to call function
by pointer).

--
			Gleb.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 10/10] Introduce MSI message sending interface that bypass IRQ routing.
  2009-08-09 15:07         ` Gleb Natapov
@ 2009-08-09 15:14           ` Avi Kivity
  2009-08-09 15:14             ` Gleb Natapov
  0 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2009-08-09 15:14 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On 08/09/2009 06:07 PM, Gleb Natapov wrote:
>> It's still an "install handle, call handle" interface.  Maybe it would
>> have been better to start off with your new interface, but having both
>> is too much for too little gain.
>>
>>      
> Is it not too late to change interface? There was no released kernel with
> irqfd yet. And this just adds another level of indirection and one more
> point of false cache sharing.
>    

No irqfd, but we do have irq routing.  The sharing isn't a problem since 
it's read only.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 10/10] Introduce MSI message sending interface that bypass IRQ routing.
  2009-08-09 15:14             ` Gleb Natapov
@ 2009-08-09 15:22               ` Avi Kivity
  0 siblings, 0 replies; 28+ messages in thread
From: Avi Kivity @ 2009-08-09 15:22 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On 08/09/2009 06:14 PM, Gleb Natapov wrote:
> We don't have many users of the old interface now. So what I propose is
> to retain old interface for use with old binaries, but limit the number
> of GSI entries to small number. New code should use new interface.
>    

Still, we complicate the interface for userspace.

> Correct sharing is not the problem, but two levels of indirection still
> is (one indirection is to find list element another is to call function
> by pointer).
>
>    

irqfd can cache the entries locally if it proves to be a problem.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 06/10] Move irq routing to its own locking.
  2009-07-14 14:30 [PATCH 0/10] [RFC] more fine grained locking for IRQ injection Gleb Natapov
@ 2009-07-14 14:30 ` Gleb Natapov
  0 siblings, 0 replies; 28+ messages in thread
From: Gleb Natapov @ 2009-07-14 14:30 UTC (permalink / raw)
  To: kvm; +Cc: mtosatti


Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 include/linux/kvm_host.h |    1 +
 virt/kvm/irq_comm.c      |    5 ++---
 virt/kvm/kvm_main.c      |    1 +
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b53a5b8..8ca15a0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -167,6 +167,7 @@ struct kvm {
 	struct mutex irq_lock;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	struct kvm_kernel_irq_routing_entry *irq_routing;
+	spinlock_t irq_routing_lock;
 	struct hlist_head mask_notifier_list;
 	struct hlist_head irq_ack_notifier_list;
 #endif
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index ce8fcd3..3dbba41 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -350,11 +350,10 @@ int kvm_set_irq_routing(struct kvm *kvm,
 		++ue;
 	}
 
-	mutex_lock(&kvm->irq_lock);
+	spin_lock(&kvm->irq_routing_lock);
 	old = kvm->irq_routing;
 	rcu_assign_pointer(kvm->irq_routing, new);
-	mutex_unlock(&kvm->irq_lock);
-
+	spin_unlock(&kvm->irq_routing_lock);
 	synchronize_rcu();
 
 	r = 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ceaa478..1d70da3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -945,6 +945,7 @@ static struct kvm *kvm_create_vm(void)
 	if (IS_ERR(kvm))
 		goto out;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
+	spin_lock_init(&kvm->irq_routing_lock);
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
 	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
 #endif
-- 
1.6.2.1


^ permalink raw reply related	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2009-08-09 15:16 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-08-09 12:41 [PATCH 00/10] make interrupt injection lockless (almost) Gleb Natapov
2009-08-09 12:41 ` [PATCH 01/10] Change irq routing table to use gsi indexed array Gleb Natapov
2009-08-09 14:25   ` Avi Kivity
2009-08-09 12:41 ` [PATCH 02/10] Move irq routing data structure to rcu locking Gleb Natapov
2009-08-09 12:41 ` [PATCH 03/10] Move irq ack notifier list to arch independent code Gleb Natapov
2009-08-09 12:41 ` [PATCH 04/10] Convert irq notifiers lists to RCU locking Gleb Natapov
2009-08-09 12:41 ` [PATCH 05/10] Protect irq_sources_bitmap by kvm->lock instead of kvm->irq_lock Gleb Natapov
2009-08-09 12:41 ` [PATCH 06/10] Move irq routing to its own locking Gleb Natapov
2009-08-09 14:52   ` Avi Kivity
2009-08-09 12:41 ` [PATCH 07/10] Move irq notifiers lists " Gleb Natapov
2009-08-09 14:52   ` Avi Kivity
2009-08-09 14:49     ` Gleb Natapov
2009-08-09 14:57       ` Avi Kivity
2009-08-09 12:41 ` [PATCH 08/10] Move IO APIC to its own lock Gleb Natapov
2009-08-09 14:54   ` Avi Kivity
2009-08-09 14:57     ` Gleb Natapov
2009-08-09 15:10       ` Avi Kivity
2009-08-09 15:09         ` Gleb Natapov
2009-08-09 12:41 ` [PATCH 09/10] Drop kvm->irq_lock lock Gleb Natapov
2009-08-09 12:41 ` [PATCH 10/10] Introduce MSI message sending interface that bypass IRQ routing Gleb Natapov
2009-08-09 14:56   ` Avi Kivity
2009-08-09 14:52     ` Gleb Natapov
2009-08-09 15:01       ` Avi Kivity
2009-08-09 15:07         ` Gleb Natapov
2009-08-09 15:14           ` Avi Kivity
2009-08-09 15:14             ` Gleb Natapov
2009-08-09 15:22               ` Avi Kivity
  -- strict thread matches above, loose matches on Subject: below --
2009-07-14 14:30 [PATCH 0/10] [RFC] more fine grained locking for IRQ injection Gleb Natapov
2009-07-14 14:30 ` [PATCH 06/10] Move irq routing to its own locking Gleb Natapov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.