All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3 v3] MSI-X enabling
@ 2009-02-18  9:44 Sheng Yang
  2009-02-18  9:44 ` [PATCH 1/3] KVM: Ioctls for init MSI-X entry Sheng Yang
                   ` (2 more replies)
  0 siblings, 3 replies; 19+ messages in thread
From: Sheng Yang @ 2009-02-18  9:44 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm

Updated the patchset followed Marcelo and Avi's comments.

Please also review MSI/MSI-X userspace patch as well.

Thanks!
--
regards
Yang, Sheng

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 1/3] KVM: Ioctls for init MSI-X entry
  2009-02-18  9:44 [PATCH 0/3 v3] MSI-X enabling Sheng Yang
@ 2009-02-18  9:44 ` Sheng Yang
  2009-02-18 10:32   ` Avi Kivity
  2009-02-18  9:44 ` [PATCH 2/3] KVM: Add gsi_msg_pending_bitmap for MSI-X Sheng Yang
  2009-02-18  9:44 ` [PATCH 3/3] KVM: Enable MSI-X for KVM assigned device Sheng Yang
  2 siblings, 1 reply; 19+ messages in thread
From: Sheng Yang @ 2009-02-18  9:44 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Sheng Yang

Introduce KVM_SET_MSIX_NR and KVM_SET_MSIX_ENTRY two ioctls.

This two ioctls are used by userspace to specific guest device MSI-X entry
number and correlate MSI-X entry with GSI during the initialization stage.

MSI-X should be well initialzed before enabling.

Don't support change MSI-X entry number for now.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 include/linux/kvm.h      |   16 +++++++
 include/linux/kvm_host.h |    3 +
 virt/kvm/kvm_main.c      |  103 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 122 insertions(+), 0 deletions(-)

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 2163b3d..a2dfbe0 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -475,6 +475,8 @@ struct kvm_irq_routing {
 #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
 			    struct kvm_assigned_irq)
 #define KVM_REINJECT_CONTROL      _IO(KVMIO, 0x71)
+#define KVM_SET_MSIX_NR _IOR(KVMIO, 0x72, struct kvm_assigned_msix_nr)
+#define KVM_SET_MSIX_ENTRY _IOR(KVMIO, 0x73, struct kvm_assigned_msix_entry)
 
 /*
  * ioctls for vcpu fds
@@ -595,4 +597,18 @@ struct kvm_assigned_irq {
 #define KVM_DEV_IRQ_ASSIGN_MSI_ACTION	KVM_DEV_IRQ_ASSIGN_ENABLE_MSI
 #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI	(1 << 0)
 
+struct kvm_assigned_msix_nr {
+	__u32 assigned_dev_id;
+	__u16 entry_nr;
+	__u16 padding;
+};
+
+#define KVM_MAX_MSIX_PER_DEV		512
+struct kvm_assigned_msix_entry {
+	__u32 assigned_dev_id;
+	__u32 gsi;
+	__u16 entry; /* The index of entry in the MSI-X table */
+	__u16 padding[3];
+};
+
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7c7096d..b105ada 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -326,9 +326,12 @@ struct kvm_assigned_dev_kernel {
 	int assigned_dev_id;
 	int host_busnr;
 	int host_devfn;
+	unsigned int entries_nr;
 	int host_irq;
 	bool host_irq_disabled;
+	struct msix_entry *host_msix_entries;
 	int guest_irq;
+	struct msix_entry *guest_msix_entries;
 #define KVM_ASSIGNED_DEV_GUEST_INTX	(1 << 0)
 #define KVM_ASSIGNED_DEV_GUEST_MSI	(1 << 1)
 #define KVM_ASSIGNED_DEV_HOST_INTX	(1 << 8)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 266bdaf..bb4aa73 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1593,6 +1593,87 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
 	return 0;
 }
 
+static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
+				    struct kvm_assigned_msix_nr *entry_nr)
+{
+	int r = 0;
+	struct kvm_assigned_dev_kernel *adev;
+
+	mutex_lock(&kvm->lock);
+
+	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      entry_nr->assigned_dev_id);
+	if (!adev) {
+		r = -EINVAL;
+		goto msix_nr_out;
+	}
+
+	if (adev->entries_nr == 0) {
+		adev->entries_nr = entry_nr->entry_nr;
+		if (adev->entries_nr == 0 ||
+		    adev->entries_nr >= KVM_MAX_MSIX_PER_DEV)
+			goto msix_nr_out;
+
+		adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
+						entry_nr->entry_nr,
+						GFP_KERNEL);
+		if (!adev->host_msix_entries) {
+			printk(KERN_ERR "no memory for host msix entries!\n");
+			r = -ENOMEM;
+			goto msix_nr_out;
+		}
+		adev->guest_msix_entries = kzalloc(sizeof(struct msix_entry) *
+						entry_nr->entry_nr,
+						GFP_KERNEL);
+		if (!adev->guest_msix_entries) {
+			printk(KERN_ERR "no memory for host msix entries!\n");
+			kfree(adev->host_msix_entries);
+			r = -ENOMEM;
+			goto msix_nr_out;
+		}
+	} else
+		printk(KERN_WARNING "kvm: not allow recall set msix nr!\n");
+msix_nr_out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
+				       struct kvm_assigned_msix_entry *entry)
+{
+	int r = 0, i;
+	struct kvm_assigned_dev_kernel *adev;
+
+	mutex_lock(&kvm->lock);
+
+	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      entry->assigned_dev_id);
+
+	if (!adev) {
+		r = -EINVAL;
+		goto msix_entry_out;
+	}
+
+	for (i = 0; i < adev->entries_nr; i++)
+		if (adev->guest_msix_entries[i].vector == 0 ||
+		    adev->guest_msix_entries[i].entry == entry->entry) {
+			adev->guest_msix_entries[i].entry = entry->entry;
+			adev->guest_msix_entries[i].vector = entry->gsi;
+			adev->host_msix_entries[i].entry = entry->entry;
+			break;
+		}
+	if (i == adev->entries_nr) {
+		printk(KERN_ERR "kvm: Too much entries for MSI-X!\n");
+		r = -ENOSPC;
+		goto msix_entry_out;
+	}
+
+msix_entry_out:
+	mutex_unlock(&kvm->lock);
+
+	return r;
+}
+
 static long kvm_vcpu_ioctl(struct file *filp,
 			   unsigned int ioctl, unsigned long arg)
 {
@@ -1917,7 +1998,29 @@ static long kvm_vm_ioctl(struct file *filp,
 		vfree(entries);
 		break;
 	}
+#ifdef KVM_CAP_DEVICE_MSIX
+	case KVM_SET_MSIX_NR: {
+		struct kvm_assigned_msix_nr entry_nr;
+		r = -EFAULT;
+		if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
+			goto out;
+		r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
+		if (r)
+			goto out;
+		break;
+	}
+	case KVM_SET_MSIX_ENTRY: {
+		struct kvm_assigned_msix_entry entry;
+		r = -EFAULT;
+		if (copy_from_user(&entry, argp, sizeof entry))
+			goto out;
+		r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
+		if (r)
+			goto out;
+		break;
+	}
 #endif
+#endif /* KVM_CAP_IRQ_ROUTING */
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 	}
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 2/3] KVM: Add gsi_msg_pending_bitmap for MSI-X
  2009-02-18  9:44 [PATCH 0/3 v3] MSI-X enabling Sheng Yang
  2009-02-18  9:44 ` [PATCH 1/3] KVM: Ioctls for init MSI-X entry Sheng Yang
@ 2009-02-18  9:44 ` Sheng Yang
  2009-02-18 11:00   ` Avi Kivity
  2009-02-18  9:44 ` [PATCH 3/3] KVM: Enable MSI-X for KVM assigned device Sheng Yang
  2 siblings, 1 reply; 19+ messages in thread
From: Sheng Yang @ 2009-02-18  9:44 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Sheng Yang

We have to handle more than one interrupt with one handler for MSI-X. So we
need a bitmap to track the triggered interrupts.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 include/linux/kvm_host.h |    5 +-
 virt/kvm/kvm_main.c      |  102 ++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 102 insertions(+), 5 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b105ada..6e354af 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -145,6 +145,8 @@ struct kvm {
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
 	struct hlist_head mask_notifier_list;
+#define KVM_MAX_IRQ_ROUTES 1024
+	DECLARE_BITMAP(irq_routes_pending_bitmap, KVM_MAX_IRQ_ROUTES);
 #endif
 
 #ifdef KVM_ARCH_WANT_MMU_NOTIFIER
@@ -336,6 +338,7 @@ struct kvm_assigned_dev_kernel {
 #define KVM_ASSIGNED_DEV_GUEST_MSI	(1 << 1)
 #define KVM_ASSIGNED_DEV_HOST_INTX	(1 << 8)
 #define KVM_ASSIGNED_DEV_HOST_MSI	(1 << 9)
+#define KVM_ASSIGNED_DEV_MSIX		((1 << 2) | (1 << 10))
 	unsigned long irq_requested_type;
 	int irq_source_id;
 	int flags;
@@ -503,8 +506,6 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se
 
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 
-#define KVM_MAX_IRQ_ROUTES 1024
-
 int kvm_setup_default_irq_routing(struct kvm *kvm);
 int kvm_set_irq_routing(struct kvm *kvm,
 			const struct kvm_irq_routing_entry *entries,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index bb4aa73..4010802 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -95,25 +95,113 @@ static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *h
 	return NULL;
 }
 
+static int find_host_irq_from_gsi(struct kvm_assigned_dev_kernel *assigned_dev,
+				  u32 gsi)
+{
+	int i, entry, irq;
+	struct msix_entry *host_msix_entries, *guest_msix_entries;
+
+	host_msix_entries = assigned_dev->host_msix_entries;
+	guest_msix_entries = assigned_dev->guest_msix_entries;
+
+	entry = -1;
+	irq = 0;
+	for (i = 0; i < assigned_dev->entries_nr; i++)
+		if (gsi == (guest_msix_entries + i)->vector) {
+			entry = (guest_msix_entries + i)->entry;
+			break;
+		}
+	if (entry < 0) {
+		printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
+		return 0;
+	}
+	for (i = 0; i < assigned_dev->entries_nr; i++)
+		if (entry == (host_msix_entries + i)->entry) {
+			irq = (host_msix_entries + i)->vector;
+			break;
+		}
+	if (irq == 0) {
+		printk(KERN_WARNING "Fail to find correlated MSI-X irq!\n");
+		return 0;
+	}
+
+	return irq;
+}
+
+static int find_gsi_from_host_irq(struct kvm_assigned_dev_kernel *assigned_dev,
+				  int irq)
+{
+	int i, entry, gsi;
+	struct msix_entry *host_msix_entries, *guest_msix_entries;
+
+	host_msix_entries = assigned_dev->host_msix_entries;
+	guest_msix_entries = assigned_dev->guest_msix_entries;
+
+	entry = -1;
+	gsi = -1;
+	for (i = 0; i < assigned_dev->entries_nr; i++)
+		if (irq == (host_msix_entries + i)->vector) {
+			entry = (host_msix_entries + i)->entry;
+			break;
+		}
+	if (entry < 0) {
+		printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
+		return 0;
+	}
+	for (i = 0; i < assigned_dev->entries_nr; i++)
+		if (entry == (guest_msix_entries + i)->entry) {
+			gsi = (guest_msix_entries + i)->vector;
+			break;
+		}
+	if (gsi < 0) {
+		printk(KERN_WARNING "Fail to find correlated MSI-X gsi!\n");
+		return 0;
+	}
+
+	return gsi;
+}
+
 static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 {
 	struct kvm_assigned_dev_kernel *assigned_dev;
+	struct kvm *kvm;
+	u32 gsi;
+	int irq;
 
 	assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
 				    interrupt_work);
+	kvm = assigned_dev->kvm;
 
 	/* This is taken to safely inject irq inside the guest. When
 	 * the interrupt injection (or the ioapic code) uses a
 	 * finer-grained lock, update this
 	 */
-	mutex_lock(&assigned_dev->kvm->lock);
-	kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
-		    assigned_dev->guest_irq, 1);
+	mutex_lock(&kvm->lock);
+handle_irq:
+	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
+		gsi = find_first_bit(kvm->irq_routes_pending_bitmap,
+				     KVM_MAX_IRQ_ROUTES);
+		BUG_ON(gsi >= KVM_MAX_IRQ_ROUTES);
+		clear_bit(gsi, kvm->irq_routes_pending_bitmap);
+	} else
+		gsi = assigned_dev->guest_irq;
+
+	kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, gsi, 1);
 
 	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) {
 		enable_irq(assigned_dev->host_irq);
 		assigned_dev->host_irq_disabled = false;
+	} else if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
+		irq = find_host_irq_from_gsi(assigned_dev, gsi);
+		if (irq != 0)
+			enable_irq(irq);
+		assigned_dev->host_irq_disabled = false;
+		gsi = find_first_bit(kvm->irq_routes_pending_bitmap,
+				KVM_MAX_IRQ_ROUTES);
+		if (gsi < KVM_MAX_IRQ_ROUTES)
+			goto handle_irq;
 	}
+
 	mutex_unlock(&assigned_dev->kvm->lock);
 }
 
@@ -121,6 +209,14 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 {
 	struct kvm_assigned_dev_kernel *assigned_dev =
 		(struct kvm_assigned_dev_kernel *) dev_id;
+	struct kvm *kvm = assigned_dev->kvm;
+
+	if (assigned_dev->irq_requested_type == KVM_ASSIGNED_DEV_MSIX) {
+		int gsi = find_gsi_from_host_irq(assigned_dev, irq);
+		if (gsi < 0)
+			return IRQ_HANDLED;
+		set_bit(gsi, kvm->irq_routes_pending_bitmap);
+	}
 
 	schedule_work(&assigned_dev->interrupt_work);
 
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 3/3] KVM: Enable MSI-X for KVM assigned device
  2009-02-18  9:44 [PATCH 0/3 v3] MSI-X enabling Sheng Yang
  2009-02-18  9:44 ` [PATCH 1/3] KVM: Ioctls for init MSI-X entry Sheng Yang
  2009-02-18  9:44 ` [PATCH 2/3] KVM: Add gsi_msg_pending_bitmap for MSI-X Sheng Yang
@ 2009-02-18  9:44 ` Sheng Yang
  2009-02-18 10:45   ` Avi Kivity
  2 siblings, 1 reply; 19+ messages in thread
From: Sheng Yang @ 2009-02-18  9:44 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Sheng Yang

This patch finally enable MSI-X.

What we need for MSI-X:
1. Intercept one page in MMIO region of device. So that we can get guest desired
MSI-X table and set up the real one. Now this have been done by guest, and
transfer to kernel using ioctl KVM_SET_MSIX_NR and KVM_SET_MSIX_ENTRY.

2. Information for incoming interrupt. Now one device can have more than one
interrupt, and they are all handled by one workqueue structure. So we need to
identify them. The previous patch enable gsi_msg_pending_bitmap get this done.

3. Mapping from host IRQ to guest gsi as well as guest gsi to real MSI/MSI-X
message address/data. We used same entry number for the host and guest here, so
that it's easy to find the correlated guest gsi.

What we lack for now:
1. The PCI spec said nothing can existed with MSI-X table in the same page of
MMIO region, except pending bits. The patch ignore pending bits as the first
step (so they are always 0 - no pending).

2. The PCI spec allowed to change MSI-X table dynamically. That means, the OS
can enable MSI-X, then mask one MSI-X entry, modify it, and unmask it. The patch
didn't support this, and Linux also don't work in this way.

3. The patch didn't implement MSI-X mask all and mask single entry. I would
implement the former in driver/pci/msi.c later. And for single entry, userspace
should have reposibility to handle it.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 include/linux/kvm.h |    8 ++++
 virt/kvm/kvm_main.c |  107 ++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 109 insertions(+), 6 deletions(-)

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index a2dfbe0..78480d0 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -440,6 +440,9 @@ struct kvm_irq_routing {
 };
 
 #endif
+#if defined(CONFIG_X86)
+#define KVM_CAP_DEVICE_MSIX 26
+#endif
 
 /*
  * ioctls for VM fds
@@ -597,6 +600,11 @@ struct kvm_assigned_irq {
 #define KVM_DEV_IRQ_ASSIGN_MSI_ACTION	KVM_DEV_IRQ_ASSIGN_ENABLE_MSI
 #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI	(1 << 0)
 
+#define KVM_DEV_IRQ_ASSIGN_MSIX_ACTION  (KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX |\
+					KVM_DEV_IRQ_ASSIGN_MASK_MSIX)
+#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX  (1 << 1)
+#define KVM_DEV_IRQ_ASSIGN_MASK_MSIX    (1 << 2)
+
 struct kvm_assigned_msix_nr {
 	__u32 assigned_dev_id;
 	__u16 entry_nr;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4010802..d3acb37 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -280,13 +280,33 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
 	 * now, the kvm state is still legal for probably we also have to wait
 	 * interrupt_work done.
 	 */
-	disable_irq_nosync(assigned_dev->host_irq);
-	cancel_work_sync(&assigned_dev->interrupt_work);
+	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
+		int i;
+		for (i = 0; i < assigned_dev->entries_nr; i++)
+			disable_irq_nosync(assigned_dev->
+					   host_msix_entries[i].vector);
+
+		cancel_work_sync(&assigned_dev->interrupt_work);
+
+		for (i = 0; i < assigned_dev->entries_nr; i++)
+			free_irq(assigned_dev->host_msix_entries[i].vector,
+				 (void *)assigned_dev);
+
+		assigned_dev->entries_nr = 0;
+		kfree(assigned_dev->host_msix_entries);
+		kfree(assigned_dev->guest_msix_entries);
+		pci_disable_msix(assigned_dev->dev);
+	} else {
+		/* Deal with MSI and INTx */
+		disable_irq_nosync(assigned_dev->host_irq);
+		cancel_work_sync(&assigned_dev->interrupt_work);
 
-	free_irq(assigned_dev->host_irq, (void *)assigned_dev);
+		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
 
-	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
-		pci_disable_msi(assigned_dev->dev);
+		if (assigned_dev->irq_requested_type &
+				KVM_ASSIGNED_DEV_HOST_MSI)
+			pci_disable_msi(assigned_dev->dev);
+	}
 
 	assigned_dev->irq_requested_type = 0;
 }
@@ -415,6 +435,69 @@ static int assigned_device_update_msi(struct kvm *kvm,
 	adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI;
 	return 0;
 }
+
+static int assigned_device_update_msix(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *adev,
+			struct kvm_assigned_irq *airq)
+{
+	/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
+	int i, r;
+
+	adev->ack_notifier.gsi = -1;
+
+	if (irqchip_in_kernel(kvm)) {
+		if (airq->flags & KVM_DEV_IRQ_ASSIGN_MASK_MSIX) {
+			printk(KERN_WARNING
+			       "kvm: unsupported mask MSI-X, flags 0x%x!\n",
+			       airq->flags);
+			return 0;
+		}
+
+		if (!(airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX)) {
+			/* Guest disable MSI-X */
+			kvm_free_assigned_irq(kvm, adev);
+			if (msi2intx) {
+				pci_enable_msi(adev->dev);
+				if (adev->dev->msi_enabled)
+					return assigned_device_update_msi(kvm,
+							adev, airq);
+			}
+			return assigned_device_update_intx(kvm, adev, airq);
+		}
+
+		/* host_msix_entries and guest_msix_entries should have been
+		 * initialized */
+
+		if (adev->entries_nr == 0) {
+			printk(KERN_ERR
+			       "kvm: MSI-X entry not initialized!\n");
+			return -EFAULT;
+		}
+
+		kvm_free_assigned_irq(kvm, adev);
+
+		r = pci_enable_msix(adev->dev, adev->host_msix_entries,
+				    adev->entries_nr);
+		if (r) {
+			printk(KERN_ERR "Fail to enable MSI-X feature!\n");
+			return r;
+		}
+
+		for (i = 0; i < adev->entries_nr; i++) {
+			r = request_irq((adev->host_msix_entries + i)->vector,
+					kvm_assigned_dev_intr, 0,
+					"kvm_assigned_msix_device",
+					(void *)adev);
+			if (r)
+				return r;
+		}
+	}
+
+	adev->irq_requested_type |= KVM_ASSIGNED_DEV_MSIX;
+
+	return 0;
+}
+
 #endif
 
 static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
@@ -461,12 +544,24 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 		}
 	}
 
-	if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) &&
+	if (match->irq_requested_type & KVM_ASSIGNED_DEV_MSIX)
+		current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX;
+	else if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) &&
 		 (match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI))
 		current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI;
 
 	changed_flags = assigned_irq->flags ^ current_flags;
 
+#ifdef CONFIG_X86
+	if (changed_flags & KVM_DEV_IRQ_ASSIGN_MSIX_ACTION) {
+		r = assigned_device_update_msix(kvm, match, assigned_irq);
+		if (r) {
+			printk(KERN_WARNING "kvm: failed to execute "
+					"MSI-X action!\n");
+			goto out_release;
+		}
+	} else
+#endif
 	if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) ||
 	    (msi2intx && match->dev->msi_enabled)) {
 #ifdef CONFIG_X86
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/3] KVM: Ioctls for init MSI-X entry
  2009-02-18  9:44 ` [PATCH 1/3] KVM: Ioctls for init MSI-X entry Sheng Yang
@ 2009-02-18 10:32   ` Avi Kivity
  0 siblings, 0 replies; 19+ messages in thread
From: Avi Kivity @ 2009-02-18 10:32 UTC (permalink / raw)
  To: Sheng Yang; +Cc: Marcelo Tosatti, kvm

Sheng Yang wrote:
> Introduce KVM_SET_MSIX_NR and KVM_SET_MSIX_ENTRY two ioctls.
>
> This two ioctls are used by userspace to specific guest device MSI-X entry
> number and correlate MSI-X entry with GSI during the initialization stage.
>
> MSI-X should be well initialzed before enabling.
>
> Don't support change MSI-X entry number for now.
>
>   

Sorry, this has been reviewed quite a bit but I found a few issues:

> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index 2163b3d..a2dfbe0 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -475,6 +475,8 @@ struct kvm_irq_routing {
>  #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
>  			    struct kvm_assigned_irq)
>  #define KVM_REINJECT_CONTROL      _IO(KVMIO, 0x71)
> +#define KVM_SET_MSIX_NR _IOR(KVMIO, 0x72, struct kvm_assigned_msix_nr)
> +#define KVM_SET_MSIX_ENTRY _IOR(KVMIO, 0x73, struct kvm_assigned_msix_entry)
>   

KVM_SET_ASSIGNED_... so it's associated with device assignment, not generic.

Should be _IOW, not _IOR.  Looks like KVM_ASSIGN_IRQ is broken...

>  
> +static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
> +				    struct kvm_assigned_msix_nr *entry_nr)
> +{
> +	int r = 0;
> +	struct kvm_assigned_dev_kernel *adev;
> +
> +	mutex_lock(&kvm->lock);
> +
> +	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
> +				      entry_nr->assigned_dev_id);
> +	if (!adev) {
> +		r = -EINVAL;
> +		goto msix_nr_out;
> +	}
> +
> +	if (adev->entries_nr == 0) {
> +		adev->entries_nr = entry_nr->entry_nr;
> +		if (adev->entries_nr == 0 ||
> +		    adev->entries_nr >= KVM_MAX_MSIX_PER_DEV)
> +			goto msix_nr_out;
>   

r == 0 here, needs a meaningful error number.

> +
> +		adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
> +						entry_nr->entry_nr,
> +						GFP_KERNEL);
> +		if (!adev->host_msix_entries) {
> +			printk(KERN_ERR "no memory for host msix entries!\n");
> +			r = -ENOMEM;
>   

Drop the printk, -ENOMEM is enough.

> +			goto msix_nr_out;
> +		}
> +		adev->guest_msix_entries = kzalloc(sizeof(struct msix_entry) *
> +						entry_nr->entry_nr,
> +						GFP_KERNEL);
> +		if (!adev->guest_msix_entries) {
> +			printk(KERN_ERR "no memory for host msix entries!\n");
>   

Ditto.

> +			kfree(adev->host_msix_entries);
> +			r = -ENOMEM;
> +			goto msix_nr_out;
> +		}
> +	} else
> +		printk(KERN_WARNING "kvm: not allow recall set msix nr!\n");
>   

Drop printk, add error.

> +msix_nr_out:
> +	mutex_unlock(&kvm->lock);
> +	return r;
> +}
> +
> +static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
> +				       struct kvm_assigned_msix_entry *entry)
> +{
> +	int r = 0, i;
> +	struct kvm_assigned_dev_kernel *adev;
> +
> +	mutex_lock(&kvm->lock);
> +
> +	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
> +				      entry->assigned_dev_id);
> +
> +	if (!adev) {
> +		r = -EINVAL;
> +		goto msix_entry_out;
> +	}
> +
> +	for (i = 0; i < adev->entries_nr; i++)
> +		if (adev->guest_msix_entries[i].vector == 0 ||
> +		    adev->guest_msix_entries[i].entry == entry->entry) {
> +			adev->guest_msix_entries[i].entry = entry->entry;
> +			adev->guest_msix_entries[i].vector = entry->gsi;
> +			adev->host_msix_entries[i].entry = entry->entry;
> +			break;
> +		}
> +	if (i == adev->entries_nr) {
> +		printk(KERN_ERR "kvm: Too much entries for MSI-X!\n");
>   

Drop.

> +		r = -ENOSPC;
> +		goto msix_entry_out;
> +	}
> +
> +msix_entry_out:
> +	mutex_unlock(&kvm->lock);
> +
> +	return r;
> +}
> +
>  static long kvm_vcpu_ioctl(struct file *filp,
>  			   unsigned int ioctl, unsigned long arg)
>  {
> @@ -1917,7 +1998,29 @@ static long kvm_vm_ioctl(struct file *filp,
>  		vfree(entries);
>  		break;
>  	}
> +#ifdef KVM_CAP_DEVICE_MSIX
> +	case KVM_SET_MSIX_NR: {
> +		struct kvm_assigned_msix_nr entry_nr;
> +		r = -EFAULT;
> +		if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
> +			goto out;
> +		r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
> +		if (r)
> +			goto out;
> +		break;
> +	}
> +	case KVM_SET_MSIX_ENTRY: {
> +		struct kvm_assigned_msix_entry entry;
> +		r = -EFAULT;
> +		if (copy_from_user(&entry, argp, sizeof entry))
> +			goto out;
> +		r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
> +		if (r)
> +			goto out;
> +		break;
> +	}
>  #endif
> +#endif /* KVM_CAP_IRQ_ROUTING */
>  	default:
>  		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
>  	}
>   


-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] KVM: Enable MSI-X for KVM assigned device
  2009-02-18  9:44 ` [PATCH 3/3] KVM: Enable MSI-X for KVM assigned device Sheng Yang
@ 2009-02-18 10:45   ` Avi Kivity
  2009-02-18 12:24     ` Sheng Yang
  0 siblings, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2009-02-18 10:45 UTC (permalink / raw)
  To: Sheng Yang; +Cc: Marcelo Tosatti, kvm

Sheng Yang wrote:
> This patch finally enable MSI-X.
>
> What we need for MSI-X:
> 1. Intercept one page in MMIO region of device. So that we can get guest desired
> MSI-X table and set up the real one. Now this have been done by guest, and
> transfer to kernel using ioctl KVM_SET_MSIX_NR and KVM_SET_MSIX_ENTRY.
>
> 2. Information for incoming interrupt. Now one device can have more than one
> interrupt, and they are all handled by one workqueue structure. So we need to
> identify them. The previous patch enable gsi_msg_pending_bitmap get this done.
>
> 3. Mapping from host IRQ to guest gsi as well as guest gsi to real MSI/MSI-X
> message address/data. We used same entry number for the host and guest here, so
> that it's easy to find the correlated guest gsi.
>
> What we lack for now:
> 1. The PCI spec said nothing can existed with MSI-X table in the same page of
> MMIO region, except pending bits. The patch ignore pending bits as the first
> step (so they are always 0 - no pending).
>
> 2. The PCI spec allowed to change MSI-X table dynamically. That means, the OS
> can enable MSI-X, then mask one MSI-X entry, modify it, and unmask it. The patch
> didn't support this, and Linux also don't work in this way.
>
> 3. The patch didn't implement MSI-X mask all and mask single entry. I would
> implement the former in driver/pci/msi.c later. And for single entry, userspace
> should have reposibility to handle it.
>
> Signed-off-by: Sheng Yang <sheng@linux.intel.com>
> ---
>  include/linux/kvm.h |    8 ++++
>  virt/kvm/kvm_main.c |  107 ++++++++++++++++++++++++++++++++++++++++++++++++---
>  2 files changed, 109 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index a2dfbe0..78480d0 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -440,6 +440,9 @@ struct kvm_irq_routing {
>  };
>  
>  #endif
> +#if defined(CONFIG_X86)
> +#define KVM_CAP_DEVICE_MSIX 26
> +#endif
>   

We switched to a different way of depending on CONFIG_X86, see the other 
KVM_CAP defines.

>  
>  struct kvm_assigned_msix_nr {
>  	__u32 assigned_dev_id;
>  	__u16 entry_nr;
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 4010802..d3acb37 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -280,13 +280,33 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
>  	 * now, the kvm state is still legal for probably we also have to wait
>  	 * interrupt_work done.
>  	 */
> -	disable_irq_nosync(assigned_dev->host_irq);
> -	cancel_work_sync(&assigned_dev->interrupt_work);
> +	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
> +		int i;
> +		for (i = 0; i < assigned_dev->entries_nr; i++)
> +			disable_irq_nosync(assigned_dev->
> +					   host_msix_entries[i].vector);
> +
> +		cancel_work_sync(&assigned_dev->interrupt_work);
> +
> +		for (i = 0; i < assigned_dev->entries_nr; i++)
> +			free_irq(assigned_dev->host_msix_entries[i].vector,
> +				 (void *)assigned_dev);
> +
> +		assigned_dev->entries_nr = 0;
> +		kfree(assigned_dev->host_msix_entries);
> +		kfree(assigned_dev->guest_msix_entries);
> +		pci_disable_msix(assigned_dev->dev);
> +	} else {
> +		/* Deal with MSI and INTx */
> +		disable_irq_nosync(assigned_dev->host_irq);
> +		cancel_work_sync(&assigned_dev->interrupt_work);
>   

How about always have an array?  That will also allow us to deal with 
INTx where x=B,C,D.

Currently for MSI and INTx the array will hold just one active element.

>  
> -	free_irq(assigned_dev->host_irq, (void *)assigned_dev);
> +		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
>  
> -	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
> -		pci_disable_msi(assigned_dev->dev);
> +		if (assigned_dev->irq_requested_type &
> +				KVM_ASSIGNED_DEV_HOST_MSI)
> +			pci_disable_msi(assigned_dev->dev);
> +	}
>   

All those flags and bits are worrying me.  Maybe each entry in the array 
can have an ops member, and disabling would work by calling 
->ops->disable().

We can do that later.

>  
>  	assigned_dev->irq_requested_type = 0;
>  }
> @@ -415,6 +435,69 @@ static int assigned_device_update_msi(struct kvm *kvm,
>  	adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI;
>  	return 0;
>  }
> +
> +static int assigned_device_update_msix(struct kvm *kvm,
> +			struct kvm_assigned_dev_kernel *adev,
> +			struct kvm_assigned_irq *airq)
> +{
> +	/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
> +	int i, r;
> +
> +	adev->ack_notifier.gsi = -1;
> +
> +	if (irqchip_in_kernel(kvm)) {
> +		if (airq->flags & KVM_DEV_IRQ_ASSIGN_MASK_MSIX) {
> +			printk(KERN_WARNING
> +			       "kvm: unsupported mask MSI-X, flags 0x%x!\n",
> +			       airq->flags);
>   

error, not printk

> +			return 0;
> +		}
> +
> +		if (!(airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX)) {
> +			/* Guest disable MSI-X */
> +			kvm_free_assigned_irq(kvm, adev);
> +			if (msi2intx) {
> +				pci_enable_msi(adev->dev);
> +				if (adev->dev->msi_enabled)
> +					return assigned_device_update_msi(kvm,
> +							adev, airq);
> +			}
> +			return assigned_device_update_intx(kvm, adev, airq);
> +		}
> +
> +		/* host_msix_entries and guest_msix_entries should have been
> +		 * initialized */
> +
> +		if (adev->entries_nr == 0) {
> +			printk(KERN_ERR
> +			       "kvm: MSI-X entry not initialized!\n");
> +			return -EFAULT;
>   

-EFAULT is for accesses to unmapped user memory; drop the printk.

> +		}
> +
> +		kvm_free_assigned_irq(kvm, adev);
> +
> +		r = pci_enable_msix(adev->dev, adev->host_msix_entries,
> +				    adev->entries_nr);
> +		if (r) {
> +			printk(KERN_ERR "Fail to enable MSI-X feature!\n");
>   

Drop the printk.

> +			return r;
> +		}
> +
> +		for (i = 0; i < adev->entries_nr; i++) {
> +			r = request_irq((adev->host_msix_entries + i)->vector,
> +					kvm_assigned_dev_intr, 0,
> +					"kvm_assigned_msix_device",
> +					(void *)adev);
> +			if (r)
> +				return r;
> +		}
> +	}
> +
> +	adev->irq_requested_type |= KVM_ASSIGNED_DEV_MSIX;
> +
> +	return 0;
> +}
> +
>  #endif
>  
>  static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
> @@ -461,12 +544,24 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
>  		}
>  	}
>  
> -	if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) &&
> +	if (match->irq_requested_type & KVM_ASSIGNED_DEV_MSIX)
> +		current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX;
> +	else if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) &&
>  		 (match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI))
>  		current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI;
>  
>  	changed_flags = assigned_irq->flags ^ current_flags;
>  
> +#ifdef CONFIG_X86
> +	if (changed_flags & KVM_DEV_IRQ_ASSIGN_MSIX_ACTION) {
> +		r = assigned_device_update_msix(kvm, match, assigned_irq);
> +		if (r) {
> +			printk(KERN_WARNING "kvm: failed to execute "
> +					"MSI-X action!\n");
> +			goto out_release;
> +		}
> +	} else
> +#endif
>  	if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) ||
>  	    (msi2intx && match->dev->msi_enabled)) {
>  #ifdef CONFIG_X86
>   


-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/3] KVM: Add gsi_msg_pending_bitmap for MSI-X
  2009-02-18  9:44 ` [PATCH 2/3] KVM: Add gsi_msg_pending_bitmap for MSI-X Sheng Yang
@ 2009-02-18 11:00   ` Avi Kivity
  2009-02-18 11:13     ` Sheng Yang
  0 siblings, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2009-02-18 11:00 UTC (permalink / raw)
  To: Sheng Yang; +Cc: Marcelo Tosatti, kvm

Sheng Yang wrote:
> We have to handle more than one interrupt with one handler for MSI-X. So we
> need a bitmap to track the triggered interrupts.
>   

Can you explain why?

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/3] KVM: Add gsi_msg_pending_bitmap for MSI-X
  2009-02-18 11:00   ` Avi Kivity
@ 2009-02-18 11:13     ` Sheng Yang
  2009-02-18 11:29       ` Avi Kivity
  0 siblings, 1 reply; 19+ messages in thread
From: Sheng Yang @ 2009-02-18 11:13 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm

On Wednesday 18 February 2009 19:00:53 Avi Kivity wrote:
> Sheng Yang wrote:
> > We have to handle more than one interrupt with one handler for MSI-X. So
> > we need a bitmap to track the triggered interrupts.
>
> Can you explain why?

Or how can we know which interrupt happened? Current we scheduled the work 
later, and no more irq information available at that time.

-- 
regards
Yang, Sheng


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/3] KVM: Add gsi_msg_pending_bitmap for MSI-X
  2009-02-18 11:13     ` Sheng Yang
@ 2009-02-18 11:29       ` Avi Kivity
  2009-02-18 11:38         ` Sheng Yang
  0 siblings, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2009-02-18 11:29 UTC (permalink / raw)
  To: Sheng Yang; +Cc: Marcelo Tosatti, kvm

Sheng Yang wrote:
> On Wednesday 18 February 2009 19:00:53 Avi Kivity wrote:
>   
>> Sheng Yang wrote:
>>     
>>> We have to handle more than one interrupt with one handler for MSI-X. So
>>> we need a bitmap to track the triggered interrupts.
>>>       
>> Can you explain why?
>>     
>
> Or how can we know which interrupt happened? Current we scheduled the work 
> later, and no more irq information available at that time.
>   

We can have a work_struct per interrupt, or we can set a flag in the 
msix array that the interrupt is pending.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/3] KVM: Add gsi_msg_pending_bitmap for MSI-X
  2009-02-18 11:29       ` Avi Kivity
@ 2009-02-18 11:38         ` Sheng Yang
  0 siblings, 0 replies; 19+ messages in thread
From: Sheng Yang @ 2009-02-18 11:38 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm

On Wednesday 18 February 2009 19:29:28 Avi Kivity wrote:
> Sheng Yang wrote:
> > On Wednesday 18 February 2009 19:00:53 Avi Kivity wrote:
> >> Sheng Yang wrote:
> >>> We have to handle more than one interrupt with one handler for MSI-X.
> >>> So we need a bitmap to track the triggered interrupts.
> >>
> >> Can you explain why?
> >
> > Or how can we know which interrupt happened? Current we scheduled the
> > work later, and no more irq information available at that time.
>
> We can have a work_struct per interrupt, or we can set a flag in the
> msix array that the interrupt is pending.

As I know, work_struct itself don't take any data. And host MSI-X array is a 
type of msix_entry* which is used for pci_enable_msix. But modifying type of 
guest msix entries should be OK. I will try.

-- 
regards
Yang, Sheng

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] KVM: Enable MSI-X for KVM assigned device
  2009-02-18 10:45   ` Avi Kivity
@ 2009-02-18 12:24     ` Sheng Yang
  2009-02-18 12:36       ` Avi Kivity
  0 siblings, 1 reply; 19+ messages in thread
From: Sheng Yang @ 2009-02-18 12:24 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm

On Wed, Feb 18, 2009 at 10:45:19AM +0000, Avi Kivity wrote:
> Sheng Yang wrote:
>> index a2dfbe0..78480d0 100644
>> --- a/include/linux/kvm.h
>> +++ b/include/linux/kvm.h
>> @@ -440,6 +440,9 @@ struct kvm_irq_routing {
>>  };
>>   #endif
>> +#if defined(CONFIG_X86)
>> +#define KVM_CAP_DEVICE_MSIX 26
>> +#endif
>>   
>
> We switched to a different way of depending on CONFIG_X86, see the other  
> KVM_CAP defines.

Thanks to point it out. :)

>
>>   struct kvm_assigned_msix_nr {
>>  	__u32 assigned_dev_id;
>>  	__u16 entry_nr;
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index 4010802..d3acb37 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -280,13 +280,33 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
>>  	 * now, the kvm state is still legal for probably we also have to wait
>>  	 * interrupt_work done.
>>  	 */
>> -	disable_irq_nosync(assigned_dev->host_irq);
>> -	cancel_work_sync(&assigned_dev->interrupt_work);
>> +	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
>> +		int i;
>> +		for (i = 0; i < assigned_dev->entries_nr; i++)
>> +			disable_irq_nosync(assigned_dev->
>> +					   host_msix_entries[i].vector);
>> +
>> +		cancel_work_sync(&assigned_dev->interrupt_work);
>> +
>> +		for (i = 0; i < assigned_dev->entries_nr; i++)
>> +			free_irq(assigned_dev->host_msix_entries[i].vector,
>> +				 (void *)assigned_dev);
>> +
>> +		assigned_dev->entries_nr = 0;
>> +		kfree(assigned_dev->host_msix_entries);
>> +		kfree(assigned_dev->guest_msix_entries);
>> +		pci_disable_msix(assigned_dev->dev);
>> +	} else {
>> +		/* Deal with MSI and INTx */
>> +		disable_irq_nosync(assigned_dev->host_irq);
>> +		cancel_work_sync(&assigned_dev->interrupt_work);
>>   
>
> How about always have an array?  That will also allow us to deal with  
> INTx where x=B,C,D.
>
> Currently for MSI and INTx the array will hold just one active element.

So array, or bitmap? I remember I changed it to bitmap accounding to your
first comment...

OK. I think array is reasonable, but the length is a problem, as I did before. How long would you like?

-- 
regards
Yang, Sheng

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] KVM: Enable MSI-X for KVM assigned device
  2009-02-18 12:24     ` Sheng Yang
@ 2009-02-18 12:36       ` Avi Kivity
  2009-02-18 12:52         ` Sheng Yang
  0 siblings, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2009-02-18 12:36 UTC (permalink / raw)
  To: Sheng Yang; +Cc: Marcelo Tosatti, kvm

Sheng Yang wrote:
>
>>>   struct kvm_assigned_msix_nr {
>>>  	__u32 assigned_dev_id;
>>>  	__u16 entry_nr;
>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>>> index 4010802..d3acb37 100644
>>> --- a/virt/kvm/kvm_main.c
>>> +++ b/virt/kvm/kvm_main.c
>>> @@ -280,13 +280,33 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
>>>  	 * now, the kvm state is still legal for probably we also have to wait
>>>  	 * interrupt_work done.
>>>  	 */
>>> -	disable_irq_nosync(assigned_dev->host_irq);
>>> -	cancel_work_sync(&assigned_dev->interrupt_work);
>>> +	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
>>> +		int i;
>>> +		for (i = 0; i < assigned_dev->entries_nr; i++)
>>> +			disable_irq_nosync(assigned_dev->
>>> +					   host_msix_entries[i].vector);
>>> +
>>> +		cancel_work_sync(&assigned_dev->interrupt_work);
>>> +
>>> +		for (i = 0; i < assigned_dev->entries_nr; i++)
>>> +			free_irq(assigned_dev->host_msix_entries[i].vector,
>>> +				 (void *)assigned_dev);
>>> +
>>> +		assigned_dev->entries_nr = 0;
>>> +		kfree(assigned_dev->host_msix_entries);
>>> +		kfree(assigned_dev->guest_msix_entries);
>>> +		pci_disable_msix(assigned_dev->dev);
>>> +	} else {
>>> +		/* Deal with MSI and INTx */
>>> +		disable_irq_nosync(assigned_dev->host_irq);
>>> +		cancel_work_sync(&assigned_dev->interrupt_work);
>>>   
>>>       
>> How about always have an array?  That will also allow us to deal with  
>> INTx where x=B,C,D.
>>
>> Currently for MSI and INTx the array will hold just one active element.
>>     
>
> So array, or bitmap? I remember I changed it to bitmap accounding to your
> first comment...
>   

Which bitmap?  I'm confused.

I'm talking about unifying the existing array 
(assigned_dev->host_msix_entries[]) with ->host_irq.  Also since we need 
an array for INTx when a function uses INT[BCD].

So we'll have assigned_dev->host_irqs[], each entry can be INTx or MSI 
or MSIx.

> OK. I think array is reasonable, but the length is a problem, as I did before. How long would you like?
>   

MAX(4, KVM_MAX_MSIX_ENTRIES), no?

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/3] KVM: Enable MSI-X for KVM assigned device
  2009-02-18 12:36       ` Avi Kivity
@ 2009-02-18 12:52         ` Sheng Yang
  0 siblings, 0 replies; 19+ messages in thread
From: Sheng Yang @ 2009-02-18 12:52 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm

On Wednesday 18 February 2009 20:36:10 Avi Kivity wrote:
> Sheng Yang wrote:
> >>>   struct kvm_assigned_msix_nr {
> >>>  	__u32 assigned_dev_id;
> >>>  	__u16 entry_nr;
> >>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> >>> index 4010802..d3acb37 100644
> >>> --- a/virt/kvm/kvm_main.c
> >>> +++ b/virt/kvm/kvm_main.c
> >>> @@ -280,13 +280,33 @@ static void kvm_free_assigned_irq(struct kvm
> >>> *kvm, * now, the kvm state is still legal for probably we also have to
> >>> wait * interrupt_work done.
> >>>  	 */
> >>> -	disable_irq_nosync(assigned_dev->host_irq);
> >>> -	cancel_work_sync(&assigned_dev->interrupt_work);
> >>> +	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
> >>> +		int i;
> >>> +		for (i = 0; i < assigned_dev->entries_nr; i++)
> >>> +			disable_irq_nosync(assigned_dev->
> >>> +					   host_msix_entries[i].vector);
> >>> +
> >>> +		cancel_work_sync(&assigned_dev->interrupt_work);
> >>> +
> >>> +		for (i = 0; i < assigned_dev->entries_nr; i++)
> >>> +			free_irq(assigned_dev->host_msix_entries[i].vector,
> >>> +				 (void *)assigned_dev);
> >>> +
> >>> +		assigned_dev->entries_nr = 0;
> >>> +		kfree(assigned_dev->host_msix_entries);
> >>> +		kfree(assigned_dev->guest_msix_entries);
> >>> +		pci_disable_msix(assigned_dev->dev);
> >>> +	} else {
> >>> +		/* Deal with MSI and INTx */
> >>> +		disable_irq_nosync(assigned_dev->host_irq);
> >>> +		cancel_work_sync(&assigned_dev->interrupt_work);
> >>
> >> How about always have an array?  That will also allow us to deal with
> >> INTx where x=B,C,D.
> >>
> >> Currently for MSI and INTx the array will hold just one active element.
> >
> > So array, or bitmap? I remember I changed it to bitmap accounding to your
> > first comment...
>
> Which bitmap?  I'm confused.
>
> I'm talking about unifying the existing array
> (assigned_dev->host_msix_entries[]) with ->host_irq.  Also since we need
> an array for INTx when a function uses INT[BCD].
>
> So we'll have assigned_dev->host_irqs[], each entry can be INTx or MSI
> or MSIx.
>
> > OK. I think array is reasonable, but the length is a problem, as I did
> > before. How long would you like?
>
> MAX(4, KVM_MAX_MSIX_ENTRIES), no?

Oh, yeah, I misunderstood it(wrong context)...

Need more adjustment on the type, for host_msix_entries is used with 
pci_enable_msix. So I'd like to put it a bit later.

-- 
regards
Yang, Sheng

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/3] KVM: Add gsi_msg_pending_bitmap for MSI-X
  2009-02-17 18:09       ` Avi Kivity
@ 2009-02-18  1:33         ` Sheng Yang
  0 siblings, 0 replies; 19+ messages in thread
From: Sheng Yang @ 2009-02-18  1:33 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm

On Wednesday 18 February 2009 02:09:49 Avi Kivity wrote:
> Sheng Yang wrote:
> >>> +	if (assigned_dev->irq_requested_type == KVM_ASSIGNED_DEV_MSIX) {
> >>> +		u32 gsi;
> >>> +		gsi = find_gsi_from_host_irq(assigned_dev, irq);
> >>> +		if (gsi == 0)
> >>> +			return IRQ_HANDLED;
> >>
> >> So you chose GSI == 0 as invalid because of x86 assumptions? Or is there
> >> any other reason?
> >
> > Yeah, it based on x86 and IA64 IRQ 0 can't be used by MSI-X. And only x86
> > support MSI-X now(and IA64 would follow later).
>
> gsi != irq... as used in kvm it is just a cookie and is never visible to
> the guest.  I'd prefer an illegal value here.
>
>
> Also, please repost the entire patchset so I can be sure I apply the
> latest; too many versions floating around.

OK. And please don't forget there is a patchset named "Optimize and unify 
IOAPIC/MSI delivery" before this one. :)

-- 
regards
Yang, Sheng

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/3] KVM: Add gsi_msg_pending_bitmap for MSI-X
  2009-02-13  3:37     ` Sheng Yang
  2009-02-13 17:10       ` Marcelo Tosatti
@ 2009-02-17 18:09       ` Avi Kivity
  2009-02-18  1:33         ` Sheng Yang
  1 sibling, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2009-02-17 18:09 UTC (permalink / raw)
  To: Sheng Yang; +Cc: Marcelo Tosatti, kvm

Sheng Yang wrote:
>>> +	if (assigned_dev->irq_requested_type == KVM_ASSIGNED_DEV_MSIX) {
>>> +		u32 gsi;
>>> +		gsi = find_gsi_from_host_irq(assigned_dev, irq);
>>> +		if (gsi == 0)
>>> +			return IRQ_HANDLED;
>>>       
>> So you chose GSI == 0 as invalid because of x86 assumptions? Or is there
>> any other reason?
>>     
>
> Yeah, it based on x86 and IA64 IRQ 0 can't be used by MSI-X. And only x86 
> support MSI-X now(and IA64 would follow later).
>   

gsi != irq... as used in kvm it is just a cookie and is never visible to 
the guest.  I'd prefer an illegal value here.


Also, please repost the entire patchset so I can be sure I apply the 
latest; too many versions floating around.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/3] KVM: Add gsi_msg_pending_bitmap for MSI-X
  2009-02-13  3:37     ` Sheng Yang
@ 2009-02-13 17:10       ` Marcelo Tosatti
  2009-02-17 18:09       ` Avi Kivity
  1 sibling, 0 replies; 19+ messages in thread
From: Marcelo Tosatti @ 2009-02-13 17:10 UTC (permalink / raw)
  To: Sheng Yang; +Cc: Avi Kivity, kvm

On Fri, Feb 13, 2009 at 11:37:45AM +0800, Sheng Yang wrote:
> > > +#define KVM_ASSIGNED_DEV_MSIX		((1 << 2) | (1 << 10))
> >
> > Can you explain the usage of the two bits?
> 
> Um... Just to keep consistent with formers(one for guest and one for host), at 
> cost of one bit.

OK

> > Can drop the printk's (also from find_gsi_from_host_irq).
> 
> Not that confident. In fact, I often triggered this during debug... IIRC, 
> userspace program shouldn't trigger this if kernel space works well. Maybe it 
> can be changed to WARN_ON() or BUG_ON() later.

OK

> > Check the return value?
> 
> Yeah...
> >
> > > +		enable_irq(irq);
> >
> > Do you guarantee that particular irq you're enable_irq'ing is not bogus?
> > Its has been passed from userspace after all.
> 
> It isn't passed from userspace. This one is filled by pci_enable_msix(), which 
> should be OK. 

Alright.

> > So you chose GSI == 0 as invalid because of x86 assumptions? Or is there
> > any other reason?
> 
> Yeah, it based on x86 and IA64 IRQ 0 can't be used by MSI-X. And only x86 
> support MSI-X now(and IA64 would follow later).

OK.

> >
> > IRQ sharing in the host side is not supported correct?
> 
> Um? Yeah... And seems we won't support it forever...

OK.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/3] KVM: Add gsi_msg_pending_bitmap for MSI-X
  2009-02-12 19:51   ` Marcelo Tosatti
@ 2009-02-13  3:37     ` Sheng Yang
  2009-02-13 17:10       ` Marcelo Tosatti
  2009-02-17 18:09       ` Avi Kivity
  0 siblings, 2 replies; 19+ messages in thread
From: Sheng Yang @ 2009-02-13  3:37 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Avi Kivity, kvm

On Friday 13 February 2009 03:51:18 Marcelo Tosatti wrote:
> Hi Sheng,
>
> On Wed, Feb 11, 2009 at 04:08:50PM +0800, Sheng Yang wrote:
> > We have to handle more than one interrupt with one handler for MSI-X. So
> > we need a bitmap to track the triggered interrupts.
> >
> > Signed-off-by: Sheng Yang <sheng@linux.intel.com>
> > ---
> >  include/linux/kvm_host.h |    5 +-
> >  virt/kvm/kvm_main.c      |  103
> > ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 103
> > insertions(+), 5 deletions(-)
> >
> >
> > @@ -335,6 +337,7 @@ struct kvm_assigned_dev_kernel {
> >  #define KVM_ASSIGNED_DEV_GUEST_MSI	(1 << 1)
> >  #define KVM_ASSIGNED_DEV_HOST_INTX	(1 << 8)
> >  #define KVM_ASSIGNED_DEV_HOST_MSI	(1 << 9)
> > +#define KVM_ASSIGNED_DEV_MSIX		((1 << 2) | (1 << 10))
>
> Can you explain the usage of the two bits?

Um... Just to keep consistent with formers(one for guest and one for host), at 
cost of one bit.

> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index ea96690..961603f 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -95,25 +95,113 @@ static struct kvm_assigned_dev_kernel
> > *kvm_find_assigned_dev(struct list_head *h return NULL;
> >  }
> >
> > +static int find_host_irq_from_gsi(struct kvm_assigned_dev_kernel
> > *assigned_dev, +				  u32 gsi)
> > +{
> > +	int i, entry, irq;
> > +	struct msix_entry *host_msix_entries, *guest_msix_entries;
> > +
> > +	host_msix_entries = assigned_dev->host_msix_entries;
> > +	guest_msix_entries = assigned_dev->guest_msix_entries;
> > +
> > +	entry = -1;
> > +	irq = 0;
> > +	for (i = 0; i < assigned_dev->entries_nr; i++)
> > +		if (gsi == (guest_msix_entries + i)->vector) {
> > +			entry = (guest_msix_entries + i)->entry;
> > +			break;
> > +		}
> > +	if (entry < 0) {
> > +		printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
> > +		return 0;
> > +	}
> > +	for (i = 0; i < assigned_dev->entries_nr; i++)
> > +		if (entry == (host_msix_entries + i)->entry) {
> > +			irq = (host_msix_entries + i)->vector;
> > +			break;
> > +		}
> > +	if (irq == 0) {
> > +		printk(KERN_WARNING "Fail to find correlated MSI-X irq!\n");
> > +		return 0;
> > +	}
>
> Can drop the printk's (also from find_gsi_from_host_irq).

Not that confident. In fact, I often triggered this during debug... IIRC, 
userspace program shouldn't trigger this if kernel space works well. Maybe it 
can be changed to WARN_ON() or BUG_ON() later.
>
> >  	struct kvm_assigned_dev_kernel *assigned_dev;
> > +	struct kvm *kvm;
> > +	u32 gsi;
> > +	int irq;
> >
> >  	assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
> >  				    interrupt_work);
> > +	kvm = assigned_dev->kvm;
> >
> >  	/* This is taken to safely inject irq inside the guest. When
> >  	 * the interrupt injection (or the ioapic code) uses a
> >  	 * finer-grained lock, update this
> >  	 */
> > -	mutex_lock(&assigned_dev->kvm->lock);
> > -	kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
> > -		    assigned_dev->guest_irq, 1);
> > +	mutex_lock(&kvm->lock);
> > +handle_irq:
> > +	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
> > +		gsi = find_first_bit(kvm->irq_routes_pending_bitmap,
> > +				     KVM_MAX_IRQ_ROUTES);
> > +		BUG_ON(gsi >= KVM_MAX_IRQ_ROUTES);
> > +		clear_bit(gsi, kvm->irq_routes_pending_bitmap);
> > +	} else
> > +		gsi = assigned_dev->guest_irq;
> > +
> > +	kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, gsi, 1);
> >
> >  	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) {
> >  		enable_irq(assigned_dev->host_irq);
> >  		assigned_dev->host_irq_disabled = false;
> > +	} else if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
> > +		irq = find_host_irq_from_gsi(assigned_dev, gsi);
>
> Check the return value?

Yeah...
>
> > +		enable_irq(irq);
>
> Do you guarantee that particular irq you're enable_irq'ing is not bogus?
> Its has been passed from userspace after all.

It isn't passed from userspace. This one is filled by pci_enable_msix(), which 
should be OK. 
>
> In a later patch you can assign KVM_ASSIGNED_DEV_MSIX if the irqchip is
> not in-kernel in assigned_device_update_msix:
>
> +       adev->irq_requested_type |= KVM_ASSIGNED_DEV_MSIX;
> +
>
> Just trying to harden the code against bogosity elsewhere.

Yeah, of course. :)
>
> > +		assigned_dev->host_irq_disabled = false;
> > +		gsi = find_first_bit(kvm->irq_routes_pending_bitmap,
> > +				KVM_MAX_IRQ_ROUTES);
> > +		if (gsi < KVM_MAX_IRQ_ROUTES)
> > +			goto handle_irq;
> >  	}
> > +
> >  	mutex_unlock(&assigned_dev->kvm->lock);
> >  }
> >
> > @@ -121,6 +209,15 @@ static irqreturn_t kvm_assigned_dev_intr(int irq,
> > void *dev_id) {
> >  	struct kvm_assigned_dev_kernel *assigned_dev =
> >  		(struct kvm_assigned_dev_kernel *) dev_id;
> > +	struct kvm *kvm = assigned_dev->kvm;
> > +
> > +	if (assigned_dev->irq_requested_type == KVM_ASSIGNED_DEV_MSIX) {
> > +		u32 gsi;
> > +		gsi = find_gsi_from_host_irq(assigned_dev, irq);
> > +		if (gsi == 0)
> > +			return IRQ_HANDLED;
>
> So you chose GSI == 0 as invalid because of x86 assumptions? Or is there
> any other reason?

Yeah, it based on x86 and IA64 IRQ 0 can't be used by MSI-X. And only x86 
support MSI-X now(and IA64 would follow later).
>
> IRQ sharing in the host side is not supported correct?

Um? Yeah... And seems we won't support it forever...

-- 
regards
Yang, Sheng


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/3] KVM: Add gsi_msg_pending_bitmap for MSI-X
  2009-02-11  8:08 ` [PATCH 2/3] KVM: Add gsi_msg_pending_bitmap for MSI-X Sheng Yang
@ 2009-02-12 19:51   ` Marcelo Tosatti
  2009-02-13  3:37     ` Sheng Yang
  0 siblings, 1 reply; 19+ messages in thread
From: Marcelo Tosatti @ 2009-02-12 19:51 UTC (permalink / raw)
  To: Sheng Yang; +Cc: Avi Kivity, kvm

Hi Sheng,

On Wed, Feb 11, 2009 at 04:08:50PM +0800, Sheng Yang wrote:
> We have to handle more than one interrupt with one handler for MSI-X. So we
> need a bitmap to track the triggered interrupts.
> 
> Signed-off-by: Sheng Yang <sheng@linux.intel.com>
> ---
>  include/linux/kvm_host.h |    5 +-
>  virt/kvm/kvm_main.c      |  103 ++++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 103 insertions(+), 5 deletions(-)
> 

> @@ -335,6 +337,7 @@ struct kvm_assigned_dev_kernel {
>  #define KVM_ASSIGNED_DEV_GUEST_MSI	(1 << 1)
>  #define KVM_ASSIGNED_DEV_HOST_INTX	(1 << 8)
>  #define KVM_ASSIGNED_DEV_HOST_MSI	(1 << 9)
> +#define KVM_ASSIGNED_DEV_MSIX		((1 << 2) | (1 << 10))

Can you explain the usage of the two bits?

> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index ea96690..961603f 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -95,25 +95,113 @@ static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *h
>  	return NULL;
>  }
>  
> +static int find_host_irq_from_gsi(struct kvm_assigned_dev_kernel *assigned_dev,
> +				  u32 gsi)
> +{
> +	int i, entry, irq;
> +	struct msix_entry *host_msix_entries, *guest_msix_entries;
> +
> +	host_msix_entries = assigned_dev->host_msix_entries;
> +	guest_msix_entries = assigned_dev->guest_msix_entries;
> +
> +	entry = -1;
> +	irq = 0;
> +	for (i = 0; i < assigned_dev->entries_nr; i++)
> +		if (gsi == (guest_msix_entries + i)->vector) {
> +			entry = (guest_msix_entries + i)->entry;
> +			break;
> +		}
> +	if (entry < 0) {
> +		printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
> +		return 0;
> +	}
> +	for (i = 0; i < assigned_dev->entries_nr; i++)
> +		if (entry == (host_msix_entries + i)->entry) {
> +			irq = (host_msix_entries + i)->vector;
> +			break;
> +		}
> +	if (irq == 0) {
> +		printk(KERN_WARNING "Fail to find correlated MSI-X irq!\n");
> +		return 0;
> +	}

Can drop the printk's (also from find_gsi_from_host_irq).

>  	struct kvm_assigned_dev_kernel *assigned_dev;
> +	struct kvm *kvm;
> +	u32 gsi;
> +	int irq;
>  
>  	assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
>  				    interrupt_work);
> +	kvm = assigned_dev->kvm;
>  
>  	/* This is taken to safely inject irq inside the guest. When
>  	 * the interrupt injection (or the ioapic code) uses a
>  	 * finer-grained lock, update this
>  	 */
> -	mutex_lock(&assigned_dev->kvm->lock);
> -	kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
> -		    assigned_dev->guest_irq, 1);
> +	mutex_lock(&kvm->lock);
> +handle_irq:
> +	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
> +		gsi = find_first_bit(kvm->irq_routes_pending_bitmap,
> +				     KVM_MAX_IRQ_ROUTES);
> +		BUG_ON(gsi >= KVM_MAX_IRQ_ROUTES);
> +		clear_bit(gsi, kvm->irq_routes_pending_bitmap);
> +	} else
> +		gsi = assigned_dev->guest_irq;
> +
> +	kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, gsi, 1);
>  
>  	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) {
>  		enable_irq(assigned_dev->host_irq);
>  		assigned_dev->host_irq_disabled = false;
> +	} else if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
> +		irq = find_host_irq_from_gsi(assigned_dev, gsi);

Check the return value?

> +		enable_irq(irq);

Do you guarantee that particular irq you're enable_irq'ing is not bogus?
Its has been passed from userspace after all.

In a later patch you can assign KVM_ASSIGNED_DEV_MSIX if the irqchip is
not in-kernel in assigned_device_update_msix:

+       adev->irq_requested_type |= KVM_ASSIGNED_DEV_MSIX;
+

Just trying to harden the code against bogosity elsewhere.

> +		assigned_dev->host_irq_disabled = false;
> +		gsi = find_first_bit(kvm->irq_routes_pending_bitmap,
> +				KVM_MAX_IRQ_ROUTES);
> +		if (gsi < KVM_MAX_IRQ_ROUTES)
> +			goto handle_irq;
>  	}
> +
>  	mutex_unlock(&assigned_dev->kvm->lock);
>  }
>  
> @@ -121,6 +209,15 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
>  {
>  	struct kvm_assigned_dev_kernel *assigned_dev =
>  		(struct kvm_assigned_dev_kernel *) dev_id;
> +	struct kvm *kvm = assigned_dev->kvm;
> +
> +	if (assigned_dev->irq_requested_type == KVM_ASSIGNED_DEV_MSIX) {
> +		u32 gsi;
> +		gsi = find_gsi_from_host_irq(assigned_dev, irq);
> +		if (gsi == 0)
> +			return IRQ_HANDLED;

So you chose GSI == 0 as invalid because of x86 assumptions? Or is there
any other reason?

IRQ sharing in the host side is not supported correct?


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 2/3] KVM: Add gsi_msg_pending_bitmap for MSI-X
  2009-02-11  8:08 [PATCH 0/3 v2] MSI-X enabling Sheng Yang
@ 2009-02-11  8:08 ` Sheng Yang
  2009-02-12 19:51   ` Marcelo Tosatti
  0 siblings, 1 reply; 19+ messages in thread
From: Sheng Yang @ 2009-02-11  8:08 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Sheng Yang

We have to handle more than one interrupt with one handler for MSI-X. So we
need a bitmap to track the triggered interrupts.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 include/linux/kvm_host.h |    5 +-
 virt/kvm/kvm_main.c      |  103 ++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 103 insertions(+), 5 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a7d6123..c081867 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -144,6 +144,8 @@ struct kvm {
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
 	struct hlist_head mask_notifier_list;
+#define KVM_MAX_IRQ_ROUTES 1024
+	DECLARE_BITMAP(irq_routes_pending_bitmap, KVM_MAX_IRQ_ROUTES);
 #endif
 
 #ifdef KVM_ARCH_WANT_MMU_NOTIFIER
@@ -335,6 +337,7 @@ struct kvm_assigned_dev_kernel {
 #define KVM_ASSIGNED_DEV_GUEST_MSI	(1 << 1)
 #define KVM_ASSIGNED_DEV_HOST_INTX	(1 << 8)
 #define KVM_ASSIGNED_DEV_HOST_MSI	(1 << 9)
+#define KVM_ASSIGNED_DEV_MSIX		((1 << 2) | (1 << 10))
 	unsigned long irq_requested_type;
 	int irq_source_id;
 	int flags;
@@ -502,8 +505,6 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se
 
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 
-#define KVM_MAX_IRQ_ROUTES 1024
-
 int kvm_setup_default_irq_routing(struct kvm *kvm);
 int kvm_set_irq_routing(struct kvm *kvm,
 			const struct kvm_irq_routing_entry *entries,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ea96690..961603f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -95,25 +95,113 @@ static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *h
 	return NULL;
 }
 
+static int find_host_irq_from_gsi(struct kvm_assigned_dev_kernel *assigned_dev,
+				  u32 gsi)
+{
+	int i, entry, irq;
+	struct msix_entry *host_msix_entries, *guest_msix_entries;
+
+	host_msix_entries = assigned_dev->host_msix_entries;
+	guest_msix_entries = assigned_dev->guest_msix_entries;
+
+	entry = -1;
+	irq = 0;
+	for (i = 0; i < assigned_dev->entries_nr; i++)
+		if (gsi == (guest_msix_entries + i)->vector) {
+			entry = (guest_msix_entries + i)->entry;
+			break;
+		}
+	if (entry < 0) {
+		printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
+		return 0;
+	}
+	for (i = 0; i < assigned_dev->entries_nr; i++)
+		if (entry == (host_msix_entries + i)->entry) {
+			irq = (host_msix_entries + i)->vector;
+			break;
+		}
+	if (irq == 0) {
+		printk(KERN_WARNING "Fail to find correlated MSI-X irq!\n");
+		return 0;
+	}
+
+	return irq;
+}
+
+static u32 find_gsi_from_host_irq(struct kvm_assigned_dev_kernel *assigned_dev,
+				  int irq)
+{
+	int i, entry;
+	u32 gsi;
+	struct msix_entry *host_msix_entries, *guest_msix_entries;
+
+	host_msix_entries = assigned_dev->host_msix_entries;
+	guest_msix_entries = assigned_dev->guest_msix_entries;
+
+	entry = -1;
+	gsi = 0;
+	for (i = 0; i < assigned_dev->entries_nr; i++)
+		if (irq == (host_msix_entries + i)->vector) {
+			entry = (host_msix_entries + i)->entry;
+			break;
+		}
+	if (entry < 0) {
+		printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
+		return 0;
+	}
+	for (i = 0; i < assigned_dev->entries_nr; i++)
+		if (entry == (guest_msix_entries + i)->entry) {
+			gsi = (guest_msix_entries + i)->vector;
+			break;
+		}
+	if (gsi == 0) {
+		printk(KERN_WARNING "Fail to find correlated MSI-X gsi!\n");
+		return 0;
+	}
+
+	return gsi;
+}
+
 static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 {
 	struct kvm_assigned_dev_kernel *assigned_dev;
+	struct kvm *kvm;
+	u32 gsi;
+	int irq;
 
 	assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
 				    interrupt_work);
+	kvm = assigned_dev->kvm;
 
 	/* This is taken to safely inject irq inside the guest. When
 	 * the interrupt injection (or the ioapic code) uses a
 	 * finer-grained lock, update this
 	 */
-	mutex_lock(&assigned_dev->kvm->lock);
-	kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
-		    assigned_dev->guest_irq, 1);
+	mutex_lock(&kvm->lock);
+handle_irq:
+	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
+		gsi = find_first_bit(kvm->irq_routes_pending_bitmap,
+				     KVM_MAX_IRQ_ROUTES);
+		BUG_ON(gsi >= KVM_MAX_IRQ_ROUTES);
+		clear_bit(gsi, kvm->irq_routes_pending_bitmap);
+	} else
+		gsi = assigned_dev->guest_irq;
+
+	kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, gsi, 1);
 
 	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) {
 		enable_irq(assigned_dev->host_irq);
 		assigned_dev->host_irq_disabled = false;
+	} else if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
+		irq = find_host_irq_from_gsi(assigned_dev, gsi);
+		enable_irq(irq);
+		assigned_dev->host_irq_disabled = false;
+		gsi = find_first_bit(kvm->irq_routes_pending_bitmap,
+				KVM_MAX_IRQ_ROUTES);
+		if (gsi < KVM_MAX_IRQ_ROUTES)
+			goto handle_irq;
 	}
+
 	mutex_unlock(&assigned_dev->kvm->lock);
 }
 
@@ -121,6 +209,15 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 {
 	struct kvm_assigned_dev_kernel *assigned_dev =
 		(struct kvm_assigned_dev_kernel *) dev_id;
+	struct kvm *kvm = assigned_dev->kvm;
+
+	if (assigned_dev->irq_requested_type == KVM_ASSIGNED_DEV_MSIX) {
+		u32 gsi;
+		gsi = find_gsi_from_host_irq(assigned_dev, irq);
+		if (gsi == 0)
+			return IRQ_HANDLED;
+		set_bit(gsi, kvm->irq_routes_pending_bitmap);
+	}
 
 	schedule_work(&assigned_dev->interrupt_work);
 
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2009-02-18 12:52 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-02-18  9:44 [PATCH 0/3 v3] MSI-X enabling Sheng Yang
2009-02-18  9:44 ` [PATCH 1/3] KVM: Ioctls for init MSI-X entry Sheng Yang
2009-02-18 10:32   ` Avi Kivity
2009-02-18  9:44 ` [PATCH 2/3] KVM: Add gsi_msg_pending_bitmap for MSI-X Sheng Yang
2009-02-18 11:00   ` Avi Kivity
2009-02-18 11:13     ` Sheng Yang
2009-02-18 11:29       ` Avi Kivity
2009-02-18 11:38         ` Sheng Yang
2009-02-18  9:44 ` [PATCH 3/3] KVM: Enable MSI-X for KVM assigned device Sheng Yang
2009-02-18 10:45   ` Avi Kivity
2009-02-18 12:24     ` Sheng Yang
2009-02-18 12:36       ` Avi Kivity
2009-02-18 12:52         ` Sheng Yang
  -- strict thread matches above, loose matches on Subject: below --
2009-02-11  8:08 [PATCH 0/3 v2] MSI-X enabling Sheng Yang
2009-02-11  8:08 ` [PATCH 2/3] KVM: Add gsi_msg_pending_bitmap for MSI-X Sheng Yang
2009-02-12 19:51   ` Marcelo Tosatti
2009-02-13  3:37     ` Sheng Yang
2009-02-13 17:10       ` Marcelo Tosatti
2009-02-17 18:09       ` Avi Kivity
2009-02-18  1:33         ` Sheng Yang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.