All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4 v12] MSI-X MMIO support for KVM
@ 2011-03-02  7:26 Sheng Yang
  2011-03-02  7:26 ` [PATCH 1/4] KVM: Move struct kvm_io_device to kvm_host.h Sheng Yang
                   ` (4 more replies)
  0 siblings, 5 replies; 9+ messages in thread
From: Sheng Yang @ 2011-03-02  7:26 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti
  Cc: Michael S. Tsirkin, Alex Williamson, kvm, Sheng Yang

Change from v10:
1. Update according to the comments of Michael.
2. Use mmio_needed to exit to userspace according to Marcelo's comments.

Sheng Yang (4):
  KVM: Move struct kvm_io_device to kvm_host.h
  KVM: Add kvm_io_ext_data to IO handler
  KVM: Emulate MSI-X table in kernel
  KVM: Add documents for MSI-X MMIO API

 Documentation/kvm/api.txt |   58 +++++++++
 arch/x86/kvm/Makefile     |    2 +-
 arch/x86/kvm/i8254.c      |    6 +-
 arch/x86/kvm/i8259.c      |    3 +-
 arch/x86/kvm/lapic.c      |    3 +-
 arch/x86/kvm/x86.c        |   42 +++++--
 include/linux/kvm.h       |   28 +++++
 include/linux/kvm_host.h  |   64 ++++++++++-
 virt/kvm/assigned-dev.c   |   41 +++++++
 virt/kvm/coalesced_mmio.c |    3 +-
 virt/kvm/eventfd.c        |    2 +-
 virt/kvm/ioapic.c         |    2 +-
 virt/kvm/iodev.h          |   31 +----
 virt/kvm/kvm_main.c       |   40 ++++++-
 virt/kvm/msix_mmio.c      |  291 +++++++++++++++++++++++++++++++++++++++++++++
 virt/kvm/msix_mmio.h      |   26 ++++
 16 files changed, 591 insertions(+), 51 deletions(-)
 create mode 100644 virt/kvm/msix_mmio.c
 create mode 100644 virt/kvm/msix_mmio.h


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/4] KVM: Move struct kvm_io_device to kvm_host.h
  2011-03-02  7:26 [PATCH 0/4 v12] MSI-X MMIO support for KVM Sheng Yang
@ 2011-03-02  7:26 ` Sheng Yang
  2011-03-02  7:26 ` [PATCH 2/4] KVM: Add kvm_io_ext_data to IO handler Sheng Yang
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 9+ messages in thread
From: Sheng Yang @ 2011-03-02  7:26 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti
  Cc: Michael S. Tsirkin, Alex Williamson, kvm, Sheng Yang

Then it can be used by other struct in kvm_host.h

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 include/linux/kvm_host.h |   23 +++++++++++++++++++++++
 virt/kvm/iodev.h         |   25 +------------------------
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b5021db..7d313e0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -98,6 +98,29 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
 int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
 #endif
 
+struct kvm_io_device;
+
+/**
+ * kvm_io_device_ops are called under kvm slots_lock.
+ * read and write handlers return 0 if the transaction has been handled,
+ * or non-zero to have it passed to the next device.
+ **/
+struct kvm_io_device_ops {
+	int (*read)(struct kvm_io_device *this,
+		    gpa_t addr,
+		    int len,
+		    void *val);
+	int (*write)(struct kvm_io_device *this,
+		     gpa_t addr,
+		     int len,
+		     const void *val);
+	void (*destructor)(struct kvm_io_device *this);
+};
+
+struct kvm_io_device {
+	const struct kvm_io_device_ops *ops;
+};
+
 struct kvm_vcpu {
 	struct kvm *kvm;
 #ifdef CONFIG_PREEMPT_NOTIFIERS
diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h
index 12fd3ca..d1f5651 100644
--- a/virt/kvm/iodev.h
+++ b/virt/kvm/iodev.h
@@ -17,32 +17,9 @@
 #define __KVM_IODEV_H__
 
 #include <linux/kvm_types.h>
+#include <linux/kvm_host.h>
 #include <asm/errno.h>
 
-struct kvm_io_device;
-
-/**
- * kvm_io_device_ops are called under kvm slots_lock.
- * read and write handlers return 0 if the transaction has been handled,
- * or non-zero to have it passed to the next device.
- **/
-struct kvm_io_device_ops {
-	int (*read)(struct kvm_io_device *this,
-		    gpa_t addr,
-		    int len,
-		    void *val);
-	int (*write)(struct kvm_io_device *this,
-		     gpa_t addr,
-		     int len,
-		     const void *val);
-	void (*destructor)(struct kvm_io_device *this);
-};
-
-
-struct kvm_io_device {
-	const struct kvm_io_device_ops *ops;
-};
-
 static inline void kvm_iodevice_init(struct kvm_io_device *dev,
 				     const struct kvm_io_device_ops *ops)
 {
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/4] KVM: Add kvm_io_ext_data to IO handler
  2011-03-02  7:26 [PATCH 0/4 v12] MSI-X MMIO support for KVM Sheng Yang
  2011-03-02  7:26 ` [PATCH 1/4] KVM: Move struct kvm_io_device to kvm_host.h Sheng Yang
@ 2011-03-02  7:26 ` Sheng Yang
  2011-03-02  7:26 ` [PATCH 3/4] KVM: Emulate MSI-X table in kernel Sheng Yang
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 9+ messages in thread
From: Sheng Yang @ 2011-03-02  7:26 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti
  Cc: Michael S. Tsirkin, Alex Williamson, kvm, Sheng Yang

Add a new parameter to IO writing handler, so that we can transfer information
from IO handler to caller.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 arch/x86/kvm/i8254.c      |    6 ++++--
 arch/x86/kvm/i8259.c      |    3 ++-
 arch/x86/kvm/lapic.c      |    3 ++-
 arch/x86/kvm/x86.c        |   13 ++++++++-----
 include/linux/kvm_host.h  |    9 +++++++--
 virt/kvm/coalesced_mmio.c |    3 ++-
 virt/kvm/eventfd.c        |    2 +-
 virt/kvm/ioapic.c         |    2 +-
 virt/kvm/iodev.h          |    6 ++++--
 virt/kvm/kvm_main.c       |    4 ++--
 10 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index efad723..bd8f0c5 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -439,7 +439,8 @@ static inline int pit_in_range(gpa_t addr)
 }
 
 static int pit_ioport_write(struct kvm_io_device *this,
-			    gpa_t addr, int len, const void *data)
+			    gpa_t addr, int len, const void *data,
+			    struct kvm_io_ext_data *ext_data)
 {
 	struct kvm_pit *pit = dev_to_pit(this);
 	struct kvm_kpit_state *pit_state = &pit->pit_state;
@@ -585,7 +586,8 @@ static int pit_ioport_read(struct kvm_io_device *this,
 }
 
 static int speaker_ioport_write(struct kvm_io_device *this,
-				gpa_t addr, int len, const void *data)
+				gpa_t addr, int len, const void *data,
+				struct kvm_io_ext_data *ext_data)
 {
 	struct kvm_pit *pit = speaker_to_pit(this);
 	struct kvm_kpit_state *pit_state = &pit->pit_state;
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 3cece05..96b1070 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -480,7 +480,8 @@ static inline struct kvm_pic *to_pic(struct kvm_io_device *dev)
 }
 
 static int picdev_write(struct kvm_io_device *this,
-			 gpa_t addr, int len, const void *val)
+			 gpa_t addr, int len, const void *val,
+			 struct kvm_io_ext_data *ext_data)
 {
 	struct kvm_pic *s = to_pic(this);
 	unsigned char data = *(unsigned char *)val;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 93cf9d0..f413e9c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -836,7 +836,8 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 }
 
 static int apic_mmio_write(struct kvm_io_device *this,
-			    gpa_t address, int len, const void *data)
+			    gpa_t address, int len, const void *data,
+			    struct kvm_io_ext_data *ext_data)
 {
 	struct kvm_lapic *apic = to_lapic(this);
 	unsigned int offset = address - apic->base_address;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fa708c9..21b84e2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3571,13 +3571,14 @@ static void kvm_init_msr_list(void)
 }
 
 static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
-			   const void *v)
+			   const void *v, struct kvm_io_ext_data *ext_data)
 {
 	if (vcpu->arch.apic &&
-	    !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
+	    !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v, ext_data))
 		return 0;
 
-	return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
+	return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS,
+				addr, len, v, ext_data);
 }
 
 static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
@@ -3807,6 +3808,7 @@ static int emulator_write_emulated_onepage(unsigned long addr,
 					   struct kvm_vcpu *vcpu)
 {
 	gpa_t                 gpa;
+	struct kvm_io_ext_data ext_data;
 
 	gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception);
 
@@ -3825,7 +3827,7 @@ mmio:
 	/*
 	 * Is this MMIO handled locally?
 	 */
-	if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
+	if (!vcpu_mmio_write(vcpu, gpa, bytes, val, &ext_data))
 		return X86EMUL_CONTINUE;
 
 	vcpu->mmio_needed = 1;
@@ -3940,6 +3942,7 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
 {
 	/* TODO: String I/O for in kernel device */
 	int r;
+	struct kvm_io_ext_data ext_data;
 
 	if (vcpu->arch.pio.in)
 		r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
@@ -3947,7 +3950,7 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
 	else
 		r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
 				     vcpu->arch.pio.port, vcpu->arch.pio.size,
-				     pd);
+				     pd, &ext_data);
 	return r;
 }
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7d313e0..a32c53e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -68,8 +68,12 @@ enum kvm_bus {
 	KVM_NR_BUSES
 };
 
+struct kvm_io_ext_data {
+	int type;   /* Extended data type */
+};
+
 int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
-		     int len, const void *val);
+		     int len, const void *val, struct kvm_io_ext_data *data);
 int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len,
 		    void *val);
 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
@@ -113,7 +117,8 @@ struct kvm_io_device_ops {
 	int (*write)(struct kvm_io_device *this,
 		     gpa_t addr,
 		     int len,
-		     const void *val);
+		     const void *val,
+		     struct kvm_io_ext_data *data);
 	void (*destructor)(struct kvm_io_device *this);
 };
 
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index fc84875..37b254c 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -59,7 +59,8 @@ static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev,
 }
 
 static int coalesced_mmio_write(struct kvm_io_device *this,
-				gpa_t addr, int len, const void *val)
+				gpa_t addr, int len, const void *val,
+				struct kvm_io_ext_data *ext_data)
 {
 	struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
 	struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 2ca4535..8edd757 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -483,7 +483,7 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
 /* MMIO/PIO writes trigger an event if the addr/val match */
 static int
 ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
-		const void *val)
+		const void *val, struct kvm_io_ext_data *ext_data)
 {
 	struct _ioeventfd *p = to_ioeventfd(this);
 
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 0b9df83..6a027ef 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -321,7 +321,7 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 }
 
 static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
-			     const void *val)
+			     const void *val, struct kvm_io_ext_data *ext_data)
 {
 	struct kvm_ioapic *ioapic = to_ioapic(this);
 	u32 data;
diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h
index d1f5651..340ab79 100644
--- a/virt/kvm/iodev.h
+++ b/virt/kvm/iodev.h
@@ -33,9 +33,11 @@ static inline int kvm_iodevice_read(struct kvm_io_device *dev,
 }
 
 static inline int kvm_iodevice_write(struct kvm_io_device *dev,
-				     gpa_t addr, int l, const void *v)
+				     gpa_t addr, int l, const void *v,
+				     struct kvm_io_ext_data *data)
 {
-	return dev->ops->write ? dev->ops->write(dev, addr, l, v) : -EOPNOTSUPP;
+	return dev->ops->write ?
+		dev->ops->write(dev, addr, l, v, data) : -EOPNOTSUPP;
 }
 
 static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b1b6cbb..a61f90e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2221,14 +2221,14 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
 
 /* kvm_io_bus_write - called under kvm->slots_lock */
 int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
-		     int len, const void *val)
+		     int len, const void *val, struct kvm_io_ext_data *ext_data)
 {
 	int i;
 	struct kvm_io_bus *bus;
 
 	bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
 	for (i = 0; i < bus->dev_count; i++)
-		if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
+		if (!kvm_iodevice_write(bus->devs[i], addr, len, val, ext_data))
 			return 0;
 	return -EOPNOTSUPP;
 }
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/4] KVM: Emulate MSI-X table in kernel
  2011-03-02  7:26 [PATCH 0/4 v12] MSI-X MMIO support for KVM Sheng Yang
  2011-03-02  7:26 ` [PATCH 1/4] KVM: Move struct kvm_io_device to kvm_host.h Sheng Yang
  2011-03-02  7:26 ` [PATCH 2/4] KVM: Add kvm_io_ext_data to IO handler Sheng Yang
@ 2011-03-02  7:26 ` Sheng Yang
  2011-03-02  7:26 ` [PATCH 4/4] KVM: Add documents for MSI-X MMIO API Sheng Yang
  2011-03-02  9:23 ` [PATCH 0/4 v12] MSI-X MMIO support for KVM Michael S. Tsirkin
  4 siblings, 0 replies; 9+ messages in thread
From: Sheng Yang @ 2011-03-02  7:26 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti
  Cc: Michael S. Tsirkin, Alex Williamson, kvm, Sheng Yang

Then we can support mask bit operation of assigned devices now.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 arch/x86/kvm/Makefile    |    2 +-
 arch/x86/kvm/x86.c       |   31 ++++-
 include/linux/kvm.h      |   28 +++++
 include/linux/kvm_host.h |   34 ++++++
 virt/kvm/assigned-dev.c  |   41 +++++++
 virt/kvm/kvm_main.c      |   38 ++++++-
 virt/kvm/msix_mmio.c     |  291 ++++++++++++++++++++++++++++++++++++++++++++++
 virt/kvm/msix_mmio.h     |   26 ++++
 8 files changed, 479 insertions(+), 12 deletions(-)
 create mode 100644 virt/kvm/msix_mmio.c
 create mode 100644 virt/kvm/msix_mmio.h

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index f15501f..3a0d851 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,7 +7,7 @@ CFLAGS_vmx.o := -I.
 
 kvm-y			+= $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
 				coalesced_mmio.o irq_comm.o eventfd.o \
-				assigned-dev.o)
+				assigned-dev.o msix_mmio.o)
 kvm-$(CONFIG_IOMMU_API)	+= $(addprefix ../../../virt/kvm/, iommu.o)
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(addprefix ../../../virt/kvm/, async_pf.o)
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 21b84e2..9bafaca 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1966,6 +1966,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_X86_ROBUST_SINGLESTEP:
 	case KVM_CAP_XSAVE:
 	case KVM_CAP_ASYNC_PF:
+	case KVM_CAP_MSIX_MMIO:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -3809,6 +3810,7 @@ static int emulator_write_emulated_onepage(unsigned long addr,
 {
 	gpa_t                 gpa;
 	struct kvm_io_ext_data ext_data;
+	int r;
 
 	gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception);
 
@@ -3824,18 +3826,33 @@ static int emulator_write_emulated_onepage(unsigned long addr,
 
 mmio:
 	trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
+	r = vcpu_mmio_write(vcpu, gpa, bytes, val, &ext_data);
 	/*
 	 * Is this MMIO handled locally?
 	 */
-	if (!vcpu_mmio_write(vcpu, gpa, bytes, val, &ext_data))
+	if (!r)
 		return X86EMUL_CONTINUE;
 
-	vcpu->mmio_needed = 1;
-	vcpu->run->exit_reason = KVM_EXIT_MMIO;
-	vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
-	vcpu->run->mmio.len = vcpu->mmio_size = bytes;
-	vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
-	memcpy(vcpu->run->mmio.data, val, bytes);
+	if (r == -ENOTSYNC) {
+		vcpu->mmio_needed = 1;
+		vcpu->mmio_is_write = 1;
+		vcpu->run->exit_reason = KVM_EXIT_MSIX_ROUTING_UPDATE;
+		vcpu->run->msix_routing.dev_id =
+			ext_data.msix_routing.dev_id;
+		vcpu->run->msix_routing.type =
+			ext_data.msix_routing.type;
+		vcpu->run->msix_routing.entry_idx =
+			ext_data.msix_routing.entry_idx;
+		vcpu->run->msix_routing.flags =
+			ext_data.msix_routing.flags;
+	} else  {
+		vcpu->mmio_needed = 1;
+		vcpu->run->exit_reason = KVM_EXIT_MMIO;
+		vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
+		vcpu->run->mmio.len = vcpu->mmio_size = bytes;
+		vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
+		memcpy(vcpu->run->mmio.data, val, bytes);
+	}
 
 	return X86EMUL_CONTINUE;
 }
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ea2dc1a..4393e4e 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -161,6 +161,7 @@ struct kvm_pit_config {
 #define KVM_EXIT_NMI              16
 #define KVM_EXIT_INTERNAL_ERROR   17
 #define KVM_EXIT_OSI              18
+#define KVM_EXIT_MSIX_ROUTING_UPDATE 19
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 #define KVM_INTERNAL_ERROR_EMULATION 1
@@ -264,6 +265,13 @@ struct kvm_run {
 		struct {
 			__u64 gprs[32];
 		} osi;
+		/* KVM_EXIT_MSIX_ROUTING_UPDATE*/
+		struct {
+			__u32 dev_id;
+			__u16 type;
+			__u16 entry_idx;
+			__u64 flags;
+		} msix_routing;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
@@ -541,6 +549,7 @@ struct kvm_ppc_pvinfo {
 #define KVM_CAP_PPC_GET_PVINFO 57
 #define KVM_CAP_PPC_IRQ_LEVEL 58
 #define KVM_CAP_ASYNC_PF 59
+#define KVM_CAP_MSIX_MMIO 60
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -672,6 +681,9 @@ struct kvm_clock_data {
 #define KVM_XEN_HVM_CONFIG        _IOW(KVMIO,  0x7a, struct kvm_xen_hvm_config)
 #define KVM_SET_CLOCK             _IOW(KVMIO,  0x7b, struct kvm_clock_data)
 #define KVM_GET_CLOCK             _IOR(KVMIO,  0x7c, struct kvm_clock_data)
+/* Available with KVM_CAP_MSIX_MMIO */
+#define KVM_REGISTER_MSIX_MMIO    _IOW(KVMIO,  0x7d, struct kvm_msix_mmio_user)
+#define KVM_UNREGISTER_MSIX_MMIO  _IOW(KVMIO,  0x7e, struct kvm_msix_mmio_user)
 /* Available with KVM_CAP_PIT_STATE2 */
 #define KVM_GET_PIT2              _IOR(KVMIO,  0x9f, struct kvm_pit_state2)
 #define KVM_SET_PIT2              _IOW(KVMIO,  0xa0, struct kvm_pit_state2)
@@ -795,4 +807,20 @@ struct kvm_assigned_msix_entry {
 	__u16 padding[3];
 };
 
+#define KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV	    (1 << 0)
+
+#define KVM_MSIX_MMIO_TYPE_BASE_TABLE	    (1 << 8)
+
+#define KVM_MSIX_MMIO_TYPE_DEV_MASK	    0x00ff
+#define KVM_MSIX_MMIO_TYPE_BASE_MASK	    0xff00
+struct kvm_msix_mmio_user {
+	__u32 dev_id;
+	__u16 type;
+	__u16 max_entries_nr;
+	__u64 base_addr;
+	__u64 base_va;
+	__u64 flags;
+	__u64 reserved[4];
+};
+
 #endif /* __LINUX_KVM_H */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a32c53e..91488a4 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -68,8 +68,17 @@ enum kvm_bus {
 	KVM_NR_BUSES
 };
 
+#define KVM_IO_EXT_DATA_TYPE_MSIX_ROUTING   1
 struct kvm_io_ext_data {
 	int type;   /* Extended data type */
+	union {
+		struct {
+			u32 dev_id;	/* Device ID */
+			u16 type;	/* Device type */
+			u16 entry_idx;  /* Accessed MSI-X entry index */
+			u64 flags;
+		} msix_routing;
+	};
 };
 
 int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
@@ -238,6 +247,27 @@ struct kvm_memslots {
 					KVM_PRIVATE_MEM_SLOTS];
 };
 
+#define KVM_MSIX_MMIO_MAX    32
+
+struct kvm_msix_mmio {
+	u32 dev_id;
+	u16 type;
+	u16 max_entries_nr;
+	u64 flags;
+	gpa_t table_base_addr;
+	hva_t table_base_va;
+	gpa_t pba_base_addr;
+	hva_t pba_base_va;
+};
+
+struct kvm_msix_mmio_dev {
+	struct kvm *kvm;
+	struct kvm_io_device table_dev;
+	int mmio_nr;
+	struct kvm_msix_mmio mmio[KVM_MSIX_MMIO_MAX];
+	struct mutex lock;
+};
+
 struct kvm {
 	spinlock_t mmu_lock;
 	raw_spinlock_t requests_lock;
@@ -286,6 +316,7 @@ struct kvm {
 	long mmu_notifier_count;
 #endif
 	long tlbs_dirty;
+	struct kvm_msix_mmio_dev msix_mmio_dev;
 };
 
 /* The guest did something we don't support. */
@@ -558,6 +589,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 int kvm_request_irq_source_id(struct kvm *kvm);
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 
+int kvm_assigned_device_update_msix_mask_bit(struct kvm *kvm,
+			int assigned_dev_id, int entry, bool mask);
+
 /* For vcpu->arch.iommu_flags */
 #define KVM_IOMMU_CACHE_COHERENCY	0x1
 
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index ae72ae6..cc09f38 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -18,6 +18,7 @@
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include "irq.h"
+#include "msix_mmio.h"
 
 static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
 						      int assigned_dev_id)
@@ -191,12 +192,22 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
 	kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
 }
 
+static void assigned_device_free_msix_mmio(struct kvm *kvm,
+				struct kvm_assigned_dev_kernel *adev)
+{
+	kvm_free_msix_mmio(kvm, adev->assigned_dev_id,
+			KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV |
+			KVM_MSIX_MMIO_TYPE_BASE_TABLE);
+}
+
 static void kvm_free_assigned_device(struct kvm *kvm,
 				     struct kvm_assigned_dev_kernel
 				     *assigned_dev)
 {
 	kvm_free_assigned_irq(kvm, assigned_dev);
 
+	assigned_device_free_msix_mmio(kvm, assigned_dev);
+
 	__pci_reset_function(assigned_dev->dev);
 	pci_restore_state(assigned_dev->dev);
 
@@ -785,3 +796,33 @@ out:
 	return r;
 }
 
+/* The caller should hold kvm->lock */
+int kvm_assigned_device_update_msix_mask_bit(struct kvm *kvm,
+				int assigned_dev_id, int entry, bool mask)
+{
+	int r = -ENODEV;
+	struct kvm_assigned_dev_kernel *adev;
+	int i;
+
+	BUG_ON(!irqchip_in_kernel(kvm));
+
+	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_dev_id);
+	if (!adev)
+		goto out;
+
+	r = -ENOTSUPP;
+	/* For non-MSIX enabled devices, entries_nr == 0 */
+	for (i = 0; i < adev->entries_nr; i++)
+		if (adev->host_msix_entries[i].entry == entry) {
+			if (mask)
+				disable_irq_nosync(
+					adev->host_msix_entries[i].vector);
+			else
+				enable_irq(adev->host_msix_entries[i].vector);
+			r = 0;
+			break;
+		}
+out:
+	return r;
+}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a61f90e..f211e49 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -56,6 +56,7 @@
 
 #include "coalesced_mmio.h"
 #include "async_pf.h"
+#include "msix_mmio.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/kvm.h>
@@ -509,6 +510,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	struct mm_struct *mm = kvm->mm;
 
 	kvm_arch_sync_events(kvm);
+	kvm_unregister_msix_mmio_dev(kvm);
 	spin_lock(&kvm_lock);
 	list_del(&kvm->vm_list);
 	spin_unlock(&kvm_lock);
@@ -1877,6 +1879,24 @@ static long kvm_vm_ioctl(struct file *filp,
 		mutex_unlock(&kvm->lock);
 		break;
 #endif
+	case KVM_REGISTER_MSIX_MMIO: {
+		struct kvm_msix_mmio_user mmio_user;
+
+		r = -EFAULT;
+		if (copy_from_user(&mmio_user, argp, sizeof mmio_user))
+			goto out;
+		r = kvm_vm_ioctl_register_msix_mmio(kvm, &mmio_user);
+		break;
+	}
+	case KVM_UNREGISTER_MSIX_MMIO: {
+		struct kvm_msix_mmio_user mmio_user;
+
+		r = -EFAULT;
+		if (copy_from_user(&mmio_user, argp, sizeof mmio_user))
+			goto out;
+		r = kvm_vm_ioctl_unregister_msix_mmio(kvm, &mmio_user);
+		break;
+	}
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 		if (r == -ENOTTY)
@@ -1988,6 +2008,12 @@ static int kvm_dev_ioctl_create_vm(void)
 		return r;
 	}
 #endif
+	r = kvm_register_msix_mmio_dev(kvm);
+	if (r < 0) {
+		kvm_put_kvm(kvm);
+		return r;
+	}
+
 	r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
 	if (r < 0)
 		kvm_put_kvm(kvm);
@@ -2223,14 +2249,18 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
 int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 		     int len, const void *val, struct kvm_io_ext_data *ext_data)
 {
-	int i;
+	int i, r = -EOPNOTSUPP;
 	struct kvm_io_bus *bus;
 
 	bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
-	for (i = 0; i < bus->dev_count; i++)
-		if (!kvm_iodevice_write(bus->devs[i], addr, len, val, ext_data))
+	for (i = 0; i < bus->dev_count; i++) {
+		r = kvm_iodevice_write(bus->devs[i], addr, len, val, ext_data);
+		if (r == -ENOTSYNC)
+			break;
+		else if (!r)
 			return 0;
-	return -EOPNOTSUPP;
+	}
+	return r;
 }
 
 /* kvm_io_bus_read - called under kvm->slots_lock */
diff --git a/virt/kvm/msix_mmio.c b/virt/kvm/msix_mmio.c
new file mode 100644
index 0000000..472b488
--- /dev/null
+++ b/virt/kvm/msix_mmio.c
@@ -0,0 +1,291 @@
+/*
+ * MSI-X MMIO emulation
+ *
+ * Copyright (c) 2010 Intel Corporation
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Author:
+ *   Sheng Yang <sheng.yang@intel.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/pci.h>
+
+#include "msix_mmio.h"
+#include "iodev.h"
+
+static int update_msix_mask_bit(struct kvm *kvm, struct kvm_msix_mmio *mmio,
+				int entry, u32 flag)
+{
+	BUG_ON(!(mmio->type & KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV));
+	return kvm_assigned_device_update_msix_mask_bit(kvm,
+			mmio->dev_id, entry, flag);
+}
+
+/* Caller must hold dev->lock */
+static int get_mmio_table_index(struct kvm_msix_mmio_dev *dev,
+				gpa_t addr, int len)
+{
+	gpa_t start, end;
+	int i, r = -EINVAL;
+
+	for (i = 0; i < dev->mmio_nr; i++) {
+		start = dev->mmio[i].table_base_addr;
+		end = dev->mmio[i].table_base_addr + PCI_MSIX_ENTRY_SIZE *
+			dev->mmio[i].max_entries_nr;
+		if (addr >= start && addr + len <= end) {
+			r = i;
+			break;
+		}
+	}
+
+	return r;
+}
+
+static int msix_table_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
+				void *val)
+{
+	struct kvm_msix_mmio_dev *mmio_dev =
+		container_of(this, struct kvm_msix_mmio_dev, table_dev);
+	struct kvm_msix_mmio *mmio;
+	int idx, ret = 0, entry, offset, r;
+
+	mutex_lock(&mmio_dev->lock);
+	idx = get_mmio_table_index(mmio_dev, addr, len);
+	if (idx < 0) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+	if ((len != 4 && len != 8) || addr & (len - 1))
+		goto out;
+
+	offset = addr % PCI_MSIX_ENTRY_SIZE;
+	mmio = &mmio_dev->mmio[idx];
+	entry = (addr - mmio->table_base_addr) / PCI_MSIX_ENTRY_SIZE;
+	r = copy_from_user(val, (void __user *)(mmio->table_base_va +
+			entry * PCI_MSIX_ENTRY_SIZE + offset), len);
+	if (r)
+		goto out;
+out:
+	mutex_unlock(&mmio_dev->lock);
+	return ret;
+}
+
+/* WARNING: Big endian machine untested! */
+static int msix_table_mmio_write(struct kvm_io_device *this, gpa_t addr,
+				int len, const void *val,
+				struct kvm_io_ext_data *ext_data)
+{
+	struct kvm_msix_mmio_dev *mmio_dev =
+		container_of(this, struct kvm_msix_mmio_dev, table_dev);
+	struct kvm_msix_mmio *mmio;
+	int idx, entry, offset, ret = 0, r = 0;
+	void __user *entry_base;
+	__le32 __user *ctrl_pos;
+	__le32 old_ctrl, new_ctrl;
+
+	mutex_lock(&mmio_dev->kvm->lock);
+	mutex_lock(&mmio_dev->lock);
+	idx = get_mmio_table_index(mmio_dev, addr, len);
+	if (idx < 0) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+	if ((len != 4 && len != 8) || addr & (len - 1))
+		goto out;
+
+	offset = addr % PCI_MSIX_ENTRY_SIZE;
+
+	mmio = &mmio_dev->mmio[idx];
+	entry = (addr - mmio->table_base_addr) / PCI_MSIX_ENTRY_SIZE;
+	entry_base = (void __user *)(mmio->table_base_va +
+			entry * PCI_MSIX_ENTRY_SIZE);
+	ctrl_pos = entry_base + PCI_MSIX_ENTRY_VECTOR_CTRL;
+
+	if (get_user(old_ctrl, ctrl_pos))
+		goto out;
+
+	/* Don't allow writing to other fields when entry is unmasked */
+	if (!(le32_to_cpu(old_ctrl) & PCI_MSIX_ENTRY_CTRL_MASKBIT) &&
+	    offset != PCI_MSIX_ENTRY_VECTOR_CTRL)
+		goto out;
+
+	if (copy_to_user(entry_base + offset, val, len))
+		goto out;
+
+	ext_data->type = KVM_IO_EXT_DATA_TYPE_MSIX_ROUTING;
+	ext_data->msix_routing.dev_id = mmio->dev_id;
+	ext_data->msix_routing.type = mmio->type;
+	ext_data->msix_routing.entry_idx = entry;
+	ext_data->msix_routing.flags = 0;
+
+	if (offset + len < PCI_MSIX_ENTRY_VECTOR_CTRL) {
+		ret = -ENOTSYNC;
+		goto out;
+	}
+
+	if (get_user(new_ctrl, ctrl_pos))
+		goto out;
+
+	if (old_ctrl == new_ctrl) {
+		if (offset == PCI_MSIX_ENTRY_DATA && len == 8)
+			ret = -ENOTSYNC;
+		goto out;
+	}
+	if ((old_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT) ^
+			(new_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT))
+		r = update_msix_mask_bit(mmio_dev->kvm, mmio, entry,
+				!!(new_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT));
+	if (r)
+		ret = -ENOTSYNC;
+out:
+	mutex_unlock(&mmio_dev->lock);
+	mutex_unlock(&mmio_dev->kvm->lock);
+	return ret;
+}
+
+static const struct kvm_io_device_ops msix_mmio_table_ops = {
+	.read     = msix_table_mmio_read,
+	.write    = msix_table_mmio_write,
+};
+
+int kvm_register_msix_mmio_dev(struct kvm *kvm)
+{
+	int ret;
+
+	kvm_iodevice_init(&kvm->msix_mmio_dev.table_dev, &msix_mmio_table_ops);
+	mutex_init(&kvm->msix_mmio_dev.lock);
+	kvm->msix_mmio_dev.kvm = kvm;
+	mutex_lock(&kvm->slots_lock);
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS,
+				      &kvm->msix_mmio_dev.table_dev);
+	mutex_unlock(&kvm->slots_lock);
+	return ret;
+}
+
+int kvm_unregister_msix_mmio_dev(struct kvm *kvm)
+{
+	int ret;
+
+	mutex_lock(&kvm->slots_lock);
+	ret = kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
+				      &kvm->msix_mmio_dev.table_dev);
+	mutex_unlock(&kvm->slots_lock);
+	return ret;
+}
+
+int kvm_vm_ioctl_register_msix_mmio(struct kvm *kvm,
+				    struct kvm_msix_mmio_user *mmio_user)
+{
+	struct kvm_msix_mmio_dev *mmio_dev = &kvm->msix_mmio_dev;
+	struct kvm_msix_mmio *mmio = NULL;
+	int r = 0, i;
+
+	mutex_lock(&mmio_dev->lock);
+	if (mmio_user->max_entries_nr > KVM_MAX_MSIX_PER_DEV) {
+		r = -ENOSPC;
+		goto out;
+	}
+	/* All reserved currently */
+	if (mmio_user->flags) {
+		r = -EINVAL;
+		goto out;
+	}
+
+	if ((mmio_user->type & KVM_MSIX_MMIO_TYPE_DEV_MASK) !=
+			KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV) {
+		r = -EINVAL;
+		goto out;
+	}
+	if ((mmio_user->type & KVM_MSIX_MMIO_TYPE_BASE_MASK) !=
+			KVM_MSIX_MMIO_TYPE_BASE_TABLE) {
+		r = -EINVAL;
+		goto out;
+	}
+
+	if ((unsigned long)mmio_user->base_va != mmio_user->base_va ||
+	    (unsigned long)mmio_user->base_addr != mmio_user->base_addr) {
+		r = -EFAULT;
+		goto out;
+	}
+
+	/* Check alignment and accessibility */
+	if ((mmio_user->base_va % PCI_MSIX_ENTRY_SIZE) ||
+	    !access_ok(VERIFY_WRITE, (void __user *)mmio_user->base_va,
+			mmio_user->max_entries_nr * PCI_MSIX_ENTRY_SIZE)) {
+		r = -EFAULT;
+		goto out;
+	}
+
+	for (i = 0; i < mmio_dev->mmio_nr; i++) {
+		if (mmio_dev->mmio[i].dev_id == mmio_user->dev_id &&
+		    (mmio_dev->mmio[i].type & KVM_MSIX_MMIO_TYPE_DEV_MASK) ==
+		    (mmio_user->type & KVM_MSIX_MMIO_TYPE_DEV_MASK)) {
+			mmio = &mmio_dev->mmio[i];
+			if (mmio->max_entries_nr != mmio_user->max_entries_nr) {
+				r = -EINVAL;
+				goto out;
+			}
+			break;
+		}
+	}
+	if (!mmio) {
+		if (mmio_dev->mmio_nr == KVM_MSIX_MMIO_MAX) {
+			r = -ENOSPC;
+			goto out;
+		}
+		mmio = &mmio_dev->mmio[mmio_dev->mmio_nr];
+		mmio_dev->mmio_nr++;
+	}
+
+	mmio->max_entries_nr = mmio_user->max_entries_nr;
+	mmio->dev_id = mmio_user->dev_id;
+	mmio->flags = mmio_user->flags;
+
+	if ((mmio_user->type & KVM_MSIX_MMIO_TYPE_DEV_MASK) ==
+			KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV)
+		mmio->type = KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV;
+	if ((mmio_user->type & KVM_MSIX_MMIO_TYPE_BASE_MASK) ==
+			KVM_MSIX_MMIO_TYPE_BASE_TABLE) {
+		mmio->type |= KVM_MSIX_MMIO_TYPE_BASE_TABLE;
+		mmio->table_base_addr = mmio_user->base_addr;
+		mmio->table_base_va = mmio_user->base_va;
+	}
+out:
+	mutex_unlock(&mmio_dev->lock);
+	return r;
+}
+
+int kvm_free_msix_mmio(struct kvm *kvm, u32 dev_id, u16 type)
+{
+	struct kvm_msix_mmio_dev *mmio_dev = &kvm->msix_mmio_dev;
+	int r = -EINVAL, i, j;
+
+	mutex_lock(&mmio_dev->lock);
+	BUG_ON(mmio_dev->mmio_nr > KVM_MSIX_MMIO_MAX);
+	for (i = 0; i < mmio_dev->mmio_nr; i++) {
+		if (mmio_dev->mmio[i].dev_id == dev_id &&
+		    mmio_dev->mmio[i].type == type) {
+			r = 0;
+			for (j = i; j < mmio_dev->mmio_nr - 1; j++)
+				mmio_dev->mmio[j] = mmio_dev->mmio[j + 1];
+			mmio_dev->mmio[mmio_dev->mmio_nr].max_entries_nr = 0;
+			mmio_dev->mmio[mmio_dev->mmio_nr].dev_id = 0;
+			mmio_dev->mmio[mmio_dev->mmio_nr].type = 0;
+			mmio_dev->mmio_nr--;
+			break;
+		}
+	}
+	mutex_unlock(&mmio_dev->lock);
+	return r;
+}
+
+int kvm_vm_ioctl_unregister_msix_mmio(struct kvm *kvm,
+				      struct kvm_msix_mmio_user *mmio_user)
+{
+	return kvm_free_msix_mmio(kvm, mmio_user->dev_id, mmio_user->type);
+}
+
diff --git a/virt/kvm/msix_mmio.h b/virt/kvm/msix_mmio.h
new file mode 100644
index 0000000..46346fb
--- /dev/null
+++ b/virt/kvm/msix_mmio.h
@@ -0,0 +1,26 @@
+#ifndef __KVM_MSIX_MMIO_H__
+#define __KVM_MSIX_MMIO_H__
+/*
+ * MSI-X MMIO emulation
+ *
+ * Copyright (c) 2010 Intel Corporation
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Author:
+ *   Sheng Yang <sheng.yang@intel.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+
+int kvm_register_msix_mmio_dev(struct kvm *kvm);
+int kvm_unregister_msix_mmio_dev(struct kvm *kvm);
+int kvm_vm_ioctl_register_msix_mmio(struct kvm *kvm,
+				    struct kvm_msix_mmio_user *mmio_user);
+int kvm_vm_ioctl_unregister_msix_mmio(struct kvm *kvm,
+				      struct kvm_msix_mmio_user *mmio_user);
+int kvm_free_msix_mmio(struct kvm *kvm, u32 dev_id, u16 type);
+
+#endif
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 4/4] KVM: Add documents for MSI-X MMIO API
  2011-03-02  7:26 [PATCH 0/4 v12] MSI-X MMIO support for KVM Sheng Yang
                   ` (2 preceding siblings ...)
  2011-03-02  7:26 ` [PATCH 3/4] KVM: Emulate MSI-X table in kernel Sheng Yang
@ 2011-03-02  7:26 ` Sheng Yang
  2011-03-02  9:23 ` [PATCH 0/4 v12] MSI-X MMIO support for KVM Michael S. Tsirkin
  4 siblings, 0 replies; 9+ messages in thread
From: Sheng Yang @ 2011-03-02  7:26 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti
  Cc: Michael S. Tsirkin, Alex Williamson, kvm, Sheng Yang


Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 Documentation/kvm/api.txt |   58 +++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 58 insertions(+), 0 deletions(-)

diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index e1a9297..dd10c3b 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -1263,6 +1263,53 @@ struct kvm_assigned_msix_entry {
 	__u16 padding[3];
 };
 
+4.54 KVM_REGISTER_MSIX_MMIO
+
+Capability: KVM_CAP_MSIX_MMIO
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_msix_mmio_user (in)
+Returns: 0 on success, -1 on error
+
+This API indicates an MSI-X MMIO address of a guest device. Then all MMIO
+operation would be handled by kernel. When necessary(e.g. MSI data/address
+changed), KVM would exit to userspace using KVM_EXIT_MSIX_ROUTING_UPDATE to
+indicate the MMIO modification and require userspace to update IRQ routing
+table.
+
+NOTICE: Writing the MSI-X MMIO page after it was registered with this API may
+be dangerous for userspace program. The writing during VM running may result
+in synchronization issue therefore the assigned device can't work properly.
+The writing is allowed when VM is not running and can be used as save/restore
+mechanism.
+
+struct kvm_msix_mmio_user {
+	__u32 dev_id;
+	__u16 type;		/* Device type and MMIO address type */
+	__u16 max_entries_nr;	/* Maximum entries supported */
+	__u64 base_addr;	/* Guest physical address of MMIO */
+	__u64 base_va;		/* Host virtual address of MMIO mapping */
+	__u64 flags;		/* Reserved for now */
+	__u64 reserved[4];
+};
+
+Current device type can be:
+#define KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV	    (1 << 0)
+
+Current MMIO type can be:
+#define KVM_MSIX_MMIO_TYPE_BASE_TABLE	    (1 << 8)
+
+4.55 KVM_UNREGISTER_MSIX_MMIO
+
+Capability: KVM_CAP_MSIX_MMIO
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_msix_mmio_user (in)
+Returns: 0 on success, -1 on error
+
+This API would unregister the specific MSI-X MMIO, indicated by dev_id and
+type fields of struct kvm_msix_mmio_user.
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
@@ -1445,6 +1492,17 @@ Userspace can now handle the hypercall and when it's done modify the gprs as
 necessary. Upon guest entry all guest GPRs will then be replaced by the values
 in this struct.
 
+		/* KVM_EXIT_MSIX_ROUTING_UPDATE*/
+		struct {
+			__u32 dev_id;
+			__u16 type;
+			__u16 entry_idx;
+			__u64 flags;
+		} msix_routing;
+
+KVM_EXIT_MSIX_ROUTING_UPDATE indicates one MSI-X entry has been modified, and
+userspace need to update the correlated routing table.
+
 		/* Fix the size of the union. */
 		char padding[256];
 	};
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 0/4 v12] MSI-X MMIO support for KVM
  2011-03-02  7:26 [PATCH 0/4 v12] MSI-X MMIO support for KVM Sheng Yang
                   ` (3 preceding siblings ...)
  2011-03-02  7:26 ` [PATCH 4/4] KVM: Add documents for MSI-X MMIO API Sheng Yang
@ 2011-03-02  9:23 ` Michael S. Tsirkin
  2011-03-02 18:51   ` Marcelo Tosatti
  4 siblings, 1 reply; 9+ messages in thread
From: Michael S. Tsirkin @ 2011-03-02  9:23 UTC (permalink / raw)
  To: Sheng Yang; +Cc: Avi Kivity, Marcelo Tosatti, Alex Williamson, kvm

On Wed, Mar 02, 2011 at 03:26:53PM +0800, Sheng Yang wrote:
> Change from v10:
> 1. Update according to the comments of Michael.
> 2. Use mmio_needed to exit to userspace according to Marcelo's comments.

PCI-wise, I don't see anything to complain about.
So ack the PCI bits.
You guys decide on the rest.

Several things I suggested previously that are not
related to the PCI point of view:

1. In msix_table_mmio_write, we fill in ext_data even if
   we are not going to exit to userspace in the end.
   It seems a trivial optimization to only do it if we exit.
2. Instead of filling in ext_data, and then copying to vcpu,
   we could fill the data in vcpu directly.
3. MSIX is not an error. So returning -ENOTSYNC to signal
   it is ugly. It would be cleaner to return negative
   value on error, and positive exit code to trigger exit.
4. Patch 4/4 adds whitespace errors that git complains about.

With changes 2 and 3, arch/x86/kvm/x86.c would not
need to know about msix at all.

As I said these are all suggestions unrelated to pci,
and I don't know what Avi/Marcelo think about 1 and 2.
3 and 4 are easy to fix though.


> Sheng Yang (4):
>   KVM: Move struct kvm_io_device to kvm_host.h
>   KVM: Add kvm_io_ext_data to IO handler
>   KVM: Emulate MSI-X table in kernel
>   KVM: Add documents for MSI-X MMIO API
> 
>  Documentation/kvm/api.txt |   58 +++++++++
>  arch/x86/kvm/Makefile     |    2 +-
>  arch/x86/kvm/i8254.c      |    6 +-
>  arch/x86/kvm/i8259.c      |    3 +-
>  arch/x86/kvm/lapic.c      |    3 +-
>  arch/x86/kvm/x86.c        |   42 +++++--
>  include/linux/kvm.h       |   28 +++++
>  include/linux/kvm_host.h  |   64 ++++++++++-
>  virt/kvm/assigned-dev.c   |   41 +++++++
>  virt/kvm/coalesced_mmio.c |    3 +-
>  virt/kvm/eventfd.c        |    2 +-
>  virt/kvm/ioapic.c         |    2 +-
>  virt/kvm/iodev.h          |   31 +----
>  virt/kvm/kvm_main.c       |   40 ++++++-
>  virt/kvm/msix_mmio.c      |  291 +++++++++++++++++++++++++++++++++++++++++++++
>  virt/kvm/msix_mmio.h      |   26 ++++
>  16 files changed, 591 insertions(+), 51 deletions(-)
>  create mode 100644 virt/kvm/msix_mmio.c
>  create mode 100644 virt/kvm/msix_mmio.h

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 0/4 v12] MSI-X MMIO support for KVM
  2011-03-02  9:23 ` [PATCH 0/4 v12] MSI-X MMIO support for KVM Michael S. Tsirkin
@ 2011-03-02 18:51   ` Marcelo Tosatti
  2011-03-02 19:56     ` Michael S. Tsirkin
  2011-03-03 13:31     ` Sheng Yang
  0 siblings, 2 replies; 9+ messages in thread
From: Marcelo Tosatti @ 2011-03-02 18:51 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Sheng Yang, Avi Kivity, Alex Williamson, kvm

On Wed, Mar 02, 2011 at 11:23:14AM +0200, Michael S. Tsirkin wrote:
> On Wed, Mar 02, 2011 at 03:26:53PM +0800, Sheng Yang wrote:
> > Change from v10:
> > 1. Update according to the comments of Michael.
> > 2. Use mmio_needed to exit to userspace according to Marcelo's comments.
> 
> PCI-wise, I don't see anything to complain about.
> So ack the PCI bits.
> You guys decide on the rest.
> 
> Several things I suggested previously that are not
> related to the PCI point of view:
> 
> 1. In msix_table_mmio_write, we fill in ext_data even if
>    we are not going to exit to userspace in the end.
>    It seems a trivial optimization to only do it if we exit.
> 2. Instead of filling in ext_data, and then copying to vcpu,
>    we could fill the data in vcpu directly.
> 3. MSIX is not an error. So returning -ENOTSYNC to signal
>    it is ugly. It would be cleaner to return negative
>    value on error, and positive exit code to trigger exit.
> 4. Patch 4/4 adds whitespace errors that git complains about.
> 
> With changes 2 and 3, arch/x86/kvm/x86.c would not
> need to know about msix at all.
> 
> As I said these are all suggestions unrelated to pci,
> and I don't know what Avi/Marcelo think about 1 and 2.
> 3 and 4 are easy to fix though.

All minor IMO (i prefer ENOTSYNC as its meaningful), whitespace
can be fixed while applying.

Avi, can you please ACK?

Sheng, we will fix any further comments. Thanks!


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 0/4 v12] MSI-X MMIO support for KVM
  2011-03-02 18:51   ` Marcelo Tosatti
@ 2011-03-02 19:56     ` Michael S. Tsirkin
  2011-03-03 13:31     ` Sheng Yang
  1 sibling, 0 replies; 9+ messages in thread
From: Michael S. Tsirkin @ 2011-03-02 19:56 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Sheng Yang, Avi Kivity, Alex Williamson, kvm

On Wed, Mar 02, 2011 at 03:51:20PM -0300, Marcelo Tosatti wrote:
> On Wed, Mar 02, 2011 at 11:23:14AM +0200, Michael S. Tsirkin wrote:
> > On Wed, Mar 02, 2011 at 03:26:53PM +0800, Sheng Yang wrote:
> > > Change from v10:
> > > 1. Update according to the comments of Michael.
> > > 2. Use mmio_needed to exit to userspace according to Marcelo's comments.
> > 
> > PCI-wise, I don't see anything to complain about.
> > So ack the PCI bits.
> > You guys decide on the rest.
> > 
> > Several things I suggested previously that are not
> > related to the PCI point of view:
> > 
> > 1. In msix_table_mmio_write, we fill in ext_data even if
> >    we are not going to exit to userspace in the end.
> >    It seems a trivial optimization to only do it if we exit.
> > 2. Instead of filling in ext_data, and then copying to vcpu,
> >    we could fill the data in vcpu directly.
> > 3. MSIX is not an error. So returning -ENOTSYNC to signal
> >    it is ugly. It would be cleaner to return negative
> >    value on error, and positive exit code to trigger exit.
> > 4. Patch 4/4 adds whitespace errors that git complains about.
> > 
> > With changes 2 and 3, arch/x86/kvm/x86.c would not
> > need to know about msix at all.
> > 
> > As I said these are all suggestions unrelated to pci,
> > and I don't know what Avi/Marcelo think about 1 and 2.
> > 3 and 4 are easy to fix though.
> 
> All minor IMO

Yes.

> (i prefer ENOTSYNC as its meaningful),

When this is merged I'll post a patch on top and
we can discuss.

> whitespace
> can be fixed while applying.
> 
> Avi, can you please ACK?
> 
> Sheng, we will fix any further comments. Thanks!

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 0/4 v12] MSI-X MMIO support for KVM
  2011-03-02 18:51   ` Marcelo Tosatti
  2011-03-02 19:56     ` Michael S. Tsirkin
@ 2011-03-03 13:31     ` Sheng Yang
  1 sibling, 0 replies; 9+ messages in thread
From: Sheng Yang @ 2011-03-03 13:31 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Michael S. Tsirkin, Avi Kivity, Alex Williamson, kvm

On Wed, Mar 02, 2011 at 03:51:20PM -0300, Marcelo Tosatti wrote:
> On Wed, Mar 02, 2011 at 11:23:14AM +0200, Michael S. Tsirkin wrote:
> > On Wed, Mar 02, 2011 at 03:26:53PM +0800, Sheng Yang wrote:
> > > Change from v10:
> > > 1. Update according to the comments of Michael.
> > > 2. Use mmio_needed to exit to userspace according to Marcelo's comments.
> > 
> > PCI-wise, I don't see anything to complain about.
> > So ack the PCI bits.
> > You guys decide on the rest.
> > 
> > Several things I suggested previously that are not
> > related to the PCI point of view:
> > 
> > 1. In msix_table_mmio_write, we fill in ext_data even if
> >    we are not going to exit to userspace in the end.
> >    It seems a trivial optimization to only do it if we exit.
> > 2. Instead of filling in ext_data, and then copying to vcpu,
> >    we could fill the data in vcpu directly.
> > 3. MSIX is not an error. So returning -ENOTSYNC to signal
> >    it is ugly. It would be cleaner to return negative
> >    value on error, and positive exit code to trigger exit.
> > 4. Patch 4/4 adds whitespace errors that git complains about.
> > 
> > With changes 2 and 3, arch/x86/kvm/x86.c would not
> > need to know about msix at all.
> > 
> > As I said these are all suggestions unrelated to pci,
> > and I don't know what Avi/Marcelo think about 1 and 2.
> > 3 and 4 are easy to fix though.
> 
> All minor IMO (i prefer ENOTSYNC as its meaningful), whitespace
> can be fixed while applying.
> 
> Avi, can you please ACK?
> 
> Sheng, we will fix any further comments. Thanks!

Thanks! It's my pleasure to work with you guys. Hope we can work together in
the future. :)

-- 
regards
Yang, Sheng

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2011-03-03 13:29 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-03-02  7:26 [PATCH 0/4 v12] MSI-X MMIO support for KVM Sheng Yang
2011-03-02  7:26 ` [PATCH 1/4] KVM: Move struct kvm_io_device to kvm_host.h Sheng Yang
2011-03-02  7:26 ` [PATCH 2/4] KVM: Add kvm_io_ext_data to IO handler Sheng Yang
2011-03-02  7:26 ` [PATCH 3/4] KVM: Emulate MSI-X table in kernel Sheng Yang
2011-03-02  7:26 ` [PATCH 4/4] KVM: Add documents for MSI-X MMIO API Sheng Yang
2011-03-02  9:23 ` [PATCH 0/4 v12] MSI-X MMIO support for KVM Michael S. Tsirkin
2011-03-02 18:51   ` Marcelo Tosatti
2011-03-02 19:56     ` Michael S. Tsirkin
2011-03-03 13:31     ` Sheng Yang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.