* [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-10 9:05 ` Zha Bin
0 siblings, 0 replies; 104+ messages in thread
From: Zha Bin @ 2020-02-10 9:05 UTC (permalink / raw)
To: linux-kernel
Cc: virtio-dev, zhabin, slp, mst, jasowang, jing2.liu, qemu-devel,
chao.p.peng, gerry
From: Liu Jiang <gerry@linux.alibaba.com>
Userspace VMMs (e.g. Qemu microvm, Firecracker) take advantage of using
virtio over mmio devices as a lightweight machine model for modern
cloud. The standard virtio over MMIO transport layer only supports one
legacy interrupt, which is much heavier than virtio over PCI transport
layer using MSI. Legacy interrupt has long work path and causes specific
VMExits in following cases, which would considerably slow down the
performance:
1) read interrupt status register
2) update interrupt status register
3) write IOAPIC EOI register
We proposed to add MSI support for virtio over MMIO via new feature
bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
uses msi_sharing[1] to indicate the event and vector mapping.
Bit 1 is 0: device uses non-sharing and fixed vector per event mapping.
Bit 1 is 1: device uses sharing mode and dynamic mapping.
[1] https://lkml.org/lkml/2020/1/21/31
Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
Co-developed-by: Zha Bin <zhabin@linux.alibaba.com>
Signed-off-by: Zha Bin <zhabin@linux.alibaba.com>
Co-developed-by: Jing Liu <jing2.liu@linux.intel.com>
Signed-off-by: Jing Liu <jing2.liu@linux.intel.com>
Co-developed-by: Chao Peng <chao.p.peng@linux.intel.com>
Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
---
drivers/virtio/virtio_mmio.c | 299 ++++++++++++++++++++++++++++++++++--
drivers/virtio/virtio_mmio_common.h | 8 +
drivers/virtio/virtio_mmio_msi.h | 82 ++++++++++
include/uapi/linux/virtio_config.h | 7 +-
include/uapi/linux/virtio_mmio.h | 31 ++++
5 files changed, 411 insertions(+), 16 deletions(-)
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 41e1c93..b24ce21 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -67,8 +67,7 @@
#include <uapi/linux/virtio_mmio.h>
#include <linux/virtio_ring.h>
#include "virtio_mmio_common.h"
-
-
+#include "virtio_mmio_msi.h"
/* The alignment to use between consumer and producer parts of vring.
* Currently hardcoded to the page size. */
@@ -85,9 +84,13 @@ struct virtio_mmio_vq_info {
/* Notify Address*/
unsigned int notify_addr;
-};
+ /* MSI vector (or none) */
+ unsigned int msi_vector;
+};
+static void vm_free_msi_irqs(struct virtio_device *vdev);
+static int vm_request_msi_vectors(struct virtio_device *vdev, int nirqs);
/* Configuration interface */
@@ -110,6 +113,9 @@ static void vm_transport_features(struct virtio_device *vdev, u64 features)
{
if (features & BIT_ULL(VIRTIO_F_MMIO_NOTIFICATION))
__virtio_set_bit(vdev, VIRTIO_F_MMIO_NOTIFICATION);
+
+ if (features & BIT_ULL(VIRTIO_F_MMIO_MSI))
+ __virtio_set_bit(vdev, VIRTIO_F_MMIO_MSI);
}
static int vm_finalize_features(struct virtio_device *vdev)
@@ -307,7 +313,33 @@ static irqreturn_t vm_interrupt(int irq, void *opaque)
return ret;
}
+static irqreturn_t vm_vring_interrupt(int irq, void *opaque)
+{
+ struct virtio_mmio_device *vm_dev = opaque;
+ struct virtio_mmio_vq_info *info;
+ irqreturn_t ret = IRQ_NONE;
+ unsigned long flags;
+
+ spin_lock_irqsave(&vm_dev->lock, flags);
+ list_for_each_entry(info, &vm_dev->virtqueues, node) {
+ if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
+ ret = IRQ_HANDLED;
+ }
+ spin_unlock_irqrestore(&vm_dev->lock, flags);
+
+ return ret;
+}
+
+
+/* Handle a configuration change */
+static irqreturn_t vm_config_changed(int irq, void *opaque)
+{
+ struct virtio_mmio_device *vm_dev = opaque;
+
+ virtio_config_changed(&vm_dev->vdev);
+ return IRQ_HANDLED;
+}
static void vm_del_vq(struct virtqueue *vq)
{
@@ -316,6 +348,15 @@ static void vm_del_vq(struct virtqueue *vq)
unsigned long flags;
unsigned int index = vq->index;
+ if (vm_dev->msi_enabled && !vm_dev->msi_share) {
+ if (info->msi_vector != VIRTIO_MMIO_MSI_NO_VECTOR) {
+ int irq = mmio_msi_irq_vector(&vq->vdev->dev,
+ info->msi_vector);
+
+ free_irq(irq, vq);
+ }
+ }
+
spin_lock_irqsave(&vm_dev->lock, flags);
list_del(&info->node);
spin_unlock_irqrestore(&vm_dev->lock, flags);
@@ -334,20 +375,56 @@ static void vm_del_vq(struct virtqueue *vq)
kfree(info);
}
-static void vm_del_vqs(struct virtio_device *vdev)
+static void vm_free_irqs(struct virtio_device *vdev)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+
+ if (vm_dev->msi_enabled)
+ vm_free_msi_irqs(vdev);
+ else
+ free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev);
+}
+
+static void vm_del_vqs(struct virtio_device *vdev)
+{
struct virtqueue *vq, *n;
list_for_each_entry_safe(vq, n, &vdev->vqs, list)
vm_del_vq(vq);
- free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev);
+ vm_free_irqs(vdev);
+}
+
+static inline void mmio_msi_set_enable(struct virtio_mmio_device *vm_dev,
+ int enable)
+{
+ u32 state;
+
+ state = readl(vm_dev->base + VIRTIO_MMIO_MSI_STATE);
+ if (enable && (state & VIRTIO_MMIO_MSI_ENABLED_MASK))
+ return;
+
+ writel(VIRTIO_MMIO_MSI_CMD_ENABLE,
+ vm_dev->base + VIRTIO_MMIO_MSI_COMMAND);
+}
+
+static void mmio_msi_config_vector(struct virtio_mmio_device *vm_dev, u32 vec)
+{
+ writel(vec, vm_dev->base + VIRTIO_MMIO_MSI_VEC_SEL);
+ writel(VIRTIO_MMIO_MSI_CMD_MAP_CONFIG, vm_dev->base +
+ VIRTIO_MMIO_MSI_COMMAND);
+}
+
+static void mmio_msi_queue_vector(struct virtio_mmio_device *vm_dev, u32 vec)
+{
+ writel(vec, vm_dev->base + VIRTIO_MMIO_MSI_VEC_SEL);
+ writel(VIRTIO_MMIO_MSI_CMD_MAP_QUEUE, vm_dev->base +
+ VIRTIO_MMIO_MSI_COMMAND);
}
static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
void (*callback)(struct virtqueue *vq),
- const char *name, bool ctx)
+ const char *name, bool ctx, u32 msi_vector)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
struct virtio_mmio_vq_info *info;
@@ -440,6 +517,11 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
else
info->notify_addr = VIRTIO_MMIO_QUEUE_NOTIFY;
+ info->msi_vector = msi_vector;
+ /* Set queue event and vector mapping for MSI share mode. */
+ if (vm_dev->msi_share && msi_vector != VIRTIO_MMIO_MSI_NO_VECTOR)
+ mmio_msi_queue_vector(vm_dev, msi_vector);
+
spin_lock_irqsave(&vm_dev->lock, flags);
list_add(&info->node, &vm_dev->virtqueues);
spin_unlock_irqrestore(&vm_dev->lock, flags);
@@ -461,12 +543,11 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
return ERR_PTR(err);
}
-static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
- struct virtqueue *vqs[],
- vq_callback_t *callbacks[],
- const char * const names[],
- const bool *ctx,
- struct irq_affinity *desc)
+static int vm_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs,
+ struct virtqueue *vqs[],
+ vq_callback_t *callbacks[],
+ const char * const names[],
+ const bool *ctx)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
int irq = platform_get_irq(vm_dev->pdev, 0);
@@ -487,8 +568,6 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
err = request_irq(irq, vm_interrupt, IRQF_SHARED,
dev_name(&vdev->dev), vm_dev);
- if (err)
- return err;
for (i = 0; i < nvqs; ++i) {
if (!names[i]) {
@@ -497,14 +576,202 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
}
vqs[i] = vm_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
- ctx ? ctx[i] : false);
+ ctx ? ctx[i] : false,
+ VIRTIO_MMIO_MSI_NO_VECTOR);
if (IS_ERR(vqs[i])) {
vm_del_vqs(vdev);
return PTR_ERR(vqs[i]);
}
}
+ return err;
+}
+
+static int vm_find_vqs_msi(struct virtio_device *vdev, unsigned int nvqs,
+ struct virtqueue *vqs[], vq_callback_t *callbacks[],
+ const char * const names[], bool per_vq_vectors,
+ const bool *ctx, struct irq_affinity *desc)
+{
+ struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+ int i, err, allocated_vectors, nvectors;
+ u32 msi_vec;
+ u32 max_vec_num = readl(vm_dev->base + VIRTIO_MMIO_MSI_VEC_NUM);
+
+ /* For MSI non-sharing, the max vector number MUST greater than nvqs.
+ * Otherwise, go back to legacy interrupt.
+ */
+ if (per_vq_vectors && max_vec_num < (nvqs + 1))
+ return -EINVAL;
+
+ if (per_vq_vectors) {
+ nvectors = 1;
+ for (i = 0; i < nvqs; ++i)
+ if (callbacks[i])
+ ++nvectors;
+ } else {
+ nvectors = 2;
+ }
+
+ vm_dev->msi_share = !per_vq_vectors;
+
+ /* Allocate nvqs irqs for queues and one irq for configuration */
+ err = vm_request_msi_vectors(vdev, nvectors);
+ if (err != 0)
+ return err;
+
+ allocated_vectors = vm_dev->msi_used_vectors;
+ for (i = 0; i < nvqs; i++) {
+ if (!names[i]) {
+ vqs[i] = NULL;
+ continue;
+ }
+ if (!callbacks[i])
+ msi_vec = VIRTIO_MMIO_MSI_NO_VECTOR;
+ else if (per_vq_vectors)
+ msi_vec = allocated_vectors++;
+ else
+ msi_vec = 1;
+ vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i],
+ ctx ? ctx[i] : false, msi_vec);
+ if (IS_ERR(vqs[i])) {
+ err = PTR_ERR(vqs[i]);
+ goto error_find;
+ }
+
+ if (!per_vq_vectors ||
+ msi_vec == VIRTIO_MMIO_MSI_NO_VECTOR)
+ continue;
+
+ /* allocate per-vq irq if available and necessary */
+ snprintf(vm_dev->vm_vq_names[msi_vec],
+ sizeof(*vm_dev->vm_vq_names),
+ "%s-%s",
+ dev_name(&vm_dev->vdev.dev), names[i]);
+ err = request_irq(mmio_msi_irq_vector(&vqs[i]->vdev->dev,
+ msi_vec),
+ vring_interrupt, 0,
+ vm_dev->vm_vq_names[msi_vec], vqs[i]);
+
+ if (err)
+ goto error_find;
+ }
+
return 0;
+
+error_find:
+ vm_del_vqs(vdev);
+ return err;
+}
+
+static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
+ struct virtqueue *vqs[],
+ vq_callback_t *callbacks[],
+ const char * const names[],
+ const bool *ctx,
+ struct irq_affinity *desc)
+{
+ struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+ int err;
+
+ if (__virtio_test_bit(vdev, VIRTIO_F_MMIO_NOTIFICATION)) {
+ unsigned int notify = readl(vm_dev->base +
+ VIRTIO_MMIO_QUEUE_NOTIFY);
+
+ vm_dev->notify_base = notify & 0xffff;
+ vm_dev->notify_multiplier = (notify >> 16) & 0xffff;
+ }
+
+ if (__virtio_test_bit(vdev, VIRTIO_F_MMIO_MSI)) {
+ bool dyn_mapping = !!(readl(vm_dev->base +
+ VIRTIO_MMIO_MSI_STATE) &
+ VIRTIO_MMIO_MSI_SHARING_MASK);
+ if (!dyn_mapping)
+ err = vm_find_vqs_msi(vdev, nvqs, vqs, callbacks,
+ names, true, ctx, desc);
+ else
+ err = vm_find_vqs_msi(vdev, nvqs, vqs, callbacks,
+ names, false, ctx, desc);
+ if (!err)
+ return 0;
+ }
+
+ return vm_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, ctx);
+}
+
+static int vm_request_msi_vectors(struct virtio_device *vdev, int nirqs)
+{
+ struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+ unsigned int v;
+ int irq, err;
+
+ if (vm_dev->msi_enabled)
+ return -EINVAL;
+
+ vm_dev->vm_vq_names = kmalloc_array(nirqs, sizeof(*vm_dev->vm_vq_names),
+ GFP_KERNEL);
+ if (!vm_dev->vm_vq_names)
+ return -ENOMEM;
+
+ mmio_get_msi_domain(vdev);
+ err = mmio_msi_domain_alloc_irqs(&vdev->dev, nirqs);
+ if (err) {
+ kfree(vm_dev->vm_vq_names);
+ vm_dev->vm_vq_names = NULL;
+ return err;
+ }
+
+ mmio_msi_set_enable(vm_dev, 1);
+ vm_dev->msi_enabled = true;
+
+ v = vm_dev->msi_used_vectors;
+ /* The first MSI vector is used for configuration change event. */
+ snprintf(vm_dev->vm_vq_names[v], sizeof(*vm_dev->vm_vq_names),
+ "%s-config", dev_name(&vdev->dev));
+ irq = mmio_msi_irq_vector(&vdev->dev, v);
+ err = request_irq(irq, vm_config_changed, 0, vm_dev->vm_vq_names[v],
+ vm_dev);
+ if (err)
+ goto error_request_irq;
+
+ /* Set the configuration event mapping. */
+ if (vm_dev->msi_share)
+ mmio_msi_config_vector(vm_dev, v);
+
+ ++vm_dev->msi_used_vectors;
+
+ if (vm_dev->msi_share) {
+ v = vm_dev->msi_used_vectors;
+ snprintf(vm_dev->vm_vq_names[v], sizeof(*vm_dev->vm_vq_names),
+ "%s-virtqueues", dev_name(&vm_dev->vdev.dev));
+ err = request_irq(mmio_msi_irq_vector(&vdev->dev, v),
+ vm_vring_interrupt, 0, vm_dev->vm_vq_names[v],
+ vm_dev);
+ if (err)
+ goto error_request_irq;
+ ++vm_dev->msi_used_vectors;
+ }
+
+ return 0;
+
+error_request_irq:
+ vm_free_msi_irqs(vdev);
+
+ return err;
+}
+
+static void vm_free_msi_irqs(struct virtio_device *vdev)
+{
+ int i;
+ struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+
+ mmio_msi_set_enable(vm_dev, 0);
+ for (i = 0; i < vm_dev->msi_used_vectors; i++)
+ free_irq(mmio_msi_irq_vector(&vdev->dev, i), vm_dev);
+ mmio_msi_domain_free_irqs(&vdev->dev);
+ kfree(vm_dev->vm_vq_names);
+ vm_dev->vm_vq_names = NULL;
+ vm_dev->msi_enabled = false;
+ vm_dev->msi_used_vectors = 0;
}
static const char *vm_bus_name(struct virtio_device *vdev)
@@ -609,6 +876,8 @@ static int virtio_mmio_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, vm_dev);
+ mmio_msi_create_irq_domain();
+
rc = register_virtio_device(&vm_dev->vdev);
if (rc)
put_device(&vm_dev->vdev.dev);
diff --git a/drivers/virtio/virtio_mmio_common.h b/drivers/virtio/virtio_mmio_common.h
index 90cb304..ccf6320 100644
--- a/drivers/virtio/virtio_mmio_common.h
+++ b/drivers/virtio/virtio_mmio_common.h
@@ -26,6 +26,14 @@ struct virtio_mmio_device {
unsigned short notify_base;
unsigned short notify_multiplier;
+
+ /* Name strings for interrupts. This size should be enough. */
+ char (*vm_vq_names)[256];
+
+ /* used vectors */
+ unsigned int msi_used_vectors;
+ bool msi_share;
+ bool msi_enabled;
};
#endif
diff --git a/drivers/virtio/virtio_mmio_msi.h b/drivers/virtio/virtio_mmio_msi.h
index 27cb2af..10038cb 100644
--- a/drivers/virtio/virtio_mmio_msi.h
+++ b/drivers/virtio/virtio_mmio_msi.h
@@ -8,6 +8,7 @@
#include <linux/irq.h>
#include <linux/irqdomain.h>
#include <linux/platform_device.h>
+#include "virtio_mmio_common.h"
static irq_hw_number_t mmio_msi_hwirq;
static struct irq_domain *mmio_msi_domain;
@@ -21,12 +22,41 @@ void __weak irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
{
}
+static void __iomem *vm_dev_base(struct msi_desc *desc)
+{
+ if (desc) {
+ struct device *dev = desc->dev;
+ struct virtio_device *vdev = dev_to_virtio(dev);
+ struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+
+ return vm_dev->base;
+ }
+
+ return NULL;
+}
+
+static void mmio_msi_set_mask_bit(struct irq_data *data, u32 flag)
+{
+ struct msi_desc *desc = irq_data_get_msi_desc(data);
+ void __iomem *base = vm_dev_base(desc);
+ unsigned int offset = data->irq - desc->irq;
+
+ if (base) {
+ u32 op = flag ? VIRTIO_MMIO_MSI_CMD_MASK :
+ VIRTIO_MMIO_MSI_CMD_UNMASK;
+ writel(offset, base + VIRTIO_MMIO_MSI_VEC_SEL);
+ writel(op, base + VIRTIO_MMIO_MSI_COMMAND);
+ }
+}
+
static void mmio_msi_mask_irq(struct irq_data *data)
{
+ mmio_msi_set_mask_bit(data, 1);
}
static void mmio_msi_unmask_irq(struct irq_data *data)
{
+ mmio_msi_set_mask_bit(data, 0);
}
static struct irq_chip mmio_msi_controller = {
@@ -86,8 +116,60 @@ static inline void mmio_msi_create_irq_domain(void)
irq_domain_free_fwnode(fn);
}
}
+
+static void mmio_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
+{
+ void __iomem *base = vm_dev_base(desc);
+
+ if (base) {
+ writel(desc->platform.msi_index, base +
+ VIRTIO_MMIO_MSI_VEC_SEL);
+ writel(msg->address_lo, base + VIRTIO_MMIO_MSI_ADDRESS_LOW);
+ writel(msg->address_hi, base + VIRTIO_MMIO_MSI_ADDRESS_HIGH);
+ writel(msg->data, base + VIRTIO_MMIO_MSI_DATA);
+ writel(VIRTIO_MMIO_MSI_CMD_CONFIGURE,
+ base + VIRTIO_MMIO_MSI_COMMAND);
+ }
+}
+
+static inline int mmio_msi_domain_alloc_irqs(struct device *dev,
+ unsigned int nvec)
+{
+ return platform_msi_domain_alloc_irqs(dev, nvec,
+ mmio_write_msi_msg);
+}
+
+static inline void mmio_msi_domain_free_irqs(struct device *dev)
+{
+ return platform_msi_domain_free_irqs(dev);
+}
+
+static inline void mmio_get_msi_domain(struct virtio_device *vdev)
+{
+ if (!vdev->dev.msi_domain)
+ vdev->dev.msi_domain = mmio_msi_domain;
+}
+
+static inline int mmio_msi_irq_vector(struct device *dev, unsigned int nr)
+{
+ struct msi_desc *entry = first_msi_entry(dev);
+
+ return entry->irq + nr;
+}
+
#else
static inline void mmio_msi_create_irq_domain(void) {}
+static inline int mmio_msi_irq_vector(struct device *dev, unsigned int nr)
+{
+ return -EINVAL;
+}
+static inline void mmio_get_msi_domain(struct virtio_device *vdev) {}
+static inline int mmio_msi_domain_alloc_irqs(struct device *dev,
+ unsigned int nvec)
+{
+ return -EINVAL;
+}
+static inline void mmio_msi_domain_free_irqs(struct device *dev) {}
#endif
#endif
diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h
index 5d93c01..5eb3900 100644
--- a/include/uapi/linux/virtio_config.h
+++ b/include/uapi/linux/virtio_config.h
@@ -52,7 +52,7 @@
* rest are per-device feature bits.
*/
#define VIRTIO_TRANSPORT_F_START 28
-#define VIRTIO_TRANSPORT_F_END 40
+#define VIRTIO_TRANSPORT_F_END 41
#ifndef VIRTIO_CONFIG_NO_LEGACY
/* Do we get callbacks when the ring is completely used, even if we've
@@ -94,4 +94,9 @@
* layer.
*/
#define VIRTIO_F_MMIO_NOTIFICATION 39
+
+/*
+ * This feature indicates the MSI support on MMIO layer.
+ */
+#define VIRTIO_F_MMIO_MSI 40
#endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */
diff --git a/include/uapi/linux/virtio_mmio.h b/include/uapi/linux/virtio_mmio.h
index c4b0968..777cb0e 100644
--- a/include/uapi/linux/virtio_mmio.h
+++ b/include/uapi/linux/virtio_mmio.h
@@ -122,6 +122,21 @@
#define VIRTIO_MMIO_QUEUE_USED_LOW 0x0a0
#define VIRTIO_MMIO_QUEUE_USED_HIGH 0x0a4
+/* MSI max vector number that device supports - Read Only */
+#define VIRTIO_MMIO_MSI_VEC_NUM 0x0c0
+/* MSI state register - Read Only */
+#define VIRTIO_MMIO_MSI_STATE 0x0c4
+/* MSI command register - Write Only */
+#define VIRTIO_MMIO_MSI_COMMAND 0x0c8
+/* MSI vector selector - Write Only */
+#define VIRTIO_MMIO_MSI_VEC_SEL 0x0d0
+/* MSI low 32 bit address, 64 bits in two halves */
+#define VIRTIO_MMIO_MSI_ADDRESS_LOW 0x0d4
+/* MSI high 32 bit address, 64 bits in two halves */
+#define VIRTIO_MMIO_MSI_ADDRESS_HIGH 0x0d8
+/* MSI 32 bit data */
+#define VIRTIO_MMIO_MSI_DATA 0x0dc
+
/* Configuration atomicity value */
#define VIRTIO_MMIO_CONFIG_GENERATION 0x0fc
@@ -130,6 +145,22 @@
#define VIRTIO_MMIO_CONFIG 0x100
+/* MSI commands */
+#define VIRTIO_MMIO_MSI_CMD_ENABLE 0x1
+#define VIRTIO_MMIO_MSI_CMD_DISABLE 0x2
+#define VIRTIO_MMIO_MSI_CMD_CONFIGURE 0x3
+#define VIRTIO_MMIO_MSI_CMD_MASK 0x4
+#define VIRTIO_MMIO_MSI_CMD_UNMASK 0x5
+#define VIRTIO_MMIO_MSI_CMD_MAP_CONFIG 0x6
+#define VIRTIO_MMIO_MSI_CMD_MAP_QUEUE 0x7
+
+/* MSI NO_VECTOR */
+#define VIRTIO_MMIO_MSI_NO_VECTOR 0xffffffff
+
+/* MSI state enabled state mask */
+#define VIRTIO_MMIO_MSI_ENABLED_MASK (1 << 31)
+/* MSI state MSI sharing mask */
+#define VIRTIO_MMIO_MSI_SHARING_MASK (1 << 30)
/*
* Interrupt flags (re: interrupt status & acknowledge registers)
--
1.8.3.1
^ permalink raw reply related [flat|nested] 104+ messages in thread
* Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
2020-02-10 9:05 ` Zha Bin
@ 2020-02-11 3:17 ` Jason Wang
-1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2020-02-11 3:17 UTC (permalink / raw)
To: Zha Bin, linux-kernel
Cc: virtio-dev, slp, mst, jing2.liu, qemu-devel, chao.p.peng, gerry
On 2020/2/10 下午5:05, Zha Bin wrote:
> From: Liu Jiang<gerry@linux.alibaba.com>
>
> Userspace VMMs (e.g. Qemu microvm, Firecracker) take advantage of using
> virtio over mmio devices as a lightweight machine model for modern
> cloud. The standard virtio over MMIO transport layer only supports one
> legacy interrupt, which is much heavier than virtio over PCI transport
> layer using MSI. Legacy interrupt has long work path and causes specific
> VMExits in following cases, which would considerably slow down the
> performance:
>
> 1) read interrupt status register
> 2) update interrupt status register
> 3) write IOAPIC EOI register
>
> We proposed to add MSI support for virtio over MMIO via new feature
> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
>
> With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
> uses msi_sharing[1] to indicate the event and vector mapping.
> Bit 1 is 0: device uses non-sharing and fixed vector per event mapping.
> Bit 1 is 1: device uses sharing mode and dynamic mapping.
I believe dynamic mapping should cover the case of fixed vector?
Thanks
^ permalink raw reply [flat|nested] 104+ messages in thread
* [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-11 3:17 ` Jason Wang
0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2020-02-11 3:17 UTC (permalink / raw)
To: Zha Bin, linux-kernel
Cc: virtio-dev, slp, mst, jing2.liu, qemu-devel, chao.p.peng, gerry
On 2020/2/10 下午5:05, Zha Bin wrote:
> From: Liu Jiang<gerry@linux.alibaba.com>
>
> Userspace VMMs (e.g. Qemu microvm, Firecracker) take advantage of using
> virtio over mmio devices as a lightweight machine model for modern
> cloud. The standard virtio over MMIO transport layer only supports one
> legacy interrupt, which is much heavier than virtio over PCI transport
> layer using MSI. Legacy interrupt has long work path and causes specific
> VMExits in following cases, which would considerably slow down the
> performance:
>
> 1) read interrupt status register
> 2) update interrupt status register
> 3) write IOAPIC EOI register
>
> We proposed to add MSI support for virtio over MMIO via new feature
> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
>
> With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
> uses msi_sharing[1] to indicate the event and vector mapping.
> Bit 1 is 0: device uses non-sharing and fixed vector per event mapping.
> Bit 1 is 1: device uses sharing mode and dynamic mapping.
I believe dynamic mapping should cover the case of fixed vector?
Thanks
---------------------------------------------------------------------
To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
2020-02-11 3:17 ` [virtio-dev] " Jason Wang
@ 2020-02-11 3:35 ` Liu, Jing2
-1 siblings, 0 replies; 104+ messages in thread
From: Liu, Jing2 @ 2020-02-11 3:35 UTC (permalink / raw)
To: Jason Wang, Zha Bin, linux-kernel
Cc: virtio-dev, slp, mst, qemu-devel, chao.p.peng, gerry
On 2/11/2020 11:17 AM, Jason Wang wrote:
>
> On 2020/2/10 下午5:05, Zha Bin wrote:
>> From: Liu Jiang<gerry@linux.alibaba.com>
>>
>> Userspace VMMs (e.g. Qemu microvm, Firecracker) take advantage of using
>> virtio over mmio devices as a lightweight machine model for modern
>> cloud. The standard virtio over MMIO transport layer only supports one
>> legacy interrupt, which is much heavier than virtio over PCI transport
>> layer using MSI. Legacy interrupt has long work path and causes specific
>> VMExits in following cases, which would considerably slow down the
>> performance:
>>
>> 1) read interrupt status register
>> 2) update interrupt status register
>> 3) write IOAPIC EOI register
>>
>> We proposed to add MSI support for virtio over MMIO via new feature
>> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
>>
>> With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
>> uses msi_sharing[1] to indicate the event and vector mapping.
>> Bit 1 is 0: device uses non-sharing and fixed vector per event mapping.
>> Bit 1 is 1: device uses sharing mode and dynamic mapping.
>
>
> I believe dynamic mapping should cover the case of fixed vector?
>
Actually this bit *aims* for msi sharing or msi non-sharing.
It means, when msi sharing bit is 1, device doesn't want vector per queue
(it wants msi vector sharing as name) and doesn't want a high interrupt
rate.
So driver turns to !per_vq_vectors and has to do dynamical mapping.
So they are opposite not superset.
Thanks!
Jing
> Thanks
>
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
>
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-11 3:35 ` Liu, Jing2
0 siblings, 0 replies; 104+ messages in thread
From: Liu, Jing2 @ 2020-02-11 3:35 UTC (permalink / raw)
To: Jason Wang, Zha Bin, linux-kernel
Cc: virtio-dev, slp, mst, qemu-devel, chao.p.peng, gerry
On 2/11/2020 11:17 AM, Jason Wang wrote:
>
> On 2020/2/10 下午5:05, Zha Bin wrote:
>> From: Liu Jiang<gerry@linux.alibaba.com>
>>
>> Userspace VMMs (e.g. Qemu microvm, Firecracker) take advantage of using
>> virtio over mmio devices as a lightweight machine model for modern
>> cloud. The standard virtio over MMIO transport layer only supports one
>> legacy interrupt, which is much heavier than virtio over PCI transport
>> layer using MSI. Legacy interrupt has long work path and causes specific
>> VMExits in following cases, which would considerably slow down the
>> performance:
>>
>> 1) read interrupt status register
>> 2) update interrupt status register
>> 3) write IOAPIC EOI register
>>
>> We proposed to add MSI support for virtio over MMIO via new feature
>> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
>>
>> With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
>> uses msi_sharing[1] to indicate the event and vector mapping.
>> Bit 1 is 0: device uses non-sharing and fixed vector per event mapping.
>> Bit 1 is 1: device uses sharing mode and dynamic mapping.
>
>
> I believe dynamic mapping should cover the case of fixed vector?
>
Actually this bit *aims* for msi sharing or msi non-sharing.
It means, when msi sharing bit is 1, device doesn't want vector per queue
(it wants msi vector sharing as name) and doesn't want a high interrupt
rate.
So driver turns to !per_vq_vectors and has to do dynamical mapping.
So they are opposite not superset.
Thanks!
Jing
> Thanks
>
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
>
---------------------------------------------------------------------
To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
2020-02-11 3:35 ` Liu, Jing2
@ 2020-02-11 4:02 ` Jason Wang
-1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2020-02-11 4:02 UTC (permalink / raw)
To: Liu, Jing2, Zha Bin, linux-kernel
Cc: virtio-dev, slp, mst, qemu-devel, chao.p.peng, gerry
On 2020/2/11 上午11:35, Liu, Jing2 wrote:
>
> On 2/11/2020 11:17 AM, Jason Wang wrote:
>>
>> On 2020/2/10 下午5:05, Zha Bin wrote:
>>> From: Liu Jiang<gerry@linux.alibaba.com>
>>>
>>> Userspace VMMs (e.g. Qemu microvm, Firecracker) take advantage of using
>>> virtio over mmio devices as a lightweight machine model for modern
>>> cloud. The standard virtio over MMIO transport layer only supports one
>>> legacy interrupt, which is much heavier than virtio over PCI transport
>>> layer using MSI. Legacy interrupt has long work path and causes
>>> specific
>>> VMExits in following cases, which would considerably slow down the
>>> performance:
>>>
>>> 1) read interrupt status register
>>> 2) update interrupt status register
>>> 3) write IOAPIC EOI register
>>>
>>> We proposed to add MSI support for virtio over MMIO via new feature
>>> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
>>>
>>> With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
>>> uses msi_sharing[1] to indicate the event and vector mapping.
>>> Bit 1 is 0: device uses non-sharing and fixed vector per event mapping.
>>> Bit 1 is 1: device uses sharing mode and dynamic mapping.
>>
>>
>> I believe dynamic mapping should cover the case of fixed vector?
>>
> Actually this bit *aims* for msi sharing or msi non-sharing.
>
> It means, when msi sharing bit is 1, device doesn't want vector per queue
>
> (it wants msi vector sharing as name) and doesn't want a high
> interrupt rate.
>
> So driver turns to !per_vq_vectors and has to do dynamical mapping.
>
> So they are opposite not superset.
>
> Thanks!
>
> Jing
I think you need add more comments on the command.
E.g if I want to map vector 0 to queue 1, how do I need to do?
write(1, queue_sel);
write(0, vector_sel);
?
Thanks
>
>
>> Thanks
>>
>>
>>
>> ---------------------------------------------------------------------
>> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
>> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
>>
>
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-11 4:02 ` Jason Wang
0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2020-02-11 4:02 UTC (permalink / raw)
To: Liu, Jing2, Zha Bin, linux-kernel
Cc: virtio-dev, slp, mst, qemu-devel, chao.p.peng, gerry
On 2020/2/11 上午11:35, Liu, Jing2 wrote:
>
> On 2/11/2020 11:17 AM, Jason Wang wrote:
>>
>> On 2020/2/10 下午5:05, Zha Bin wrote:
>>> From: Liu Jiang<gerry@linux.alibaba.com>
>>>
>>> Userspace VMMs (e.g. Qemu microvm, Firecracker) take advantage of using
>>> virtio over mmio devices as a lightweight machine model for modern
>>> cloud. The standard virtio over MMIO transport layer only supports one
>>> legacy interrupt, which is much heavier than virtio over PCI transport
>>> layer using MSI. Legacy interrupt has long work path and causes
>>> specific
>>> VMExits in following cases, which would considerably slow down the
>>> performance:
>>>
>>> 1) read interrupt status register
>>> 2) update interrupt status register
>>> 3) write IOAPIC EOI register
>>>
>>> We proposed to add MSI support for virtio over MMIO via new feature
>>> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
>>>
>>> With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
>>> uses msi_sharing[1] to indicate the event and vector mapping.
>>> Bit 1 is 0: device uses non-sharing and fixed vector per event mapping.
>>> Bit 1 is 1: device uses sharing mode and dynamic mapping.
>>
>>
>> I believe dynamic mapping should cover the case of fixed vector?
>>
> Actually this bit *aims* for msi sharing or msi non-sharing.
>
> It means, when msi sharing bit is 1, device doesn't want vector per queue
>
> (it wants msi vector sharing as name) and doesn't want a high
> interrupt rate.
>
> So driver turns to !per_vq_vectors and has to do dynamical mapping.
>
> So they are opposite not superset.
>
> Thanks!
>
> Jing
I think you need add more comments on the command.
E.g if I want to map vector 0 to queue 1, how do I need to do?
write(1, queue_sel);
write(0, vector_sel);
?
Thanks
>
>
>> Thanks
>>
>>
>>
>> ---------------------------------------------------------------------
>> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
>> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
>>
>
---------------------------------------------------------------------
To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
2020-02-11 4:02 ` Jason Wang
@ 2020-02-11 6:02 ` Liu, Jing2
-1 siblings, 0 replies; 104+ messages in thread
From: Liu, Jing2 @ 2020-02-11 6:02 UTC (permalink / raw)
To: Jason Wang, Zha Bin, linux-kernel
Cc: virtio-dev, slp, mst, qemu-devel, chao.p.peng, gerry
[-- Attachment #1: Type: text/plain, Size: 2997 bytes --]
On 2/11/2020 12:02 PM, Jason Wang wrote:
>
> On 2020/2/11 上午11:35, Liu, Jing2 wrote:
>>
>> On 2/11/2020 11:17 AM, Jason Wang wrote:
>>>
>>> On 2020/2/10 下午5:05, Zha Bin wrote:
>>>> From: Liu Jiang<gerry@linux.alibaba.com>
>>>>
>>>> Userspace VMMs (e.g. Qemu microvm, Firecracker) take advantage of
>>>> using
>>>> virtio over mmio devices as a lightweight machine model for modern
>>>> cloud. The standard virtio over MMIO transport layer only supports one
>>>> legacy interrupt, which is much heavier than virtio over PCI transport
>>>> layer using MSI. Legacy interrupt has long work path and causes
>>>> specific
>>>> VMExits in following cases, which would considerably slow down the
>>>> performance:
>>>>
>>>> 1) read interrupt status register
>>>> 2) update interrupt status register
>>>> 3) write IOAPIC EOI register
>>>>
>>>> We proposed to add MSI support for virtio over MMIO via new feature
>>>> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
>>>>
>>>> With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
>>>> uses msi_sharing[1] to indicate the event and vector mapping.
>>>> Bit 1 is 0: device uses non-sharing and fixed vector per event
>>>> mapping.
>>>> Bit 1 is 1: device uses sharing mode and dynamic mapping.
>>>
>>>
>>> I believe dynamic mapping should cover the case of fixed vector?
>>>
>> Actually this bit *aims* for msi sharing or msi non-sharing.
>>
>> It means, when msi sharing bit is 1, device doesn't want vector per
>> queue
>>
>> (it wants msi vector sharing as name) and doesn't want a high
>> interrupt rate.
>>
>> So driver turns to !per_vq_vectors and has to do dynamical mapping.
>>
>> So they are opposite not superset.
>>
>> Thanks!
>>
>> Jing
>
>
> I think you need add more comments on the command.
>
> E.g if I want to map vector 0 to queue 1, how do I need to do?
>
> write(1, queue_sel);
> write(0, vector_sel);
That's true. Besides, two commands are used for msi sharing mode,
VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
"To set up the event and vector mapping for MSI sharing mode, driver
SHOULD write a valid MsiVecSel followed by
VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE command to
map the configuration change/selected queue events respectively. " (See
spec patch 5/5)
So if driver detects the msi sharing mode, when it does setup vq, writes
the queue_sel (this already exists in setup vq), vector sel and then
MAP_QUEUE command to do the queue event mapping.
For msi non-sharing mode, no special action is needed because we make
the rule of per_vq_vector and fixed relationship.
Correct me if this is not that clear for spec/code comments.
Thanks!
Jing
>
> ?
>
> Thanks
>
>
>>
>>
>>> Thanks
>>>
>>>
>>>
>>> ---------------------------------------------------------------------
>>> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
>>> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
>>>
>>
>
[-- Attachment #2: Type: text/html, Size: 5954 bytes --]
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-11 6:02 ` Liu, Jing2
0 siblings, 0 replies; 104+ messages in thread
From: Liu, Jing2 @ 2020-02-11 6:02 UTC (permalink / raw)
To: Jason Wang, Zha Bin, linux-kernel
Cc: virtio-dev, slp, mst, qemu-devel, chao.p.peng, gerry
[-- Attachment #1: Type: text/plain, Size: 3099 bytes --]
On 2/11/2020 12:02 PM, Jason Wang wrote:
>
> On 2020/2/11 上午11:35, Liu, Jing2 wrote:
>>
>> On 2/11/2020 11:17 AM, Jason Wang wrote:
>>>
>>> On 2020/2/10 下午5:05, Zha Bin wrote:
>>>> From: Liu Jiang<gerry@linux.alibaba.com>
>>>>
>>>> Userspace VMMs (e.g. Qemu microvm, Firecracker) take advantage of
>>>> using
>>>> virtio over mmio devices as a lightweight machine model for modern
>>>> cloud. The standard virtio over MMIO transport layer only supports one
>>>> legacy interrupt, which is much heavier than virtio over PCI transport
>>>> layer using MSI. Legacy interrupt has long work path and causes
>>>> specific
>>>> VMExits in following cases, which would considerably slow down the
>>>> performance:
>>>>
>>>> 1) read interrupt status register
>>>> 2) update interrupt status register
>>>> 3) write IOAPIC EOI register
>>>>
>>>> We proposed to add MSI support for virtio over MMIO via new feature
>>>> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
>>>>
>>>> With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
>>>> uses msi_sharing[1] to indicate the event and vector mapping.
>>>> Bit 1 is 0: device uses non-sharing and fixed vector per event
>>>> mapping.
>>>> Bit 1 is 1: device uses sharing mode and dynamic mapping.
>>>
>>>
>>> I believe dynamic mapping should cover the case of fixed vector?
>>>
>> Actually this bit *aims* for msi sharing or msi non-sharing.
>>
>> It means, when msi sharing bit is 1, device doesn't want vector per
>> queue
>>
>> (it wants msi vector sharing as name) and doesn't want a high
>> interrupt rate.
>>
>> So driver turns to !per_vq_vectors and has to do dynamical mapping.
>>
>> So they are opposite not superset.
>>
>> Thanks!
>>
>> Jing
>
>
> I think you need add more comments on the command.
>
> E.g if I want to map vector 0 to queue 1, how do I need to do?
>
> write(1, queue_sel);
> write(0, vector_sel);
That's true. Besides, two commands are used for msi sharing mode,
VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
"To set up the event and vector mapping for MSI sharing mode, driver
SHOULD write a valid MsiVecSel followed by
VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE command to
map the configuration change/selected queue events respectively. " (See
spec patch 5/5)
So if driver detects the msi sharing mode, when it does setup vq, writes
the queue_sel (this already exists in setup vq), vector sel and then
MAP_QUEUE command to do the queue event mapping.
For msi non-sharing mode, no special action is needed because we make
the rule of per_vq_vector and fixed relationship.
Correct me if this is not that clear for spec/code comments.
Thanks!
Jing
>
> ?
>
> Thanks
>
>
>>
>>
>>> Thanks
>>>
>>>
>>>
>>> ---------------------------------------------------------------------
>>> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
>>> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
>>>
>>
>
[-- Attachment #2: Type: text/html, Size: 6134 bytes --]
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
2020-02-11 6:02 ` Liu, Jing2
@ 2020-02-11 7:40 ` Jason Wang
-1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2020-02-11 7:40 UTC (permalink / raw)
To: Liu, Jing2, Zha Bin, linux-kernel
Cc: virtio-dev, slp, mst, qemu-devel, chao.p.peng, gerry
On 2020/2/11 下午2:02, Liu, Jing2 wrote:
>
>
> On 2/11/2020 12:02 PM, Jason Wang wrote:
>>
>> On 2020/2/11 上午11:35, Liu, Jing2 wrote:
>>>
>>> On 2/11/2020 11:17 AM, Jason Wang wrote:
>>>>
>>>> On 2020/2/10 下午5:05, Zha Bin wrote:
>>>>> From: Liu Jiang<gerry@linux.alibaba.com>
>>>>>
>>>>> Userspace VMMs (e.g. Qemu microvm, Firecracker) take advantage of
>>>>> using
>>>>> virtio over mmio devices as a lightweight machine model for modern
>>>>> cloud. The standard virtio over MMIO transport layer only supports
>>>>> one
>>>>> legacy interrupt, which is much heavier than virtio over PCI
>>>>> transport
>>>>> layer using MSI. Legacy interrupt has long work path and causes
>>>>> specific
>>>>> VMExits in following cases, which would considerably slow down the
>>>>> performance:
>>>>>
>>>>> 1) read interrupt status register
>>>>> 2) update interrupt status register
>>>>> 3) write IOAPIC EOI register
>>>>>
>>>>> We proposed to add MSI support for virtio over MMIO via new feature
>>>>> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
>>>>>
>>>>> With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
>>>>> uses msi_sharing[1] to indicate the event and vector mapping.
>>>>> Bit 1 is 0: device uses non-sharing and fixed vector per event
>>>>> mapping.
>>>>> Bit 1 is 1: device uses sharing mode and dynamic mapping.
>>>>
>>>>
>>>> I believe dynamic mapping should cover the case of fixed vector?
>>>>
>>> Actually this bit *aims* for msi sharing or msi non-sharing.
>>>
>>> It means, when msi sharing bit is 1, device doesn't want vector per
>>> queue
>>>
>>> (it wants msi vector sharing as name) and doesn't want a high
>>> interrupt rate.
>>>
>>> So driver turns to !per_vq_vectors and has to do dynamical mapping.
>>>
>>> So they are opposite not superset.
>>>
>>> Thanks!
>>>
>>> Jing
>>
>>
>> I think you need add more comments on the command.
>>
>> E.g if I want to map vector 0 to queue 1, how do I need to do?
>>
>> write(1, queue_sel);
>> write(0, vector_sel);
>
> That's true. Besides, two commands are used for msi sharing mode,
>
> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
>
> "To set up the event and vector mapping for MSI sharing mode, driver
> SHOULD write a valid MsiVecSel followed by
> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE command
> to map the configuration change/selected queue events respectively. "
> (See spec patch 5/5)
>
> So if driver detects the msi sharing mode, when it does setup vq,
> writes the queue_sel (this already exists in setup vq), vector sel and
> then MAP_QUEUE command to do the queue event mapping.
>
So actually the per vq msix could be done through this. I don't get why
you need to introduce MSI_SHARING_MASK which is the charge of driver
instead of device. The interrupt rate should have no direct relationship
with whether it has been shared or not.
Btw, you introduce mask/unmask without pending, how to deal with the
lost interrupt during the masking then?
> For msi non-sharing mode, no special action is needed because we make
> the rule of per_vq_vector and fixed relationship.
>
> Correct me if this is not that clear for spec/code comments.
>
The ABI is not as straightforward as PCI did. Why not just reuse the PCI
layout?
E.g having
queue_sel
queue_msix_vector
msix_config
for configuring map between msi vector and queues/config
Then
vector_sel
address
data
pending
mask
unmask
for configuring msi table?
Thanks
> Thanks!
>
> Jing
>
>
>>
>> ?
>>
>> Thanks
>>
>>
>>>
>>>
>>>> Thanks
>>>>
>>>>
>>>>
>>>> ---------------------------------------------------------------------
>>>> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
>>>> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
>>>>
>>>
>>
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-11 7:40 ` Jason Wang
0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2020-02-11 7:40 UTC (permalink / raw)
To: Liu, Jing2, Zha Bin, linux-kernel
Cc: virtio-dev, slp, mst, qemu-devel, chao.p.peng, gerry
On 2020/2/11 下午2:02, Liu, Jing2 wrote:
>
>
> On 2/11/2020 12:02 PM, Jason Wang wrote:
>>
>> On 2020/2/11 上午11:35, Liu, Jing2 wrote:
>>>
>>> On 2/11/2020 11:17 AM, Jason Wang wrote:
>>>>
>>>> On 2020/2/10 下午5:05, Zha Bin wrote:
>>>>> From: Liu Jiang<gerry@linux.alibaba.com>
>>>>>
>>>>> Userspace VMMs (e.g. Qemu microvm, Firecracker) take advantage of
>>>>> using
>>>>> virtio over mmio devices as a lightweight machine model for modern
>>>>> cloud. The standard virtio over MMIO transport layer only supports
>>>>> one
>>>>> legacy interrupt, which is much heavier than virtio over PCI
>>>>> transport
>>>>> layer using MSI. Legacy interrupt has long work path and causes
>>>>> specific
>>>>> VMExits in following cases, which would considerably slow down the
>>>>> performance:
>>>>>
>>>>> 1) read interrupt status register
>>>>> 2) update interrupt status register
>>>>> 3) write IOAPIC EOI register
>>>>>
>>>>> We proposed to add MSI support for virtio over MMIO via new feature
>>>>> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
>>>>>
>>>>> With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
>>>>> uses msi_sharing[1] to indicate the event and vector mapping.
>>>>> Bit 1 is 0: device uses non-sharing and fixed vector per event
>>>>> mapping.
>>>>> Bit 1 is 1: device uses sharing mode and dynamic mapping.
>>>>
>>>>
>>>> I believe dynamic mapping should cover the case of fixed vector?
>>>>
>>> Actually this bit *aims* for msi sharing or msi non-sharing.
>>>
>>> It means, when msi sharing bit is 1, device doesn't want vector per
>>> queue
>>>
>>> (it wants msi vector sharing as name) and doesn't want a high
>>> interrupt rate.
>>>
>>> So driver turns to !per_vq_vectors and has to do dynamical mapping.
>>>
>>> So they are opposite not superset.
>>>
>>> Thanks!
>>>
>>> Jing
>>
>>
>> I think you need add more comments on the command.
>>
>> E.g if I want to map vector 0 to queue 1, how do I need to do?
>>
>> write(1, queue_sel);
>> write(0, vector_sel);
>
> That's true. Besides, two commands are used for msi sharing mode,
>
> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
>
> "To set up the event and vector mapping for MSI sharing mode, driver
> SHOULD write a valid MsiVecSel followed by
> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE command
> to map the configuration change/selected queue events respectively. "
> (See spec patch 5/5)
>
> So if driver detects the msi sharing mode, when it does setup vq,
> writes the queue_sel (this already exists in setup vq), vector sel and
> then MAP_QUEUE command to do the queue event mapping.
>
So actually the per vq msix could be done through this. I don't get why
you need to introduce MSI_SHARING_MASK which is the charge of driver
instead of device. The interrupt rate should have no direct relationship
with whether it has been shared or not.
Btw, you introduce mask/unmask without pending, how to deal with the
lost interrupt during the masking then?
> For msi non-sharing mode, no special action is needed because we make
> the rule of per_vq_vector and fixed relationship.
>
> Correct me if this is not that clear for spec/code comments.
>
The ABI is not as straightforward as PCI did. Why not just reuse the PCI
layout?
E.g having
queue_sel
queue_msix_vector
msix_config
for configuring map between msi vector and queues/config
Then
vector_sel
address
data
pending
mask
unmask
for configuring msi table?
Thanks
> Thanks!
>
> Jing
>
>
>>
>> ?
>>
>> Thanks
>>
>>
>>>
>>>
>>>> Thanks
>>>>
>>>>
>>>>
>>>> ---------------------------------------------------------------------
>>>> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
>>>> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
>>>>
>>>
>>
---------------------------------------------------------------------
To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
2020-02-11 7:40 ` Jason Wang
(?)
@ 2020-02-11 11:58 ` Michael S. Tsirkin
-1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2020-02-11 11:58 UTC (permalink / raw)
To: Jason Wang
Cc: Liu, Jing2, Zha Bin, linux-kernel, virtio-dev, slp, qemu-devel,
chao.p.peng, gerry
On Tue, Feb 11, 2020 at 03:40:23PM +0800, Jason Wang wrote:
>
> On 2020/2/11 下午2:02, Liu, Jing2 wrote:
> >
> >
> > On 2/11/2020 12:02 PM, Jason Wang wrote:
> > >
> > > On 2020/2/11 上午11:35, Liu, Jing2 wrote:
> > > >
> > > > On 2/11/2020 11:17 AM, Jason Wang wrote:
> > > > >
> > > > > On 2020/2/10 下午5:05, Zha Bin wrote:
> > > > > > From: Liu Jiang<gerry@linux.alibaba.com>
> > > > > >
> > > > > > Userspace VMMs (e.g. Qemu microvm, Firecracker) take
> > > > > > advantage of using
> > > > > > virtio over mmio devices as a lightweight machine model for modern
> > > > > > cloud. The standard virtio over MMIO transport layer
> > > > > > only supports one
> > > > > > legacy interrupt, which is much heavier than virtio over
> > > > > > PCI transport
> > > > > > layer using MSI. Legacy interrupt has long work path and
> > > > > > causes specific
> > > > > > VMExits in following cases, which would considerably slow down the
> > > > > > performance:
> > > > > >
> > > > > > 1) read interrupt status register
> > > > > > 2) update interrupt status register
> > > > > > 3) write IOAPIC EOI register
> > > > > >
> > > > > > We proposed to add MSI support for virtio over MMIO via new feature
> > > > > > bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
> > > > > >
> > > > > > With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
> > > > > > uses msi_sharing[1] to indicate the event and vector mapping.
> > > > > > Bit 1 is 0: device uses non-sharing and fixed vector per
> > > > > > event mapping.
> > > > > > Bit 1 is 1: device uses sharing mode and dynamic mapping.
> > > > >
> > > > >
> > > > > I believe dynamic mapping should cover the case of fixed vector?
> > > > >
> > > > Actually this bit *aims* for msi sharing or msi non-sharing.
> > > >
> > > > It means, when msi sharing bit is 1, device doesn't want vector
> > > > per queue
> > > >
> > > > (it wants msi vector sharing as name) and doesn't want a high
> > > > interrupt rate.
> > > >
> > > > So driver turns to !per_vq_vectors and has to do dynamical mapping.
> > > >
> > > > So they are opposite not superset.
> > > >
> > > > Thanks!
> > > >
> > > > Jing
> > >
> > >
> > > I think you need add more comments on the command.
> > >
> > > E.g if I want to map vector 0 to queue 1, how do I need to do?
> > >
> > > write(1, queue_sel);
> > > write(0, vector_sel);
> >
> > That's true. Besides, two commands are used for msi sharing mode,
> >
> > VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
> >
> > "To set up the event and vector mapping for MSI sharing mode, driver
> > SHOULD write a valid MsiVecSel followed by
> > VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE command to
> > map the configuration change/selected queue events respectively. " (See
> > spec patch 5/5)
> >
> > So if driver detects the msi sharing mode, when it does setup vq, writes
> > the queue_sel (this already exists in setup vq), vector sel and then
> > MAP_QUEUE command to do the queue event mapping.
> >
>
> So actually the per vq msix could be done through this. I don't get why you
> need to introduce MSI_SHARING_MASK which is the charge of driver instead of
> device. The interrupt rate should have no direct relationship with whether
> it has been shared or not.
>
> Btw, you introduce mask/unmask without pending, how to deal with the lost
> interrupt during the masking then?
pending can be an internal device register. as long as device
does not lose interrupts while masked, all's well.
There's value is being able to say "this queue sends no
interrupts do not bother checking used notification area".
so we need way to say that. So I guess an enable interrupts
register might have some value...
But besides that, it's enough to have mask/unmask/address/data
per vq.
>
> > For msi non-sharing mode, no special action is needed because we make
> > the rule of per_vq_vector and fixed relationship.
> >
> > Correct me if this is not that clear for spec/code comments.
> >
>
> The ABI is not as straightforward as PCI did. Why not just reuse the PCI
> layout?
>
> E.g having
>
> queue_sel
> queue_msix_vector
> msix_config
>
> for configuring map between msi vector and queues/config
>
> Then
>
> vector_sel
> address
> data
> pending
> mask
> unmask
>
> for configuring msi table?
>
> Thanks
>
>
> > Thanks!
> >
> > Jing
> >
> >
> > >
> > > ?
> > >
> > > Thanks
> > >
> > >
> > > >
> > > >
> > > > > Thanks
> > > > >
> > > > >
> > > > >
> > > > > ---------------------------------------------------------------------
> > > > > To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> > > > > For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
> > > > >
> > > >
> > >
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-11 11:58 ` Michael S. Tsirkin
0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2020-02-11 11:58 UTC (permalink / raw)
To: Jason Wang
Cc: Liu, Jing2, Zha Bin, linux-kernel, virtio-dev, slp, qemu-devel,
chao.p.peng, gerry
On Tue, Feb 11, 2020 at 03:40:23PM +0800, Jason Wang wrote:
>
> On 2020/2/11 下午2:02, Liu, Jing2 wrote:
> >
> >
> > On 2/11/2020 12:02 PM, Jason Wang wrote:
> > >
> > > On 2020/2/11 上午11:35, Liu, Jing2 wrote:
> > > >
> > > > On 2/11/2020 11:17 AM, Jason Wang wrote:
> > > > >
> > > > > On 2020/2/10 下午5:05, Zha Bin wrote:
> > > > > > From: Liu Jiang<gerry@linux.alibaba.com>
> > > > > >
> > > > > > Userspace VMMs (e.g. Qemu microvm, Firecracker) take
> > > > > > advantage of using
> > > > > > virtio over mmio devices as a lightweight machine model for modern
> > > > > > cloud. The standard virtio over MMIO transport layer
> > > > > > only supports one
> > > > > > legacy interrupt, which is much heavier than virtio over
> > > > > > PCI transport
> > > > > > layer using MSI. Legacy interrupt has long work path and
> > > > > > causes specific
> > > > > > VMExits in following cases, which would considerably slow down the
> > > > > > performance:
> > > > > >
> > > > > > 1) read interrupt status register
> > > > > > 2) update interrupt status register
> > > > > > 3) write IOAPIC EOI register
> > > > > >
> > > > > > We proposed to add MSI support for virtio over MMIO via new feature
> > > > > > bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
> > > > > >
> > > > > > With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
> > > > > > uses msi_sharing[1] to indicate the event and vector mapping.
> > > > > > Bit 1 is 0: device uses non-sharing and fixed vector per
> > > > > > event mapping.
> > > > > > Bit 1 is 1: device uses sharing mode and dynamic mapping.
> > > > >
> > > > >
> > > > > I believe dynamic mapping should cover the case of fixed vector?
> > > > >
> > > > Actually this bit *aims* for msi sharing or msi non-sharing.
> > > >
> > > > It means, when msi sharing bit is 1, device doesn't want vector
> > > > per queue
> > > >
> > > > (it wants msi vector sharing as name) and doesn't want a high
> > > > interrupt rate.
> > > >
> > > > So driver turns to !per_vq_vectors and has to do dynamical mapping.
> > > >
> > > > So they are opposite not superset.
> > > >
> > > > Thanks!
> > > >
> > > > Jing
> > >
> > >
> > > I think you need add more comments on the command.
> > >
> > > E.g if I want to map vector 0 to queue 1, how do I need to do?
> > >
> > > write(1, queue_sel);
> > > write(0, vector_sel);
> >
> > That's true. Besides, two commands are used for msi sharing mode,
> >
> > VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
> >
> > "To set up the event and vector mapping for MSI sharing mode, driver
> > SHOULD write a valid MsiVecSel followed by
> > VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE command to
> > map the configuration change/selected queue events respectively. " (See
> > spec patch 5/5)
> >
> > So if driver detects the msi sharing mode, when it does setup vq, writes
> > the queue_sel (this already exists in setup vq), vector sel and then
> > MAP_QUEUE command to do the queue event mapping.
> >
>
> So actually the per vq msix could be done through this. I don't get why you
> need to introduce MSI_SHARING_MASK which is the charge of driver instead of
> device. The interrupt rate should have no direct relationship with whether
> it has been shared or not.
>
> Btw, you introduce mask/unmask without pending, how to deal with the lost
> interrupt during the masking then?
pending can be an internal device register. as long as device
does not lose interrupts while masked, all's well.
There's value is being able to say "this queue sends no
interrupts do not bother checking used notification area".
so we need way to say that. So I guess an enable interrupts
register might have some value...
But besides that, it's enough to have mask/unmask/address/data
per vq.
>
> > For msi non-sharing mode, no special action is needed because we make
> > the rule of per_vq_vector and fixed relationship.
> >
> > Correct me if this is not that clear for spec/code comments.
> >
>
> The ABI is not as straightforward as PCI did. Why not just reuse the PCI
> layout?
>
> E.g having
>
> queue_sel
> queue_msix_vector
> msix_config
>
> for configuring map between msi vector and queues/config
>
> Then
>
> vector_sel
> address
> data
> pending
> mask
> unmask
>
> for configuring msi table?
>
> Thanks
>
>
> > Thanks!
> >
> > Jing
> >
> >
> > >
> > > ?
> > >
> > > Thanks
> > >
> > >
> > > >
> > > >
> > > > > Thanks
> > > > >
> > > > >
> > > > >
> > > > > ---------------------------------------------------------------------
> > > > > To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> > > > > For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
> > > > >
> > > >
> > >
---------------------------------------------------------------------
To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-11 11:58 ` Michael S. Tsirkin
0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2020-02-11 11:58 UTC (permalink / raw)
To: Jason Wang
Cc: virtio-dev, Zha Bin, slp, Liu, Jing2, linux-kernel, qemu-devel,
chao.p.peng, gerry
On Tue, Feb 11, 2020 at 03:40:23PM +0800, Jason Wang wrote:
>
> On 2020/2/11 下午2:02, Liu, Jing2 wrote:
> >
> >
> > On 2/11/2020 12:02 PM, Jason Wang wrote:
> > >
> > > On 2020/2/11 上午11:35, Liu, Jing2 wrote:
> > > >
> > > > On 2/11/2020 11:17 AM, Jason Wang wrote:
> > > > >
> > > > > On 2020/2/10 下午5:05, Zha Bin wrote:
> > > > > > From: Liu Jiang<gerry@linux.alibaba.com>
> > > > > >
> > > > > > Userspace VMMs (e.g. Qemu microvm, Firecracker) take
> > > > > > advantage of using
> > > > > > virtio over mmio devices as a lightweight machine model for modern
> > > > > > cloud. The standard virtio over MMIO transport layer
> > > > > > only supports one
> > > > > > legacy interrupt, which is much heavier than virtio over
> > > > > > PCI transport
> > > > > > layer using MSI. Legacy interrupt has long work path and
> > > > > > causes specific
> > > > > > VMExits in following cases, which would considerably slow down the
> > > > > > performance:
> > > > > >
> > > > > > 1) read interrupt status register
> > > > > > 2) update interrupt status register
> > > > > > 3) write IOAPIC EOI register
> > > > > >
> > > > > > We proposed to add MSI support for virtio over MMIO via new feature
> > > > > > bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
> > > > > >
> > > > > > With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
> > > > > > uses msi_sharing[1] to indicate the event and vector mapping.
> > > > > > Bit 1 is 0: device uses non-sharing and fixed vector per
> > > > > > event mapping.
> > > > > > Bit 1 is 1: device uses sharing mode and dynamic mapping.
> > > > >
> > > > >
> > > > > I believe dynamic mapping should cover the case of fixed vector?
> > > > >
> > > > Actually this bit *aims* for msi sharing or msi non-sharing.
> > > >
> > > > It means, when msi sharing bit is 1, device doesn't want vector
> > > > per queue
> > > >
> > > > (it wants msi vector sharing as name) and doesn't want a high
> > > > interrupt rate.
> > > >
> > > > So driver turns to !per_vq_vectors and has to do dynamical mapping.
> > > >
> > > > So they are opposite not superset.
> > > >
> > > > Thanks!
> > > >
> > > > Jing
> > >
> > >
> > > I think you need add more comments on the command.
> > >
> > > E.g if I want to map vector 0 to queue 1, how do I need to do?
> > >
> > > write(1, queue_sel);
> > > write(0, vector_sel);
> >
> > That's true. Besides, two commands are used for msi sharing mode,
> >
> > VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
> >
> > "To set up the event and vector mapping for MSI sharing mode, driver
> > SHOULD write a valid MsiVecSel followed by
> > VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE command to
> > map the configuration change/selected queue events respectively. " (See
> > spec patch 5/5)
> >
> > So if driver detects the msi sharing mode, when it does setup vq, writes
> > the queue_sel (this already exists in setup vq), vector sel and then
> > MAP_QUEUE command to do the queue event mapping.
> >
>
> So actually the per vq msix could be done through this. I don't get why you
> need to introduce MSI_SHARING_MASK which is the charge of driver instead of
> device. The interrupt rate should have no direct relationship with whether
> it has been shared or not.
>
> Btw, you introduce mask/unmask without pending, how to deal with the lost
> interrupt during the masking then?
pending can be an internal device register. as long as device
does not lose interrupts while masked, all's well.
There's value is being able to say "this queue sends no
interrupts do not bother checking used notification area".
so we need way to say that. So I guess an enable interrupts
register might have some value...
But besides that, it's enough to have mask/unmask/address/data
per vq.
>
> > For msi non-sharing mode, no special action is needed because we make
> > the rule of per_vq_vector and fixed relationship.
> >
> > Correct me if this is not that clear for spec/code comments.
> >
>
> The ABI is not as straightforward as PCI did. Why not just reuse the PCI
> layout?
>
> E.g having
>
> queue_sel
> queue_msix_vector
> msix_config
>
> for configuring map between msi vector and queues/config
>
> Then
>
> vector_sel
> address
> data
> pending
> mask
> unmask
>
> for configuring msi table?
>
> Thanks
>
>
> > Thanks!
> >
> > Jing
> >
> >
> > >
> > > ?
> > >
> > > Thanks
> > >
> > >
> > > >
> > > >
> > > > > Thanks
> > > > >
> > > > >
> > > > >
> > > > > ---------------------------------------------------------------------
> > > > > To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> > > > > For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
> > > > >
> > > >
> > >
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
2020-02-11 11:58 ` Michael S. Tsirkin
@ 2020-02-11 12:04 ` Jason Wang
-1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2020-02-11 12:04 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: virtio-dev, Zha Bin, slp, Liu, Jing2, linux-kernel, qemu-devel,
chao.p.peng, gerry
On 2020/2/11 下午7:58, Michael S. Tsirkin wrote:
> On Tue, Feb 11, 2020 at 03:40:23PM +0800, Jason Wang wrote:
>> On 2020/2/11 下午2:02, Liu, Jing2 wrote:
>>> On 2/11/2020 12:02 PM, Jason Wang wrote:
>>>> On 2020/2/11 上午11:35, Liu, Jing2 wrote:
>>>>> On 2/11/2020 11:17 AM, Jason Wang wrote:
>>>>>> On 2020/2/10 下午5:05, Zha Bin wrote:
>>>>>>> From: Liu Jiang<gerry@linux.alibaba.com>
>>>>>>>
>>>>>>> Userspace VMMs (e.g. Qemu microvm, Firecracker) take
>>>>>>> advantage of using
>>>>>>> virtio over mmio devices as a lightweight machine model for modern
>>>>>>> cloud. The standard virtio over MMIO transport layer
>>>>>>> only supports one
>>>>>>> legacy interrupt, which is much heavier than virtio over
>>>>>>> PCI transport
>>>>>>> layer using MSI. Legacy interrupt has long work path and
>>>>>>> causes specific
>>>>>>> VMExits in following cases, which would considerably slow down the
>>>>>>> performance:
>>>>>>>
>>>>>>> 1) read interrupt status register
>>>>>>> 2) update interrupt status register
>>>>>>> 3) write IOAPIC EOI register
>>>>>>>
>>>>>>> We proposed to add MSI support for virtio over MMIO via new feature
>>>>>>> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
>>>>>>>
>>>>>>> With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
>>>>>>> uses msi_sharing[1] to indicate the event and vector mapping.
>>>>>>> Bit 1 is 0: device uses non-sharing and fixed vector per
>>>>>>> event mapping.
>>>>>>> Bit 1 is 1: device uses sharing mode and dynamic mapping.
>>>>>> I believe dynamic mapping should cover the case of fixed vector?
>>>>>>
>>>>> Actually this bit*aims* for msi sharing or msi non-sharing.
>>>>>
>>>>> It means, when msi sharing bit is 1, device doesn't want vector
>>>>> per queue
>>>>>
>>>>> (it wants msi vector sharing as name) and doesn't want a high
>>>>> interrupt rate.
>>>>>
>>>>> So driver turns to !per_vq_vectors and has to do dynamical mapping.
>>>>>
>>>>> So they are opposite not superset.
>>>>>
>>>>> Thanks!
>>>>>
>>>>> Jing
>>>> I think you need add more comments on the command.
>>>>
>>>> E.g if I want to map vector 0 to queue 1, how do I need to do?
>>>>
>>>> write(1, queue_sel);
>>>> write(0, vector_sel);
>>> That's true. Besides, two commands are used for msi sharing mode,
>>>
>>> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
>>>
>>> "To set up the event and vector mapping for MSI sharing mode, driver
>>> SHOULD write a valid MsiVecSel followed by
>>> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE command to
>>> map the configuration change/selected queue events respectively. " (See
>>> spec patch 5/5)
>>>
>>> So if driver detects the msi sharing mode, when it does setup vq, writes
>>> the queue_sel (this already exists in setup vq), vector sel and then
>>> MAP_QUEUE command to do the queue event mapping.
>>>
>> So actually the per vq msix could be done through this. I don't get why you
>> need to introduce MSI_SHARING_MASK which is the charge of driver instead of
>> device. The interrupt rate should have no direct relationship with whether
>> it has been shared or not.
>>
>> Btw, you introduce mask/unmask without pending, how to deal with the lost
>> interrupt during the masking then?
> pending can be an internal device register. as long as device
> does not lose interrupts while masked, all's well.
You meant raise the interrupt during unmask automatically?
>
> There's value is being able to say "this queue sends no
> interrupts do not bother checking used notification area".
> so we need way to say that. So I guess an enable interrupts
> register might have some value...
> But besides that, it's enough to have mask/unmask/address/data
> per vq.
Just to check, do you mean "per vector" here?
Thanks
>
>
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-11 12:04 ` Jason Wang
0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2020-02-11 12:04 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: virtio-dev, Zha Bin, slp, Liu, Jing2, linux-kernel, qemu-devel,
chao.p.peng, gerry
On 2020/2/11 下午7:58, Michael S. Tsirkin wrote:
> On Tue, Feb 11, 2020 at 03:40:23PM +0800, Jason Wang wrote:
>> On 2020/2/11 下午2:02, Liu, Jing2 wrote:
>>> On 2/11/2020 12:02 PM, Jason Wang wrote:
>>>> On 2020/2/11 上午11:35, Liu, Jing2 wrote:
>>>>> On 2/11/2020 11:17 AM, Jason Wang wrote:
>>>>>> On 2020/2/10 下午5:05, Zha Bin wrote:
>>>>>>> From: Liu Jiang<gerry@linux.alibaba.com>
>>>>>>>
>>>>>>> Userspace VMMs (e.g. Qemu microvm, Firecracker) take
>>>>>>> advantage of using
>>>>>>> virtio over mmio devices as a lightweight machine model for modern
>>>>>>> cloud. The standard virtio over MMIO transport layer
>>>>>>> only supports one
>>>>>>> legacy interrupt, which is much heavier than virtio over
>>>>>>> PCI transport
>>>>>>> layer using MSI. Legacy interrupt has long work path and
>>>>>>> causes specific
>>>>>>> VMExits in following cases, which would considerably slow down the
>>>>>>> performance:
>>>>>>>
>>>>>>> 1) read interrupt status register
>>>>>>> 2) update interrupt status register
>>>>>>> 3) write IOAPIC EOI register
>>>>>>>
>>>>>>> We proposed to add MSI support for virtio over MMIO via new feature
>>>>>>> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
>>>>>>>
>>>>>>> With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
>>>>>>> uses msi_sharing[1] to indicate the event and vector mapping.
>>>>>>> Bit 1 is 0: device uses non-sharing and fixed vector per
>>>>>>> event mapping.
>>>>>>> Bit 1 is 1: device uses sharing mode and dynamic mapping.
>>>>>> I believe dynamic mapping should cover the case of fixed vector?
>>>>>>
>>>>> Actually this bit*aims* for msi sharing or msi non-sharing.
>>>>>
>>>>> It means, when msi sharing bit is 1, device doesn't want vector
>>>>> per queue
>>>>>
>>>>> (it wants msi vector sharing as name) and doesn't want a high
>>>>> interrupt rate.
>>>>>
>>>>> So driver turns to !per_vq_vectors and has to do dynamical mapping.
>>>>>
>>>>> So they are opposite not superset.
>>>>>
>>>>> Thanks!
>>>>>
>>>>> Jing
>>>> I think you need add more comments on the command.
>>>>
>>>> E.g if I want to map vector 0 to queue 1, how do I need to do?
>>>>
>>>> write(1, queue_sel);
>>>> write(0, vector_sel);
>>> That's true. Besides, two commands are used for msi sharing mode,
>>>
>>> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
>>>
>>> "To set up the event and vector mapping for MSI sharing mode, driver
>>> SHOULD write a valid MsiVecSel followed by
>>> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE command to
>>> map the configuration change/selected queue events respectively. " (See
>>> spec patch 5/5)
>>>
>>> So if driver detects the msi sharing mode, when it does setup vq, writes
>>> the queue_sel (this already exists in setup vq), vector sel and then
>>> MAP_QUEUE command to do the queue event mapping.
>>>
>> So actually the per vq msix could be done through this. I don't get why you
>> need to introduce MSI_SHARING_MASK which is the charge of driver instead of
>> device. The interrupt rate should have no direct relationship with whether
>> it has been shared or not.
>>
>> Btw, you introduce mask/unmask without pending, how to deal with the lost
>> interrupt during the masking then?
> pending can be an internal device register. as long as device
> does not lose interrupts while masked, all's well.
You meant raise the interrupt during unmask automatically?
>
> There's value is being able to say "this queue sends no
> interrupts do not bother checking used notification area".
> so we need way to say that. So I guess an enable interrupts
> register might have some value...
> But besides that, it's enough to have mask/unmask/address/data
> per vq.
Just to check, do you mean "per vector" here?
Thanks
>
>
---------------------------------------------------------------------
To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
2020-02-11 12:04 ` Jason Wang
@ 2020-02-11 12:08 ` Michael S. Tsirkin
-1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2020-02-11 12:08 UTC (permalink / raw)
To: Jason Wang
Cc: virtio-dev, Zha Bin, slp, Liu, Jing2, linux-kernel, qemu-devel,
chao.p.peng, gerry
On Tue, Feb 11, 2020 at 08:04:24PM +0800, Jason Wang wrote:
>
> On 2020/2/11 下午7:58, Michael S. Tsirkin wrote:
> > On Tue, Feb 11, 2020 at 03:40:23PM +0800, Jason Wang wrote:
> > > On 2020/2/11 下午2:02, Liu, Jing2 wrote:
> > > > On 2/11/2020 12:02 PM, Jason Wang wrote:
> > > > > On 2020/2/11 上午11:35, Liu, Jing2 wrote:
> > > > > > On 2/11/2020 11:17 AM, Jason Wang wrote:
> > > > > > > On 2020/2/10 下午5:05, Zha Bin wrote:
> > > > > > > > From: Liu Jiang<gerry@linux.alibaba.com>
> > > > > > > >
> > > > > > > > Userspace VMMs (e.g. Qemu microvm, Firecracker) take
> > > > > > > > advantage of using
> > > > > > > > virtio over mmio devices as a lightweight machine model for modern
> > > > > > > > cloud. The standard virtio over MMIO transport layer
> > > > > > > > only supports one
> > > > > > > > legacy interrupt, which is much heavier than virtio over
> > > > > > > > PCI transport
> > > > > > > > layer using MSI. Legacy interrupt has long work path and
> > > > > > > > causes specific
> > > > > > > > VMExits in following cases, which would considerably slow down the
> > > > > > > > performance:
> > > > > > > >
> > > > > > > > 1) read interrupt status register
> > > > > > > > 2) update interrupt status register
> > > > > > > > 3) write IOAPIC EOI register
> > > > > > > >
> > > > > > > > We proposed to add MSI support for virtio over MMIO via new feature
> > > > > > > > bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
> > > > > > > >
> > > > > > > > With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
> > > > > > > > uses msi_sharing[1] to indicate the event and vector mapping.
> > > > > > > > Bit 1 is 0: device uses non-sharing and fixed vector per
> > > > > > > > event mapping.
> > > > > > > > Bit 1 is 1: device uses sharing mode and dynamic mapping.
> > > > > > > I believe dynamic mapping should cover the case of fixed vector?
> > > > > > >
> > > > > > Actually this bit*aims* for msi sharing or msi non-sharing.
> > > > > >
> > > > > > It means, when msi sharing bit is 1, device doesn't want vector
> > > > > > per queue
> > > > > >
> > > > > > (it wants msi vector sharing as name) and doesn't want a high
> > > > > > interrupt rate.
> > > > > >
> > > > > > So driver turns to !per_vq_vectors and has to do dynamical mapping.
> > > > > >
> > > > > > So they are opposite not superset.
> > > > > >
> > > > > > Thanks!
> > > > > >
> > > > > > Jing
> > > > > I think you need add more comments on the command.
> > > > >
> > > > > E.g if I want to map vector 0 to queue 1, how do I need to do?
> > > > >
> > > > > write(1, queue_sel);
> > > > > write(0, vector_sel);
> > > > That's true. Besides, two commands are used for msi sharing mode,
> > > >
> > > > VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
> > > >
> > > > "To set up the event and vector mapping for MSI sharing mode, driver
> > > > SHOULD write a valid MsiVecSel followed by
> > > > VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE command to
> > > > map the configuration change/selected queue events respectively. " (See
> > > > spec patch 5/5)
> > > >
> > > > So if driver detects the msi sharing mode, when it does setup vq, writes
> > > > the queue_sel (this already exists in setup vq), vector sel and then
> > > > MAP_QUEUE command to do the queue event mapping.
> > > >
> > > So actually the per vq msix could be done through this. I don't get why you
> > > need to introduce MSI_SHARING_MASK which is the charge of driver instead of
> > > device. The interrupt rate should have no direct relationship with whether
> > > it has been shared or not.
> > >
> > > Btw, you introduce mask/unmask without pending, how to deal with the lost
> > > interrupt during the masking then?
> > pending can be an internal device register. as long as device
> > does not lose interrupts while masked, all's well.
>
>
> You meant raise the interrupt during unmask automatically?
>
yes - that's also what pci does.
the guest visible pending bit is partially implemented in qemu
as per pci spec but it's unused.
> >
> > There's value is being able to say "this queue sends no
> > interrupts do not bother checking used notification area".
> > so we need way to say that. So I guess an enable interrupts
> > register might have some value...
> > But besides that, it's enough to have mask/unmask/address/data
> > per vq.
>
>
> Just to check, do you mean "per vector" here?
>
> Thanks
>
No, per VQ. An indirection VQ -> vector -> address/data isn't
necessary for PCI either, but that ship has sailed.
--
MST
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-11 12:08 ` Michael S. Tsirkin
0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2020-02-11 12:08 UTC (permalink / raw)
To: Jason Wang
Cc: virtio-dev, Zha Bin, slp, Liu, Jing2, linux-kernel, qemu-devel,
chao.p.peng, gerry
On Tue, Feb 11, 2020 at 08:04:24PM +0800, Jason Wang wrote:
>
> On 2020/2/11 下午7:58, Michael S. Tsirkin wrote:
> > On Tue, Feb 11, 2020 at 03:40:23PM +0800, Jason Wang wrote:
> > > On 2020/2/11 下午2:02, Liu, Jing2 wrote:
> > > > On 2/11/2020 12:02 PM, Jason Wang wrote:
> > > > > On 2020/2/11 上午11:35, Liu, Jing2 wrote:
> > > > > > On 2/11/2020 11:17 AM, Jason Wang wrote:
> > > > > > > On 2020/2/10 下午5:05, Zha Bin wrote:
> > > > > > > > From: Liu Jiang<gerry@linux.alibaba.com>
> > > > > > > >
> > > > > > > > Userspace VMMs (e.g. Qemu microvm, Firecracker) take
> > > > > > > > advantage of using
> > > > > > > > virtio over mmio devices as a lightweight machine model for modern
> > > > > > > > cloud. The standard virtio over MMIO transport layer
> > > > > > > > only supports one
> > > > > > > > legacy interrupt, which is much heavier than virtio over
> > > > > > > > PCI transport
> > > > > > > > layer using MSI. Legacy interrupt has long work path and
> > > > > > > > causes specific
> > > > > > > > VMExits in following cases, which would considerably slow down the
> > > > > > > > performance:
> > > > > > > >
> > > > > > > > 1) read interrupt status register
> > > > > > > > 2) update interrupt status register
> > > > > > > > 3) write IOAPIC EOI register
> > > > > > > >
> > > > > > > > We proposed to add MSI support for virtio over MMIO via new feature
> > > > > > > > bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
> > > > > > > >
> > > > > > > > With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
> > > > > > > > uses msi_sharing[1] to indicate the event and vector mapping.
> > > > > > > > Bit 1 is 0: device uses non-sharing and fixed vector per
> > > > > > > > event mapping.
> > > > > > > > Bit 1 is 1: device uses sharing mode and dynamic mapping.
> > > > > > > I believe dynamic mapping should cover the case of fixed vector?
> > > > > > >
> > > > > > Actually this bit*aims* for msi sharing or msi non-sharing.
> > > > > >
> > > > > > It means, when msi sharing bit is 1, device doesn't want vector
> > > > > > per queue
> > > > > >
> > > > > > (it wants msi vector sharing as name) and doesn't want a high
> > > > > > interrupt rate.
> > > > > >
> > > > > > So driver turns to !per_vq_vectors and has to do dynamical mapping.
> > > > > >
> > > > > > So they are opposite not superset.
> > > > > >
> > > > > > Thanks!
> > > > > >
> > > > > > Jing
> > > > > I think you need add more comments on the command.
> > > > >
> > > > > E.g if I want to map vector 0 to queue 1, how do I need to do?
> > > > >
> > > > > write(1, queue_sel);
> > > > > write(0, vector_sel);
> > > > That's true. Besides, two commands are used for msi sharing mode,
> > > >
> > > > VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
> > > >
> > > > "To set up the event and vector mapping for MSI sharing mode, driver
> > > > SHOULD write a valid MsiVecSel followed by
> > > > VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE command to
> > > > map the configuration change/selected queue events respectively. " (See
> > > > spec patch 5/5)
> > > >
> > > > So if driver detects the msi sharing mode, when it does setup vq, writes
> > > > the queue_sel (this already exists in setup vq), vector sel and then
> > > > MAP_QUEUE command to do the queue event mapping.
> > > >
> > > So actually the per vq msix could be done through this. I don't get why you
> > > need to introduce MSI_SHARING_MASK which is the charge of driver instead of
> > > device. The interrupt rate should have no direct relationship with whether
> > > it has been shared or not.
> > >
> > > Btw, you introduce mask/unmask without pending, how to deal with the lost
> > > interrupt during the masking then?
> > pending can be an internal device register. as long as device
> > does not lose interrupts while masked, all's well.
>
>
> You meant raise the interrupt during unmask automatically?
>
yes - that's also what pci does.
the guest visible pending bit is partially implemented in qemu
as per pci spec but it's unused.
> >
> > There's value is being able to say "this queue sends no
> > interrupts do not bother checking used notification area".
> > so we need way to say that. So I guess an enable interrupts
> > register might have some value...
> > But besides that, it's enough to have mask/unmask/address/data
> > per vq.
>
>
> Just to check, do you mean "per vector" here?
>
> Thanks
>
No, per VQ. An indirection VQ -> vector -> address/data isn't
necessary for PCI either, but that ship has sailed.
--
MST
---------------------------------------------------------------------
To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
2020-02-11 12:08 ` Michael S. Tsirkin
@ 2020-02-11 12:18 ` Jason Wang
-1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2020-02-11 12:18 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: virtio-dev, Zha Bin, slp, Liu, Jing2, linux-kernel, qemu-devel,
chao.p.peng, gerry
On 2020/2/11 下午8:08, Michael S. Tsirkin wrote:
> On Tue, Feb 11, 2020 at 08:04:24PM +0800, Jason Wang wrote:
>> On 2020/2/11 下午7:58, Michael S. Tsirkin wrote:
>>> On Tue, Feb 11, 2020 at 03:40:23PM +0800, Jason Wang wrote:
>>>> On 2020/2/11 下午2:02, Liu, Jing2 wrote:
>>>>> On 2/11/2020 12:02 PM, Jason Wang wrote:
>>>>>> On 2020/2/11 上午11:35, Liu, Jing2 wrote:
>>>>>>> On 2/11/2020 11:17 AM, Jason Wang wrote:
>>>>>>>> On 2020/2/10 下午5:05, Zha Bin wrote:
>>>>>>>>> From: Liu Jiang<gerry@linux.alibaba.com>
>>>>>>>>>
>>>>>>>>> Userspace VMMs (e.g. Qemu microvm, Firecracker) take
>>>>>>>>> advantage of using
>>>>>>>>> virtio over mmio devices as a lightweight machine model for modern
>>>>>>>>> cloud. The standard virtio over MMIO transport layer
>>>>>>>>> only supports one
>>>>>>>>> legacy interrupt, which is much heavier than virtio over
>>>>>>>>> PCI transport
>>>>>>>>> layer using MSI. Legacy interrupt has long work path and
>>>>>>>>> causes specific
>>>>>>>>> VMExits in following cases, which would considerably slow down the
>>>>>>>>> performance:
>>>>>>>>>
>>>>>>>>> 1) read interrupt status register
>>>>>>>>> 2) update interrupt status register
>>>>>>>>> 3) write IOAPIC EOI register
>>>>>>>>>
>>>>>>>>> We proposed to add MSI support for virtio over MMIO via new feature
>>>>>>>>> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
>>>>>>>>>
>>>>>>>>> With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
>>>>>>>>> uses msi_sharing[1] to indicate the event and vector mapping.
>>>>>>>>> Bit 1 is 0: device uses non-sharing and fixed vector per
>>>>>>>>> event mapping.
>>>>>>>>> Bit 1 is 1: device uses sharing mode and dynamic mapping.
>>>>>>>> I believe dynamic mapping should cover the case of fixed vector?
>>>>>>>>
>>>>>>> Actually this bit*aims* for msi sharing or msi non-sharing.
>>>>>>>
>>>>>>> It means, when msi sharing bit is 1, device doesn't want vector
>>>>>>> per queue
>>>>>>>
>>>>>>> (it wants msi vector sharing as name) and doesn't want a high
>>>>>>> interrupt rate.
>>>>>>>
>>>>>>> So driver turns to !per_vq_vectors and has to do dynamical mapping.
>>>>>>>
>>>>>>> So they are opposite not superset.
>>>>>>>
>>>>>>> Thanks!
>>>>>>>
>>>>>>> Jing
>>>>>> I think you need add more comments on the command.
>>>>>>
>>>>>> E.g if I want to map vector 0 to queue 1, how do I need to do?
>>>>>>
>>>>>> write(1, queue_sel);
>>>>>> write(0, vector_sel);
>>>>> That's true. Besides, two commands are used for msi sharing mode,
>>>>>
>>>>> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
>>>>>
>>>>> "To set up the event and vector mapping for MSI sharing mode, driver
>>>>> SHOULD write a valid MsiVecSel followed by
>>>>> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE command to
>>>>> map the configuration change/selected queue events respectively. " (See
>>>>> spec patch 5/5)
>>>>>
>>>>> So if driver detects the msi sharing mode, when it does setup vq, writes
>>>>> the queue_sel (this already exists in setup vq), vector sel and then
>>>>> MAP_QUEUE command to do the queue event mapping.
>>>>>
>>>> So actually the per vq msix could be done through this. I don't get why you
>>>> need to introduce MSI_SHARING_MASK which is the charge of driver instead of
>>>> device. The interrupt rate should have no direct relationship with whether
>>>> it has been shared or not.
>>>>
>>>> Btw, you introduce mask/unmask without pending, how to deal with the lost
>>>> interrupt during the masking then?
>>> pending can be an internal device register. as long as device
>>> does not lose interrupts while masked, all's well.
>>
>> You meant raise the interrupt during unmask automatically?
>>
>
> yes - that's also what pci does.
>
> the guest visible pending bit is partially implemented in qemu
> as per pci spec but it's unused.
Ok.
>
>>> There's value is being able to say "this queue sends no
>>> interrupts do not bother checking used notification area".
>>> so we need way to say that. So I guess an enable interrupts
>>> register might have some value...
>>> But besides that, it's enough to have mask/unmask/address/data
>>> per vq.
>>
>> Just to check, do you mean "per vector" here?
>>
>> Thanks
>>
> No, per VQ. An indirection VQ -> vector -> address/data isn't
> necessary for PCI either, but that ship has sailed.
Yes, it can work but it may bring extra effort when you want to mask a
vector which is was shared by a lot of queues.
Thanks
>
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-11 12:18 ` Jason Wang
0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2020-02-11 12:18 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: virtio-dev, Zha Bin, slp, Liu, Jing2, linux-kernel, qemu-devel,
chao.p.peng, gerry
On 2020/2/11 下午8:08, Michael S. Tsirkin wrote:
> On Tue, Feb 11, 2020 at 08:04:24PM +0800, Jason Wang wrote:
>> On 2020/2/11 下午7:58, Michael S. Tsirkin wrote:
>>> On Tue, Feb 11, 2020 at 03:40:23PM +0800, Jason Wang wrote:
>>>> On 2020/2/11 下午2:02, Liu, Jing2 wrote:
>>>>> On 2/11/2020 12:02 PM, Jason Wang wrote:
>>>>>> On 2020/2/11 上午11:35, Liu, Jing2 wrote:
>>>>>>> On 2/11/2020 11:17 AM, Jason Wang wrote:
>>>>>>>> On 2020/2/10 下午5:05, Zha Bin wrote:
>>>>>>>>> From: Liu Jiang<gerry@linux.alibaba.com>
>>>>>>>>>
>>>>>>>>> Userspace VMMs (e.g. Qemu microvm, Firecracker) take
>>>>>>>>> advantage of using
>>>>>>>>> virtio over mmio devices as a lightweight machine model for modern
>>>>>>>>> cloud. The standard virtio over MMIO transport layer
>>>>>>>>> only supports one
>>>>>>>>> legacy interrupt, which is much heavier than virtio over
>>>>>>>>> PCI transport
>>>>>>>>> layer using MSI. Legacy interrupt has long work path and
>>>>>>>>> causes specific
>>>>>>>>> VMExits in following cases, which would considerably slow down the
>>>>>>>>> performance:
>>>>>>>>>
>>>>>>>>> 1) read interrupt status register
>>>>>>>>> 2) update interrupt status register
>>>>>>>>> 3) write IOAPIC EOI register
>>>>>>>>>
>>>>>>>>> We proposed to add MSI support for virtio over MMIO via new feature
>>>>>>>>> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
>>>>>>>>>
>>>>>>>>> With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
>>>>>>>>> uses msi_sharing[1] to indicate the event and vector mapping.
>>>>>>>>> Bit 1 is 0: device uses non-sharing and fixed vector per
>>>>>>>>> event mapping.
>>>>>>>>> Bit 1 is 1: device uses sharing mode and dynamic mapping.
>>>>>>>> I believe dynamic mapping should cover the case of fixed vector?
>>>>>>>>
>>>>>>> Actually this bit*aims* for msi sharing or msi non-sharing.
>>>>>>>
>>>>>>> It means, when msi sharing bit is 1, device doesn't want vector
>>>>>>> per queue
>>>>>>>
>>>>>>> (it wants msi vector sharing as name) and doesn't want a high
>>>>>>> interrupt rate.
>>>>>>>
>>>>>>> So driver turns to !per_vq_vectors and has to do dynamical mapping.
>>>>>>>
>>>>>>> So they are opposite not superset.
>>>>>>>
>>>>>>> Thanks!
>>>>>>>
>>>>>>> Jing
>>>>>> I think you need add more comments on the command.
>>>>>>
>>>>>> E.g if I want to map vector 0 to queue 1, how do I need to do?
>>>>>>
>>>>>> write(1, queue_sel);
>>>>>> write(0, vector_sel);
>>>>> That's true. Besides, two commands are used for msi sharing mode,
>>>>>
>>>>> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
>>>>>
>>>>> "To set up the event and vector mapping for MSI sharing mode, driver
>>>>> SHOULD write a valid MsiVecSel followed by
>>>>> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE command to
>>>>> map the configuration change/selected queue events respectively. " (See
>>>>> spec patch 5/5)
>>>>>
>>>>> So if driver detects the msi sharing mode, when it does setup vq, writes
>>>>> the queue_sel (this already exists in setup vq), vector sel and then
>>>>> MAP_QUEUE command to do the queue event mapping.
>>>>>
>>>> So actually the per vq msix could be done through this. I don't get why you
>>>> need to introduce MSI_SHARING_MASK which is the charge of driver instead of
>>>> device. The interrupt rate should have no direct relationship with whether
>>>> it has been shared or not.
>>>>
>>>> Btw, you introduce mask/unmask without pending, how to deal with the lost
>>>> interrupt during the masking then?
>>> pending can be an internal device register. as long as device
>>> does not lose interrupts while masked, all's well.
>>
>> You meant raise the interrupt during unmask automatically?
>>
>
> yes - that's also what pci does.
>
> the guest visible pending bit is partially implemented in qemu
> as per pci spec but it's unused.
Ok.
>
>>> There's value is being able to say "this queue sends no
>>> interrupts do not bother checking used notification area".
>>> so we need way to say that. So I guess an enable interrupts
>>> register might have some value...
>>> But besides that, it's enough to have mask/unmask/address/data
>>> per vq.
>>
>> Just to check, do you mean "per vector" here?
>>
>> Thanks
>>
> No, per VQ. An indirection VQ -> vector -> address/data isn't
> necessary for PCI either, but that ship has sailed.
Yes, it can work but it may bring extra effort when you want to mask a
vector which is was shared by a lot of queues.
Thanks
>
---------------------------------------------------------------------
To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
2020-02-11 12:18 ` Jason Wang
@ 2020-02-11 14:00 ` Michael S. Tsirkin
-1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2020-02-11 14:00 UTC (permalink / raw)
To: Jason Wang
Cc: virtio-dev, Zha Bin, slp, Liu, Jing2, linux-kernel, qemu-devel,
chao.p.peng, gerry
On Tue, Feb 11, 2020 at 08:18:54PM +0800, Jason Wang wrote:
>
> On 2020/2/11 下午8:08, Michael S. Tsirkin wrote:
> > On Tue, Feb 11, 2020 at 08:04:24PM +0800, Jason Wang wrote:
> > > On 2020/2/11 下午7:58, Michael S. Tsirkin wrote:
> > > > On Tue, Feb 11, 2020 at 03:40:23PM +0800, Jason Wang wrote:
> > > > > On 2020/2/11 下午2:02, Liu, Jing2 wrote:
> > > > > > On 2/11/2020 12:02 PM, Jason Wang wrote:
> > > > > > > On 2020/2/11 上午11:35, Liu, Jing2 wrote:
> > > > > > > > On 2/11/2020 11:17 AM, Jason Wang wrote:
> > > > > > > > > On 2020/2/10 下午5:05, Zha Bin wrote:
> > > > > > > > > > From: Liu Jiang<gerry@linux.alibaba.com>
> > > > > > > > > >
> > > > > > > > > > Userspace VMMs (e.g. Qemu microvm, Firecracker) take
> > > > > > > > > > advantage of using
> > > > > > > > > > virtio over mmio devices as a lightweight machine model for modern
> > > > > > > > > > cloud. The standard virtio over MMIO transport layer
> > > > > > > > > > only supports one
> > > > > > > > > > legacy interrupt, which is much heavier than virtio over
> > > > > > > > > > PCI transport
> > > > > > > > > > layer using MSI. Legacy interrupt has long work path and
> > > > > > > > > > causes specific
> > > > > > > > > > VMExits in following cases, which would considerably slow down the
> > > > > > > > > > performance:
> > > > > > > > > >
> > > > > > > > > > 1) read interrupt status register
> > > > > > > > > > 2) update interrupt status register
> > > > > > > > > > 3) write IOAPIC EOI register
> > > > > > > > > >
> > > > > > > > > > We proposed to add MSI support for virtio over MMIO via new feature
> > > > > > > > > > bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
> > > > > > > > > >
> > > > > > > > > > With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
> > > > > > > > > > uses msi_sharing[1] to indicate the event and vector mapping.
> > > > > > > > > > Bit 1 is 0: device uses non-sharing and fixed vector per
> > > > > > > > > > event mapping.
> > > > > > > > > > Bit 1 is 1: device uses sharing mode and dynamic mapping.
> > > > > > > > > I believe dynamic mapping should cover the case of fixed vector?
> > > > > > > > >
> > > > > > > > Actually this bit*aims* for msi sharing or msi non-sharing.
> > > > > > > >
> > > > > > > > It means, when msi sharing bit is 1, device doesn't want vector
> > > > > > > > per queue
> > > > > > > >
> > > > > > > > (it wants msi vector sharing as name) and doesn't want a high
> > > > > > > > interrupt rate.
> > > > > > > >
> > > > > > > > So driver turns to !per_vq_vectors and has to do dynamical mapping.
> > > > > > > >
> > > > > > > > So they are opposite not superset.
> > > > > > > >
> > > > > > > > Thanks!
> > > > > > > >
> > > > > > > > Jing
> > > > > > > I think you need add more comments on the command.
> > > > > > >
> > > > > > > E.g if I want to map vector 0 to queue 1, how do I need to do?
> > > > > > >
> > > > > > > write(1, queue_sel);
> > > > > > > write(0, vector_sel);
> > > > > > That's true. Besides, two commands are used for msi sharing mode,
> > > > > >
> > > > > > VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
> > > > > >
> > > > > > "To set up the event and vector mapping for MSI sharing mode, driver
> > > > > > SHOULD write a valid MsiVecSel followed by
> > > > > > VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE command to
> > > > > > map the configuration change/selected queue events respectively. " (See
> > > > > > spec patch 5/5)
> > > > > >
> > > > > > So if driver detects the msi sharing mode, when it does setup vq, writes
> > > > > > the queue_sel (this already exists in setup vq), vector sel and then
> > > > > > MAP_QUEUE command to do the queue event mapping.
> > > > > >
> > > > > So actually the per vq msix could be done through this. I don't get why you
> > > > > need to introduce MSI_SHARING_MASK which is the charge of driver instead of
> > > > > device. The interrupt rate should have no direct relationship with whether
> > > > > it has been shared or not.
> > > > >
> > > > > Btw, you introduce mask/unmask without pending, how to deal with the lost
> > > > > interrupt during the masking then?
> > > > pending can be an internal device register. as long as device
> > > > does not lose interrupts while masked, all's well.
> > >
> > > You meant raise the interrupt during unmask automatically?
> > >
> >
> > yes - that's also what pci does.
> >
> > the guest visible pending bit is partially implemented in qemu
> > as per pci spec but it's unused.
>
>
> Ok.
>
>
> >
> > > > There's value is being able to say "this queue sends no
> > > > interrupts do not bother checking used notification area".
> > > > so we need way to say that. So I guess an enable interrupts
> > > > register might have some value...
> > > > But besides that, it's enough to have mask/unmask/address/data
> > > > per vq.
> > >
> > > Just to check, do you mean "per vector" here?
> > >
> > > Thanks
> > >
> > No, per VQ. An indirection VQ -> vector -> address/data isn't
> > necessary for PCI either, but that ship has sailed.
>
>
> Yes, it can work but it may bring extra effort when you want to mask a
> vector which is was shared by a lot of queues.
>
> Thanks
>
masking should be per vq too.
--
MST
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-11 14:00 ` Michael S. Tsirkin
0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2020-02-11 14:00 UTC (permalink / raw)
To: Jason Wang
Cc: virtio-dev, Zha Bin, slp, Liu, Jing2, linux-kernel, qemu-devel,
chao.p.peng, gerry
On Tue, Feb 11, 2020 at 08:18:54PM +0800, Jason Wang wrote:
>
> On 2020/2/11 下午8:08, Michael S. Tsirkin wrote:
> > On Tue, Feb 11, 2020 at 08:04:24PM +0800, Jason Wang wrote:
> > > On 2020/2/11 下午7:58, Michael S. Tsirkin wrote:
> > > > On Tue, Feb 11, 2020 at 03:40:23PM +0800, Jason Wang wrote:
> > > > > On 2020/2/11 下午2:02, Liu, Jing2 wrote:
> > > > > > On 2/11/2020 12:02 PM, Jason Wang wrote:
> > > > > > > On 2020/2/11 上午11:35, Liu, Jing2 wrote:
> > > > > > > > On 2/11/2020 11:17 AM, Jason Wang wrote:
> > > > > > > > > On 2020/2/10 下午5:05, Zha Bin wrote:
> > > > > > > > > > From: Liu Jiang<gerry@linux.alibaba.com>
> > > > > > > > > >
> > > > > > > > > > Userspace VMMs (e.g. Qemu microvm, Firecracker) take
> > > > > > > > > > advantage of using
> > > > > > > > > > virtio over mmio devices as a lightweight machine model for modern
> > > > > > > > > > cloud. The standard virtio over MMIO transport layer
> > > > > > > > > > only supports one
> > > > > > > > > > legacy interrupt, which is much heavier than virtio over
> > > > > > > > > > PCI transport
> > > > > > > > > > layer using MSI. Legacy interrupt has long work path and
> > > > > > > > > > causes specific
> > > > > > > > > > VMExits in following cases, which would considerably slow down the
> > > > > > > > > > performance:
> > > > > > > > > >
> > > > > > > > > > 1) read interrupt status register
> > > > > > > > > > 2) update interrupt status register
> > > > > > > > > > 3) write IOAPIC EOI register
> > > > > > > > > >
> > > > > > > > > > We proposed to add MSI support for virtio over MMIO via new feature
> > > > > > > > > > bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
> > > > > > > > > >
> > > > > > > > > > With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
> > > > > > > > > > uses msi_sharing[1] to indicate the event and vector mapping.
> > > > > > > > > > Bit 1 is 0: device uses non-sharing and fixed vector per
> > > > > > > > > > event mapping.
> > > > > > > > > > Bit 1 is 1: device uses sharing mode and dynamic mapping.
> > > > > > > > > I believe dynamic mapping should cover the case of fixed vector?
> > > > > > > > >
> > > > > > > > Actually this bit*aims* for msi sharing or msi non-sharing.
> > > > > > > >
> > > > > > > > It means, when msi sharing bit is 1, device doesn't want vector
> > > > > > > > per queue
> > > > > > > >
> > > > > > > > (it wants msi vector sharing as name) and doesn't want a high
> > > > > > > > interrupt rate.
> > > > > > > >
> > > > > > > > So driver turns to !per_vq_vectors and has to do dynamical mapping.
> > > > > > > >
> > > > > > > > So they are opposite not superset.
> > > > > > > >
> > > > > > > > Thanks!
> > > > > > > >
> > > > > > > > Jing
> > > > > > > I think you need add more comments on the command.
> > > > > > >
> > > > > > > E.g if I want to map vector 0 to queue 1, how do I need to do?
> > > > > > >
> > > > > > > write(1, queue_sel);
> > > > > > > write(0, vector_sel);
> > > > > > That's true. Besides, two commands are used for msi sharing mode,
> > > > > >
> > > > > > VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
> > > > > >
> > > > > > "To set up the event and vector mapping for MSI sharing mode, driver
> > > > > > SHOULD write a valid MsiVecSel followed by
> > > > > > VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE command to
> > > > > > map the configuration change/selected queue events respectively. " (See
> > > > > > spec patch 5/5)
> > > > > >
> > > > > > So if driver detects the msi sharing mode, when it does setup vq, writes
> > > > > > the queue_sel (this already exists in setup vq), vector sel and then
> > > > > > MAP_QUEUE command to do the queue event mapping.
> > > > > >
> > > > > So actually the per vq msix could be done through this. I don't get why you
> > > > > need to introduce MSI_SHARING_MASK which is the charge of driver instead of
> > > > > device. The interrupt rate should have no direct relationship with whether
> > > > > it has been shared or not.
> > > > >
> > > > > Btw, you introduce mask/unmask without pending, how to deal with the lost
> > > > > interrupt during the masking then?
> > > > pending can be an internal device register. as long as device
> > > > does not lose interrupts while masked, all's well.
> > >
> > > You meant raise the interrupt during unmask automatically?
> > >
> >
> > yes - that's also what pci does.
> >
> > the guest visible pending bit is partially implemented in qemu
> > as per pci spec but it's unused.
>
>
> Ok.
>
>
> >
> > > > There's value is being able to say "this queue sends no
> > > > interrupts do not bother checking used notification area".
> > > > so we need way to say that. So I guess an enable interrupts
> > > > register might have some value...
> > > > But besides that, it's enough to have mask/unmask/address/data
> > > > per vq.
> > >
> > > Just to check, do you mean "per vector" here?
> > >
> > > Thanks
> > >
> > No, per VQ. An indirection VQ -> vector -> address/data isn't
> > necessary for PCI either, but that ship has sailed.
>
>
> Yes, it can work but it may bring extra effort when you want to mask a
> vector which is was shared by a lot of queues.
>
> Thanks
>
masking should be per vq too.
--
MST
---------------------------------------------------------------------
To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
2020-02-11 14:00 ` Michael S. Tsirkin
@ 2020-02-12 9:03 ` Jason Wang
-1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2020-02-12 9:03 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: virtio-dev, Zha Bin, slp, Liu, Jing2, linux-kernel, qemu-devel,
chao.p.peng, gerry
On 2020/2/11 下午10:00, Michael S. Tsirkin wrote:
>> Yes, it can work but it may bring extra effort when you want to mask a
>> vector which is was shared by a lot of queues.
>>
>> Thanks
>>
> masking should be per vq too.
>
> --
Yes, but if the vector is shard by e.g 16 queues, then all those
virtqueues needs to be masked which is expensive.
Thanks
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-12 9:03 ` Jason Wang
0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2020-02-12 9:03 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: virtio-dev, Zha Bin, slp, Liu, Jing2, linux-kernel, qemu-devel,
chao.p.peng, gerry
On 2020/2/11 下午10:00, Michael S. Tsirkin wrote:
>> Yes, it can work but it may bring extra effort when you want to mask a
>> vector which is was shared by a lot of queues.
>>
>> Thanks
>>
> masking should be per vq too.
>
> --
Yes, but if the vector is shard by e.g 16 queues, then all those
virtqueues needs to be masked which is expensive.
Thanks
---------------------------------------------------------------------
To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
2020-02-12 9:03 ` Jason Wang
@ 2020-02-12 9:15 ` Michael S. Tsirkin
-1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2020-02-12 9:15 UTC (permalink / raw)
To: Jason Wang
Cc: virtio-dev, Zha Bin, slp, Liu, Jing2, linux-kernel, qemu-devel,
chao.p.peng, gerry
On Wed, Feb 12, 2020 at 05:03:00PM +0800, Jason Wang wrote:
>
> On 2020/2/11 下午10:00, Michael S. Tsirkin wrote:
> > > Yes, it can work but it may bring extra effort when you want to mask a
> > > vector which is was shared by a lot of queues.
> > >
> > > Thanks
> > >
> > masking should be per vq too.
> >
> > --
>
>
> Yes, but if the vector is shard by e.g 16 queues, then all those virtqueues
> needs to be masked which is expensive.
>
> Thanks
I think that's ok - masking is rare. in fact if vectors can be
changed atomically I'm no longer sure they are needed,
maybe a destructive "disable" which can lose interrupts
is enough.
--
MST
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-12 9:15 ` Michael S. Tsirkin
0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2020-02-12 9:15 UTC (permalink / raw)
To: Jason Wang
Cc: virtio-dev, Zha Bin, slp, Liu, Jing2, linux-kernel, qemu-devel,
chao.p.peng, gerry
On Wed, Feb 12, 2020 at 05:03:00PM +0800, Jason Wang wrote:
>
> On 2020/2/11 下午10:00, Michael S. Tsirkin wrote:
> > > Yes, it can work but it may bring extra effort when you want to mask a
> > > vector which is was shared by a lot of queues.
> > >
> > > Thanks
> > >
> > masking should be per vq too.
> >
> > --
>
>
> Yes, but if the vector is shard by e.g 16 queues, then all those virtqueues
> needs to be masked which is expensive.
>
> Thanks
I think that's ok - masking is rare. in fact if vectors can be
changed atomically I'm no longer sure they are needed,
maybe a destructive "disable" which can lose interrupts
is enough.
--
MST
---------------------------------------------------------------------
To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
2020-02-11 7:40 ` Jason Wang
@ 2020-02-12 3:54 ` Liu, Jing2
-1 siblings, 0 replies; 104+ messages in thread
From: Liu, Jing2 @ 2020-02-12 3:54 UTC (permalink / raw)
To: Jason Wang, Zha Bin, linux-kernel
Cc: virtio-dev, slp, mst, qemu-devel, chao.p.peng, gerry
[-- Attachment #1: Type: text/plain, Size: 4949 bytes --]
On 2/11/2020 3:40 PM, Jason Wang wrote:
>
> On 2020/2/11 下午2:02, Liu, Jing2 wrote:
>>
>>
>> On 2/11/2020 12:02 PM, Jason Wang wrote:
>>>
>>> On 2020/2/11 上午11:35, Liu, Jing2 wrote:
>>>>
>>>> On 2/11/2020 11:17 AM, Jason Wang wrote:
>>>>>
>>>>> On 2020/2/10 下午5:05, Zha Bin wrote:
>>>>>> From: Liu Jiang<gerry@linux.alibaba.com>
>>>>>>
>>>>>> Userspace VMMs (e.g. Qemu microvm, Firecracker) take advantage of
>>>>>> using
>>>>>> virtio over mmio devices as a lightweight machine model for modern
>>>>>> cloud. The standard virtio over MMIO transport layer only
>>>>>> supports one
>>>>>> legacy interrupt, which is much heavier than virtio over PCI
>>>>>> transport
>>>>>> layer using MSI. Legacy interrupt has long work path and causes
>>>>>> specific
>>>>>> VMExits in following cases, which would considerably slow down the
>>>>>> performance:
>>>>>>
>>>>>> 1) read interrupt status register
>>>>>> 2) update interrupt status register
>>>>>> 3) write IOAPIC EOI register
>>>>>>
>>>>>> We proposed to add MSI support for virtio over MMIO via new feature
>>>>>> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
>>>>>>
>>>>>> With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio
>>>>>> MSI
>>>>>> uses msi_sharing[1] to indicate the event and vector mapping.
>>>>>> Bit 1 is 0: device uses non-sharing and fixed vector per event
>>>>>> mapping.
>>>>>> Bit 1 is 1: device uses sharing mode and dynamic mapping.
>>>>>
>>>>>
>>>>> I believe dynamic mapping should cover the case of fixed vector?
>>>>>
>>>> Actually this bit *aims* for msi sharing or msi non-sharing.
>>>>
>>>> It means, when msi sharing bit is 1, device doesn't want vector per
>>>> queue
>>>>
>>>> (it wants msi vector sharing as name) and doesn't want a high
>>>> interrupt rate.
>>>>
>>>> So driver turns to !per_vq_vectors and has to do dynamical mapping.
>>>>
>>>> So they are opposite not superset.
>>>>
>>>> Thanks!
>>>>
>>>> Jing
>>>
>>>
>>> I think you need add more comments on the command.
>>>
>>> E.g if I want to map vector 0 to queue 1, how do I need to do?
>>>
>>> write(1, queue_sel);
>>> write(0, vector_sel);
>>
>> That's true. Besides, two commands are used for msi sharing mode,
>>
>> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
>>
>> "To set up the event and vector mapping for MSI sharing mode, driver
>> SHOULD write a valid MsiVecSel followed by
>> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE command
>> to map the configuration change/selected queue events respectively.
>> " (See spec patch 5/5)
>>
>> So if driver detects the msi sharing mode, when it does setup vq,
>> writes the queue_sel (this already exists in setup vq), vector sel
>> and then MAP_QUEUE command to do the queue event mapping.
>>
>
> So actually the per vq msix could be done through this.
Right, per vq msix can also be mapped by the 2 commands if we want.
The current design benefits for those devices requesting per vq msi that
driver
doesn't have to map during setup each queue,
since we define the relationship by default.
> I don't get why you need to introduce MSI_SHARING_MASK which is the
> charge of driver instead of device.
MSI_SHARING_MASK is just for identifying the msi_sharing bit in
readl(MsiState) (0x0c4). The device tells whether it wants msi_sharing.
MsiState register: R
le32 {
msi_enabled : 1;
msi_sharing: 1;
reserved : 30;
};
> The interrupt rate should have no direct relationship with whether it
> has been shared or not.
>
> Btw, you introduce mask/unmask without pending, how to deal with the
> lost interrupt during the masking then?
>
>
>> For msi non-sharing mode, no special action is needed because we make
>> the rule of per_vq_vector and fixed relationship.
>>
>> Correct me if this is not that clear for spec/code comments.
>>
>
> The ABI is not as straightforward as PCI did. Why not just reuse the
> PCI layout?
>
> E.g having
>
> queue_sel
> queue_msix_vector
> msix_config
>
> for configuring map between msi vector and queues/config
Thanks for the advice. :)
Actually when looking into pci, the queue_msix_vector/msix_config is the
msi vector index, which is the same as the mmio register MsiVecSel (0x0d0).
So we don't introduce two extra registers for mapping even in sharing mode.
What do you think?
>
> Then
>
> vector_sel
> address
> data
> pending
> mask
> unmask
>
> for configuring msi table?
PCI-like msix table is not introduced to device and instead simply use
commands to tell the mask/configure/enable.
Thanks!
Jing
>
> Thanks
>
>
>> Thanks!
>>
>> Jing
>>
>>
>>>
>>> ?
>>>
>>> Thanks
>>>
>>>
>>>>
>>>>
>>>>> Thanks
>>>>>
>>>>>
>>>>>
>>>>> ---------------------------------------------------------------------
>>>>> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
>>>>> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
>>>>>
>>>>
>>>
>
[-- Attachment #2: Type: text/html, Size: 9754 bytes --]
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-12 3:54 ` Liu, Jing2
0 siblings, 0 replies; 104+ messages in thread
From: Liu, Jing2 @ 2020-02-12 3:54 UTC (permalink / raw)
To: Jason Wang, Zha Bin, linux-kernel
Cc: virtio-dev, slp, mst, qemu-devel, chao.p.peng, gerry
[-- Attachment #1: Type: text/plain, Size: 5140 bytes --]
On 2/11/2020 3:40 PM, Jason Wang wrote:
>
> On 2020/2/11 下午2:02, Liu, Jing2 wrote:
>>
>>
>> On 2/11/2020 12:02 PM, Jason Wang wrote:
>>>
>>> On 2020/2/11 上午11:35, Liu, Jing2 wrote:
>>>>
>>>> On 2/11/2020 11:17 AM, Jason Wang wrote:
>>>>>
>>>>> On 2020/2/10 下午5:05, Zha Bin wrote:
>>>>>> From: Liu Jiang<gerry@linux.alibaba.com>
>>>>>>
>>>>>> Userspace VMMs (e.g. Qemu microvm, Firecracker) take advantage of
>>>>>> using
>>>>>> virtio over mmio devices as a lightweight machine model for modern
>>>>>> cloud. The standard virtio over MMIO transport layer only
>>>>>> supports one
>>>>>> legacy interrupt, which is much heavier than virtio over PCI
>>>>>> transport
>>>>>> layer using MSI. Legacy interrupt has long work path and causes
>>>>>> specific
>>>>>> VMExits in following cases, which would considerably slow down the
>>>>>> performance:
>>>>>>
>>>>>> 1) read interrupt status register
>>>>>> 2) update interrupt status register
>>>>>> 3) write IOAPIC EOI register
>>>>>>
>>>>>> We proposed to add MSI support for virtio over MMIO via new feature
>>>>>> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
>>>>>>
>>>>>> With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio
>>>>>> MSI
>>>>>> uses msi_sharing[1] to indicate the event and vector mapping.
>>>>>> Bit 1 is 0: device uses non-sharing and fixed vector per event
>>>>>> mapping.
>>>>>> Bit 1 is 1: device uses sharing mode and dynamic mapping.
>>>>>
>>>>>
>>>>> I believe dynamic mapping should cover the case of fixed vector?
>>>>>
>>>> Actually this bit *aims* for msi sharing or msi non-sharing.
>>>>
>>>> It means, when msi sharing bit is 1, device doesn't want vector per
>>>> queue
>>>>
>>>> (it wants msi vector sharing as name) and doesn't want a high
>>>> interrupt rate.
>>>>
>>>> So driver turns to !per_vq_vectors and has to do dynamical mapping.
>>>>
>>>> So they are opposite not superset.
>>>>
>>>> Thanks!
>>>>
>>>> Jing
>>>
>>>
>>> I think you need add more comments on the command.
>>>
>>> E.g if I want to map vector 0 to queue 1, how do I need to do?
>>>
>>> write(1, queue_sel);
>>> write(0, vector_sel);
>>
>> That's true. Besides, two commands are used for msi sharing mode,
>>
>> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
>>
>> "To set up the event and vector mapping for MSI sharing mode, driver
>> SHOULD write a valid MsiVecSel followed by
>> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE command
>> to map the configuration change/selected queue events respectively.
>> " (See spec patch 5/5)
>>
>> So if driver detects the msi sharing mode, when it does setup vq,
>> writes the queue_sel (this already exists in setup vq), vector sel
>> and then MAP_QUEUE command to do the queue event mapping.
>>
>
> So actually the per vq msix could be done through this.
Right, per vq msix can also be mapped by the 2 commands if we want.
The current design benefits for those devices requesting per vq msi that
driver
doesn't have to map during setup each queue,
since we define the relationship by default.
> I don't get why you need to introduce MSI_SHARING_MASK which is the
> charge of driver instead of device.
MSI_SHARING_MASK is just for identifying the msi_sharing bit in
readl(MsiState) (0x0c4). The device tells whether it wants msi_sharing.
MsiState register: R
le32 {
msi_enabled : 1;
msi_sharing: 1;
reserved : 30;
};
> The interrupt rate should have no direct relationship with whether it
> has been shared or not.
>
> Btw, you introduce mask/unmask without pending, how to deal with the
> lost interrupt during the masking then?
>
>
>> For msi non-sharing mode, no special action is needed because we make
>> the rule of per_vq_vector and fixed relationship.
>>
>> Correct me if this is not that clear for spec/code comments.
>>
>
> The ABI is not as straightforward as PCI did. Why not just reuse the
> PCI layout?
>
> E.g having
>
> queue_sel
> queue_msix_vector
> msix_config
>
> for configuring map between msi vector and queues/config
Thanks for the advice. :)
Actually when looking into pci, the queue_msix_vector/msix_config is the
msi vector index, which is the same as the mmio register MsiVecSel (0x0d0).
So we don't introduce two extra registers for mapping even in sharing mode.
What do you think?
>
> Then
>
> vector_sel
> address
> data
> pending
> mask
> unmask
>
> for configuring msi table?
PCI-like msix table is not introduced to device and instead simply use
commands to tell the mask/configure/enable.
Thanks!
Jing
>
> Thanks
>
>
>> Thanks!
>>
>> Jing
>>
>>
>>>
>>> ?
>>>
>>> Thanks
>>>
>>>
>>>>
>>>>
>>>>> Thanks
>>>>>
>>>>>
>>>>>
>>>>> ---------------------------------------------------------------------
>>>>> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
>>>>> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
>>>>>
>>>>
>>>
>
[-- Attachment #2: Type: text/html, Size: 10067 bytes --]
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
2020-02-12 3:54 ` Liu, Jing2
(?)
@ 2020-02-12 7:33 ` Michael S. Tsirkin
-1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2020-02-12 7:33 UTC (permalink / raw)
To: Liu, Jing2
Cc: Jason Wang, Zha Bin, linux-kernel, virtio-dev, slp, qemu-devel,
chao.p.peng, gerry
On Wed, Feb 12, 2020 at 11:54:53AM +0800, Liu, Jing2 wrote:
>
> On 2/11/2020 3:40 PM, Jason Wang wrote:
>
>
> On 2020/2/11 下午2:02, Liu, Jing2 wrote:
>
>
>
> On 2/11/2020 12:02 PM, Jason Wang wrote:
>
>
> On 2020/2/11 上午11:35, Liu, Jing2 wrote:
>
>
> On 2/11/2020 11:17 AM, Jason Wang wrote:
>
>
> On 2020/2/10 下午5:05, Zha Bin wrote:
>
> From: Liu Jiang<gerry@linux.alibaba.com>
>
> Userspace VMMs (e.g. Qemu microvm, Firecracker) take
> advantage of using
> virtio over mmio devices as a lightweight machine model
> for modern
> cloud. The standard virtio over MMIO transport layer
> only supports one
> legacy interrupt, which is much heavier than virtio
> over PCI transport
> layer using MSI. Legacy interrupt has long work path
> and causes specific
> VMExits in following cases, which would considerably
> slow down the
> performance:
>
> 1) read interrupt status register
> 2) update interrupt status register
> 3) write IOAPIC EOI register
>
> We proposed to add MSI support for virtio over MMIO via
> new feature
> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt
> performance.
>
> With the VIRTIO_F_MMIO_MSI feature bit supported, the
> virtio-mmio MSI
> uses msi_sharing[1] to indicate the event and vector
> mapping.
> Bit 1 is 0: device uses non-sharing and fixed vector
> per event mapping.
> Bit 1 is 1: device uses sharing mode and dynamic
> mapping.
>
>
>
> I believe dynamic mapping should cover the case of fixed
> vector?
>
>
> Actually this bit *aims* for msi sharing or msi non-sharing.
>
> It means, when msi sharing bit is 1, device doesn't want vector
> per queue
>
> (it wants msi vector sharing as name) and doesn't want a high
> interrupt rate.
>
> So driver turns to !per_vq_vectors and has to do dynamical
> mapping.
>
> So they are opposite not superset.
>
> Thanks!
>
> Jing
>
>
>
> I think you need add more comments on the command.
>
> E.g if I want to map vector 0 to queue 1, how do I need to do?
>
> write(1, queue_sel);
> write(0, vector_sel);
>
>
> That's true. Besides, two commands are used for msi sharing mode,
>
> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
>
> "To set up the event and vector mapping for MSI sharing mode, driver
> SHOULD write a valid MsiVecSel followed by
> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE command to
> map the configuration change/selected queue events respectively. "
> (See spec patch 5/5)
>
> So if driver detects the msi sharing mode, when it does setup vq,
> writes the queue_sel (this already exists in setup vq), vector sel and
> then MAP_QUEUE command to do the queue event mapping.
>
>
>
> So actually the per vq msix could be done through this.
>
> Right, per vq msix can also be mapped by the 2 commands if we want.
>
> The current design benefits for those devices requesting per vq msi that driver
>
> doesn't have to map during setup each queue,
>
> since we define the relationship by default.
Point being to save some exits for configuration? How much does it
save? IMHO we need to find a way to drastically simplify this interface,
to cut down the new LOC to at most 100-200, proportionally to the
performance gain it gives.
>
> I don't get why you need to introduce MSI_SHARING_MASK which is the charge
> of driver instead of device.
>
> MSI_SHARING_MASK is just for identifying the msi_sharing bit in readl(MsiState)
> (0x0c4). The device tells whether it wants msi_sharing.
>
> MsiState register: R
>
> le32 {
> msi_enabled : 1;
> msi_sharing: 1;
> reserved : 30;
> };
>
> The interrupt rate should have no direct relationship with whether it has
> been shared or not.
>
>
>
> Btw, you introduce mask/unmask without pending, how to deal with the lost
> interrupt during the masking then?
>
>
>
> For msi non-sharing mode, no special action is needed because we make
> the rule of per_vq_vector and fixed relationship.
>
> Correct me if this is not that clear for spec/code comments.
>
>
>
> The ABI is not as straightforward as PCI did. Why not just reuse the PCI
> layout?
>
> E.g having
>
> queue_sel
> queue_msix_vector
> msix_config
>
> for configuring map between msi vector and queues/config
>
> Thanks for the advice. :)
>
> Actually when looking into pci, the queue_msix_vector/msix_config is the msi
> vector index, which is the same as the mmio register MsiVecSel (0x0d0).
>
> So we don't introduce two extra registers for mapping even in sharing mode.
>
> What do you think?
>
>
>
> Then
>
> vector_sel
> address
> data
> pending
> mask
> unmask
>
> for configuring msi table?
>
> PCI-like msix table is not introduced to device and instead simply use commands
> to tell the mask/configure/enable.
>
> Thanks!
>
> Jing
>
>
> Thanks
>
>
>
> Thanks!
>
> Jing
>
>
>
>
> ?
>
> Thanks
>
>
>
>
>
>
> Thanks
>
>
>
> ---------------------------------------------------------------------
>
> To unsubscribe, e-mail:
> virtio-dev-unsubscribe@lists.oasis-open.org
> For additional commands, e-mail:
> virtio-dev-help@lists.oasis-open.org
>
>
>
>
>
>
>
>
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-12 7:33 ` Michael S. Tsirkin
0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2020-02-12 7:33 UTC (permalink / raw)
To: Liu, Jing2
Cc: Jason Wang, Zha Bin, linux-kernel, virtio-dev, slp, qemu-devel,
chao.p.peng, gerry
On Wed, Feb 12, 2020 at 11:54:53AM +0800, Liu, Jing2 wrote:
>
> On 2/11/2020 3:40 PM, Jason Wang wrote:
>
>
> On 2020/2/11 下午2:02, Liu, Jing2 wrote:
>
>
>
> On 2/11/2020 12:02 PM, Jason Wang wrote:
>
>
> On 2020/2/11 上午11:35, Liu, Jing2 wrote:
>
>
> On 2/11/2020 11:17 AM, Jason Wang wrote:
>
>
> On 2020/2/10 下午5:05, Zha Bin wrote:
>
> From: Liu Jiang<gerry@linux.alibaba.com>
>
> Userspace VMMs (e.g. Qemu microvm, Firecracker) take
> advantage of using
> virtio over mmio devices as a lightweight machine model
> for modern
> cloud. The standard virtio over MMIO transport layer
> only supports one
> legacy interrupt, which is much heavier than virtio
> over PCI transport
> layer using MSI. Legacy interrupt has long work path
> and causes specific
> VMExits in following cases, which would considerably
> slow down the
> performance:
>
> 1) read interrupt status register
> 2) update interrupt status register
> 3) write IOAPIC EOI register
>
> We proposed to add MSI support for virtio over MMIO via
> new feature
> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt
> performance.
>
> With the VIRTIO_F_MMIO_MSI feature bit supported, the
> virtio-mmio MSI
> uses msi_sharing[1] to indicate the event and vector
> mapping.
> Bit 1 is 0: device uses non-sharing and fixed vector
> per event mapping.
> Bit 1 is 1: device uses sharing mode and dynamic
> mapping.
>
>
>
> I believe dynamic mapping should cover the case of fixed
> vector?
>
>
> Actually this bit *aims* for msi sharing or msi non-sharing.
>
> It means, when msi sharing bit is 1, device doesn't want vector
> per queue
>
> (it wants msi vector sharing as name) and doesn't want a high
> interrupt rate.
>
> So driver turns to !per_vq_vectors and has to do dynamical
> mapping.
>
> So they are opposite not superset.
>
> Thanks!
>
> Jing
>
>
>
> I think you need add more comments on the command.
>
> E.g if I want to map vector 0 to queue 1, how do I need to do?
>
> write(1, queue_sel);
> write(0, vector_sel);
>
>
> That's true. Besides, two commands are used for msi sharing mode,
>
> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
>
> "To set up the event and vector mapping for MSI sharing mode, driver
> SHOULD write a valid MsiVecSel followed by
> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE command to
> map the configuration change/selected queue events respectively. "
> (See spec patch 5/5)
>
> So if driver detects the msi sharing mode, when it does setup vq,
> writes the queue_sel (this already exists in setup vq), vector sel and
> then MAP_QUEUE command to do the queue event mapping.
>
>
>
> So actually the per vq msix could be done through this.
>
> Right, per vq msix can also be mapped by the 2 commands if we want.
>
> The current design benefits for those devices requesting per vq msi that driver
>
> doesn't have to map during setup each queue,
>
> since we define the relationship by default.
Point being to save some exits for configuration? How much does it
save? IMHO we need to find a way to drastically simplify this interface,
to cut down the new LOC to at most 100-200, proportionally to the
performance gain it gives.
>
> I don't get why you need to introduce MSI_SHARING_MASK which is the charge
> of driver instead of device.
>
> MSI_SHARING_MASK is just for identifying the msi_sharing bit in readl(MsiState)
> (0x0c4). The device tells whether it wants msi_sharing.
>
> MsiState register: R
>
> le32 {
> msi_enabled : 1;
> msi_sharing: 1;
> reserved : 30;
> };
>
> The interrupt rate should have no direct relationship with whether it has
> been shared or not.
>
>
>
> Btw, you introduce mask/unmask without pending, how to deal with the lost
> interrupt during the masking then?
>
>
>
> For msi non-sharing mode, no special action is needed because we make
> the rule of per_vq_vector and fixed relationship.
>
> Correct me if this is not that clear for spec/code comments.
>
>
>
> The ABI is not as straightforward as PCI did. Why not just reuse the PCI
> layout?
>
> E.g having
>
> queue_sel
> queue_msix_vector
> msix_config
>
> for configuring map between msi vector and queues/config
>
> Thanks for the advice. :)
>
> Actually when looking into pci, the queue_msix_vector/msix_config is the msi
> vector index, which is the same as the mmio register MsiVecSel (0x0d0).
>
> So we don't introduce two extra registers for mapping even in sharing mode.
>
> What do you think?
>
>
>
> Then
>
> vector_sel
> address
> data
> pending
> mask
> unmask
>
> for configuring msi table?
>
> PCI-like msix table is not introduced to device and instead simply use commands
> to tell the mask/configure/enable.
>
> Thanks!
>
> Jing
>
>
> Thanks
>
>
>
> Thanks!
>
> Jing
>
>
>
>
> ?
>
> Thanks
>
>
>
>
>
>
> Thanks
>
>
>
> ---------------------------------------------------------------------
>
> To unsubscribe, e-mail:
> virtio-dev-unsubscribe@lists.oasis-open.org
> For additional commands, e-mail:
> virtio-dev-help@lists.oasis-open.org
>
>
>
>
>
>
>
>
---------------------------------------------------------------------
To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-12 7:33 ` Michael S. Tsirkin
0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2020-02-12 7:33 UTC (permalink / raw)
To: Liu, Jing2
Cc: virtio-dev, Zha Bin, slp, Jason Wang, linux-kernel, qemu-devel,
chao.p.peng, gerry
On Wed, Feb 12, 2020 at 11:54:53AM +0800, Liu, Jing2 wrote:
>
> On 2/11/2020 3:40 PM, Jason Wang wrote:
>
>
> On 2020/2/11 下午2:02, Liu, Jing2 wrote:
>
>
>
> On 2/11/2020 12:02 PM, Jason Wang wrote:
>
>
> On 2020/2/11 上午11:35, Liu, Jing2 wrote:
>
>
> On 2/11/2020 11:17 AM, Jason Wang wrote:
>
>
> On 2020/2/10 下午5:05, Zha Bin wrote:
>
> From: Liu Jiang<gerry@linux.alibaba.com>
>
> Userspace VMMs (e.g. Qemu microvm, Firecracker) take
> advantage of using
> virtio over mmio devices as a lightweight machine model
> for modern
> cloud. The standard virtio over MMIO transport layer
> only supports one
> legacy interrupt, which is much heavier than virtio
> over PCI transport
> layer using MSI. Legacy interrupt has long work path
> and causes specific
> VMExits in following cases, which would considerably
> slow down the
> performance:
>
> 1) read interrupt status register
> 2) update interrupt status register
> 3) write IOAPIC EOI register
>
> We proposed to add MSI support for virtio over MMIO via
> new feature
> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt
> performance.
>
> With the VIRTIO_F_MMIO_MSI feature bit supported, the
> virtio-mmio MSI
> uses msi_sharing[1] to indicate the event and vector
> mapping.
> Bit 1 is 0: device uses non-sharing and fixed vector
> per event mapping.
> Bit 1 is 1: device uses sharing mode and dynamic
> mapping.
>
>
>
> I believe dynamic mapping should cover the case of fixed
> vector?
>
>
> Actually this bit *aims* for msi sharing or msi non-sharing.
>
> It means, when msi sharing bit is 1, device doesn't want vector
> per queue
>
> (it wants msi vector sharing as name) and doesn't want a high
> interrupt rate.
>
> So driver turns to !per_vq_vectors and has to do dynamical
> mapping.
>
> So they are opposite not superset.
>
> Thanks!
>
> Jing
>
>
>
> I think you need add more comments on the command.
>
> E.g if I want to map vector 0 to queue 1, how do I need to do?
>
> write(1, queue_sel);
> write(0, vector_sel);
>
>
> That's true. Besides, two commands are used for msi sharing mode,
>
> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
>
> "To set up the event and vector mapping for MSI sharing mode, driver
> SHOULD write a valid MsiVecSel followed by
> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE command to
> map the configuration change/selected queue events respectively. "
> (See spec patch 5/5)
>
> So if driver detects the msi sharing mode, when it does setup vq,
> writes the queue_sel (this already exists in setup vq), vector sel and
> then MAP_QUEUE command to do the queue event mapping.
>
>
>
> So actually the per vq msix could be done through this.
>
> Right, per vq msix can also be mapped by the 2 commands if we want.
>
> The current design benefits for those devices requesting per vq msi that driver
>
> doesn't have to map during setup each queue,
>
> since we define the relationship by default.
Point being to save some exits for configuration? How much does it
save? IMHO we need to find a way to drastically simplify this interface,
to cut down the new LOC to at most 100-200, proportionally to the
performance gain it gives.
>
> I don't get why you need to introduce MSI_SHARING_MASK which is the charge
> of driver instead of device.
>
> MSI_SHARING_MASK is just for identifying the msi_sharing bit in readl(MsiState)
> (0x0c4). The device tells whether it wants msi_sharing.
>
> MsiState register: R
>
> le32 {
> msi_enabled : 1;
> msi_sharing: 1;
> reserved : 30;
> };
>
> The interrupt rate should have no direct relationship with whether it has
> been shared or not.
>
>
>
> Btw, you introduce mask/unmask without pending, how to deal with the lost
> interrupt during the masking then?
>
>
>
> For msi non-sharing mode, no special action is needed because we make
> the rule of per_vq_vector and fixed relationship.
>
> Correct me if this is not that clear for spec/code comments.
>
>
>
> The ABI is not as straightforward as PCI did. Why not just reuse the PCI
> layout?
>
> E.g having
>
> queue_sel
> queue_msix_vector
> msix_config
>
> for configuring map between msi vector and queues/config
>
> Thanks for the advice. :)
>
> Actually when looking into pci, the queue_msix_vector/msix_config is the msi
> vector index, which is the same as the mmio register MsiVecSel (0x0d0).
>
> So we don't introduce two extra registers for mapping even in sharing mode.
>
> What do you think?
>
>
>
> Then
>
> vector_sel
> address
> data
> pending
> mask
> unmask
>
> for configuring msi table?
>
> PCI-like msix table is not introduced to device and instead simply use commands
> to tell the mask/configure/enable.
>
> Thanks!
>
> Jing
>
>
> Thanks
>
>
>
> Thanks!
>
> Jing
>
>
>
>
> ?
>
> Thanks
>
>
>
>
>
>
> Thanks
>
>
>
> ---------------------------------------------------------------------
>
> To unsubscribe, e-mail:
> virtio-dev-unsubscribe@lists.oasis-open.org
> For additional commands, e-mail:
> virtio-dev-help@lists.oasis-open.org
>
>
>
>
>
>
>
>
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
2020-02-12 3:54 ` Liu, Jing2
@ 2020-02-12 9:06 ` Jason Wang
-1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2020-02-12 9:06 UTC (permalink / raw)
To: Liu, Jing2, Zha Bin, linux-kernel
Cc: virtio-dev, slp, mst, qemu-devel, chao.p.peng, gerry
On 2020/2/12 上午11:54, Liu, Jing2 wrote:
>
>
> On 2/11/2020 3:40 PM, Jason Wang wrote:
>>
>> On 2020/2/11 下午2:02, Liu, Jing2 wrote:
>>>
>>>
>>> On 2/11/2020 12:02 PM, Jason Wang wrote:
>>>>
>>>> On 2020/2/11 上午11:35, Liu, Jing2 wrote:
>>>>>
>>>>> On 2/11/2020 11:17 AM, Jason Wang wrote:
>>>>>>
>>>>>> On 2020/2/10 下午5:05, Zha Bin wrote:
>>>>>>> From: Liu Jiang<gerry@linux.alibaba.com>
>>>>>>>
>>>>>>> Userspace VMMs (e.g. Qemu microvm, Firecracker) take advantage
>>>>>>> of using
>>>>>>> virtio over mmio devices as a lightweight machine model for modern
>>>>>>> cloud. The standard virtio over MMIO transport layer only
>>>>>>> supports one
>>>>>>> legacy interrupt, which is much heavier than virtio over PCI
>>>>>>> transport
>>>>>>> layer using MSI. Legacy interrupt has long work path and causes
>>>>>>> specific
>>>>>>> VMExits in following cases, which would considerably slow down the
>>>>>>> performance:
>>>>>>>
>>>>>>> 1) read interrupt status register
>>>>>>> 2) update interrupt status register
>>>>>>> 3) write IOAPIC EOI register
>>>>>>>
>>>>>>> We proposed to add MSI support for virtio over MMIO via new feature
>>>>>>> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
>>>>>>>
>>>>>>> With the VIRTIO_F_MMIO_MSI feature bit supported, the
>>>>>>> virtio-mmio MSI
>>>>>>> uses msi_sharing[1] to indicate the event and vector mapping.
>>>>>>> Bit 1 is 0: device uses non-sharing and fixed vector per event
>>>>>>> mapping.
>>>>>>> Bit 1 is 1: device uses sharing mode and dynamic mapping.
>>>>>>
>>>>>>
>>>>>> I believe dynamic mapping should cover the case of fixed vector?
>>>>>>
>>>>> Actually this bit *aims* for msi sharing or msi non-sharing.
>>>>>
>>>>> It means, when msi sharing bit is 1, device doesn't want vector
>>>>> per queue
>>>>>
>>>>> (it wants msi vector sharing as name) and doesn't want a high
>>>>> interrupt rate.
>>>>>
>>>>> So driver turns to !per_vq_vectors and has to do dynamical mapping.
>>>>>
>>>>> So they are opposite not superset.
>>>>>
>>>>> Thanks!
>>>>>
>>>>> Jing
>>>>
>>>>
>>>> I think you need add more comments on the command.
>>>>
>>>> E.g if I want to map vector 0 to queue 1, how do I need to do?
>>>>
>>>> write(1, queue_sel);
>>>> write(0, vector_sel);
>>>
>>> That's true. Besides, two commands are used for msi sharing mode,
>>>
>>> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
>>>
>>> "To set up the event and vector mapping for MSI sharing mode, driver
>>> SHOULD write a valid MsiVecSel followed by
>>> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE command
>>> to map the configuration change/selected queue events respectively.
>>> " (See spec patch 5/5)
>>>
>>> So if driver detects the msi sharing mode, when it does setup vq,
>>> writes the queue_sel (this already exists in setup vq), vector sel
>>> and then MAP_QUEUE command to do the queue event mapping.
>>>
>>
>> So actually the per vq msix could be done through this.
>
> Right, per vq msix can also be mapped by the 2 commands if we want.
>
> The current design benefits for those devices requesting per vq msi
> that driver
>
> doesn't have to map during setup each queue,
>
> since we define the relationship by default.
>
Well since you've defined the dynamic mapping, having some "default"
mapping won't help to reduce the complexity but increase it.
>
>> I don't get why you need to introduce MSI_SHARING_MASK which is the
>> charge of driver instead of device.
>
> MSI_SHARING_MASK is just for identifying the msi_sharing bit in
> readl(MsiState) (0x0c4). The device tells whether it wants msi_sharing.
>
> MsiState register: R
>
> le32 {
> msi_enabled : 1;
> msi_sharing: 1;
> reserved : 30;
> };
>
The question is why device want such information.
>
>> The interrupt rate should have no direct relationship with whether it
>> has been shared or not.
>
>>
>> Btw, you introduce mask/unmask without pending, how to deal with the
>> lost interrupt during the masking then?
>>
>>
>>> For msi non-sharing mode, no special action is needed because we
>>> make the rule of per_vq_vector and fixed relationship.
>>>
>>> Correct me if this is not that clear for spec/code comments.
>>>
>>
>> The ABI is not as straightforward as PCI did. Why not just reuse the
>> PCI layout?
>>
>> E.g having
>>
>> queue_sel
>> queue_msix_vector
>> msix_config
>>
>> for configuring map between msi vector and queues/config
>
> Thanks for the advice. :)
>
> Actually when looking into pci, the queue_msix_vector/msix_config is
> the msi vector index, which is the same as the mmio register MsiVecSel
> (0x0d0).
>
> So we don't introduce two extra registers for mapping even in sharing
> mode.
>
> What do you think?
>
I'm not sure I get the point, but I still prefer the separate vector_sel
from queue_msix_vector.
Btw, Michael propose per vq registers which could also work.
Thanks
>
>>
>> Then
>>
>> vector_sel
>> address
>> data
>> pending
>> mask
>> unmask
>>
>> for configuring msi table?
>
> PCI-like msix table is not introduced to device and instead simply use
> commands to tell the mask/configure/enable.
>
> Thanks!
>
> Jing
>
>>
>> Thanks
>>
>>
>>> Thanks!
>>>
>>> Jing
>>>
>>>
>>>>
>>>> ?
>>>>
>>>> Thanks
>>>>
>>>>
>>>>>
>>>>>
>>>>>> Thanks
>>>>>>
>>>>>>
>>>>>>
>>>>>> ---------------------------------------------------------------------
>>>>>>
>>>>>> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
>>>>>> For additional commands, e-mail:
>>>>>> virtio-dev-help@lists.oasis-open.org
>>>>>>
>>>>>
>>>>
>>
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-12 9:06 ` Jason Wang
0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2020-02-12 9:06 UTC (permalink / raw)
To: Liu, Jing2, Zha Bin, linux-kernel
Cc: virtio-dev, slp, mst, qemu-devel, chao.p.peng, gerry
On 2020/2/12 上午11:54, Liu, Jing2 wrote:
>
>
> On 2/11/2020 3:40 PM, Jason Wang wrote:
>>
>> On 2020/2/11 下午2:02, Liu, Jing2 wrote:
>>>
>>>
>>> On 2/11/2020 12:02 PM, Jason Wang wrote:
>>>>
>>>> On 2020/2/11 上午11:35, Liu, Jing2 wrote:
>>>>>
>>>>> On 2/11/2020 11:17 AM, Jason Wang wrote:
>>>>>>
>>>>>> On 2020/2/10 下午5:05, Zha Bin wrote:
>>>>>>> From: Liu Jiang<gerry@linux.alibaba.com>
>>>>>>>
>>>>>>> Userspace VMMs (e.g. Qemu microvm, Firecracker) take advantage
>>>>>>> of using
>>>>>>> virtio over mmio devices as a lightweight machine model for modern
>>>>>>> cloud. The standard virtio over MMIO transport layer only
>>>>>>> supports one
>>>>>>> legacy interrupt, which is much heavier than virtio over PCI
>>>>>>> transport
>>>>>>> layer using MSI. Legacy interrupt has long work path and causes
>>>>>>> specific
>>>>>>> VMExits in following cases, which would considerably slow down the
>>>>>>> performance:
>>>>>>>
>>>>>>> 1) read interrupt status register
>>>>>>> 2) update interrupt status register
>>>>>>> 3) write IOAPIC EOI register
>>>>>>>
>>>>>>> We proposed to add MSI support for virtio over MMIO via new feature
>>>>>>> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
>>>>>>>
>>>>>>> With the VIRTIO_F_MMIO_MSI feature bit supported, the
>>>>>>> virtio-mmio MSI
>>>>>>> uses msi_sharing[1] to indicate the event and vector mapping.
>>>>>>> Bit 1 is 0: device uses non-sharing and fixed vector per event
>>>>>>> mapping.
>>>>>>> Bit 1 is 1: device uses sharing mode and dynamic mapping.
>>>>>>
>>>>>>
>>>>>> I believe dynamic mapping should cover the case of fixed vector?
>>>>>>
>>>>> Actually this bit *aims* for msi sharing or msi non-sharing.
>>>>>
>>>>> It means, when msi sharing bit is 1, device doesn't want vector
>>>>> per queue
>>>>>
>>>>> (it wants msi vector sharing as name) and doesn't want a high
>>>>> interrupt rate.
>>>>>
>>>>> So driver turns to !per_vq_vectors and has to do dynamical mapping.
>>>>>
>>>>> So they are opposite not superset.
>>>>>
>>>>> Thanks!
>>>>>
>>>>> Jing
>>>>
>>>>
>>>> I think you need add more comments on the command.
>>>>
>>>> E.g if I want to map vector 0 to queue 1, how do I need to do?
>>>>
>>>> write(1, queue_sel);
>>>> write(0, vector_sel);
>>>
>>> That's true. Besides, two commands are used for msi sharing mode,
>>>
>>> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
>>>
>>> "To set up the event and vector mapping for MSI sharing mode, driver
>>> SHOULD write a valid MsiVecSel followed by
>>> VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE command
>>> to map the configuration change/selected queue events respectively.
>>> " (See spec patch 5/5)
>>>
>>> So if driver detects the msi sharing mode, when it does setup vq,
>>> writes the queue_sel (this already exists in setup vq), vector sel
>>> and then MAP_QUEUE command to do the queue event mapping.
>>>
>>
>> So actually the per vq msix could be done through this.
>
> Right, per vq msix can also be mapped by the 2 commands if we want.
>
> The current design benefits for those devices requesting per vq msi
> that driver
>
> doesn't have to map during setup each queue,
>
> since we define the relationship by default.
>
Well since you've defined the dynamic mapping, having some "default"
mapping won't help to reduce the complexity but increase it.
>
>> I don't get why you need to introduce MSI_SHARING_MASK which is the
>> charge of driver instead of device.
>
> MSI_SHARING_MASK is just for identifying the msi_sharing bit in
> readl(MsiState) (0x0c4). The device tells whether it wants msi_sharing.
>
> MsiState register: R
>
> le32 {
> msi_enabled : 1;
> msi_sharing: 1;
> reserved : 30;
> };
>
The question is why device want such information.
>
>> The interrupt rate should have no direct relationship with whether it
>> has been shared or not.
>
>>
>> Btw, you introduce mask/unmask without pending, how to deal with the
>> lost interrupt during the masking then?
>>
>>
>>> For msi non-sharing mode, no special action is needed because we
>>> make the rule of per_vq_vector and fixed relationship.
>>>
>>> Correct me if this is not that clear for spec/code comments.
>>>
>>
>> The ABI is not as straightforward as PCI did. Why not just reuse the
>> PCI layout?
>>
>> E.g having
>>
>> queue_sel
>> queue_msix_vector
>> msix_config
>>
>> for configuring map between msi vector and queues/config
>
> Thanks for the advice. :)
>
> Actually when looking into pci, the queue_msix_vector/msix_config is
> the msi vector index, which is the same as the mmio register MsiVecSel
> (0x0d0).
>
> So we don't introduce two extra registers for mapping even in sharing
> mode.
>
> What do you think?
>
I'm not sure I get the point, but I still prefer the separate vector_sel
from queue_msix_vector.
Btw, Michael propose per vq registers which could also work.
Thanks
>
>>
>> Then
>>
>> vector_sel
>> address
>> data
>> pending
>> mask
>> unmask
>>
>> for configuring msi table?
>
> PCI-like msix table is not introduced to device and instead simply use
> commands to tell the mask/configure/enable.
>
> Thanks!
>
> Jing
>
>>
>> Thanks
>>
>>
>>> Thanks!
>>>
>>> Jing
>>>
>>>
>>>>
>>>> ?
>>>>
>>>> Thanks
>>>>
>>>>
>>>>>
>>>>>
>>>>>> Thanks
>>>>>>
>>>>>>
>>>>>>
>>>>>> ---------------------------------------------------------------------
>>>>>>
>>>>>> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
>>>>>> For additional commands, e-mail:
>>>>>> virtio-dev-help@lists.oasis-open.org
>>>>>>
>>>>>
>>>>
>>
---------------------------------------------------------------------
To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
2020-02-12 9:06 ` Jason Wang
(?)
@ 2020-02-12 9:16 ` Michael S. Tsirkin
-1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2020-02-12 9:16 UTC (permalink / raw)
To: Jason Wang
Cc: Liu, Jing2, Zha Bin, linux-kernel, virtio-dev, slp, qemu-devel,
chao.p.peng, gerry
On Wed, Feb 12, 2020 at 05:06:52PM +0800, Jason Wang wrote:
>
> On 2020/2/12 上午11:54, Liu, Jing2 wrote:
> >
> >
> > On 2/11/2020 3:40 PM, Jason Wang wrote:
> > >
> > > On 2020/2/11 下午2:02, Liu, Jing2 wrote:
> > > >
> > > >
> > > > On 2/11/2020 12:02 PM, Jason Wang wrote:
> > > > >
> > > > > On 2020/2/11 上午11:35, Liu, Jing2 wrote:
> > > > > >
> > > > > > On 2/11/2020 11:17 AM, Jason Wang wrote:
> > > > > > >
> > > > > > > On 2020/2/10 下午5:05, Zha Bin wrote:
> > > > > > > > From: Liu Jiang<gerry@linux.alibaba.com>
> > > > > > > >
> > > > > > > > Userspace VMMs (e.g. Qemu microvm, Firecracker)
> > > > > > > > take advantage of using
> > > > > > > > virtio over mmio devices as a lightweight machine model for modern
> > > > > > > > cloud. The standard virtio over MMIO transport
> > > > > > > > layer only supports one
> > > > > > > > legacy interrupt, which is much heavier than
> > > > > > > > virtio over PCI transport
> > > > > > > > layer using MSI. Legacy interrupt has long work
> > > > > > > > path and causes specific
> > > > > > > > VMExits in following cases, which would considerably slow down the
> > > > > > > > performance:
> > > > > > > >
> > > > > > > > 1) read interrupt status register
> > > > > > > > 2) update interrupt status register
> > > > > > > > 3) write IOAPIC EOI register
> > > > > > > >
> > > > > > > > We proposed to add MSI support for virtio over MMIO via new feature
> > > > > > > > bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
> > > > > > > >
> > > > > > > > With the VIRTIO_F_MMIO_MSI feature bit
> > > > > > > > supported, the virtio-mmio MSI
> > > > > > > > uses msi_sharing[1] to indicate the event and vector mapping.
> > > > > > > > Bit 1 is 0: device uses non-sharing and fixed
> > > > > > > > vector per event mapping.
> > > > > > > > Bit 1 is 1: device uses sharing mode and dynamic mapping.
> > > > > > >
> > > > > > >
> > > > > > > I believe dynamic mapping should cover the case of fixed vector?
> > > > > > >
> > > > > > Actually this bit *aims* for msi sharing or msi non-sharing.
> > > > > >
> > > > > > It means, when msi sharing bit is 1, device doesn't want
> > > > > > vector per queue
> > > > > >
> > > > > > (it wants msi vector sharing as name) and doesn't want a
> > > > > > high interrupt rate.
> > > > > >
> > > > > > So driver turns to !per_vq_vectors and has to do dynamical mapping.
> > > > > >
> > > > > > So they are opposite not superset.
> > > > > >
> > > > > > Thanks!
> > > > > >
> > > > > > Jing
> > > > >
> > > > >
> > > > > I think you need add more comments on the command.
> > > > >
> > > > > E.g if I want to map vector 0 to queue 1, how do I need to do?
> > > > >
> > > > > write(1, queue_sel);
> > > > > write(0, vector_sel);
> > > >
> > > > That's true. Besides, two commands are used for msi sharing mode,
> > > >
> > > > VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
> > > >
> > > > "To set up the event and vector mapping for MSI sharing mode,
> > > > driver SHOULD write a valid MsiVecSel followed by
> > > > VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE
> > > > command to map the configuration change/selected queue events
> > > > respectively. " (See spec patch 5/5)
> > > >
> > > > So if driver detects the msi sharing mode, when it does setup
> > > > vq, writes the queue_sel (this already exists in setup vq),
> > > > vector sel and then MAP_QUEUE command to do the queue event
> > > > mapping.
> > > >
> > >
> > > So actually the per vq msix could be done through this.
> >
> > Right, per vq msix can also be mapped by the 2 commands if we want.
> >
> > The current design benefits for those devices requesting per vq msi that
> > driver
> >
> > doesn't have to map during setup each queue,
> >
> > since we define the relationship by default.
> >
>
> Well since you've defined the dynamic mapping, having some "default" mapping
> won't help to reduce the complexity but increase it.
>
>
> >
> > > I don't get why you need to introduce MSI_SHARING_MASK which is the
> > > charge of driver instead of device.
> >
> > MSI_SHARING_MASK is just for identifying the msi_sharing bit in
> > readl(MsiState) (0x0c4). The device tells whether it wants msi_sharing.
> >
> > MsiState register: R
> >
> > le32 {
> > msi_enabled : 1;
> > msi_sharing: 1;
> > reserved : 30;
> > };
> >
>
> The question is why device want such information.
>
>
> >
> > > The interrupt rate should have no direct relationship with whether
> > > it has been shared or not.
> >
> > >
> > > Btw, you introduce mask/unmask without pending, how to deal with the
> > > lost interrupt during the masking then?
> > >
> > >
> > > > For msi non-sharing mode, no special action is needed because we
> > > > make the rule of per_vq_vector and fixed relationship.
> > > >
> > > > Correct me if this is not that clear for spec/code comments.
> > > >
> > >
> > > The ABI is not as straightforward as PCI did. Why not just reuse the
> > > PCI layout?
> > >
> > > E.g having
> > >
> > > queue_sel
> > > queue_msix_vector
> > > msix_config
> > >
> > > for configuring map between msi vector and queues/config
> >
> > Thanks for the advice. :)
> >
> > Actually when looking into pci, the queue_msix_vector/msix_config is the
> > msi vector index, which is the same as the mmio register MsiVecSel
> > (0x0d0).
> >
> > So we don't introduce two extra registers for mapping even in sharing
> > mode.
> >
> > What do you think?
> >
>
> I'm not sure I get the point, but I still prefer the separate vector_sel
> from queue_msix_vector.
>
> Btw, Michael propose per vq registers which could also work.
>
> Thanks
>
Right and I'd even ask a question: do we need shared MSI at all?
Is it somehow better than legacy interrupt? And why?
Performance numbers please.
> >
> > >
> > > Then
> > >
> > > vector_sel
> > > address
> > > data
> > > pending
> > > mask
> > > unmask
> > >
> > > for configuring msi table?
> >
> > PCI-like msix table is not introduced to device and instead simply use
> > commands to tell the mask/configure/enable.
> >
> > Thanks!
> >
> > Jing
> >
> > >
> > > Thanks
> > >
> > >
> > > > Thanks!
> > > >
> > > > Jing
> > > >
> > > >
> > > > >
> > > > > ?
> > > > >
> > > > > Thanks
> > > > >
> > > > >
> > > > > >
> > > > > >
> > > > > > > Thanks
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > ---------------------------------------------------------------------
> > > > > > >
> > > > > > > To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> > > > > > > For additional commands, e-mail:
> > > > > > > virtio-dev-help@lists.oasis-open.org
> > > > > > >
> > > > > >
> > > > >
> > >
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-12 9:16 ` Michael S. Tsirkin
0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2020-02-12 9:16 UTC (permalink / raw)
To: Jason Wang
Cc: Liu, Jing2, Zha Bin, linux-kernel, virtio-dev, slp, qemu-devel,
chao.p.peng, gerry
On Wed, Feb 12, 2020 at 05:06:52PM +0800, Jason Wang wrote:
>
> On 2020/2/12 上午11:54, Liu, Jing2 wrote:
> >
> >
> > On 2/11/2020 3:40 PM, Jason Wang wrote:
> > >
> > > On 2020/2/11 下午2:02, Liu, Jing2 wrote:
> > > >
> > > >
> > > > On 2/11/2020 12:02 PM, Jason Wang wrote:
> > > > >
> > > > > On 2020/2/11 上午11:35, Liu, Jing2 wrote:
> > > > > >
> > > > > > On 2/11/2020 11:17 AM, Jason Wang wrote:
> > > > > > >
> > > > > > > On 2020/2/10 下午5:05, Zha Bin wrote:
> > > > > > > > From: Liu Jiang<gerry@linux.alibaba.com>
> > > > > > > >
> > > > > > > > Userspace VMMs (e.g. Qemu microvm, Firecracker)
> > > > > > > > take advantage of using
> > > > > > > > virtio over mmio devices as a lightweight machine model for modern
> > > > > > > > cloud. The standard virtio over MMIO transport
> > > > > > > > layer only supports one
> > > > > > > > legacy interrupt, which is much heavier than
> > > > > > > > virtio over PCI transport
> > > > > > > > layer using MSI. Legacy interrupt has long work
> > > > > > > > path and causes specific
> > > > > > > > VMExits in following cases, which would considerably slow down the
> > > > > > > > performance:
> > > > > > > >
> > > > > > > > 1) read interrupt status register
> > > > > > > > 2) update interrupt status register
> > > > > > > > 3) write IOAPIC EOI register
> > > > > > > >
> > > > > > > > We proposed to add MSI support for virtio over MMIO via new feature
> > > > > > > > bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
> > > > > > > >
> > > > > > > > With the VIRTIO_F_MMIO_MSI feature bit
> > > > > > > > supported, the virtio-mmio MSI
> > > > > > > > uses msi_sharing[1] to indicate the event and vector mapping.
> > > > > > > > Bit 1 is 0: device uses non-sharing and fixed
> > > > > > > > vector per event mapping.
> > > > > > > > Bit 1 is 1: device uses sharing mode and dynamic mapping.
> > > > > > >
> > > > > > >
> > > > > > > I believe dynamic mapping should cover the case of fixed vector?
> > > > > > >
> > > > > > Actually this bit *aims* for msi sharing or msi non-sharing.
> > > > > >
> > > > > > It means, when msi sharing bit is 1, device doesn't want
> > > > > > vector per queue
> > > > > >
> > > > > > (it wants msi vector sharing as name) and doesn't want a
> > > > > > high interrupt rate.
> > > > > >
> > > > > > So driver turns to !per_vq_vectors and has to do dynamical mapping.
> > > > > >
> > > > > > So they are opposite not superset.
> > > > > >
> > > > > > Thanks!
> > > > > >
> > > > > > Jing
> > > > >
> > > > >
> > > > > I think you need add more comments on the command.
> > > > >
> > > > > E.g if I want to map vector 0 to queue 1, how do I need to do?
> > > > >
> > > > > write(1, queue_sel);
> > > > > write(0, vector_sel);
> > > >
> > > > That's true. Besides, two commands are used for msi sharing mode,
> > > >
> > > > VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
> > > >
> > > > "To set up the event and vector mapping for MSI sharing mode,
> > > > driver SHOULD write a valid MsiVecSel followed by
> > > > VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE
> > > > command to map the configuration change/selected queue events
> > > > respectively. " (See spec patch 5/5)
> > > >
> > > > So if driver detects the msi sharing mode, when it does setup
> > > > vq, writes the queue_sel (this already exists in setup vq),
> > > > vector sel and then MAP_QUEUE command to do the queue event
> > > > mapping.
> > > >
> > >
> > > So actually the per vq msix could be done through this.
> >
> > Right, per vq msix can also be mapped by the 2 commands if we want.
> >
> > The current design benefits for those devices requesting per vq msi that
> > driver
> >
> > doesn't have to map during setup each queue,
> >
> > since we define the relationship by default.
> >
>
> Well since you've defined the dynamic mapping, having some "default" mapping
> won't help to reduce the complexity but increase it.
>
>
> >
> > > I don't get why you need to introduce MSI_SHARING_MASK which is the
> > > charge of driver instead of device.
> >
> > MSI_SHARING_MASK is just for identifying the msi_sharing bit in
> > readl(MsiState) (0x0c4). The device tells whether it wants msi_sharing.
> >
> > MsiState register: R
> >
> > le32 {
> > msi_enabled : 1;
> > msi_sharing: 1;
> > reserved : 30;
> > };
> >
>
> The question is why device want such information.
>
>
> >
> > > The interrupt rate should have no direct relationship with whether
> > > it has been shared or not.
> >
> > >
> > > Btw, you introduce mask/unmask without pending, how to deal with the
> > > lost interrupt during the masking then?
> > >
> > >
> > > > For msi non-sharing mode, no special action is needed because we
> > > > make the rule of per_vq_vector and fixed relationship.
> > > >
> > > > Correct me if this is not that clear for spec/code comments.
> > > >
> > >
> > > The ABI is not as straightforward as PCI did. Why not just reuse the
> > > PCI layout?
> > >
> > > E.g having
> > >
> > > queue_sel
> > > queue_msix_vector
> > > msix_config
> > >
> > > for configuring map between msi vector and queues/config
> >
> > Thanks for the advice. :)
> >
> > Actually when looking into pci, the queue_msix_vector/msix_config is the
> > msi vector index, which is the same as the mmio register MsiVecSel
> > (0x0d0).
> >
> > So we don't introduce two extra registers for mapping even in sharing
> > mode.
> >
> > What do you think?
> >
>
> I'm not sure I get the point, but I still prefer the separate vector_sel
> from queue_msix_vector.
>
> Btw, Michael propose per vq registers which could also work.
>
> Thanks
>
Right and I'd even ask a question: do we need shared MSI at all?
Is it somehow better than legacy interrupt? And why?
Performance numbers please.
> >
> > >
> > > Then
> > >
> > > vector_sel
> > > address
> > > data
> > > pending
> > > mask
> > > unmask
> > >
> > > for configuring msi table?
> >
> > PCI-like msix table is not introduced to device and instead simply use
> > commands to tell the mask/configure/enable.
> >
> > Thanks!
> >
> > Jing
> >
> > >
> > > Thanks
> > >
> > >
> > > > Thanks!
> > > >
> > > > Jing
> > > >
> > > >
> > > > >
> > > > > ?
> > > > >
> > > > > Thanks
> > > > >
> > > > >
> > > > > >
> > > > > >
> > > > > > > Thanks
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > ---------------------------------------------------------------------
> > > > > > >
> > > > > > > To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> > > > > > > For additional commands, e-mail:
> > > > > > > virtio-dev-help@lists.oasis-open.org
> > > > > > >
> > > > > >
> > > > >
> > >
---------------------------------------------------------------------
To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-12 9:16 ` Michael S. Tsirkin
0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2020-02-12 9:16 UTC (permalink / raw)
To: Jason Wang
Cc: virtio-dev, Zha Bin, slp, Liu, Jing2, linux-kernel, qemu-devel,
chao.p.peng, gerry
On Wed, Feb 12, 2020 at 05:06:52PM +0800, Jason Wang wrote:
>
> On 2020/2/12 上午11:54, Liu, Jing2 wrote:
> >
> >
> > On 2/11/2020 3:40 PM, Jason Wang wrote:
> > >
> > > On 2020/2/11 下午2:02, Liu, Jing2 wrote:
> > > >
> > > >
> > > > On 2/11/2020 12:02 PM, Jason Wang wrote:
> > > > >
> > > > > On 2020/2/11 上午11:35, Liu, Jing2 wrote:
> > > > > >
> > > > > > On 2/11/2020 11:17 AM, Jason Wang wrote:
> > > > > > >
> > > > > > > On 2020/2/10 下午5:05, Zha Bin wrote:
> > > > > > > > From: Liu Jiang<gerry@linux.alibaba.com>
> > > > > > > >
> > > > > > > > Userspace VMMs (e.g. Qemu microvm, Firecracker)
> > > > > > > > take advantage of using
> > > > > > > > virtio over mmio devices as a lightweight machine model for modern
> > > > > > > > cloud. The standard virtio over MMIO transport
> > > > > > > > layer only supports one
> > > > > > > > legacy interrupt, which is much heavier than
> > > > > > > > virtio over PCI transport
> > > > > > > > layer using MSI. Legacy interrupt has long work
> > > > > > > > path and causes specific
> > > > > > > > VMExits in following cases, which would considerably slow down the
> > > > > > > > performance:
> > > > > > > >
> > > > > > > > 1) read interrupt status register
> > > > > > > > 2) update interrupt status register
> > > > > > > > 3) write IOAPIC EOI register
> > > > > > > >
> > > > > > > > We proposed to add MSI support for virtio over MMIO via new feature
> > > > > > > > bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
> > > > > > > >
> > > > > > > > With the VIRTIO_F_MMIO_MSI feature bit
> > > > > > > > supported, the virtio-mmio MSI
> > > > > > > > uses msi_sharing[1] to indicate the event and vector mapping.
> > > > > > > > Bit 1 is 0: device uses non-sharing and fixed
> > > > > > > > vector per event mapping.
> > > > > > > > Bit 1 is 1: device uses sharing mode and dynamic mapping.
> > > > > > >
> > > > > > >
> > > > > > > I believe dynamic mapping should cover the case of fixed vector?
> > > > > > >
> > > > > > Actually this bit *aims* for msi sharing or msi non-sharing.
> > > > > >
> > > > > > It means, when msi sharing bit is 1, device doesn't want
> > > > > > vector per queue
> > > > > >
> > > > > > (it wants msi vector sharing as name) and doesn't want a
> > > > > > high interrupt rate.
> > > > > >
> > > > > > So driver turns to !per_vq_vectors and has to do dynamical mapping.
> > > > > >
> > > > > > So they are opposite not superset.
> > > > > >
> > > > > > Thanks!
> > > > > >
> > > > > > Jing
> > > > >
> > > > >
> > > > > I think you need add more comments on the command.
> > > > >
> > > > > E.g if I want to map vector 0 to queue 1, how do I need to do?
> > > > >
> > > > > write(1, queue_sel);
> > > > > write(0, vector_sel);
> > > >
> > > > That's true. Besides, two commands are used for msi sharing mode,
> > > >
> > > > VIRTIO_MMIO_MSI_CMD_MAP_CONFIG and VIRTIO_MMIO_MSI_CMD_MAP_QUEUE.
> > > >
> > > > "To set up the event and vector mapping for MSI sharing mode,
> > > > driver SHOULD write a valid MsiVecSel followed by
> > > > VIRTIO_MMIO_MSI_CMD_MAP_CONFIG/VIRTIO_MMIO_MSI_CMD_MAP_QUEUE
> > > > command to map the configuration change/selected queue events
> > > > respectively. " (See spec patch 5/5)
> > > >
> > > > So if driver detects the msi sharing mode, when it does setup
> > > > vq, writes the queue_sel (this already exists in setup vq),
> > > > vector sel and then MAP_QUEUE command to do the queue event
> > > > mapping.
> > > >
> > >
> > > So actually the per vq msix could be done through this.
> >
> > Right, per vq msix can also be mapped by the 2 commands if we want.
> >
> > The current design benefits for those devices requesting per vq msi that
> > driver
> >
> > doesn't have to map during setup each queue,
> >
> > since we define the relationship by default.
> >
>
> Well since you've defined the dynamic mapping, having some "default" mapping
> won't help to reduce the complexity but increase it.
>
>
> >
> > > I don't get why you need to introduce MSI_SHARING_MASK which is the
> > > charge of driver instead of device.
> >
> > MSI_SHARING_MASK is just for identifying the msi_sharing bit in
> > readl(MsiState) (0x0c4). The device tells whether it wants msi_sharing.
> >
> > MsiState register: R
> >
> > le32 {
> > msi_enabled : 1;
> > msi_sharing: 1;
> > reserved : 30;
> > };
> >
>
> The question is why device want such information.
>
>
> >
> > > The interrupt rate should have no direct relationship with whether
> > > it has been shared or not.
> >
> > >
> > > Btw, you introduce mask/unmask without pending, how to deal with the
> > > lost interrupt during the masking then?
> > >
> > >
> > > > For msi non-sharing mode, no special action is needed because we
> > > > make the rule of per_vq_vector and fixed relationship.
> > > >
> > > > Correct me if this is not that clear for spec/code comments.
> > > >
> > >
> > > The ABI is not as straightforward as PCI did. Why not just reuse the
> > > PCI layout?
> > >
> > > E.g having
> > >
> > > queue_sel
> > > queue_msix_vector
> > > msix_config
> > >
> > > for configuring map between msi vector and queues/config
> >
> > Thanks for the advice. :)
> >
> > Actually when looking into pci, the queue_msix_vector/msix_config is the
> > msi vector index, which is the same as the mmio register MsiVecSel
> > (0x0d0).
> >
> > So we don't introduce two extra registers for mapping even in sharing
> > mode.
> >
> > What do you think?
> >
>
> I'm not sure I get the point, but I still prefer the separate vector_sel
> from queue_msix_vector.
>
> Btw, Michael propose per vq registers which could also work.
>
> Thanks
>
Right and I'd even ask a question: do we need shared MSI at all?
Is it somehow better than legacy interrupt? And why?
Performance numbers please.
> >
> > >
> > > Then
> > >
> > > vector_sel
> > > address
> > > data
> > > pending
> > > mask
> > > unmask
> > >
> > > for configuring msi table?
> >
> > PCI-like msix table is not introduced to device and instead simply use
> > commands to tell the mask/configure/enable.
> >
> > Thanks!
> >
> > Jing
> >
> > >
> > > Thanks
> > >
> > >
> > > > Thanks!
> > > >
> > > > Jing
> > > >
> > > >
> > > > >
> > > > > ?
> > > > >
> > > > > Thanks
> > > > >
> > > > >
> > > > > >
> > > > > >
> > > > > > > Thanks
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > ---------------------------------------------------------------------
> > > > > > >
> > > > > > > To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> > > > > > > For additional commands, e-mail:
> > > > > > > virtio-dev-help@lists.oasis-open.org
> > > > > > >
> > > > > >
> > > > >
> > >
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
2020-02-12 9:16 ` Michael S. Tsirkin
(?)
@ 2020-02-13 3:40 ` Jason Wang
-1 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2020-02-13 3:40 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Liu, Jing2, Zha Bin, linux-kernel, virtio-dev, slp, qemu-devel,
chao.p.peng, gerry
On 2020/2/12 下午5:16, Michael S. Tsirkin wrote:
>>> Thanks for the advice.:)
>>>
>>> Actually when looking into pci, the queue_msix_vector/msix_config is the
>>> msi vector index, which is the same as the mmio register MsiVecSel
>>> (0x0d0).
>>>
>>> So we don't introduce two extra registers for mapping even in sharing
>>> mode.
>>>
>>> What do you think?
>>>
>> I'm not sure I get the point, but I still prefer the separate vector_sel
>> from queue_msix_vector.
>>
>> Btw, Michael propose per vq registers which could also work.
>>
>> Thanks
>>
> Right and I'd even ask a question: do we need shared MSI at all?
I guess it is still needed at least for the current virtio code.
Technically we may have thousands queues.
Thanks
> Is it somehow better than legacy interrupt? And why?
> Performance numbers please.
>
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-13 3:40 ` Jason Wang
0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2020-02-13 3:40 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Liu, Jing2, Zha Bin, linux-kernel, virtio-dev, slp, qemu-devel,
chao.p.peng, gerry
On 2020/2/12 下午5:16, Michael S. Tsirkin wrote:
>>> Thanks for the advice.:)
>>>
>>> Actually when looking into pci, the queue_msix_vector/msix_config is the
>>> msi vector index, which is the same as the mmio register MsiVecSel
>>> (0x0d0).
>>>
>>> So we don't introduce two extra registers for mapping even in sharing
>>> mode.
>>>
>>> What do you think?
>>>
>> I'm not sure I get the point, but I still prefer the separate vector_sel
>> from queue_msix_vector.
>>
>> Btw, Michael propose per vq registers which could also work.
>>
>> Thanks
>>
> Right and I'd even ask a question: do we need shared MSI at all?
I guess it is still needed at least for the current virtio code.
Technically we may have thousands queues.
Thanks
> Is it somehow better than legacy interrupt? And why?
> Performance numbers please.
>
---------------------------------------------------------------------
To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-13 3:40 ` Jason Wang
0 siblings, 0 replies; 104+ messages in thread
From: Jason Wang @ 2020-02-13 3:40 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: virtio-dev, Zha Bin, slp, Liu, Jing2, linux-kernel, qemu-devel,
chao.p.peng, gerry
On 2020/2/12 下午5:16, Michael S. Tsirkin wrote:
>>> Thanks for the advice.:)
>>>
>>> Actually when looking into pci, the queue_msix_vector/msix_config is the
>>> msi vector index, which is the same as the mmio register MsiVecSel
>>> (0x0d0).
>>>
>>> So we don't introduce two extra registers for mapping even in sharing
>>> mode.
>>>
>>> What do you think?
>>>
>> I'm not sure I get the point, but I still prefer the separate vector_sel
>> from queue_msix_vector.
>>
>> Btw, Michael propose per vq registers which could also work.
>>
>> Thanks
>>
> Right and I'd even ask a question: do we need shared MSI at all?
I guess it is still needed at least for the current virtio code.
Technically we may have thousands queues.
Thanks
> Is it somehow better than legacy interrupt? And why?
> Performance numbers please.
>
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
2020-02-11 3:35 ` Liu, Jing2
(?)
@ 2020-02-11 11:21 ` Michael S. Tsirkin
-1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2020-02-11 11:21 UTC (permalink / raw)
To: Liu, Jing2
Cc: Jason Wang, Zha Bin, linux-kernel, virtio-dev, slp, qemu-devel,
chao.p.peng, gerry
On Tue, Feb 11, 2020 at 11:35:43AM +0800, Liu, Jing2 wrote:
>
> On 2/11/2020 11:17 AM, Jason Wang wrote:
> >
> > On 2020/2/10 下午5:05, Zha Bin wrote:
> > > From: Liu Jiang<gerry@linux.alibaba.com>
> > >
> > > Userspace VMMs (e.g. Qemu microvm, Firecracker) take advantage of using
> > > virtio over mmio devices as a lightweight machine model for modern
> > > cloud. The standard virtio over MMIO transport layer only supports one
> > > legacy interrupt, which is much heavier than virtio over PCI transport
> > > layer using MSI. Legacy interrupt has long work path and causes specific
> > > VMExits in following cases, which would considerably slow down the
> > > performance:
> > >
> > > 1) read interrupt status register
> > > 2) update interrupt status register
> > > 3) write IOAPIC EOI register
> > >
> > > We proposed to add MSI support for virtio over MMIO via new feature
> > > bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
> > >
> > > With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
> > > uses msi_sharing[1] to indicate the event and vector mapping.
> > > Bit 1 is 0: device uses non-sharing and fixed vector per event mapping.
> > > Bit 1 is 1: device uses sharing mode and dynamic mapping.
> >
> >
> > I believe dynamic mapping should cover the case of fixed vector?
> >
> Actually this bit *aims* for msi sharing or msi non-sharing.
>
> It means, when msi sharing bit is 1, device doesn't want vector per queue
>
> (it wants msi vector sharing as name) and doesn't want a high interrupt
> rate.
>
> So driver turns to !per_vq_vectors and has to do dynamical mapping.
>
> So they are opposite not superset.
>
> Thanks!
>
> Jing
what's the point of all this flexibility? the cover letter
complains about the code size of pci, then you go back
and add a ton of options with no justification.
>
> > Thanks
> >
> >
> >
> > ---------------------------------------------------------------------
> > To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> > For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
> >
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-11 11:21 ` Michael S. Tsirkin
0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2020-02-11 11:21 UTC (permalink / raw)
To: Liu, Jing2
Cc: Jason Wang, Zha Bin, linux-kernel, virtio-dev, slp, qemu-devel,
chao.p.peng, gerry
On Tue, Feb 11, 2020 at 11:35:43AM +0800, Liu, Jing2 wrote:
>
> On 2/11/2020 11:17 AM, Jason Wang wrote:
> >
> > On 2020/2/10 下午5:05, Zha Bin wrote:
> > > From: Liu Jiang<gerry@linux.alibaba.com>
> > >
> > > Userspace VMMs (e.g. Qemu microvm, Firecracker) take advantage of using
> > > virtio over mmio devices as a lightweight machine model for modern
> > > cloud. The standard virtio over MMIO transport layer only supports one
> > > legacy interrupt, which is much heavier than virtio over PCI transport
> > > layer using MSI. Legacy interrupt has long work path and causes specific
> > > VMExits in following cases, which would considerably slow down the
> > > performance:
> > >
> > > 1) read interrupt status register
> > > 2) update interrupt status register
> > > 3) write IOAPIC EOI register
> > >
> > > We proposed to add MSI support for virtio over MMIO via new feature
> > > bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
> > >
> > > With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
> > > uses msi_sharing[1] to indicate the event and vector mapping.
> > > Bit 1 is 0: device uses non-sharing and fixed vector per event mapping.
> > > Bit 1 is 1: device uses sharing mode and dynamic mapping.
> >
> >
> > I believe dynamic mapping should cover the case of fixed vector?
> >
> Actually this bit *aims* for msi sharing or msi non-sharing.
>
> It means, when msi sharing bit is 1, device doesn't want vector per queue
>
> (it wants msi vector sharing as name) and doesn't want a high interrupt
> rate.
>
> So driver turns to !per_vq_vectors and has to do dynamical mapping.
>
> So they are opposite not superset.
>
> Thanks!
>
> Jing
what's the point of all this flexibility? the cover letter
complains about the code size of pci, then you go back
and add a ton of options with no justification.
>
> > Thanks
> >
> >
> >
> > ---------------------------------------------------------------------
> > To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> > For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
> >
---------------------------------------------------------------------
To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-11 11:21 ` Michael S. Tsirkin
0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2020-02-11 11:21 UTC (permalink / raw)
To: Liu, Jing2
Cc: virtio-dev, Zha Bin, slp, Jason Wang, linux-kernel, qemu-devel,
chao.p.peng, gerry
On Tue, Feb 11, 2020 at 11:35:43AM +0800, Liu, Jing2 wrote:
>
> On 2/11/2020 11:17 AM, Jason Wang wrote:
> >
> > On 2020/2/10 下午5:05, Zha Bin wrote:
> > > From: Liu Jiang<gerry@linux.alibaba.com>
> > >
> > > Userspace VMMs (e.g. Qemu microvm, Firecracker) take advantage of using
> > > virtio over mmio devices as a lightweight machine model for modern
> > > cloud. The standard virtio over MMIO transport layer only supports one
> > > legacy interrupt, which is much heavier than virtio over PCI transport
> > > layer using MSI. Legacy interrupt has long work path and causes specific
> > > VMExits in following cases, which would considerably slow down the
> > > performance:
> > >
> > > 1) read interrupt status register
> > > 2) update interrupt status register
> > > 3) write IOAPIC EOI register
> > >
> > > We proposed to add MSI support for virtio over MMIO via new feature
> > > bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
> > >
> > > With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
> > > uses msi_sharing[1] to indicate the event and vector mapping.
> > > Bit 1 is 0: device uses non-sharing and fixed vector per event mapping.
> > > Bit 1 is 1: device uses sharing mode and dynamic mapping.
> >
> >
> > I believe dynamic mapping should cover the case of fixed vector?
> >
> Actually this bit *aims* for msi sharing or msi non-sharing.
>
> It means, when msi sharing bit is 1, device doesn't want vector per queue
>
> (it wants msi vector sharing as name) and doesn't want a high interrupt
> rate.
>
> So driver turns to !per_vq_vectors and has to do dynamical mapping.
>
> So they are opposite not superset.
>
> Thanks!
>
> Jing
what's the point of all this flexibility? the cover letter
complains about the code size of pci, then you go back
and add a ton of options with no justification.
>
> > Thanks
> >
> >
> >
> > ---------------------------------------------------------------------
> > To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> > For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
> >
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
2020-02-10 9:05 ` Zha Bin
(?)
@ 2020-02-11 11:11 ` Michael S. Tsirkin
-1 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2020-02-11 11:11 UTC (permalink / raw)
To: Zha Bin
Cc: linux-kernel, jasowang, slp, virtio-dev, qemu-devel, gerry,
jing2.liu, chao.p.peng
On Mon, Feb 10, 2020 at 05:05:20PM +0800, Zha Bin wrote:
> From: Liu Jiang <gerry@linux.alibaba.com>
>
> Userspace VMMs (e.g. Qemu microvm, Firecracker) take advantage of using
> virtio over mmio devices as a lightweight machine model for modern
> cloud. The standard virtio over MMIO transport layer only supports one
> legacy interrupt, which is much heavier than virtio over PCI transport
> layer using MSI. Legacy interrupt has long work path and causes specific
> VMExits in following cases, which would considerably slow down the
> performance:
>
> 1) read interrupt status register
> 2) update interrupt status register
> 3) write IOAPIC EOI register
>
> We proposed to add MSI support for virtio over MMIO via new feature
> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
>
> With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
> uses msi_sharing[1] to indicate the event and vector mapping.
> Bit 1 is 0: device uses non-sharing and fixed vector per event mapping.
> Bit 1 is 1: device uses sharing mode and dynamic mapping.
>
> [1] https://lkml.org/lkml/2020/1/21/31
>
> Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
> Co-developed-by: Zha Bin <zhabin@linux.alibaba.com>
> Signed-off-by: Zha Bin <zhabin@linux.alibaba.com>
> Co-developed-by: Jing Liu <jing2.liu@linux.intel.com>
> Signed-off-by: Jing Liu <jing2.liu@linux.intel.com>
> Co-developed-by: Chao Peng <chao.p.peng@linux.intel.com>
> Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
> ---
> drivers/virtio/virtio_mmio.c | 299 ++++++++++++++++++++++++++++++++++--
> drivers/virtio/virtio_mmio_common.h | 8 +
> drivers/virtio/virtio_mmio_msi.h | 82 ++++++++++
> include/uapi/linux/virtio_config.h | 7 +-
> include/uapi/linux/virtio_mmio.h | 31 ++++
> 5 files changed, 411 insertions(+), 16 deletions(-)
So that's > 100 exatra lines in headers that you for some reason
don't count when comparing code size :).
I think an effort can be made to reduce the complexity here.
Otherwise you will end up with a clone of PCI
sooner than you think. In fact, disabling all legacy:
$ wc -l drivers/virtio/virtio_pci_modern.c
734 drivers/virtio/virtio_pci_modern.c
$ wc -l drivers/virtio/virtio_pci_common.c
635 drivers/virtio/virtio_pci_common.c
So you have almost matched that with your patch.
>
> diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
> index 41e1c93..b24ce21 100644
> --- a/drivers/virtio/virtio_mmio.c
> +++ b/drivers/virtio/virtio_mmio.c
> @@ -67,8 +67,7 @@
> #include <uapi/linux/virtio_mmio.h>
> #include <linux/virtio_ring.h>
> #include "virtio_mmio_common.h"
> -
> -
> +#include "virtio_mmio_msi.h"
>
> /* The alignment to use between consumer and producer parts of vring.
> * Currently hardcoded to the page size. */
> @@ -85,9 +84,13 @@ struct virtio_mmio_vq_info {
>
> /* Notify Address*/
> unsigned int notify_addr;
> -};
>
> + /* MSI vector (or none) */
> + unsigned int msi_vector;
> +};
>
> +static void vm_free_msi_irqs(struct virtio_device *vdev);
> +static int vm_request_msi_vectors(struct virtio_device *vdev, int nirqs);
>
> /* Configuration interface */
>
> @@ -110,6 +113,9 @@ static void vm_transport_features(struct virtio_device *vdev, u64 features)
> {
> if (features & BIT_ULL(VIRTIO_F_MMIO_NOTIFICATION))
> __virtio_set_bit(vdev, VIRTIO_F_MMIO_NOTIFICATION);
> +
> + if (features & BIT_ULL(VIRTIO_F_MMIO_MSI))
> + __virtio_set_bit(vdev, VIRTIO_F_MMIO_MSI);
> }
>
> static int vm_finalize_features(struct virtio_device *vdev)
> @@ -307,7 +313,33 @@ static irqreturn_t vm_interrupt(int irq, void *opaque)
> return ret;
> }
>
> +static irqreturn_t vm_vring_interrupt(int irq, void *opaque)
> +{
> + struct virtio_mmio_device *vm_dev = opaque;
> + struct virtio_mmio_vq_info *info;
> + irqreturn_t ret = IRQ_NONE;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&vm_dev->lock, flags);
> + list_for_each_entry(info, &vm_dev->virtqueues, node) {
> + if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
> + ret = IRQ_HANDLED;
> + }
> + spin_unlock_irqrestore(&vm_dev->lock, flags);
> +
> + return ret;
> +}
> +
> +
> +/* Handle a configuration change */
> +static irqreturn_t vm_config_changed(int irq, void *opaque)
> +{
> + struct virtio_mmio_device *vm_dev = opaque;
> +
> + virtio_config_changed(&vm_dev->vdev);
>
> + return IRQ_HANDLED;
> +}
>
> static void vm_del_vq(struct virtqueue *vq)
> {
> @@ -316,6 +348,15 @@ static void vm_del_vq(struct virtqueue *vq)
> unsigned long flags;
> unsigned int index = vq->index;
>
> + if (vm_dev->msi_enabled && !vm_dev->msi_share) {
> + if (info->msi_vector != VIRTIO_MMIO_MSI_NO_VECTOR) {
> + int irq = mmio_msi_irq_vector(&vq->vdev->dev,
> + info->msi_vector);
> +
> + free_irq(irq, vq);
> + }
> + }
> +
> spin_lock_irqsave(&vm_dev->lock, flags);
> list_del(&info->node);
> spin_unlock_irqrestore(&vm_dev->lock, flags);
> @@ -334,20 +375,56 @@ static void vm_del_vq(struct virtqueue *vq)
> kfree(info);
> }
>
> -static void vm_del_vqs(struct virtio_device *vdev)
> +static void vm_free_irqs(struct virtio_device *vdev)
> {
> struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> +
> + if (vm_dev->msi_enabled)
> + vm_free_msi_irqs(vdev);
> + else
> + free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev);
> +}
> +
> +static void vm_del_vqs(struct virtio_device *vdev)
> +{
> struct virtqueue *vq, *n;
>
> list_for_each_entry_safe(vq, n, &vdev->vqs, list)
> vm_del_vq(vq);
>
> - free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev);
> + vm_free_irqs(vdev);
> +}
> +
> +static inline void mmio_msi_set_enable(struct virtio_mmio_device *vm_dev,
> + int enable)
> +{
> + u32 state;
> +
> + state = readl(vm_dev->base + VIRTIO_MMIO_MSI_STATE);
> + if (enable && (state & VIRTIO_MMIO_MSI_ENABLED_MASK))
> + return;
> +
> + writel(VIRTIO_MMIO_MSI_CMD_ENABLE,
> + vm_dev->base + VIRTIO_MMIO_MSI_COMMAND);
> +}
> +
> +static void mmio_msi_config_vector(struct virtio_mmio_device *vm_dev, u32 vec)
> +{
> + writel(vec, vm_dev->base + VIRTIO_MMIO_MSI_VEC_SEL);
> + writel(VIRTIO_MMIO_MSI_CMD_MAP_CONFIG, vm_dev->base +
> + VIRTIO_MMIO_MSI_COMMAND);
> +}
> +
> +static void mmio_msi_queue_vector(struct virtio_mmio_device *vm_dev, u32 vec)
> +{
> + writel(vec, vm_dev->base + VIRTIO_MMIO_MSI_VEC_SEL);
> + writel(VIRTIO_MMIO_MSI_CMD_MAP_QUEUE, vm_dev->base +
> + VIRTIO_MMIO_MSI_COMMAND);
> }
>
> static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
> void (*callback)(struct virtqueue *vq),
> - const char *name, bool ctx)
> + const char *name, bool ctx, u32 msi_vector)
> {
> struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> struct virtio_mmio_vq_info *info;
> @@ -440,6 +517,11 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
> else
> info->notify_addr = VIRTIO_MMIO_QUEUE_NOTIFY;
>
> + info->msi_vector = msi_vector;
> + /* Set queue event and vector mapping for MSI share mode. */
> + if (vm_dev->msi_share && msi_vector != VIRTIO_MMIO_MSI_NO_VECTOR)
> + mmio_msi_queue_vector(vm_dev, msi_vector);
> +
> spin_lock_irqsave(&vm_dev->lock, flags);
> list_add(&info->node, &vm_dev->virtqueues);
> spin_unlock_irqrestore(&vm_dev->lock, flags);
> @@ -461,12 +543,11 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
> return ERR_PTR(err);
> }
>
> -static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
> - struct virtqueue *vqs[],
> - vq_callback_t *callbacks[],
> - const char * const names[],
> - const bool *ctx,
> - struct irq_affinity *desc)
> +static int vm_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs,
> + struct virtqueue *vqs[],
> + vq_callback_t *callbacks[],
> + const char * const names[],
> + const bool *ctx)
> {
> struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> int irq = platform_get_irq(vm_dev->pdev, 0);
> @@ -487,8 +568,6 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
>
> err = request_irq(irq, vm_interrupt, IRQF_SHARED,
> dev_name(&vdev->dev), vm_dev);
> - if (err)
> - return err;
>
> for (i = 0; i < nvqs; ++i) {
> if (!names[i]) {
> @@ -497,14 +576,202 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
> }
>
> vqs[i] = vm_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
> - ctx ? ctx[i] : false);
> + ctx ? ctx[i] : false,
> + VIRTIO_MMIO_MSI_NO_VECTOR);
> if (IS_ERR(vqs[i])) {
> vm_del_vqs(vdev);
> return PTR_ERR(vqs[i]);
> }
> }
>
> + return err;
> +}
> +
> +static int vm_find_vqs_msi(struct virtio_device *vdev, unsigned int nvqs,
> + struct virtqueue *vqs[], vq_callback_t *callbacks[],
> + const char * const names[], bool per_vq_vectors,
> + const bool *ctx, struct irq_affinity *desc)
> +{
> + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> + int i, err, allocated_vectors, nvectors;
> + u32 msi_vec;
> + u32 max_vec_num = readl(vm_dev->base + VIRTIO_MMIO_MSI_VEC_NUM);
> +
> + /* For MSI non-sharing, the max vector number MUST greater than nvqs.
> + * Otherwise, go back to legacy interrupt.
> + */
> + if (per_vq_vectors && max_vec_num < (nvqs + 1))
> + return -EINVAL;
> +
> + if (per_vq_vectors) {
> + nvectors = 1;
> + for (i = 0; i < nvqs; ++i)
> + if (callbacks[i])
> + ++nvectors;
> + } else {
> + nvectors = 2;
> + }
> +
> + vm_dev->msi_share = !per_vq_vectors;
> +
> + /* Allocate nvqs irqs for queues and one irq for configuration */
> + err = vm_request_msi_vectors(vdev, nvectors);
> + if (err != 0)
> + return err;
> +
> + allocated_vectors = vm_dev->msi_used_vectors;
> + for (i = 0; i < nvqs; i++) {
> + if (!names[i]) {
> + vqs[i] = NULL;
> + continue;
> + }
> + if (!callbacks[i])
> + msi_vec = VIRTIO_MMIO_MSI_NO_VECTOR;
> + else if (per_vq_vectors)
> + msi_vec = allocated_vectors++;
> + else
> + msi_vec = 1;
> + vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i],
> + ctx ? ctx[i] : false, msi_vec);
> + if (IS_ERR(vqs[i])) {
> + err = PTR_ERR(vqs[i]);
> + goto error_find;
> + }
> +
> + if (!per_vq_vectors ||
> + msi_vec == VIRTIO_MMIO_MSI_NO_VECTOR)
> + continue;
> +
> + /* allocate per-vq irq if available and necessary */
> + snprintf(vm_dev->vm_vq_names[msi_vec],
> + sizeof(*vm_dev->vm_vq_names),
> + "%s-%s",
> + dev_name(&vm_dev->vdev.dev), names[i]);
> + err = request_irq(mmio_msi_irq_vector(&vqs[i]->vdev->dev,
> + msi_vec),
> + vring_interrupt, 0,
> + vm_dev->vm_vq_names[msi_vec], vqs[i]);
> +
> + if (err)
> + goto error_find;
> + }
> +
> return 0;
> +
> +error_find:
> + vm_del_vqs(vdev);
> + return err;
> +}
> +
> +static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
> + struct virtqueue *vqs[],
> + vq_callback_t *callbacks[],
> + const char * const names[],
> + const bool *ctx,
> + struct irq_affinity *desc)
> +{
> + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> + int err;
> +
> + if (__virtio_test_bit(vdev, VIRTIO_F_MMIO_NOTIFICATION)) {
> + unsigned int notify = readl(vm_dev->base +
> + VIRTIO_MMIO_QUEUE_NOTIFY);
> +
> + vm_dev->notify_base = notify & 0xffff;
> + vm_dev->notify_multiplier = (notify >> 16) & 0xffff;
> + }
> +
> + if (__virtio_test_bit(vdev, VIRTIO_F_MMIO_MSI)) {
> + bool dyn_mapping = !!(readl(vm_dev->base +
> + VIRTIO_MMIO_MSI_STATE) &
> + VIRTIO_MMIO_MSI_SHARING_MASK);
> + if (!dyn_mapping)
> + err = vm_find_vqs_msi(vdev, nvqs, vqs, callbacks,
> + names, true, ctx, desc);
> + else
> + err = vm_find_vqs_msi(vdev, nvqs, vqs, callbacks,
> + names, false, ctx, desc);
> + if (!err)
> + return 0;
> + }
> +
> + return vm_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, ctx);
> +}
> +
> +static int vm_request_msi_vectors(struct virtio_device *vdev, int nirqs)
> +{
> + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> + unsigned int v;
> + int irq, err;
> +
> + if (vm_dev->msi_enabled)
> + return -EINVAL;
> +
> + vm_dev->vm_vq_names = kmalloc_array(nirqs, sizeof(*vm_dev->vm_vq_names),
> + GFP_KERNEL);
> + if (!vm_dev->vm_vq_names)
> + return -ENOMEM;
> +
> + mmio_get_msi_domain(vdev);
> + err = mmio_msi_domain_alloc_irqs(&vdev->dev, nirqs);
> + if (err) {
> + kfree(vm_dev->vm_vq_names);
> + vm_dev->vm_vq_names = NULL;
> + return err;
> + }
> +
> + mmio_msi_set_enable(vm_dev, 1);
> + vm_dev->msi_enabled = true;
> +
> + v = vm_dev->msi_used_vectors;
> + /* The first MSI vector is used for configuration change event. */
> + snprintf(vm_dev->vm_vq_names[v], sizeof(*vm_dev->vm_vq_names),
> + "%s-config", dev_name(&vdev->dev));
> + irq = mmio_msi_irq_vector(&vdev->dev, v);
> + err = request_irq(irq, vm_config_changed, 0, vm_dev->vm_vq_names[v],
> + vm_dev);
> + if (err)
> + goto error_request_irq;
> +
> + /* Set the configuration event mapping. */
> + if (vm_dev->msi_share)
> + mmio_msi_config_vector(vm_dev, v);
> +
> + ++vm_dev->msi_used_vectors;
> +
> + if (vm_dev->msi_share) {
> + v = vm_dev->msi_used_vectors;
> + snprintf(vm_dev->vm_vq_names[v], sizeof(*vm_dev->vm_vq_names),
> + "%s-virtqueues", dev_name(&vm_dev->vdev.dev));
> + err = request_irq(mmio_msi_irq_vector(&vdev->dev, v),
> + vm_vring_interrupt, 0, vm_dev->vm_vq_names[v],
> + vm_dev);
> + if (err)
> + goto error_request_irq;
> + ++vm_dev->msi_used_vectors;
> + }
> +
> + return 0;
> +
> +error_request_irq:
> + vm_free_msi_irqs(vdev);
> +
> + return err;
> +}
> +
> +static void vm_free_msi_irqs(struct virtio_device *vdev)
> +{
> + int i;
> + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> +
> + mmio_msi_set_enable(vm_dev, 0);
> + for (i = 0; i < vm_dev->msi_used_vectors; i++)
> + free_irq(mmio_msi_irq_vector(&vdev->dev, i), vm_dev);
> + mmio_msi_domain_free_irqs(&vdev->dev);
> + kfree(vm_dev->vm_vq_names);
> + vm_dev->vm_vq_names = NULL;
> + vm_dev->msi_enabled = false;
> + vm_dev->msi_used_vectors = 0;
> }
>
> static const char *vm_bus_name(struct virtio_device *vdev)
> @@ -609,6 +876,8 @@ static int virtio_mmio_probe(struct platform_device *pdev)
>
> platform_set_drvdata(pdev, vm_dev);
>
> + mmio_msi_create_irq_domain();
> +
> rc = register_virtio_device(&vm_dev->vdev);
> if (rc)
> put_device(&vm_dev->vdev.dev);
> diff --git a/drivers/virtio/virtio_mmio_common.h b/drivers/virtio/virtio_mmio_common.h
> index 90cb304..ccf6320 100644
> --- a/drivers/virtio/virtio_mmio_common.h
> +++ b/drivers/virtio/virtio_mmio_common.h
> @@ -26,6 +26,14 @@ struct virtio_mmio_device {
>
> unsigned short notify_base;
> unsigned short notify_multiplier;
> +
> + /* Name strings for interrupts. This size should be enough. */
> + char (*vm_vq_names)[256];
> +
> + /* used vectors */
> + unsigned int msi_used_vectors;
> + bool msi_share;
> + bool msi_enabled;
> };
>
> #endif
> diff --git a/drivers/virtio/virtio_mmio_msi.h b/drivers/virtio/virtio_mmio_msi.h
> index 27cb2af..10038cb 100644
> --- a/drivers/virtio/virtio_mmio_msi.h
> +++ b/drivers/virtio/virtio_mmio_msi.h
> @@ -8,6 +8,7 @@
> #include <linux/irq.h>
> #include <linux/irqdomain.h>
> #include <linux/platform_device.h>
> +#include "virtio_mmio_common.h"
>
> static irq_hw_number_t mmio_msi_hwirq;
> static struct irq_domain *mmio_msi_domain;
> @@ -21,12 +22,41 @@ void __weak irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
> {
> }
>
> +static void __iomem *vm_dev_base(struct msi_desc *desc)
> +{
> + if (desc) {
> + struct device *dev = desc->dev;
> + struct virtio_device *vdev = dev_to_virtio(dev);
> + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> +
> + return vm_dev->base;
> + }
> +
> + return NULL;
> +}
> +
> +static void mmio_msi_set_mask_bit(struct irq_data *data, u32 flag)
> +{
> + struct msi_desc *desc = irq_data_get_msi_desc(data);
> + void __iomem *base = vm_dev_base(desc);
> + unsigned int offset = data->irq - desc->irq;
> +
> + if (base) {
> + u32 op = flag ? VIRTIO_MMIO_MSI_CMD_MASK :
> + VIRTIO_MMIO_MSI_CMD_UNMASK;
> + writel(offset, base + VIRTIO_MMIO_MSI_VEC_SEL);
> + writel(op, base + VIRTIO_MMIO_MSI_COMMAND);
> + }
> +}
> +
> static void mmio_msi_mask_irq(struct irq_data *data)
> {
> + mmio_msi_set_mask_bit(data, 1);
> }
>
> static void mmio_msi_unmask_irq(struct irq_data *data)
> {
> + mmio_msi_set_mask_bit(data, 0);
> }
>
> static struct irq_chip mmio_msi_controller = {
> @@ -86,8 +116,60 @@ static inline void mmio_msi_create_irq_domain(void)
> irq_domain_free_fwnode(fn);
> }
> }
> +
> +static void mmio_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
> +{
> + void __iomem *base = vm_dev_base(desc);
> +
> + if (base) {
> + writel(desc->platform.msi_index, base +
> + VIRTIO_MMIO_MSI_VEC_SEL);
> + writel(msg->address_lo, base + VIRTIO_MMIO_MSI_ADDRESS_LOW);
> + writel(msg->address_hi, base + VIRTIO_MMIO_MSI_ADDRESS_HIGH);
> + writel(msg->data, base + VIRTIO_MMIO_MSI_DATA);
> + writel(VIRTIO_MMIO_MSI_CMD_CONFIGURE,
> + base + VIRTIO_MMIO_MSI_COMMAND);
> + }
> +}
> +
> +static inline int mmio_msi_domain_alloc_irqs(struct device *dev,
> + unsigned int nvec)
> +{
> + return platform_msi_domain_alloc_irqs(dev, nvec,
> + mmio_write_msi_msg);
> +}
> +
> +static inline void mmio_msi_domain_free_irqs(struct device *dev)
> +{
> + return platform_msi_domain_free_irqs(dev);
> +}
> +
> +static inline void mmio_get_msi_domain(struct virtio_device *vdev)
> +{
> + if (!vdev->dev.msi_domain)
> + vdev->dev.msi_domain = mmio_msi_domain;
> +}
> +
> +static inline int mmio_msi_irq_vector(struct device *dev, unsigned int nr)
> +{
> + struct msi_desc *entry = first_msi_entry(dev);
> +
> + return entry->irq + nr;
> +}
> +
> #else
> static inline void mmio_msi_create_irq_domain(void) {}
> +static inline int mmio_msi_irq_vector(struct device *dev, unsigned int nr)
> +{
> + return -EINVAL;
> +}
> +static inline void mmio_get_msi_domain(struct virtio_device *vdev) {}
> +static inline int mmio_msi_domain_alloc_irqs(struct device *dev,
> + unsigned int nvec)
> +{
> + return -EINVAL;
> +}
> +static inline void mmio_msi_domain_free_irqs(struct device *dev) {}
> #endif
>
> #endif
> diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h
> index 5d93c01..5eb3900 100644
> --- a/include/uapi/linux/virtio_config.h
> +++ b/include/uapi/linux/virtio_config.h
> @@ -52,7 +52,7 @@
> * rest are per-device feature bits.
> */
> #define VIRTIO_TRANSPORT_F_START 28
> -#define VIRTIO_TRANSPORT_F_END 40
> +#define VIRTIO_TRANSPORT_F_END 41
>
> #ifndef VIRTIO_CONFIG_NO_LEGACY
> /* Do we get callbacks when the ring is completely used, even if we've
> @@ -94,4 +94,9 @@
> * layer.
> */
> #define VIRTIO_F_MMIO_NOTIFICATION 39
> +
> +/*
> + * This feature indicates the MSI support on MMIO layer.
> + */
> +#define VIRTIO_F_MMIO_MSI 40
> #endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */
> diff --git a/include/uapi/linux/virtio_mmio.h b/include/uapi/linux/virtio_mmio.h
> index c4b0968..777cb0e 100644
> --- a/include/uapi/linux/virtio_mmio.h
> +++ b/include/uapi/linux/virtio_mmio.h
> @@ -122,6 +122,21 @@
> #define VIRTIO_MMIO_QUEUE_USED_LOW 0x0a0
> #define VIRTIO_MMIO_QUEUE_USED_HIGH 0x0a4
>
> +/* MSI max vector number that device supports - Read Only */
> +#define VIRTIO_MMIO_MSI_VEC_NUM 0x0c0
> +/* MSI state register - Read Only */
> +#define VIRTIO_MMIO_MSI_STATE 0x0c4
> +/* MSI command register - Write Only */
> +#define VIRTIO_MMIO_MSI_COMMAND 0x0c8
> +/* MSI vector selector - Write Only */
> +#define VIRTIO_MMIO_MSI_VEC_SEL 0x0d0
> +/* MSI low 32 bit address, 64 bits in two halves */
> +#define VIRTIO_MMIO_MSI_ADDRESS_LOW 0x0d4
> +/* MSI high 32 bit address, 64 bits in two halves */
> +#define VIRTIO_MMIO_MSI_ADDRESS_HIGH 0x0d8
> +/* MSI 32 bit data */
> +#define VIRTIO_MMIO_MSI_DATA 0x0dc
> +
> /* Configuration atomicity value */
> #define VIRTIO_MMIO_CONFIG_GENERATION 0x0fc
>
> @@ -130,6 +145,22 @@
> #define VIRTIO_MMIO_CONFIG 0x100
>
>
> +/* MSI commands */
> +#define VIRTIO_MMIO_MSI_CMD_ENABLE 0x1
> +#define VIRTIO_MMIO_MSI_CMD_DISABLE 0x2
> +#define VIRTIO_MMIO_MSI_CMD_CONFIGURE 0x3
> +#define VIRTIO_MMIO_MSI_CMD_MASK 0x4
> +#define VIRTIO_MMIO_MSI_CMD_UNMASK 0x5
> +#define VIRTIO_MMIO_MSI_CMD_MAP_CONFIG 0x6
> +#define VIRTIO_MMIO_MSI_CMD_MAP_QUEUE 0x7
> +
> +/* MSI NO_VECTOR */
> +#define VIRTIO_MMIO_MSI_NO_VECTOR 0xffffffff
> +
> +/* MSI state enabled state mask */
> +#define VIRTIO_MMIO_MSI_ENABLED_MASK (1 << 31)
> +/* MSI state MSI sharing mask */
> +#define VIRTIO_MMIO_MSI_SHARING_MASK (1 << 30)
>
You should be able to drop MAP commands,
replace vector number with vq number.
I also don't really see a burning need for separate
enable/disable/configure/mask/unmask commands.
Just two registers mask and enable should be enough?
Will remove need for NO_VECTOR hack too.
what is sharing that hypervisor needs to worry about it?
just make all interrupts shared, or all unshared.
does it matter for performance?
> /*
> * Interrupt flags (re: interrupt status & acknowledge registers)
> --
> 1.8.3.1
^ permalink raw reply [flat|nested] 104+ messages in thread
* [virtio-dev] Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-11 11:11 ` Michael S. Tsirkin
0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2020-02-11 11:11 UTC (permalink / raw)
To: Zha Bin
Cc: linux-kernel, jasowang, slp, virtio-dev, qemu-devel, gerry,
jing2.liu, chao.p.peng
On Mon, Feb 10, 2020 at 05:05:20PM +0800, Zha Bin wrote:
> From: Liu Jiang <gerry@linux.alibaba.com>
>
> Userspace VMMs (e.g. Qemu microvm, Firecracker) take advantage of using
> virtio over mmio devices as a lightweight machine model for modern
> cloud. The standard virtio over MMIO transport layer only supports one
> legacy interrupt, which is much heavier than virtio over PCI transport
> layer using MSI. Legacy interrupt has long work path and causes specific
> VMExits in following cases, which would considerably slow down the
> performance:
>
> 1) read interrupt status register
> 2) update interrupt status register
> 3) write IOAPIC EOI register
>
> We proposed to add MSI support for virtio over MMIO via new feature
> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
>
> With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
> uses msi_sharing[1] to indicate the event and vector mapping.
> Bit 1 is 0: device uses non-sharing and fixed vector per event mapping.
> Bit 1 is 1: device uses sharing mode and dynamic mapping.
>
> [1] https://lkml.org/lkml/2020/1/21/31
>
> Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
> Co-developed-by: Zha Bin <zhabin@linux.alibaba.com>
> Signed-off-by: Zha Bin <zhabin@linux.alibaba.com>
> Co-developed-by: Jing Liu <jing2.liu@linux.intel.com>
> Signed-off-by: Jing Liu <jing2.liu@linux.intel.com>
> Co-developed-by: Chao Peng <chao.p.peng@linux.intel.com>
> Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
> ---
> drivers/virtio/virtio_mmio.c | 299 ++++++++++++++++++++++++++++++++++--
> drivers/virtio/virtio_mmio_common.h | 8 +
> drivers/virtio/virtio_mmio_msi.h | 82 ++++++++++
> include/uapi/linux/virtio_config.h | 7 +-
> include/uapi/linux/virtio_mmio.h | 31 ++++
> 5 files changed, 411 insertions(+), 16 deletions(-)
So that's > 100 exatra lines in headers that you for some reason
don't count when comparing code size :).
I think an effort can be made to reduce the complexity here.
Otherwise you will end up with a clone of PCI
sooner than you think. In fact, disabling all legacy:
$ wc -l drivers/virtio/virtio_pci_modern.c
734 drivers/virtio/virtio_pci_modern.c
$ wc -l drivers/virtio/virtio_pci_common.c
635 drivers/virtio/virtio_pci_common.c
So you have almost matched that with your patch.
>
> diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
> index 41e1c93..b24ce21 100644
> --- a/drivers/virtio/virtio_mmio.c
> +++ b/drivers/virtio/virtio_mmio.c
> @@ -67,8 +67,7 @@
> #include <uapi/linux/virtio_mmio.h>
> #include <linux/virtio_ring.h>
> #include "virtio_mmio_common.h"
> -
> -
> +#include "virtio_mmio_msi.h"
>
> /* The alignment to use between consumer and producer parts of vring.
> * Currently hardcoded to the page size. */
> @@ -85,9 +84,13 @@ struct virtio_mmio_vq_info {
>
> /* Notify Address*/
> unsigned int notify_addr;
> -};
>
> + /* MSI vector (or none) */
> + unsigned int msi_vector;
> +};
>
> +static void vm_free_msi_irqs(struct virtio_device *vdev);
> +static int vm_request_msi_vectors(struct virtio_device *vdev, int nirqs);
>
> /* Configuration interface */
>
> @@ -110,6 +113,9 @@ static void vm_transport_features(struct virtio_device *vdev, u64 features)
> {
> if (features & BIT_ULL(VIRTIO_F_MMIO_NOTIFICATION))
> __virtio_set_bit(vdev, VIRTIO_F_MMIO_NOTIFICATION);
> +
> + if (features & BIT_ULL(VIRTIO_F_MMIO_MSI))
> + __virtio_set_bit(vdev, VIRTIO_F_MMIO_MSI);
> }
>
> static int vm_finalize_features(struct virtio_device *vdev)
> @@ -307,7 +313,33 @@ static irqreturn_t vm_interrupt(int irq, void *opaque)
> return ret;
> }
>
> +static irqreturn_t vm_vring_interrupt(int irq, void *opaque)
> +{
> + struct virtio_mmio_device *vm_dev = opaque;
> + struct virtio_mmio_vq_info *info;
> + irqreturn_t ret = IRQ_NONE;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&vm_dev->lock, flags);
> + list_for_each_entry(info, &vm_dev->virtqueues, node) {
> + if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
> + ret = IRQ_HANDLED;
> + }
> + spin_unlock_irqrestore(&vm_dev->lock, flags);
> +
> + return ret;
> +}
> +
> +
> +/* Handle a configuration change */
> +static irqreturn_t vm_config_changed(int irq, void *opaque)
> +{
> + struct virtio_mmio_device *vm_dev = opaque;
> +
> + virtio_config_changed(&vm_dev->vdev);
>
> + return IRQ_HANDLED;
> +}
>
> static void vm_del_vq(struct virtqueue *vq)
> {
> @@ -316,6 +348,15 @@ static void vm_del_vq(struct virtqueue *vq)
> unsigned long flags;
> unsigned int index = vq->index;
>
> + if (vm_dev->msi_enabled && !vm_dev->msi_share) {
> + if (info->msi_vector != VIRTIO_MMIO_MSI_NO_VECTOR) {
> + int irq = mmio_msi_irq_vector(&vq->vdev->dev,
> + info->msi_vector);
> +
> + free_irq(irq, vq);
> + }
> + }
> +
> spin_lock_irqsave(&vm_dev->lock, flags);
> list_del(&info->node);
> spin_unlock_irqrestore(&vm_dev->lock, flags);
> @@ -334,20 +375,56 @@ static void vm_del_vq(struct virtqueue *vq)
> kfree(info);
> }
>
> -static void vm_del_vqs(struct virtio_device *vdev)
> +static void vm_free_irqs(struct virtio_device *vdev)
> {
> struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> +
> + if (vm_dev->msi_enabled)
> + vm_free_msi_irqs(vdev);
> + else
> + free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev);
> +}
> +
> +static void vm_del_vqs(struct virtio_device *vdev)
> +{
> struct virtqueue *vq, *n;
>
> list_for_each_entry_safe(vq, n, &vdev->vqs, list)
> vm_del_vq(vq);
>
> - free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev);
> + vm_free_irqs(vdev);
> +}
> +
> +static inline void mmio_msi_set_enable(struct virtio_mmio_device *vm_dev,
> + int enable)
> +{
> + u32 state;
> +
> + state = readl(vm_dev->base + VIRTIO_MMIO_MSI_STATE);
> + if (enable && (state & VIRTIO_MMIO_MSI_ENABLED_MASK))
> + return;
> +
> + writel(VIRTIO_MMIO_MSI_CMD_ENABLE,
> + vm_dev->base + VIRTIO_MMIO_MSI_COMMAND);
> +}
> +
> +static void mmio_msi_config_vector(struct virtio_mmio_device *vm_dev, u32 vec)
> +{
> + writel(vec, vm_dev->base + VIRTIO_MMIO_MSI_VEC_SEL);
> + writel(VIRTIO_MMIO_MSI_CMD_MAP_CONFIG, vm_dev->base +
> + VIRTIO_MMIO_MSI_COMMAND);
> +}
> +
> +static void mmio_msi_queue_vector(struct virtio_mmio_device *vm_dev, u32 vec)
> +{
> + writel(vec, vm_dev->base + VIRTIO_MMIO_MSI_VEC_SEL);
> + writel(VIRTIO_MMIO_MSI_CMD_MAP_QUEUE, vm_dev->base +
> + VIRTIO_MMIO_MSI_COMMAND);
> }
>
> static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
> void (*callback)(struct virtqueue *vq),
> - const char *name, bool ctx)
> + const char *name, bool ctx, u32 msi_vector)
> {
> struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> struct virtio_mmio_vq_info *info;
> @@ -440,6 +517,11 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
> else
> info->notify_addr = VIRTIO_MMIO_QUEUE_NOTIFY;
>
> + info->msi_vector = msi_vector;
> + /* Set queue event and vector mapping for MSI share mode. */
> + if (vm_dev->msi_share && msi_vector != VIRTIO_MMIO_MSI_NO_VECTOR)
> + mmio_msi_queue_vector(vm_dev, msi_vector);
> +
> spin_lock_irqsave(&vm_dev->lock, flags);
> list_add(&info->node, &vm_dev->virtqueues);
> spin_unlock_irqrestore(&vm_dev->lock, flags);
> @@ -461,12 +543,11 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
> return ERR_PTR(err);
> }
>
> -static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
> - struct virtqueue *vqs[],
> - vq_callback_t *callbacks[],
> - const char * const names[],
> - const bool *ctx,
> - struct irq_affinity *desc)
> +static int vm_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs,
> + struct virtqueue *vqs[],
> + vq_callback_t *callbacks[],
> + const char * const names[],
> + const bool *ctx)
> {
> struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> int irq = platform_get_irq(vm_dev->pdev, 0);
> @@ -487,8 +568,6 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
>
> err = request_irq(irq, vm_interrupt, IRQF_SHARED,
> dev_name(&vdev->dev), vm_dev);
> - if (err)
> - return err;
>
> for (i = 0; i < nvqs; ++i) {
> if (!names[i]) {
> @@ -497,14 +576,202 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
> }
>
> vqs[i] = vm_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
> - ctx ? ctx[i] : false);
> + ctx ? ctx[i] : false,
> + VIRTIO_MMIO_MSI_NO_VECTOR);
> if (IS_ERR(vqs[i])) {
> vm_del_vqs(vdev);
> return PTR_ERR(vqs[i]);
> }
> }
>
> + return err;
> +}
> +
> +static int vm_find_vqs_msi(struct virtio_device *vdev, unsigned int nvqs,
> + struct virtqueue *vqs[], vq_callback_t *callbacks[],
> + const char * const names[], bool per_vq_vectors,
> + const bool *ctx, struct irq_affinity *desc)
> +{
> + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> + int i, err, allocated_vectors, nvectors;
> + u32 msi_vec;
> + u32 max_vec_num = readl(vm_dev->base + VIRTIO_MMIO_MSI_VEC_NUM);
> +
> + /* For MSI non-sharing, the max vector number MUST greater than nvqs.
> + * Otherwise, go back to legacy interrupt.
> + */
> + if (per_vq_vectors && max_vec_num < (nvqs + 1))
> + return -EINVAL;
> +
> + if (per_vq_vectors) {
> + nvectors = 1;
> + for (i = 0; i < nvqs; ++i)
> + if (callbacks[i])
> + ++nvectors;
> + } else {
> + nvectors = 2;
> + }
> +
> + vm_dev->msi_share = !per_vq_vectors;
> +
> + /* Allocate nvqs irqs for queues and one irq for configuration */
> + err = vm_request_msi_vectors(vdev, nvectors);
> + if (err != 0)
> + return err;
> +
> + allocated_vectors = vm_dev->msi_used_vectors;
> + for (i = 0; i < nvqs; i++) {
> + if (!names[i]) {
> + vqs[i] = NULL;
> + continue;
> + }
> + if (!callbacks[i])
> + msi_vec = VIRTIO_MMIO_MSI_NO_VECTOR;
> + else if (per_vq_vectors)
> + msi_vec = allocated_vectors++;
> + else
> + msi_vec = 1;
> + vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i],
> + ctx ? ctx[i] : false, msi_vec);
> + if (IS_ERR(vqs[i])) {
> + err = PTR_ERR(vqs[i]);
> + goto error_find;
> + }
> +
> + if (!per_vq_vectors ||
> + msi_vec == VIRTIO_MMIO_MSI_NO_VECTOR)
> + continue;
> +
> + /* allocate per-vq irq if available and necessary */
> + snprintf(vm_dev->vm_vq_names[msi_vec],
> + sizeof(*vm_dev->vm_vq_names),
> + "%s-%s",
> + dev_name(&vm_dev->vdev.dev), names[i]);
> + err = request_irq(mmio_msi_irq_vector(&vqs[i]->vdev->dev,
> + msi_vec),
> + vring_interrupt, 0,
> + vm_dev->vm_vq_names[msi_vec], vqs[i]);
> +
> + if (err)
> + goto error_find;
> + }
> +
> return 0;
> +
> +error_find:
> + vm_del_vqs(vdev);
> + return err;
> +}
> +
> +static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
> + struct virtqueue *vqs[],
> + vq_callback_t *callbacks[],
> + const char * const names[],
> + const bool *ctx,
> + struct irq_affinity *desc)
> +{
> + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> + int err;
> +
> + if (__virtio_test_bit(vdev, VIRTIO_F_MMIO_NOTIFICATION)) {
> + unsigned int notify = readl(vm_dev->base +
> + VIRTIO_MMIO_QUEUE_NOTIFY);
> +
> + vm_dev->notify_base = notify & 0xffff;
> + vm_dev->notify_multiplier = (notify >> 16) & 0xffff;
> + }
> +
> + if (__virtio_test_bit(vdev, VIRTIO_F_MMIO_MSI)) {
> + bool dyn_mapping = !!(readl(vm_dev->base +
> + VIRTIO_MMIO_MSI_STATE) &
> + VIRTIO_MMIO_MSI_SHARING_MASK);
> + if (!dyn_mapping)
> + err = vm_find_vqs_msi(vdev, nvqs, vqs, callbacks,
> + names, true, ctx, desc);
> + else
> + err = vm_find_vqs_msi(vdev, nvqs, vqs, callbacks,
> + names, false, ctx, desc);
> + if (!err)
> + return 0;
> + }
> +
> + return vm_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, ctx);
> +}
> +
> +static int vm_request_msi_vectors(struct virtio_device *vdev, int nirqs)
> +{
> + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> + unsigned int v;
> + int irq, err;
> +
> + if (vm_dev->msi_enabled)
> + return -EINVAL;
> +
> + vm_dev->vm_vq_names = kmalloc_array(nirqs, sizeof(*vm_dev->vm_vq_names),
> + GFP_KERNEL);
> + if (!vm_dev->vm_vq_names)
> + return -ENOMEM;
> +
> + mmio_get_msi_domain(vdev);
> + err = mmio_msi_domain_alloc_irqs(&vdev->dev, nirqs);
> + if (err) {
> + kfree(vm_dev->vm_vq_names);
> + vm_dev->vm_vq_names = NULL;
> + return err;
> + }
> +
> + mmio_msi_set_enable(vm_dev, 1);
> + vm_dev->msi_enabled = true;
> +
> + v = vm_dev->msi_used_vectors;
> + /* The first MSI vector is used for configuration change event. */
> + snprintf(vm_dev->vm_vq_names[v], sizeof(*vm_dev->vm_vq_names),
> + "%s-config", dev_name(&vdev->dev));
> + irq = mmio_msi_irq_vector(&vdev->dev, v);
> + err = request_irq(irq, vm_config_changed, 0, vm_dev->vm_vq_names[v],
> + vm_dev);
> + if (err)
> + goto error_request_irq;
> +
> + /* Set the configuration event mapping. */
> + if (vm_dev->msi_share)
> + mmio_msi_config_vector(vm_dev, v);
> +
> + ++vm_dev->msi_used_vectors;
> +
> + if (vm_dev->msi_share) {
> + v = vm_dev->msi_used_vectors;
> + snprintf(vm_dev->vm_vq_names[v], sizeof(*vm_dev->vm_vq_names),
> + "%s-virtqueues", dev_name(&vm_dev->vdev.dev));
> + err = request_irq(mmio_msi_irq_vector(&vdev->dev, v),
> + vm_vring_interrupt, 0, vm_dev->vm_vq_names[v],
> + vm_dev);
> + if (err)
> + goto error_request_irq;
> + ++vm_dev->msi_used_vectors;
> + }
> +
> + return 0;
> +
> +error_request_irq:
> + vm_free_msi_irqs(vdev);
> +
> + return err;
> +}
> +
> +static void vm_free_msi_irqs(struct virtio_device *vdev)
> +{
> + int i;
> + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> +
> + mmio_msi_set_enable(vm_dev, 0);
> + for (i = 0; i < vm_dev->msi_used_vectors; i++)
> + free_irq(mmio_msi_irq_vector(&vdev->dev, i), vm_dev);
> + mmio_msi_domain_free_irqs(&vdev->dev);
> + kfree(vm_dev->vm_vq_names);
> + vm_dev->vm_vq_names = NULL;
> + vm_dev->msi_enabled = false;
> + vm_dev->msi_used_vectors = 0;
> }
>
> static const char *vm_bus_name(struct virtio_device *vdev)
> @@ -609,6 +876,8 @@ static int virtio_mmio_probe(struct platform_device *pdev)
>
> platform_set_drvdata(pdev, vm_dev);
>
> + mmio_msi_create_irq_domain();
> +
> rc = register_virtio_device(&vm_dev->vdev);
> if (rc)
> put_device(&vm_dev->vdev.dev);
> diff --git a/drivers/virtio/virtio_mmio_common.h b/drivers/virtio/virtio_mmio_common.h
> index 90cb304..ccf6320 100644
> --- a/drivers/virtio/virtio_mmio_common.h
> +++ b/drivers/virtio/virtio_mmio_common.h
> @@ -26,6 +26,14 @@ struct virtio_mmio_device {
>
> unsigned short notify_base;
> unsigned short notify_multiplier;
> +
> + /* Name strings for interrupts. This size should be enough. */
> + char (*vm_vq_names)[256];
> +
> + /* used vectors */
> + unsigned int msi_used_vectors;
> + bool msi_share;
> + bool msi_enabled;
> };
>
> #endif
> diff --git a/drivers/virtio/virtio_mmio_msi.h b/drivers/virtio/virtio_mmio_msi.h
> index 27cb2af..10038cb 100644
> --- a/drivers/virtio/virtio_mmio_msi.h
> +++ b/drivers/virtio/virtio_mmio_msi.h
> @@ -8,6 +8,7 @@
> #include <linux/irq.h>
> #include <linux/irqdomain.h>
> #include <linux/platform_device.h>
> +#include "virtio_mmio_common.h"
>
> static irq_hw_number_t mmio_msi_hwirq;
> static struct irq_domain *mmio_msi_domain;
> @@ -21,12 +22,41 @@ void __weak irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
> {
> }
>
> +static void __iomem *vm_dev_base(struct msi_desc *desc)
> +{
> + if (desc) {
> + struct device *dev = desc->dev;
> + struct virtio_device *vdev = dev_to_virtio(dev);
> + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> +
> + return vm_dev->base;
> + }
> +
> + return NULL;
> +}
> +
> +static void mmio_msi_set_mask_bit(struct irq_data *data, u32 flag)
> +{
> + struct msi_desc *desc = irq_data_get_msi_desc(data);
> + void __iomem *base = vm_dev_base(desc);
> + unsigned int offset = data->irq - desc->irq;
> +
> + if (base) {
> + u32 op = flag ? VIRTIO_MMIO_MSI_CMD_MASK :
> + VIRTIO_MMIO_MSI_CMD_UNMASK;
> + writel(offset, base + VIRTIO_MMIO_MSI_VEC_SEL);
> + writel(op, base + VIRTIO_MMIO_MSI_COMMAND);
> + }
> +}
> +
> static void mmio_msi_mask_irq(struct irq_data *data)
> {
> + mmio_msi_set_mask_bit(data, 1);
> }
>
> static void mmio_msi_unmask_irq(struct irq_data *data)
> {
> + mmio_msi_set_mask_bit(data, 0);
> }
>
> static struct irq_chip mmio_msi_controller = {
> @@ -86,8 +116,60 @@ static inline void mmio_msi_create_irq_domain(void)
> irq_domain_free_fwnode(fn);
> }
> }
> +
> +static void mmio_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
> +{
> + void __iomem *base = vm_dev_base(desc);
> +
> + if (base) {
> + writel(desc->platform.msi_index, base +
> + VIRTIO_MMIO_MSI_VEC_SEL);
> + writel(msg->address_lo, base + VIRTIO_MMIO_MSI_ADDRESS_LOW);
> + writel(msg->address_hi, base + VIRTIO_MMIO_MSI_ADDRESS_HIGH);
> + writel(msg->data, base + VIRTIO_MMIO_MSI_DATA);
> + writel(VIRTIO_MMIO_MSI_CMD_CONFIGURE,
> + base + VIRTIO_MMIO_MSI_COMMAND);
> + }
> +}
> +
> +static inline int mmio_msi_domain_alloc_irqs(struct device *dev,
> + unsigned int nvec)
> +{
> + return platform_msi_domain_alloc_irqs(dev, nvec,
> + mmio_write_msi_msg);
> +}
> +
> +static inline void mmio_msi_domain_free_irqs(struct device *dev)
> +{
> + return platform_msi_domain_free_irqs(dev);
> +}
> +
> +static inline void mmio_get_msi_domain(struct virtio_device *vdev)
> +{
> + if (!vdev->dev.msi_domain)
> + vdev->dev.msi_domain = mmio_msi_domain;
> +}
> +
> +static inline int mmio_msi_irq_vector(struct device *dev, unsigned int nr)
> +{
> + struct msi_desc *entry = first_msi_entry(dev);
> +
> + return entry->irq + nr;
> +}
> +
> #else
> static inline void mmio_msi_create_irq_domain(void) {}
> +static inline int mmio_msi_irq_vector(struct device *dev, unsigned int nr)
> +{
> + return -EINVAL;
> +}
> +static inline void mmio_get_msi_domain(struct virtio_device *vdev) {}
> +static inline int mmio_msi_domain_alloc_irqs(struct device *dev,
> + unsigned int nvec)
> +{
> + return -EINVAL;
> +}
> +static inline void mmio_msi_domain_free_irqs(struct device *dev) {}
> #endif
>
> #endif
> diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h
> index 5d93c01..5eb3900 100644
> --- a/include/uapi/linux/virtio_config.h
> +++ b/include/uapi/linux/virtio_config.h
> @@ -52,7 +52,7 @@
> * rest are per-device feature bits.
> */
> #define VIRTIO_TRANSPORT_F_START 28
> -#define VIRTIO_TRANSPORT_F_END 40
> +#define VIRTIO_TRANSPORT_F_END 41
>
> #ifndef VIRTIO_CONFIG_NO_LEGACY
> /* Do we get callbacks when the ring is completely used, even if we've
> @@ -94,4 +94,9 @@
> * layer.
> */
> #define VIRTIO_F_MMIO_NOTIFICATION 39
> +
> +/*
> + * This feature indicates the MSI support on MMIO layer.
> + */
> +#define VIRTIO_F_MMIO_MSI 40
> #endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */
> diff --git a/include/uapi/linux/virtio_mmio.h b/include/uapi/linux/virtio_mmio.h
> index c4b0968..777cb0e 100644
> --- a/include/uapi/linux/virtio_mmio.h
> +++ b/include/uapi/linux/virtio_mmio.h
> @@ -122,6 +122,21 @@
> #define VIRTIO_MMIO_QUEUE_USED_LOW 0x0a0
> #define VIRTIO_MMIO_QUEUE_USED_HIGH 0x0a4
>
> +/* MSI max vector number that device supports - Read Only */
> +#define VIRTIO_MMIO_MSI_VEC_NUM 0x0c0
> +/* MSI state register - Read Only */
> +#define VIRTIO_MMIO_MSI_STATE 0x0c4
> +/* MSI command register - Write Only */
> +#define VIRTIO_MMIO_MSI_COMMAND 0x0c8
> +/* MSI vector selector - Write Only */
> +#define VIRTIO_MMIO_MSI_VEC_SEL 0x0d0
> +/* MSI low 32 bit address, 64 bits in two halves */
> +#define VIRTIO_MMIO_MSI_ADDRESS_LOW 0x0d4
> +/* MSI high 32 bit address, 64 bits in two halves */
> +#define VIRTIO_MMIO_MSI_ADDRESS_HIGH 0x0d8
> +/* MSI 32 bit data */
> +#define VIRTIO_MMIO_MSI_DATA 0x0dc
> +
> /* Configuration atomicity value */
> #define VIRTIO_MMIO_CONFIG_GENERATION 0x0fc
>
> @@ -130,6 +145,22 @@
> #define VIRTIO_MMIO_CONFIG 0x100
>
>
> +/* MSI commands */
> +#define VIRTIO_MMIO_MSI_CMD_ENABLE 0x1
> +#define VIRTIO_MMIO_MSI_CMD_DISABLE 0x2
> +#define VIRTIO_MMIO_MSI_CMD_CONFIGURE 0x3
> +#define VIRTIO_MMIO_MSI_CMD_MASK 0x4
> +#define VIRTIO_MMIO_MSI_CMD_UNMASK 0x5
> +#define VIRTIO_MMIO_MSI_CMD_MAP_CONFIG 0x6
> +#define VIRTIO_MMIO_MSI_CMD_MAP_QUEUE 0x7
> +
> +/* MSI NO_VECTOR */
> +#define VIRTIO_MMIO_MSI_NO_VECTOR 0xffffffff
> +
> +/* MSI state enabled state mask */
> +#define VIRTIO_MMIO_MSI_ENABLED_MASK (1 << 31)
> +/* MSI state MSI sharing mask */
> +#define VIRTIO_MMIO_MSI_SHARING_MASK (1 << 30)
>
You should be able to drop MAP commands,
replace vector number with vq number.
I also don't really see a burning need for separate
enable/disable/configure/mask/unmask commands.
Just two registers mask and enable should be enough?
Will remove need for NO_VECTOR hack too.
what is sharing that hypervisor needs to worry about it?
just make all interrupts shared, or all unshared.
does it matter for performance?
> /*
> * Interrupt flags (re: interrupt status & acknowledge registers)
> --
> 1.8.3.1
---------------------------------------------------------------------
To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
^ permalink raw reply [flat|nested] 104+ messages in thread
* Re: [PATCH v2 4/5] virtio-mmio: add MSI interrupt feature support
@ 2020-02-11 11:11 ` Michael S. Tsirkin
0 siblings, 0 replies; 104+ messages in thread
From: Michael S. Tsirkin @ 2020-02-11 11:11 UTC (permalink / raw)
To: Zha Bin
Cc: virtio-dev, slp, jasowang, jing2.liu, linux-kernel, qemu-devel,
chao.p.peng, gerry
On Mon, Feb 10, 2020 at 05:05:20PM +0800, Zha Bin wrote:
> From: Liu Jiang <gerry@linux.alibaba.com>
>
> Userspace VMMs (e.g. Qemu microvm, Firecracker) take advantage of using
> virtio over mmio devices as a lightweight machine model for modern
> cloud. The standard virtio over MMIO transport layer only supports one
> legacy interrupt, which is much heavier than virtio over PCI transport
> layer using MSI. Legacy interrupt has long work path and causes specific
> VMExits in following cases, which would considerably slow down the
> performance:
>
> 1) read interrupt status register
> 2) update interrupt status register
> 3) write IOAPIC EOI register
>
> We proposed to add MSI support for virtio over MMIO via new feature
> bit VIRTIO_F_MMIO_MSI[1] which increases the interrupt performance.
>
> With the VIRTIO_F_MMIO_MSI feature bit supported, the virtio-mmio MSI
> uses msi_sharing[1] to indicate the event and vector mapping.
> Bit 1 is 0: device uses non-sharing and fixed vector per event mapping.
> Bit 1 is 1: device uses sharing mode and dynamic mapping.
>
> [1] https://lkml.org/lkml/2020/1/21/31
>
> Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
> Co-developed-by: Zha Bin <zhabin@linux.alibaba.com>
> Signed-off-by: Zha Bin <zhabin@linux.alibaba.com>
> Co-developed-by: Jing Liu <jing2.liu@linux.intel.com>
> Signed-off-by: Jing Liu <jing2.liu@linux.intel.com>
> Co-developed-by: Chao Peng <chao.p.peng@linux.intel.com>
> Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
> ---
> drivers/virtio/virtio_mmio.c | 299 ++++++++++++++++++++++++++++++++++--
> drivers/virtio/virtio_mmio_common.h | 8 +
> drivers/virtio/virtio_mmio_msi.h | 82 ++++++++++
> include/uapi/linux/virtio_config.h | 7 +-
> include/uapi/linux/virtio_mmio.h | 31 ++++
> 5 files changed, 411 insertions(+), 16 deletions(-)
So that's > 100 exatra lines in headers that you for some reason
don't count when comparing code size :).
I think an effort can be made to reduce the complexity here.
Otherwise you will end up with a clone of PCI
sooner than you think. In fact, disabling all legacy:
$ wc -l drivers/virtio/virtio_pci_modern.c
734 drivers/virtio/virtio_pci_modern.c
$ wc -l drivers/virtio/virtio_pci_common.c
635 drivers/virtio/virtio_pci_common.c
So you have almost matched that with your patch.
>
> diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
> index 41e1c93..b24ce21 100644
> --- a/drivers/virtio/virtio_mmio.c
> +++ b/drivers/virtio/virtio_mmio.c
> @@ -67,8 +67,7 @@
> #include <uapi/linux/virtio_mmio.h>
> #include <linux/virtio_ring.h>
> #include "virtio_mmio_common.h"
> -
> -
> +#include "virtio_mmio_msi.h"
>
> /* The alignment to use between consumer and producer parts of vring.
> * Currently hardcoded to the page size. */
> @@ -85,9 +84,13 @@ struct virtio_mmio_vq_info {
>
> /* Notify Address*/
> unsigned int notify_addr;
> -};
>
> + /* MSI vector (or none) */
> + unsigned int msi_vector;
> +};
>
> +static void vm_free_msi_irqs(struct virtio_device *vdev);
> +static int vm_request_msi_vectors(struct virtio_device *vdev, int nirqs);
>
> /* Configuration interface */
>
> @@ -110,6 +113,9 @@ static void vm_transport_features(struct virtio_device *vdev, u64 features)
> {
> if (features & BIT_ULL(VIRTIO_F_MMIO_NOTIFICATION))
> __virtio_set_bit(vdev, VIRTIO_F_MMIO_NOTIFICATION);
> +
> + if (features & BIT_ULL(VIRTIO_F_MMIO_MSI))
> + __virtio_set_bit(vdev, VIRTIO_F_MMIO_MSI);
> }
>
> static int vm_finalize_features(struct virtio_device *vdev)
> @@ -307,7 +313,33 @@ static irqreturn_t vm_interrupt(int irq, void *opaque)
> return ret;
> }
>
> +static irqreturn_t vm_vring_interrupt(int irq, void *opaque)
> +{
> + struct virtio_mmio_device *vm_dev = opaque;
> + struct virtio_mmio_vq_info *info;
> + irqreturn_t ret = IRQ_NONE;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&vm_dev->lock, flags);
> + list_for_each_entry(info, &vm_dev->virtqueues, node) {
> + if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
> + ret = IRQ_HANDLED;
> + }
> + spin_unlock_irqrestore(&vm_dev->lock, flags);
> +
> + return ret;
> +}
> +
> +
> +/* Handle a configuration change */
> +static irqreturn_t vm_config_changed(int irq, void *opaque)
> +{
> + struct virtio_mmio_device *vm_dev = opaque;
> +
> + virtio_config_changed(&vm_dev->vdev);
>
> + return IRQ_HANDLED;
> +}
>
> static void vm_del_vq(struct virtqueue *vq)
> {
> @@ -316,6 +348,15 @@ static void vm_del_vq(struct virtqueue *vq)
> unsigned long flags;
> unsigned int index = vq->index;
>
> + if (vm_dev->msi_enabled && !vm_dev->msi_share) {
> + if (info->msi_vector != VIRTIO_MMIO_MSI_NO_VECTOR) {
> + int irq = mmio_msi_irq_vector(&vq->vdev->dev,
> + info->msi_vector);
> +
> + free_irq(irq, vq);
> + }
> + }
> +
> spin_lock_irqsave(&vm_dev->lock, flags);
> list_del(&info->node);
> spin_unlock_irqrestore(&vm_dev->lock, flags);
> @@ -334,20 +375,56 @@ static void vm_del_vq(struct virtqueue *vq)
> kfree(info);
> }
>
> -static void vm_del_vqs(struct virtio_device *vdev)
> +static void vm_free_irqs(struct virtio_device *vdev)
> {
> struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> +
> + if (vm_dev->msi_enabled)
> + vm_free_msi_irqs(vdev);
> + else
> + free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev);
> +}
> +
> +static void vm_del_vqs(struct virtio_device *vdev)
> +{
> struct virtqueue *vq, *n;
>
> list_for_each_entry_safe(vq, n, &vdev->vqs, list)
> vm_del_vq(vq);
>
> - free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev);
> + vm_free_irqs(vdev);
> +}
> +
> +static inline void mmio_msi_set_enable(struct virtio_mmio_device *vm_dev,
> + int enable)
> +{
> + u32 state;
> +
> + state = readl(vm_dev->base + VIRTIO_MMIO_MSI_STATE);
> + if (enable && (state & VIRTIO_MMIO_MSI_ENABLED_MASK))
> + return;
> +
> + writel(VIRTIO_MMIO_MSI_CMD_ENABLE,
> + vm_dev->base + VIRTIO_MMIO_MSI_COMMAND);
> +}
> +
> +static void mmio_msi_config_vector(struct virtio_mmio_device *vm_dev, u32 vec)
> +{
> + writel(vec, vm_dev->base + VIRTIO_MMIO_MSI_VEC_SEL);
> + writel(VIRTIO_MMIO_MSI_CMD_MAP_CONFIG, vm_dev->base +
> + VIRTIO_MMIO_MSI_COMMAND);
> +}
> +
> +static void mmio_msi_queue_vector(struct virtio_mmio_device *vm_dev, u32 vec)
> +{
> + writel(vec, vm_dev->base + VIRTIO_MMIO_MSI_VEC_SEL);
> + writel(VIRTIO_MMIO_MSI_CMD_MAP_QUEUE, vm_dev->base +
> + VIRTIO_MMIO_MSI_COMMAND);
> }
>
> static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
> void (*callback)(struct virtqueue *vq),
> - const char *name, bool ctx)
> + const char *name, bool ctx, u32 msi_vector)
> {
> struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> struct virtio_mmio_vq_info *info;
> @@ -440,6 +517,11 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
> else
> info->notify_addr = VIRTIO_MMIO_QUEUE_NOTIFY;
>
> + info->msi_vector = msi_vector;
> + /* Set queue event and vector mapping for MSI share mode. */
> + if (vm_dev->msi_share && msi_vector != VIRTIO_MMIO_MSI_NO_VECTOR)
> + mmio_msi_queue_vector(vm_dev, msi_vector);
> +
> spin_lock_irqsave(&vm_dev->lock, flags);
> list_add(&info->node, &vm_dev->virtqueues);
> spin_unlock_irqrestore(&vm_dev->lock, flags);
> @@ -461,12 +543,11 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
> return ERR_PTR(err);
> }
>
> -static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
> - struct virtqueue *vqs[],
> - vq_callback_t *callbacks[],
> - const char * const names[],
> - const bool *ctx,
> - struct irq_affinity *desc)
> +static int vm_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs,
> + struct virtqueue *vqs[],
> + vq_callback_t *callbacks[],
> + const char * const names[],
> + const bool *ctx)
> {
> struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> int irq = platform_get_irq(vm_dev->pdev, 0);
> @@ -487,8 +568,6 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
>
> err = request_irq(irq, vm_interrupt, IRQF_SHARED,
> dev_name(&vdev->dev), vm_dev);
> - if (err)
> - return err;
>
> for (i = 0; i < nvqs; ++i) {
> if (!names[i]) {
> @@ -497,14 +576,202 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
> }
>
> vqs[i] = vm_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
> - ctx ? ctx[i] : false);
> + ctx ? ctx[i] : false,
> + VIRTIO_MMIO_MSI_NO_VECTOR);
> if (IS_ERR(vqs[i])) {
> vm_del_vqs(vdev);
> return PTR_ERR(vqs[i]);
> }
> }
>
> + return err;
> +}
> +
> +static int vm_find_vqs_msi(struct virtio_device *vdev, unsigned int nvqs,
> + struct virtqueue *vqs[], vq_callback_t *callbacks[],
> + const char * const names[], bool per_vq_vectors,
> + const bool *ctx, struct irq_affinity *desc)
> +{
> + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> + int i, err, allocated_vectors, nvectors;
> + u32 msi_vec;
> + u32 max_vec_num = readl(vm_dev->base + VIRTIO_MMIO_MSI_VEC_NUM);
> +
> + /* For MSI non-sharing, the max vector number MUST greater than nvqs.
> + * Otherwise, go back to legacy interrupt.
> + */
> + if (per_vq_vectors && max_vec_num < (nvqs + 1))
> + return -EINVAL;
> +
> + if (per_vq_vectors) {
> + nvectors = 1;
> + for (i = 0; i < nvqs; ++i)
> + if (callbacks[i])
> + ++nvectors;
> + } else {
> + nvectors = 2;
> + }
> +
> + vm_dev->msi_share = !per_vq_vectors;
> +
> + /* Allocate nvqs irqs for queues and one irq for configuration */
> + err = vm_request_msi_vectors(vdev, nvectors);
> + if (err != 0)
> + return err;
> +
> + allocated_vectors = vm_dev->msi_used_vectors;
> + for (i = 0; i < nvqs; i++) {
> + if (!names[i]) {
> + vqs[i] = NULL;
> + continue;
> + }
> + if (!callbacks[i])
> + msi_vec = VIRTIO_MMIO_MSI_NO_VECTOR;
> + else if (per_vq_vectors)
> + msi_vec = allocated_vectors++;
> + else
> + msi_vec = 1;
> + vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i],
> + ctx ? ctx[i] : false, msi_vec);
> + if (IS_ERR(vqs[i])) {
> + err = PTR_ERR(vqs[i]);
> + goto error_find;
> + }
> +
> + if (!per_vq_vectors ||
> + msi_vec == VIRTIO_MMIO_MSI_NO_VECTOR)
> + continue;
> +
> + /* allocate per-vq irq if available and necessary */
> + snprintf(vm_dev->vm_vq_names[msi_vec],
> + sizeof(*vm_dev->vm_vq_names),
> + "%s-%s",
> + dev_name(&vm_dev->vdev.dev), names[i]);
> + err = request_irq(mmio_msi_irq_vector(&vqs[i]->vdev->dev,
> + msi_vec),
> + vring_interrupt, 0,
> + vm_dev->vm_vq_names[msi_vec], vqs[i]);
> +
> + if (err)
> + goto error_find;
> + }
> +
> return 0;
> +
> +error_find:
> + vm_del_vqs(vdev);
> + return err;
> +}
> +
> +static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
> + struct virtqueue *vqs[],
> + vq_callback_t *callbacks[],
> + const char * const names[],
> + const bool *ctx,
> + struct irq_affinity *desc)
> +{
> + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> + int err;
> +
> + if (__virtio_test_bit(vdev, VIRTIO_F_MMIO_NOTIFICATION)) {
> + unsigned int notify = readl(vm_dev->base +
> + VIRTIO_MMIO_QUEUE_NOTIFY);
> +
> + vm_dev->notify_base = notify & 0xffff;
> + vm_dev->notify_multiplier = (notify >> 16) & 0xffff;
> + }
> +
> + if (__virtio_test_bit(vdev, VIRTIO_F_MMIO_MSI)) {
> + bool dyn_mapping = !!(readl(vm_dev->base +
> + VIRTIO_MMIO_MSI_STATE) &
> + VIRTIO_MMIO_MSI_SHARING_MASK);
> + if (!dyn_mapping)
> + err = vm_find_vqs_msi(vdev, nvqs, vqs, callbacks,
> + names, true, ctx, desc);
> + else
> + err = vm_find_vqs_msi(vdev, nvqs, vqs, callbacks,
> + names, false, ctx, desc);
> + if (!err)
> + return 0;
> + }
> +
> + return vm_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, ctx);
> +}
> +
> +static int vm_request_msi_vectors(struct virtio_device *vdev, int nirqs)
> +{
> + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> + unsigned int v;
> + int irq, err;
> +
> + if (vm_dev->msi_enabled)
> + return -EINVAL;
> +
> + vm_dev->vm_vq_names = kmalloc_array(nirqs, sizeof(*vm_dev->vm_vq_names),
> + GFP_KERNEL);
> + if (!vm_dev->vm_vq_names)
> + return -ENOMEM;
> +
> + mmio_get_msi_domain(vdev);
> + err = mmio_msi_domain_alloc_irqs(&vdev->dev, nirqs);
> + if (err) {
> + kfree(vm_dev->vm_vq_names);
> + vm_dev->vm_vq_names = NULL;
> + return err;
> + }
> +
> + mmio_msi_set_enable(vm_dev, 1);
> + vm_dev->msi_enabled = true;
> +
> + v = vm_dev->msi_used_vectors;
> + /* The first MSI vector is used for configuration change event. */
> + snprintf(vm_dev->vm_vq_names[v], sizeof(*vm_dev->vm_vq_names),
> + "%s-config", dev_name(&vdev->dev));
> + irq = mmio_msi_irq_vector(&vdev->dev, v);
> + err = request_irq(irq, vm_config_changed, 0, vm_dev->vm_vq_names[v],
> + vm_dev);
> + if (err)
> + goto error_request_irq;
> +
> + /* Set the configuration event mapping. */
> + if (vm_dev->msi_share)
> + mmio_msi_config_vector(vm_dev, v);
> +
> + ++vm_dev->msi_used_vectors;
> +
> + if (vm_dev->msi_share) {
> + v = vm_dev->msi_used_vectors;
> + snprintf(vm_dev->vm_vq_names[v], sizeof(*vm_dev->vm_vq_names),
> + "%s-virtqueues", dev_name(&vm_dev->vdev.dev));
> + err = request_irq(mmio_msi_irq_vector(&vdev->dev, v),
> + vm_vring_interrupt, 0, vm_dev->vm_vq_names[v],
> + vm_dev);
> + if (err)
> + goto error_request_irq;
> + ++vm_dev->msi_used_vectors;
> + }
> +
> + return 0;
> +
> +error_request_irq:
> + vm_free_msi_irqs(vdev);
> +
> + return err;
> +}
> +
> +static void vm_free_msi_irqs(struct virtio_device *vdev)
> +{
> + int i;
> + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> +
> + mmio_msi_set_enable(vm_dev, 0);
> + for (i = 0; i < vm_dev->msi_used_vectors; i++)
> + free_irq(mmio_msi_irq_vector(&vdev->dev, i), vm_dev);
> + mmio_msi_domain_free_irqs(&vdev->dev);
> + kfree(vm_dev->vm_vq_names);
> + vm_dev->vm_vq_names = NULL;
> + vm_dev->msi_enabled = false;
> + vm_dev->msi_used_vectors = 0;
> }
>
> static const char *vm_bus_name(struct virtio_device *vdev)
> @@ -609,6 +876,8 @@ static int virtio_mmio_probe(struct platform_device *pdev)
>
> platform_set_drvdata(pdev, vm_dev);
>
> + mmio_msi_create_irq_domain();
> +
> rc = register_virtio_device(&vm_dev->vdev);
> if (rc)
> put_device(&vm_dev->vdev.dev);
> diff --git a/drivers/virtio/virtio_mmio_common.h b/drivers/virtio/virtio_mmio_common.h
> index 90cb304..ccf6320 100644
> --- a/drivers/virtio/virtio_mmio_common.h
> +++ b/drivers/virtio/virtio_mmio_common.h
> @@ -26,6 +26,14 @@ struct virtio_mmio_device {
>
> unsigned short notify_base;
> unsigned short notify_multiplier;
> +
> + /* Name strings for interrupts. This size should be enough. */
> + char (*vm_vq_names)[256];
> +
> + /* used vectors */
> + unsigned int msi_used_vectors;
> + bool msi_share;
> + bool msi_enabled;
> };
>
> #endif
> diff --git a/drivers/virtio/virtio_mmio_msi.h b/drivers/virtio/virtio_mmio_msi.h
> index 27cb2af..10038cb 100644
> --- a/drivers/virtio/virtio_mmio_msi.h
> +++ b/drivers/virtio/virtio_mmio_msi.h
> @@ -8,6 +8,7 @@
> #include <linux/irq.h>
> #include <linux/irqdomain.h>
> #include <linux/platform_device.h>
> +#include "virtio_mmio_common.h"
>
> static irq_hw_number_t mmio_msi_hwirq;
> static struct irq_domain *mmio_msi_domain;
> @@ -21,12 +22,41 @@ void __weak irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
> {
> }
>
> +static void __iomem *vm_dev_base(struct msi_desc *desc)
> +{
> + if (desc) {
> + struct device *dev = desc->dev;
> + struct virtio_device *vdev = dev_to_virtio(dev);
> + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> +
> + return vm_dev->base;
> + }
> +
> + return NULL;
> +}
> +
> +static void mmio_msi_set_mask_bit(struct irq_data *data, u32 flag)
> +{
> + struct msi_desc *desc = irq_data_get_msi_desc(data);
> + void __iomem *base = vm_dev_base(desc);
> + unsigned int offset = data->irq - desc->irq;
> +
> + if (base) {
> + u32 op = flag ? VIRTIO_MMIO_MSI_CMD_MASK :
> + VIRTIO_MMIO_MSI_CMD_UNMASK;
> + writel(offset, base + VIRTIO_MMIO_MSI_VEC_SEL);
> + writel(op, base + VIRTIO_MMIO_MSI_COMMAND);
> + }
> +}
> +
> static void mmio_msi_mask_irq(struct irq_data *data)
> {
> + mmio_msi_set_mask_bit(data, 1);
> }
>
> static void mmio_msi_unmask_irq(struct irq_data *data)
> {
> + mmio_msi_set_mask_bit(data, 0);
> }
>
> static struct irq_chip mmio_msi_controller = {
> @@ -86,8 +116,60 @@ static inline void mmio_msi_create_irq_domain(void)
> irq_domain_free_fwnode(fn);
> }
> }
> +
> +static void mmio_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
> +{
> + void __iomem *base = vm_dev_base(desc);
> +
> + if (base) {
> + writel(desc->platform.msi_index, base +
> + VIRTIO_MMIO_MSI_VEC_SEL);
> + writel(msg->address_lo, base + VIRTIO_MMIO_MSI_ADDRESS_LOW);
> + writel(msg->address_hi, base + VIRTIO_MMIO_MSI_ADDRESS_HIGH);
> + writel(msg->data, base + VIRTIO_MMIO_MSI_DATA);
> + writel(VIRTIO_MMIO_MSI_CMD_CONFIGURE,
> + base + VIRTIO_MMIO_MSI_COMMAND);
> + }
> +}
> +
> +static inline int mmio_msi_domain_alloc_irqs(struct device *dev,
> + unsigned int nvec)
> +{
> + return platform_msi_domain_alloc_irqs(dev, nvec,
> + mmio_write_msi_msg);
> +}
> +
> +static inline void mmio_msi_domain_free_irqs(struct device *dev)
> +{
> + return platform_msi_domain_free_irqs(dev);
> +}
> +
> +static inline void mmio_get_msi_domain(struct virtio_device *vdev)
> +{
> + if (!vdev->dev.msi_domain)
> + vdev->dev.msi_domain = mmio_msi_domain;
> +}
> +
> +static inline int mmio_msi_irq_vector(struct device *dev, unsigned int nr)
> +{
> + struct msi_desc *entry = first_msi_entry(dev);
> +
> + return entry->irq + nr;
> +}
> +
> #else
> static inline void mmio_msi_create_irq_domain(void) {}
> +static inline int mmio_msi_irq_vector(struct device *dev, unsigned int nr)
> +{
> + return -EINVAL;
> +}
> +static inline void mmio_get_msi_domain(struct virtio_device *vdev) {}
> +static inline int mmio_msi_domain_alloc_irqs(struct device *dev,
> + unsigned int nvec)
> +{
> + return -EINVAL;
> +}
> +static inline void mmio_msi_domain_free_irqs(struct device *dev) {}
> #endif
>
> #endif
> diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h
> index 5d93c01..5eb3900 100644
> --- a/include/uapi/linux/virtio_config.h
> +++ b/include/uapi/linux/virtio_config.h
> @@ -52,7 +52,7 @@
> * rest are per-device feature bits.
> */
> #define VIRTIO_TRANSPORT_F_START 28
> -#define VIRTIO_TRANSPORT_F_END 40
> +#define VIRTIO_TRANSPORT_F_END 41
>
> #ifndef VIRTIO_CONFIG_NO_LEGACY
> /* Do we get callbacks when the ring is completely used, even if we've
> @@ -94,4 +94,9 @@
> * layer.
> */
> #define VIRTIO_F_MMIO_NOTIFICATION 39
> +
> +/*
> + * This feature indicates the MSI support on MMIO layer.
> + */
> +#define VIRTIO_F_MMIO_MSI 40
> #endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */
> diff --git a/include/uapi/linux/virtio_mmio.h b/include/uapi/linux/virtio_mmio.h
> index c4b0968..777cb0e 100644
> --- a/include/uapi/linux/virtio_mmio.h
> +++ b/include/uapi/linux/virtio_mmio.h
> @@ -122,6 +122,21 @@
> #define VIRTIO_MMIO_QUEUE_USED_LOW 0x0a0
> #define VIRTIO_MMIO_QUEUE_USED_HIGH 0x0a4
>
> +/* MSI max vector number that device supports - Read Only */
> +#define VIRTIO_MMIO_MSI_VEC_NUM 0x0c0
> +/* MSI state register - Read Only */
> +#define VIRTIO_MMIO_MSI_STATE 0x0c4
> +/* MSI command register - Write Only */
> +#define VIRTIO_MMIO_MSI_COMMAND 0x0c8
> +/* MSI vector selector - Write Only */
> +#define VIRTIO_MMIO_MSI_VEC_SEL 0x0d0
> +/* MSI low 32 bit address, 64 bits in two halves */
> +#define VIRTIO_MMIO_MSI_ADDRESS_LOW 0x0d4
> +/* MSI high 32 bit address, 64 bits in two halves */
> +#define VIRTIO_MMIO_MSI_ADDRESS_HIGH 0x0d8
> +/* MSI 32 bit data */
> +#define VIRTIO_MMIO_MSI_DATA 0x0dc
> +
> /* Configuration atomicity value */
> #define VIRTIO_MMIO_CONFIG_GENERATION 0x0fc
>
> @@ -130,6 +145,22 @@
> #define VIRTIO_MMIO_CONFIG 0x100
>
>
> +/* MSI commands */
> +#define VIRTIO_MMIO_MSI_CMD_ENABLE 0x1
> +#define VIRTIO_MMIO_MSI_CMD_DISABLE 0x2
> +#define VIRTIO_MMIO_MSI_CMD_CONFIGURE 0x3
> +#define VIRTIO_MMIO_MSI_CMD_MASK 0x4
> +#define VIRTIO_MMIO_MSI_CMD_UNMASK 0x5
> +#define VIRTIO_MMIO_MSI_CMD_MAP_CONFIG 0x6
> +#define VIRTIO_MMIO_MSI_CMD_MAP_QUEUE 0x7
> +
> +/* MSI NO_VECTOR */
> +#define VIRTIO_MMIO_MSI_NO_VECTOR 0xffffffff
> +
> +/* MSI state enabled state mask */
> +#define VIRTIO_MMIO_MSI_ENABLED_MASK (1 << 31)
> +/* MSI state MSI sharing mask */
> +#define VIRTIO_MMIO_MSI_SHARING_MASK (1 << 30)
>
You should be able to drop MAP commands,
replace vector number with vq number.
I also don't really see a burning need for separate
enable/disable/configure/mask/unmask commands.
Just two registers mask and enable should be enough?
Will remove need for NO_VECTOR hack too.
what is sharing that hypervisor needs to worry about it?
just make all interrupts shared, or all unshared.
does it matter for performance?
> /*
> * Interrupt flags (re: interrupt status & acknowledge registers)
> --
> 1.8.3.1
^ permalink raw reply [flat|nested] 104+ messages in thread