This series intends to implement IRQ offloading for vhost_vdpa. By the feat of irq forwarding facilities like posted interrupt on X86, irq bypass can help deliver interrupts to vCPU directly. vDPA devices have dedicated hardware backends like VFIO pass-throughed devices. So it would be possible to setup irq offloading(irq bypass) for vDPA devices and gain performance improvements. In my testing, with this feature, we can save 0.1ms in a ping between two VFs on average. changes from V1: (1)dropped vfio changes. (3)removed KVM_HVAE_IRQ_BYPASS checks (4)locking fixes (5)simplified vhost_vdpa_update_vq_irq() (6)minor improvements Zhu Lingshan (6): vhost: introduce vhost_call_ctx kvm: detect assigned device via irqbypass manager vDPA: implement IRQ offloading helpers in vDPA core vhost_vdpa: implement IRQ offloading in vhost_vdpa ifcvf: replace irq_request/free with vDPA helpers irqbypass: do not start cons/prod when failed connect arch/x86/kvm/x86.c | 10 ++++++-- drivers/vdpa/ifcvf/ifcvf_main.c | 14 +++++++---- drivers/vdpa/vdpa.c | 42 +++++++++++++++++++++++++++++++++ drivers/vhost/Kconfig | 1 + drivers/vhost/vdpa.c | 52 +++++++++++++++++++++++++++++++++++++++-- drivers/vhost/vhost.c | 22 ++++++++++++----- drivers/vhost/vhost.h | 9 ++++++- include/linux/vdpa.h | 13 +++++++++++ virt/lib/irqbypass.c | 16 ++++++++----- 9 files changed, 157 insertions(+), 22 deletions(-) -- 1.8.3.1
This commit introduces struct vhost_call_ctx which replaced raw struct eventfd_ctx *call_ctx in struct vhost_virtqueue. Besides eventfd_ctx, it contains a spin lock and an irq_bypass_producer in its structure. Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com> Suggested-by: Jason Wang <jasowang@redhat.com> --- drivers/vhost/vdpa.c | 4 ++-- drivers/vhost/vhost.c | 22 ++++++++++++++++------ drivers/vhost/vhost.h | 9 ++++++++- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 7580e34..2fcc422 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -96,7 +96,7 @@ static void handle_vq_kick(struct vhost_work *work) static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) { struct vhost_virtqueue *vq = private; - struct eventfd_ctx *call_ctx = vq->call_ctx; + struct eventfd_ctx *call_ctx = vq->call_ctx.ctx; if (call_ctx) eventfd_signal(call_ctx, 1); @@ -382,7 +382,7 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, break; case VHOST_SET_VRING_CALL: - if (vq->call_ctx) { + if (vq->call_ctx.ctx) { cb.callback = vhost_vdpa_virtqueue_cb; cb.private = vq; } else { diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index d7b8df3..4004e94 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -298,6 +298,13 @@ static void vhost_vq_meta_reset(struct vhost_dev *d) __vhost_vq_meta_reset(d->vqs[i]); } +static void vhost_call_ctx_reset(struct vhost_call_ctx *call_ctx) +{ + call_ctx->ctx = NULL; + memset(&call_ctx->producer, 0x0, sizeof(struct irq_bypass_producer)); + spin_lock_init(&call_ctx->ctx_lock); +} + static void vhost_vq_reset(struct vhost_dev *dev, struct vhost_virtqueue *vq) { @@ -319,13 +326,13 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->log_base = NULL; vq->error_ctx = NULL; vq->kick = NULL; - vq->call_ctx = NULL; vq->log_ctx = NULL; vhost_reset_is_le(vq); vhost_disable_cross_endian(vq); vq->busyloop_timeout = 0; vq->umem = NULL; vq->iotlb = NULL; + vhost_call_ctx_reset(&vq->call_ctx); __vhost_vq_meta_reset(vq); } @@ -685,8 +692,8 @@ void vhost_dev_cleanup(struct vhost_dev *dev) eventfd_ctx_put(dev->vqs[i]->error_ctx); if (dev->vqs[i]->kick) fput(dev->vqs[i]->kick); - if (dev->vqs[i]->call_ctx) - eventfd_ctx_put(dev->vqs[i]->call_ctx); + if (dev->vqs[i]->call_ctx.ctx) + eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx); vhost_vq_reset(dev, dev->vqs[i]); } vhost_dev_free_iovecs(dev); @@ -1629,7 +1636,10 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg r = PTR_ERR(ctx); break; } - swap(ctx, vq->call_ctx); + + spin_lock(&vq->call_ctx.ctx_lock); + swap(ctx, vq->call_ctx.ctx); + spin_unlock(&vq->call_ctx.ctx_lock); break; case VHOST_SET_VRING_ERR: if (copy_from_user(&f, argp, sizeof f)) { @@ -2440,8 +2450,8 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) { /* Signal the Guest tell them we used something up. */ - if (vq->call_ctx && vhost_notify(dev, vq)) - eventfd_signal(vq->call_ctx, 1); + if (vq->call_ctx.ctx && vhost_notify(dev, vq)) + eventfd_signal(vq->call_ctx.ctx, 1); } EXPORT_SYMBOL_GPL(vhost_signal); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index c8e96a0..402c62e 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -13,6 +13,7 @@ #include <linux/virtio_ring.h> #include <linux/atomic.h> #include <linux/vhost_iotlb.h> +#include <linux/irqbypass.h> struct vhost_work; typedef void (*vhost_work_fn_t)(struct vhost_work *work); @@ -60,6 +61,12 @@ enum vhost_uaddr_type { VHOST_NUM_ADDRS = 3, }; +struct vhost_call_ctx { + struct eventfd_ctx *ctx; + struct irq_bypass_producer producer; + spinlock_t ctx_lock; +}; + /* The virtqueue structure describes a queue attached to a device. */ struct vhost_virtqueue { struct vhost_dev *dev; @@ -72,7 +79,7 @@ struct vhost_virtqueue { vring_used_t __user *used; const struct vhost_iotlb_map *meta_iotlb[VHOST_NUM_ADDRS]; struct file *kick; - struct eventfd_ctx *call_ctx; + struct vhost_call_ctx call_ctx; struct eventfd_ctx *error_ctx; struct eventfd_ctx *log_ctx; -- 1.8.3.1
vDPA devices has dedicated backed hardware like passthrough-ed devices. Then it is possible to setup irq offloading to vCPU for vDPA devices. Thus this patch tries to manipulated assigned device counters via irqbypass manager. We will increase/decrease the assigned device counter in kvm/x86. Both vDPA and VFIO would go through this code path. This code path only affect x86 for now. Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com> Suggested-by: Jason Wang <jasowang@redhat.com> --- arch/x86/kvm/x86.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 00c88c2..20c07d3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10624,11 +10624,17 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, { struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); + int ret; irqfd->producer = prod; + kvm_arch_start_assignment(irqfd->kvm); + ret = kvm_x86_ops.update_pi_irte(irqfd->kvm, + prod->irq, irqfd->gsi, 1); + + if (ret) + kvm_arch_end_assignment(irqfd->kvm); - return kvm_x86_ops.update_pi_irte(irqfd->kvm, - prod->irq, irqfd->gsi, 1); + return ret; } void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, -- 1.8.3.1
This commit implements IRQ offloading helpers in vDPA core by introducing two couple of functions: vdpa_alloc_vq_irq() and vdpa_free_vq_irq(): request irq and free irq, will setup irq offloading. vdpa_setup_irq() and vdpa_unsetup_irq(): supportive functions, will call vhost_vdpa helpers. Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com> Suggested-by: Jason Wang <jasowang@redhat.com> --- drivers/vdpa/vdpa.c | 42 ++++++++++++++++++++++++++++++++++++++++++ include/linux/vdpa.h | 13 +++++++++++++ 2 files changed, 55 insertions(+) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index ff6562f..cce4d91 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -163,6 +163,48 @@ void vdpa_unregister_driver(struct vdpa_driver *drv) } EXPORT_SYMBOL_GPL(vdpa_unregister_driver); +static void vdpa_setup_irq(struct vdpa_device *vdev, int qid, int irq) +{ + struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver); + + if (drv->setup_vq_irq) + drv->setup_vq_irq(vdev, qid, irq); +} + +static void vdpa_unsetup_irq(struct vdpa_device *vdev, int qid) +{ + struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver); + + if (drv->unsetup_vq_irq) + drv->unsetup_vq_irq(vdev, qid); +} + +int vdpa_alloc_vq_irq(struct device *dev, struct vdpa_device *vdev, + unsigned int irq, irq_handler_t handler, + unsigned long irqflags, const char *devname, void *dev_id, + int qid) +{ + int ret; + + ret = devm_request_irq(dev, irq, handler, irqflags, devname, dev_id); + if (ret) + dev_err(dev, "Failed to request irq for vq %d\n", qid); + else + vdpa_setup_irq(vdev, qid, irq); + + return ret; + +} +EXPORT_SYMBOL_GPL(vdpa_alloc_vq_irq); + +void vdpa_free_vq_irq(struct device *dev, struct vdpa_device *vdev, int irq, + int qid, void *dev_id) +{ + devm_free_irq(dev, irq, dev_id); + vdpa_unsetup_irq(vdev, qid); +} +EXPORT_SYMBOL_GPL(vdpa_free_vq_irq); + static int vdpa_init(void) { return bus_register(&vdpa_bus); diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 239db79..7d64d83 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -220,17 +220,30 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, int vdpa_register_device(struct vdpa_device *vdev); void vdpa_unregister_device(struct vdpa_device *vdev); +/* request irq for a vq, setup irq offloading if its a vhost_vdpa vq */ +int vdpa_alloc_vq_irq(struct device *dev, struct vdpa_device *vdev, + unsigned int irq, irq_handler_t handler, + unsigned long irqflags, const char *devname, void *dev_id, + int qid); +/* free irq for a vq, unsetup irq offloading if its a vhost_vdpa vq */ +void vdpa_free_vq_irq(struct device *dev, struct vdpa_device *vdev, int irq, + int qid, void *dev_id); + /** * vdpa_driver - operations for a vDPA driver * @driver: underlying device driver * @probe: the function to call when a device is found. Returns 0 or -errno. * @remove: the function to call when a device is removed. + * @setup_vq_irq: setup irq offloading for a vhost_vdpa vq. + * @unsetup_vq_irq: unsetup offloading for a vhost_vdpa vq. */ struct vdpa_driver { struct device_driver driver; int (*probe)(struct vdpa_device *vdev); void (*remove)(struct vdpa_device *vdev); + void (*setup_vq_irq)(struct vdpa_device *vdev, int qid, int irq); + void (*unsetup_vq_irq)(struct vdpa_device *vdev, int qid); }; #define vdpa_register_driver(drv) \ -- 1.8.3.1
This patch introduce a set of functions for setup/unsetup and update irq offloading respectively by register/unregister and re-register the irq_bypass_producer. Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com> Suggested-by: Jason Wang <jasowang@redhat.com> --- drivers/vhost/Kconfig | 1 + drivers/vhost/vdpa.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index d3688c6..587fbae 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -65,6 +65,7 @@ config VHOST_VDPA tristate "Vhost driver for vDPA-based backend" depends on EVENTFD select VHOST + select IRQ_BYPASS_MANAGER depends on VDPA help This kernel module can be loaded in host kernel to accelerate diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 2fcc422..b9078d4 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -115,6 +115,43 @@ static irqreturn_t vhost_vdpa_config_cb(void *private) return IRQ_HANDLED; } +static void vhost_vdpa_setup_vq_irq(struct vdpa_device *dev, int qid, int irq) +{ + struct vhost_vdpa *v = vdpa_get_drvdata(dev); + struct vhost_virtqueue *vq = &v->vqs[qid]; + int ret; + + spin_lock(&vq->call_ctx.ctx_lock); + if (!vq->call_ctx.ctx) { + spin_unlock(&vq->call_ctx.ctx_lock); + return; + } + + vq->call_ctx.producer.token = vq->call_ctx.ctx; + vq->call_ctx.producer.irq = irq; + ret = irq_bypass_register_producer(&vq->call_ctx.producer); + spin_unlock(&vq->call_ctx.ctx_lock); +} + +static void vhost_vdpa_unsetup_vq_irq(struct vdpa_device *dev, int qid) +{ + struct vhost_vdpa *v = vdpa_get_drvdata(dev); + struct vhost_virtqueue *vq = &v->vqs[qid]; + + spin_lock(&vq->call_ctx.ctx_lock); + irq_bypass_unregister_producer(&vq->call_ctx.producer); + spin_unlock(&vq->call_ctx.ctx_lock); +} + +static void vhost_vdpa_update_vq_irq(struct vhost_virtqueue *vq) +{ + spin_lock(&vq->call_ctx.ctx_lock); + irq_bypass_unregister_producer(&vq->call_ctx.producer); + vq->call_ctx.producer.token = vq->call_ctx.ctx; + irq_bypass_register_producer(&vq->call_ctx.producer); + spin_unlock(&vq->call_ctx.ctx_lock); +} + static void vhost_vdpa_reset(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; @@ -332,6 +369,7 @@ static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) return 0; } + static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, void __user *argp) { @@ -390,6 +428,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, cb.private = NULL; } ops->set_vq_cb(vdpa, idx, &cb); + /* + * if it has a non-zero irq, means there is a + * previsouly registered irq_bypass_producer, + * we should update it when ctx (its token) + * changes. + */ + if (vq->call_ctx.producer.irq) + vhost_vdpa_update_vq_irq(vq); break; case VHOST_SET_VRING_NUM: @@ -951,6 +997,8 @@ static void vhost_vdpa_remove(struct vdpa_device *vdpa) }, .probe = vhost_vdpa_probe, .remove = vhost_vdpa_remove, + .setup_vq_irq = vhost_vdpa_setup_vq_irq, + .unsetup_vq_irq = vhost_vdpa_unsetup_vq_irq, }; static int __init vhost_vdpa_init(void) -- 1.8.3.1
This commit replaced irq_request/free() with helpers in vDPA core, so that it can request/free irq and setup irq offloading on order. Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com> Suggested-by: Jason Wang <jasowang@redhat.com> --- drivers/vdpa/ifcvf/ifcvf_main.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index f5a60c1..bd2a317 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -47,11 +47,12 @@ static void ifcvf_free_irq(struct ifcvf_adapter *adapter, int queues) { struct pci_dev *pdev = adapter->pdev; struct ifcvf_hw *vf = &adapter->vf; + struct vdpa_device *vdpa = &adapter->vdpa; int i; for (i = 0; i < queues; i++) - devm_free_irq(&pdev->dev, vf->vring[i].irq, &vf->vring[i]); + vdpa_free_vq_irq(&pdev->dev, vdpa, vf->vring[i].irq, i, &vf->vring[i]); ifcvf_free_irq_vectors(pdev); } @@ -60,6 +61,7 @@ static int ifcvf_request_irq(struct ifcvf_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; struct ifcvf_hw *vf = &adapter->vf; + struct vdpa_device *vdpa = &adapter->vdpa; int vector, i, ret, irq; ret = pci_alloc_irq_vectors(pdev, IFCVF_MAX_INTR, @@ -73,6 +75,10 @@ static int ifcvf_request_irq(struct ifcvf_adapter *adapter) pci_name(pdev)); vector = 0; irq = pci_irq_vector(pdev, vector); + /* This isconfig interrupt, config accesses all go + * through userspace, so no need to setup + * config interrupt offloading. + */ ret = devm_request_irq(&pdev->dev, irq, ifcvf_config_changed, 0, vf->config_msix_name, vf); @@ -82,13 +88,11 @@ static int ifcvf_request_irq(struct ifcvf_adapter *adapter) pci_name(pdev), i); vector = i + IFCVF_MSI_QUEUE_OFF; irq = pci_irq_vector(pdev, vector); - ret = devm_request_irq(&pdev->dev, irq, + ret = vdpa_alloc_vq_irq(&pdev->dev, vdpa, irq, ifcvf_intr_handler, 0, vf->vring[i].msix_name, - &vf->vring[i]); + &vf->vring[i], i); if (ret) { - IFCVF_ERR(pdev, - "Failed to request irq for vq %d\n", i); ifcvf_free_irq(adapter, i); return ret; -- 1.8.3.1
If failed to connect, there is no need to start consumer nor producer. Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com> Suggested-by: Jason Wang <jasowang@redhat.com> --- virt/lib/irqbypass.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c index 28fda42..c9bb395 100644 --- a/virt/lib/irqbypass.c +++ b/virt/lib/irqbypass.c @@ -40,17 +40,21 @@ static int __connect(struct irq_bypass_producer *prod, if (prod->add_consumer) ret = prod->add_consumer(prod, cons); - if (!ret) { - ret = cons->add_producer(cons, prod); - if (ret && prod->del_consumer) - prod->del_consumer(prod, cons); - } + if (ret) + goto err_add_consumer; + + ret = cons->add_producer(cons, prod); + if (ret) + goto err_add_producer; if (cons->start) cons->start(cons); if (prod->start) prod->start(prod); - +err_add_producer: + if (prod->del_consumer) + prod->del_consumer(prod, cons); +err_add_consumer: return ret; } -- 1.8.3.1
On 2020/7/16 下午7:23, Zhu Lingshan wrote: > This commit introduces struct vhost_call_ctx which replaced > raw struct eventfd_ctx *call_ctx in struct vhost_virtqueue. > Besides eventfd_ctx, it contains a spin lock and an > irq_bypass_producer in its structure. > > Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com> > Suggested-by: Jason Wang <jasowang@redhat.com> > --- > drivers/vhost/vdpa.c | 4 ++-- > drivers/vhost/vhost.c | 22 ++++++++++++++++------ > drivers/vhost/vhost.h | 9 ++++++++- > 3 files changed, 26 insertions(+), 9 deletions(-) > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > index 7580e34..2fcc422 100644 > --- a/drivers/vhost/vdpa.c > +++ b/drivers/vhost/vdpa.c > @@ -96,7 +96,7 @@ static void handle_vq_kick(struct vhost_work *work) > static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) > { > struct vhost_virtqueue *vq = private; > - struct eventfd_ctx *call_ctx = vq->call_ctx; > + struct eventfd_ctx *call_ctx = vq->call_ctx.ctx; > > if (call_ctx) > eventfd_signal(call_ctx, 1); > @@ -382,7 +382,7 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, > break; > > case VHOST_SET_VRING_CALL: > - if (vq->call_ctx) { > + if (vq->call_ctx.ctx) { > cb.callback = vhost_vdpa_virtqueue_cb; > cb.private = vq; > } else { > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index d7b8df3..4004e94 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -298,6 +298,13 @@ static void vhost_vq_meta_reset(struct vhost_dev *d) > __vhost_vq_meta_reset(d->vqs[i]); > } > > +static void vhost_call_ctx_reset(struct vhost_call_ctx *call_ctx) > +{ > + call_ctx->ctx = NULL; > + memset(&call_ctx->producer, 0x0, sizeof(struct irq_bypass_producer)); > + spin_lock_init(&call_ctx->ctx_lock); > +} > + > static void vhost_vq_reset(struct vhost_dev *dev, > struct vhost_virtqueue *vq) > { > @@ -319,13 +326,13 @@ static void vhost_vq_reset(struct vhost_dev *dev, > vq->log_base = NULL; > vq->error_ctx = NULL; > vq->kick = NULL; > - vq->call_ctx = NULL; > vq->log_ctx = NULL; > vhost_reset_is_le(vq); > vhost_disable_cross_endian(vq); > vq->busyloop_timeout = 0; > vq->umem = NULL; > vq->iotlb = NULL; > + vhost_call_ctx_reset(&vq->call_ctx); > __vhost_vq_meta_reset(vq); > } > > @@ -685,8 +692,8 @@ void vhost_dev_cleanup(struct vhost_dev *dev) > eventfd_ctx_put(dev->vqs[i]->error_ctx); > if (dev->vqs[i]->kick) > fput(dev->vqs[i]->kick); > - if (dev->vqs[i]->call_ctx) > - eventfd_ctx_put(dev->vqs[i]->call_ctx); > + if (dev->vqs[i]->call_ctx.ctx) > + eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx); > vhost_vq_reset(dev, dev->vqs[i]); > } > vhost_dev_free_iovecs(dev); > @@ -1629,7 +1636,10 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg > r = PTR_ERR(ctx); > break; > } > - swap(ctx, vq->call_ctx); > + > + spin_lock(&vq->call_ctx.ctx_lock); > + swap(ctx, vq->call_ctx.ctx); > + spin_unlock(&vq->call_ctx.ctx_lock); > break; > case VHOST_SET_VRING_ERR: > if (copy_from_user(&f, argp, sizeof f)) { > @@ -2440,8 +2450,8 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) > void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) > { > /* Signal the Guest tell them we used something up. */ > - if (vq->call_ctx && vhost_notify(dev, vq)) > - eventfd_signal(vq->call_ctx, 1); > + if (vq->call_ctx.ctx && vhost_notify(dev, vq)) > + eventfd_signal(vq->call_ctx.ctx, 1); > } > EXPORT_SYMBOL_GPL(vhost_signal); > > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h > index c8e96a0..402c62e 100644 > --- a/drivers/vhost/vhost.h > +++ b/drivers/vhost/vhost.h > @@ -13,6 +13,7 @@ > #include <linux/virtio_ring.h> > #include <linux/atomic.h> > #include <linux/vhost_iotlb.h> > +#include <linux/irqbypass.h> > > struct vhost_work; > typedef void (*vhost_work_fn_t)(struct vhost_work *work); > @@ -60,6 +61,12 @@ enum vhost_uaddr_type { > VHOST_NUM_ADDRS = 3, > }; > > +struct vhost_call_ctx { I think maybe "vhost_vring_call" is a better name since it contains not only the eventfd_ctx now. Thanks > + struct eventfd_ctx *ctx; > + struct irq_bypass_producer producer; > + spinlock_t ctx_lock; > +}; > + > /* The virtqueue structure describes a queue attached to a device. */ > struct vhost_virtqueue { > struct vhost_dev *dev; > @@ -72,7 +79,7 @@ struct vhost_virtqueue { > vring_used_t __user *used; > const struct vhost_iotlb_map *meta_iotlb[VHOST_NUM_ADDRS]; > struct file *kick; > - struct eventfd_ctx *call_ctx; > + struct vhost_call_ctx call_ctx; > struct eventfd_ctx *error_ctx; > struct eventfd_ctx *log_ctx; >
On 2020/7/16 下午7:23, Zhu Lingshan wrote: > vDPA devices has dedicated backed hardware like > passthrough-ed devices. Then it is possible to setup irq > offloading to vCPU for vDPA devices. Thus this patch tries to > manipulated assigned device counters via irqbypass manager. This part needs some tweak, e.g why assigned device could be detected through this way. > > We will increase/decrease the assigned device counter in kvm/x86. And you need explain why we don't need similar thing in other arch. Thanks > Both vDPA and VFIO would go through this code path. > > This code path only affect x86 for now. > > Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com> > Suggested-by: Jason Wang <jasowang@redhat.com> > --- > arch/x86/kvm/x86.c | 10 ++++++++-- > 1 file changed, 8 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 00c88c2..20c07d3 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -10624,11 +10624,17 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, > { > struct kvm_kernel_irqfd *irqfd = > container_of(cons, struct kvm_kernel_irqfd, consumer); > + int ret; > > irqfd->producer = prod; > + kvm_arch_start_assignment(irqfd->kvm); > + ret = kvm_x86_ops.update_pi_irte(irqfd->kvm, > + prod->irq, irqfd->gsi, 1); > + > + if (ret) > + kvm_arch_end_assignment(irqfd->kvm); > > - return kvm_x86_ops.update_pi_irte(irqfd->kvm, > - prod->irq, irqfd->gsi, 1); > + return ret; > } > > void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
On 2020/7/16 下午7:23, Zhu Lingshan wrote: > This commit implements IRQ offloading helpers Let's say "vq irq allocate/free helpers" here. > in vDPA core by > introducing two couple of functions: > > vdpa_alloc_vq_irq() and vdpa_free_vq_irq(): request irq and free > irq, will setup irq offloading. > > vdpa_setup_irq() and vdpa_unsetup_irq(): supportive functions, > will call vhost_vdpa helpers. > > Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com> > Suggested-by: Jason Wang <jasowang@redhat.com> > --- > drivers/vdpa/vdpa.c | 42 ++++++++++++++++++++++++++++++++++++++++++ > include/linux/vdpa.h | 13 +++++++++++++ > 2 files changed, 55 insertions(+) > > diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c > index ff6562f..cce4d91 100644 > --- a/drivers/vdpa/vdpa.c > +++ b/drivers/vdpa/vdpa.c > @@ -163,6 +163,48 @@ void vdpa_unregister_driver(struct vdpa_driver *drv) > } > EXPORT_SYMBOL_GPL(vdpa_unregister_driver); > > +static void vdpa_setup_irq(struct vdpa_device *vdev, int qid, int irq) > +{ > + struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver); > + > + if (drv->setup_vq_irq) > + drv->setup_vq_irq(vdev, qid, irq); > +} > + > +static void vdpa_unsetup_irq(struct vdpa_device *vdev, int qid) > +{ > + struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver); > + > + if (drv->unsetup_vq_irq) > + drv->unsetup_vq_irq(vdev, qid); Do you need to check the existence of drv before calling unset_vq_irq()? And how can this synchronize with driver releasing and binding? > +} > + > +int vdpa_alloc_vq_irq(struct device *dev, struct vdpa_device *vdev, > + unsigned int irq, irq_handler_t handler, > + unsigned long irqflags, const char *devname, void *dev_id, > + int qid) Let's add comment as what has been done by other exported helpers. > +{ > + int ret; > + > + ret = devm_request_irq(dev, irq, handler, irqflags, devname, dev_id); > + if (ret) > + dev_err(dev, "Failed to request irq for vq %d\n", qid); > + else > + vdpa_setup_irq(vdev, qid, irq); > + > + return ret; > + > +} > +EXPORT_SYMBOL_GPL(vdpa_alloc_vq_irq); > + > +void vdpa_free_vq_irq(struct device *dev, struct vdpa_device *vdev, int irq, > + int qid, void *dev_id) > +{ > + devm_free_irq(dev, irq, dev_id); > + vdpa_unsetup_irq(vdev, qid); > +} > +EXPORT_SYMBOL_GPL(vdpa_free_vq_irq); > + > static int vdpa_init(void) > { > return bus_register(&vdpa_bus); > diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h > index 239db79..7d64d83 100644 > --- a/include/linux/vdpa.h > +++ b/include/linux/vdpa.h > @@ -220,17 +220,30 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, > > int vdpa_register_device(struct vdpa_device *vdev); > void vdpa_unregister_device(struct vdpa_device *vdev); > +/* request irq for a vq, setup irq offloading if its a vhost_vdpa vq */ Let's do the documentation in vdpa.c, and again, document the devres implications or j_xxxust name it as vdpa_devm_xxx. > +int vdpa_alloc_vq_irq(struct device *dev, struct vdpa_device *vdev, > + unsigned int irq, irq_handler_t handler, > + unsigned long irqflags, const char *devname, void *dev_id, > + int qid); > +/* free irq for a vq, unsetup irq offloading if its a vhost_vdpa vq */ > +void vdpa_free_vq_irq(struct device *dev, struct vdpa_device *vdev, int irq, > + int qid, void *dev_id); > + > > /** > * vdpa_driver - operations for a vDPA driver > * @driver: underlying device driver > * @probe: the function to call when a device is found. Returns 0 or -errno. > * @remove: the function to call when a device is removed. > + * @setup_vq_irq: setup irq offloading for a vhost_vdpa vq. > + * @unsetup_vq_irq: unsetup offloading for a vhost_vdpa vq. Let's not limit the methods for a specific use case like irq offloading here. > */ > struct vdpa_driver { > struct device_driver driver; > int (*probe)(struct vdpa_device *vdev); > void (*remove)(struct vdpa_device *vdev); > + void (*setup_vq_irq)(struct vdpa_device *vdev, int qid, int irq); > + void (*unsetup_vq_irq)(struct vdpa_device *vdev, int qid); To be consistent with the exported helper, let's use alloc_vq_irq/free_vq_irq Thanks > }; > > #define vdpa_register_driver(drv) \
On 2020/7/16 下午7:23, Zhu Lingshan wrote: > This patch introduce a set of functions for setup/unsetup > and update irq offloading respectively by register/unregister > and re-register the irq_bypass_producer. > > Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com> > Suggested-by: Jason Wang <jasowang@redhat.com> > --- > drivers/vhost/Kconfig | 1 + > drivers/vhost/vdpa.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 49 insertions(+) > > diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig > index d3688c6..587fbae 100644 > --- a/drivers/vhost/Kconfig > +++ b/drivers/vhost/Kconfig > @@ -65,6 +65,7 @@ config VHOST_VDPA > tristate "Vhost driver for vDPA-based backend" > depends on EVENTFD > select VHOST > + select IRQ_BYPASS_MANAGER > depends on VDPA > help > This kernel module can be loaded in host kernel to accelerate > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > index 2fcc422..b9078d4 100644 > --- a/drivers/vhost/vdpa.c > +++ b/drivers/vhost/vdpa.c > @@ -115,6 +115,43 @@ static irqreturn_t vhost_vdpa_config_cb(void *private) > return IRQ_HANDLED; > } > > +static void vhost_vdpa_setup_vq_irq(struct vdpa_device *dev, int qid, int irq) > +{ > + struct vhost_vdpa *v = vdpa_get_drvdata(dev); > + struct vhost_virtqueue *vq = &v->vqs[qid]; > + int ret; > + > + spin_lock(&vq->call_ctx.ctx_lock); > + if (!vq->call_ctx.ctx) { > + spin_unlock(&vq->call_ctx.ctx_lock); > + return; > + } I think we can simply remove this check as what is done in vhost_vdpq_update_vq_irq(). > + > + vq->call_ctx.producer.token = vq->call_ctx.ctx; > + vq->call_ctx.producer.irq = irq; > + ret = irq_bypass_register_producer(&vq->call_ctx.producer); > + spin_unlock(&vq->call_ctx.ctx_lock); > +} > + > +static void vhost_vdpa_unsetup_vq_irq(struct vdpa_device *dev, int qid) > +{ > + struct vhost_vdpa *v = vdpa_get_drvdata(dev); > + struct vhost_virtqueue *vq = &v->vqs[qid]; > + > + spin_lock(&vq->call_ctx.ctx_lock); > + irq_bypass_unregister_producer(&vq->call_ctx.producer); > + spin_unlock(&vq->call_ctx.ctx_lock); > +} > + > +static void vhost_vdpa_update_vq_irq(struct vhost_virtqueue *vq) > +{ > + spin_lock(&vq->call_ctx.ctx_lock); > + irq_bypass_unregister_producer(&vq->call_ctx.producer); > + vq->call_ctx.producer.token = vq->call_ctx.ctx; > + irq_bypass_register_producer(&vq->call_ctx.producer); > + spin_unlock(&vq->call_ctx.ctx_lock); > +} > + > static void vhost_vdpa_reset(struct vhost_vdpa *v) > { > struct vdpa_device *vdpa = v->vdpa; > @@ -332,6 +369,7 @@ static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) > > return 0; > } > + If you really want to fix coding style issue, it's better to have another patch. > static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, > void __user *argp) > { > @@ -390,6 +428,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, > cb.private = NULL; > } > ops->set_vq_cb(vdpa, idx, &cb); > + /* > + * if it has a non-zero irq, means there is a > + * previsouly registered irq_bypass_producer, > + * we should update it when ctx (its token) > + * changes. > + */ > + if (vq->call_ctx.producer.irq) > + vhost_vdpa_update_vq_irq(vq); Is this safe to check producer.irq outside spinlock? Thanks > break; > > case VHOST_SET_VRING_NUM: > @@ -951,6 +997,8 @@ static void vhost_vdpa_remove(struct vdpa_device *vdpa) > }, > .probe = vhost_vdpa_probe, > .remove = vhost_vdpa_remove, > + .setup_vq_irq = vhost_vdpa_setup_vq_irq, > + .unsetup_vq_irq = vhost_vdpa_unsetup_vq_irq, > }; > > static int __init vhost_vdpa_init(void)
On 2020/7/16 下午7:23, Zhu Lingshan wrote: > This commit replaced irq_request/free() with helpers in vDPA > core, so that it can request/free irq and setup irq offloading > on order. > > Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com> > Suggested-by: Jason Wang <jasowang@redhat.com> > --- > drivers/vdpa/ifcvf/ifcvf_main.c | 14 +++++++++----- > 1 file changed, 9 insertions(+), 5 deletions(-) > > diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c > index f5a60c1..bd2a317 100644 > --- a/drivers/vdpa/ifcvf/ifcvf_main.c > +++ b/drivers/vdpa/ifcvf/ifcvf_main.c > @@ -47,11 +47,12 @@ static void ifcvf_free_irq(struct ifcvf_adapter *adapter, int queues) > { > struct pci_dev *pdev = adapter->pdev; > struct ifcvf_hw *vf = &adapter->vf; > + struct vdpa_device *vdpa = &adapter->vdpa; > int i; > > > for (i = 0; i < queues; i++) > - devm_free_irq(&pdev->dev, vf->vring[i].irq, &vf->vring[i]); > + vdpa_free_vq_irq(&pdev->dev, vdpa, vf->vring[i].irq, i, &vf->vring[i]); > > ifcvf_free_irq_vectors(pdev); > } > @@ -60,6 +61,7 @@ static int ifcvf_request_irq(struct ifcvf_adapter *adapter) > { > struct pci_dev *pdev = adapter->pdev; > struct ifcvf_hw *vf = &adapter->vf; > + struct vdpa_device *vdpa = &adapter->vdpa; > int vector, i, ret, irq; > > ret = pci_alloc_irq_vectors(pdev, IFCVF_MAX_INTR, > @@ -73,6 +75,10 @@ static int ifcvf_request_irq(struct ifcvf_adapter *adapter) > pci_name(pdev)); > vector = 0; > irq = pci_irq_vector(pdev, vector); > + /* This isconfig interrupt, config accesses all go Missing a blank between is and config. Thanks > + * through userspace, so no need to setup > + * config interrupt offloading. > + */ > ret = devm_request_irq(&pdev->dev, irq, > ifcvf_config_changed, 0, > vf->config_msix_name, vf); > @@ -82,13 +88,11 @@ static int ifcvf_request_irq(struct ifcvf_adapter *adapter) > pci_name(pdev), i); > vector = i + IFCVF_MSI_QUEUE_OFF; > irq = pci_irq_vector(pdev, vector); > - ret = devm_request_irq(&pdev->dev, irq, > + ret = vdpa_alloc_vq_irq(&pdev->dev, vdpa, irq, > ifcvf_intr_handler, 0, > vf->vring[i].msix_name, > - &vf->vring[i]); > + &vf->vring[i], i); > if (ret) { > - IFCVF_ERR(pdev, > - "Failed to request irq for vq %d\n", i); > ifcvf_free_irq(adapter, i); > > return ret;
[-- Attachment #1: Type: text/plain, Size: 2103 bytes --] Hi Zhu, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on vhost/linux-next] [also build test WARNING on kvm/linux-next linus/master v5.8-rc5 next-20200716] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Zhu-Lingshan/IRQ-offloading-for-vDPA/20200716-192910 base: https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next config: openrisc-randconfig-r016-20200717 (attached as .config) compiler: or1k-linux-gcc (GCC) 9.3.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=openrisc If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): drivers/vhost/vdpa.c: In function 'vhost_vdpa_setup_vq_irq': >> drivers/vhost/vdpa.c:122:6: warning: variable 'ret' set but not used [-Wunused-but-set-variable] 122 | int ret; | ^~~ vim +/ret +122 drivers/vhost/vdpa.c 117 118 static void vhost_vdpa_setup_vq_irq(struct vdpa_device *dev, int qid, int irq) 119 { 120 struct vhost_vdpa *v = vdpa_get_drvdata(dev); 121 struct vhost_virtqueue *vq = &v->vqs[qid]; > 122 int ret; 123 124 spin_lock(&vq->call_ctx.ctx_lock); 125 if (!vq->call_ctx.ctx) { 126 spin_unlock(&vq->call_ctx.ctx_lock); 127 return; 128 } 129 130 vq->call_ctx.producer.token = vq->call_ctx.ctx; 131 vq->call_ctx.producer.irq = irq; 132 ret = irq_bypass_register_producer(&vq->call_ctx.producer); 133 spin_unlock(&vq->call_ctx.ctx_lock); 134 } 135 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org [-- Attachment #2: .config.gz --] [-- Type: application/gzip, Size: 20889 bytes --]
[-- Attachment #1: Type: text/plain, Size: 2103 bytes --] Hi Zhu, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on vhost/linux-next] [also build test WARNING on kvm/linux-next linus/master v5.8-rc5 next-20200716] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Zhu-Lingshan/IRQ-offloading-for-vDPA/20200716-192910 base: https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next config: openrisc-randconfig-r016-20200717 (attached as .config) compiler: or1k-linux-gcc (GCC) 9.3.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=openrisc If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): drivers/vhost/vdpa.c: In function 'vhost_vdpa_setup_vq_irq': >> drivers/vhost/vdpa.c:122:6: warning: variable 'ret' set but not used [-Wunused-but-set-variable] 122 | int ret; | ^~~ vim +/ret +122 drivers/vhost/vdpa.c 117 118 static void vhost_vdpa_setup_vq_irq(struct vdpa_device *dev, int qid, int irq) 119 { 120 struct vhost_vdpa *v = vdpa_get_drvdata(dev); 121 struct vhost_virtqueue *vq = &v->vqs[qid]; > 122 int ret; 123 124 spin_lock(&vq->call_ctx.ctx_lock); 125 if (!vq->call_ctx.ctx) { 126 spin_unlock(&vq->call_ctx.ctx_lock); 127 return; 128 } 129 130 vq->call_ctx.producer.token = vq->call_ctx.ctx; 131 vq->call_ctx.producer.irq = irq; 132 ret = irq_bypass_register_producer(&vq->call_ctx.producer); 133 spin_unlock(&vq->call_ctx.ctx_lock); 134 } 135 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org [-- Attachment #2: .config.gz --] [-- Type: application/gzip, Size: 20889 bytes --] [-- Attachment #3: Type: text/plain, Size: 183 bytes --] _______________________________________________ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[-- Attachment #1: Type: text/plain, Size: 2160 bytes --] Hi Zhu, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on vhost/linux-next] [also build test WARNING on kvm/linux-next linus/master v5.8-rc5 next-20200716] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Zhu-Lingshan/IRQ-offloading-for-vDPA/20200716-192910 base: https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next config: openrisc-randconfig-r016-20200717 (attached as .config) compiler: or1k-linux-gcc (GCC) 9.3.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=openrisc If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): drivers/vhost/vdpa.c: In function 'vhost_vdpa_setup_vq_irq': >> drivers/vhost/vdpa.c:122:6: warning: variable 'ret' set but not used [-Wunused-but-set-variable] 122 | int ret; | ^~~ vim +/ret +122 drivers/vhost/vdpa.c 117 118 static void vhost_vdpa_setup_vq_irq(struct vdpa_device *dev, int qid, int irq) 119 { 120 struct vhost_vdpa *v = vdpa_get_drvdata(dev); 121 struct vhost_virtqueue *vq = &v->vqs[qid]; > 122 int ret; 123 124 spin_lock(&vq->call_ctx.ctx_lock); 125 if (!vq->call_ctx.ctx) { 126 spin_unlock(&vq->call_ctx.ctx_lock); 127 return; 128 } 129 130 vq->call_ctx.producer.token = vq->call_ctx.ctx; 131 vq->call_ctx.producer.irq = irq; 132 ret = irq_bypass_register_producer(&vq->call_ctx.producer); 133 spin_unlock(&vq->call_ctx.ctx_lock); 134 } 135 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org [-- Attachment #2: config.gz --] [-- Type: application/gzip, Size: 20889 bytes --]
On Thu, 16 Jul 2020 19:23:45 +0800
Zhu Lingshan <lingshan.zhu@intel.com> wrote:
> vDPA devices has dedicated backed hardware like
> passthrough-ed devices. Then it is possible to setup irq
> offloading to vCPU for vDPA devices. Thus this patch tries to
> manipulated assigned device counters via irqbypass manager.
>
> We will increase/decrease the assigned device counter in kvm/x86.
> Both vDPA and VFIO would go through this code path.
>
> This code path only affect x86 for now.
>
> Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com>
> Suggested-by: Jason Wang <jasowang@redhat.com>
> ---
> arch/x86/kvm/x86.c | 10 ++++++++--
> 1 file changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 00c88c2..20c07d3 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -10624,11 +10624,17 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
> {
> struct kvm_kernel_irqfd *irqfd =
> container_of(cons, struct kvm_kernel_irqfd, consumer);
> + int ret;
>
> irqfd->producer = prod;
> + kvm_arch_start_assignment(irqfd->kvm);
> + ret = kvm_x86_ops.update_pi_irte(irqfd->kvm,
> + prod->irq, irqfd->gsi, 1);
> +
> + if (ret)
> + kvm_arch_end_assignment(irqfd->kvm);
>
> - return kvm_x86_ops.update_pi_irte(irqfd->kvm,
> - prod->irq, irqfd->gsi, 1);
> + return ret;
> }
>
> void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
Why isn't there a matching end-assignment in the del_producer path? It
seems this only goes one-way, what happens when a device is
hot-unplugged from the VM or the device interrupt configuration changes.
This will still break vfio if it's not guaranteed to be symmetric.
Thanks,
Alex
On 2020/7/18 上午2:08, Alex Williamson wrote:
> On Thu, 16 Jul 2020 19:23:45 +0800
> Zhu Lingshan <lingshan.zhu@intel.com> wrote:
>
>> vDPA devices has dedicated backed hardware like
>> passthrough-ed devices. Then it is possible to setup irq
>> offloading to vCPU for vDPA devices. Thus this patch tries to
>> manipulated assigned device counters via irqbypass manager.
>>
>> We will increase/decrease the assigned device counter in kvm/x86.
>> Both vDPA and VFIO would go through this code path.
>>
>> This code path only affect x86 for now.
>>
>> Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com>
>> Suggested-by: Jason Wang <jasowang@redhat.com>
>> ---
>> arch/x86/kvm/x86.c | 10 ++++++++--
>> 1 file changed, 8 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 00c88c2..20c07d3 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -10624,11 +10624,17 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
>> {
>> struct kvm_kernel_irqfd *irqfd =
>> container_of(cons, struct kvm_kernel_irqfd, consumer);
>> + int ret;
>>
>> irqfd->producer = prod;
>> + kvm_arch_start_assignment(irqfd->kvm);
>> + ret = kvm_x86_ops.update_pi_irte(irqfd->kvm,
>> + prod->irq, irqfd->gsi, 1);
>> +
>> + if (ret)
>> + kvm_arch_end_assignment(irqfd->kvm);
>>
>> - return kvm_x86_ops.update_pi_irte(irqfd->kvm,
>> - prod->irq, irqfd->gsi, 1);
>> + return ret;
>> }
>>
>> void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
>
> Why isn't there a matching end-assignment in the del_producer path? It
> seems this only goes one-way, what happens when a device is
> hot-unplugged from the VM or the device interrupt configuration changes.
> This will still break vfio if it's not guaranteed to be symmetric.
> Thanks,
>
> Alex
Yes, we need add logic in the del_producer path.
Thanks
On 7/17/2020 11:32 AM, Jason Wang wrote: > > On 2020/7/16 下午7:23, Zhu Lingshan wrote: >> This commit introduces struct vhost_call_ctx which replaced >> raw struct eventfd_ctx *call_ctx in struct vhost_virtqueue. >> Besides eventfd_ctx, it contains a spin lock and an >> irq_bypass_producer in its structure. >> >> Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com> >> Suggested-by: Jason Wang <jasowang@redhat.com> >> --- >> drivers/vhost/vdpa.c | 4 ++-- >> drivers/vhost/vhost.c | 22 ++++++++++++++++------ >> drivers/vhost/vhost.h | 9 ++++++++- >> 3 files changed, 26 insertions(+), 9 deletions(-) >> >> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c >> index 7580e34..2fcc422 100644 >> --- a/drivers/vhost/vdpa.c >> +++ b/drivers/vhost/vdpa.c >> @@ -96,7 +96,7 @@ static void handle_vq_kick(struct vhost_work *work) >> static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) >> { >> struct vhost_virtqueue *vq = private; >> - struct eventfd_ctx *call_ctx = vq->call_ctx; >> + struct eventfd_ctx *call_ctx = vq->call_ctx.ctx; >> if (call_ctx) >> eventfd_signal(call_ctx, 1); >> @@ -382,7 +382,7 @@ static long vhost_vdpa_vring_ioctl(struct >> vhost_vdpa *v, unsigned int cmd, >> break; >> case VHOST_SET_VRING_CALL: >> - if (vq->call_ctx) { >> + if (vq->call_ctx.ctx) { >> cb.callback = vhost_vdpa_virtqueue_cb; >> cb.private = vq; >> } else { >> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c >> index d7b8df3..4004e94 100644 >> --- a/drivers/vhost/vhost.c >> +++ b/drivers/vhost/vhost.c >> @@ -298,6 +298,13 @@ static void vhost_vq_meta_reset(struct vhost_dev >> *d) >> __vhost_vq_meta_reset(d->vqs[i]); >> } >> +static void vhost_call_ctx_reset(struct vhost_call_ctx *call_ctx) >> +{ >> + call_ctx->ctx = NULL; >> + memset(&call_ctx->producer, 0x0, sizeof(struct >> irq_bypass_producer)); >> + spin_lock_init(&call_ctx->ctx_lock); >> +} >> + >> static void vhost_vq_reset(struct vhost_dev *dev, >> struct vhost_virtqueue *vq) >> { >> @@ -319,13 +326,13 @@ static void vhost_vq_reset(struct vhost_dev *dev, >> vq->log_base = NULL; >> vq->error_ctx = NULL; >> vq->kick = NULL; >> - vq->call_ctx = NULL; >> vq->log_ctx = NULL; >> vhost_reset_is_le(vq); >> vhost_disable_cross_endian(vq); >> vq->busyloop_timeout = 0; >> vq->umem = NULL; >> vq->iotlb = NULL; >> + vhost_call_ctx_reset(&vq->call_ctx); >> __vhost_vq_meta_reset(vq); >> } >> @@ -685,8 +692,8 @@ void vhost_dev_cleanup(struct vhost_dev *dev) >> eventfd_ctx_put(dev->vqs[i]->error_ctx); >> if (dev->vqs[i]->kick) >> fput(dev->vqs[i]->kick); >> - if (dev->vqs[i]->call_ctx) >> - eventfd_ctx_put(dev->vqs[i]->call_ctx); >> + if (dev->vqs[i]->call_ctx.ctx) >> + eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx); >> vhost_vq_reset(dev, dev->vqs[i]); >> } >> vhost_dev_free_iovecs(dev); >> @@ -1629,7 +1636,10 @@ long vhost_vring_ioctl(struct vhost_dev *d, >> unsigned int ioctl, void __user *arg >> r = PTR_ERR(ctx); >> break; >> } >> - swap(ctx, vq->call_ctx); >> + >> + spin_lock(&vq->call_ctx.ctx_lock); >> + swap(ctx, vq->call_ctx.ctx); >> + spin_unlock(&vq->call_ctx.ctx_lock); >> break; >> case VHOST_SET_VRING_ERR: >> if (copy_from_user(&f, argp, sizeof f)) { >> @@ -2440,8 +2450,8 @@ static bool vhost_notify(struct vhost_dev *dev, >> struct vhost_virtqueue *vq) >> void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) >> { >> /* Signal the Guest tell them we used something up. */ >> - if (vq->call_ctx && vhost_notify(dev, vq)) >> - eventfd_signal(vq->call_ctx, 1); >> + if (vq->call_ctx.ctx && vhost_notify(dev, vq)) >> + eventfd_signal(vq->call_ctx.ctx, 1); >> } >> EXPORT_SYMBOL_GPL(vhost_signal); >> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h >> index c8e96a0..402c62e 100644 >> --- a/drivers/vhost/vhost.h >> +++ b/drivers/vhost/vhost.h >> @@ -13,6 +13,7 @@ >> #include <linux/virtio_ring.h> >> #include <linux/atomic.h> >> #include <linux/vhost_iotlb.h> >> +#include <linux/irqbypass.h> >> struct vhost_work; >> typedef void (*vhost_work_fn_t)(struct vhost_work *work); >> @@ -60,6 +61,12 @@ enum vhost_uaddr_type { >> VHOST_NUM_ADDRS = 3, >> }; >> +struct vhost_call_ctx { > > > I think maybe "vhost_vring_call" is a better name since it contains > not only the eventfd_ctx now. > > Thanks Sure > > >> + struct eventfd_ctx *ctx; >> + struct irq_bypass_producer producer; >> + spinlock_t ctx_lock; >> +}; >> + >> /* The virtqueue structure describes a queue attached to a device. */ >> struct vhost_virtqueue { >> struct vhost_dev *dev; >> @@ -72,7 +79,7 @@ struct vhost_virtqueue { >> vring_used_t __user *used; >> const struct vhost_iotlb_map *meta_iotlb[VHOST_NUM_ADDRS]; >> struct file *kick; >> - struct eventfd_ctx *call_ctx; >> + struct vhost_call_ctx call_ctx; >> struct eventfd_ctx *error_ctx; >> struct eventfd_ctx *log_ctx; >
On 7/17/2020 12:01 PM, Jason Wang wrote: > > On 2020/7/16 下午7:23, Zhu Lingshan wrote: >> vDPA devices has dedicated backed hardware like >> passthrough-ed devices. Then it is possible to setup irq >> offloading to vCPU for vDPA devices. Thus this patch tries to >> manipulated assigned device counters via irqbypass manager. > > > This part needs some tweak, e.g why assigned device could be detected > through this way. > > >> >> We will increase/decrease the assigned device counter in kvm/x86. > > > And you need explain why we don't need similar thing in other arch. > > Thanks OK Thanks! > > >> Both vDPA and VFIO would go through this code path. >> >> This code path only affect x86 for now. >> >> Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com> >> Suggested-by: Jason Wang <jasowang@redhat.com> >> --- >> arch/x86/kvm/x86.c | 10 ++++++++-- >> 1 file changed, 8 insertions(+), 2 deletions(-) >> >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >> index 00c88c2..20c07d3 100644 >> --- a/arch/x86/kvm/x86.c >> +++ b/arch/x86/kvm/x86.c >> @@ -10624,11 +10624,17 @@ int kvm_arch_irq_bypass_add_producer(struct >> irq_bypass_consumer *cons, >> { >> struct kvm_kernel_irqfd *irqfd = >> container_of(cons, struct kvm_kernel_irqfd, consumer); >> + int ret; >> irqfd->producer = prod; >> + kvm_arch_start_assignment(irqfd->kvm); >> + ret = kvm_x86_ops.update_pi_irte(irqfd->kvm, >> + prod->irq, irqfd->gsi, 1); >> + >> + if (ret) >> + kvm_arch_end_assignment(irqfd->kvm); >> - return kvm_x86_ops.update_pi_irte(irqfd->kvm, >> - prod->irq, irqfd->gsi, 1); >> + return ret; >> } >> void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer >> *cons, >
On 2020/7/20 下午5:07, Zhu, Lingshan wrote:
>>>
>>> +}
>>> +
>>> +static void vdpa_unsetup_irq(struct vdpa_device *vdev, int qid)
>>> +{
>>> + struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver);
>>> +
>>> + if (drv->unsetup_vq_irq)
>>> + drv->unsetup_vq_irq(vdev, qid);
>>
>>
>> Do you need to check the existence of drv before calling unset_vq_irq()?
> Yes, we should check this when we take the releasing path into account.
>>
>> And how can this synchronize with driver releasing and binding?
> Will add an vdpa_unsetup_irq() call in vhsot_vdpa_release().
> For binding, I think it is a new dev bound to the the driver,
> it should go through the vdpa_setup_irq() routine. or if it is
> a device re-bind to vhost_vdpa, I think we have cleaned up
> irq_bypass_producer for it as we would call vhdpa_unsetup_irq()
> in the release function.
I meant can the following things happen?
1) some vDPA device driver probe the hardware and call
vdpa_request_irq() in its PCI probe function.
2) vDPA device is probed by vhost-vDPA
Then irq bypass can't work since we when vdpa_unsetup_irq() is called,
there's no driver bound. Or is there a requirement that
vdap_request/free_irq() must be called somewhere (e.g in the set_status
bus operations)? If yes, we need document those requirements.
Thanks
On 2020/7/21 上午10:02, Zhu, Lingshan wrote: > > > On 7/20/2020 5:40 PM, Jason Wang wrote: >> >> On 2020/7/20 下午5:07, Zhu, Lingshan wrote: >>>>> >>>>> +} >>>>> + >>>>> +static void vdpa_unsetup_irq(struct vdpa_device *vdev, int qid) >>>>> +{ >>>>> + struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver); >>>>> + >>>>> + if (drv->unsetup_vq_irq) >>>>> + drv->unsetup_vq_irq(vdev, qid); >>>> >>>> >>>> Do you need to check the existence of drv before calling >>>> unset_vq_irq()? >>> Yes, we should check this when we take the releasing path into account. >>>> >>>> And how can this synchronize with driver releasing and binding? >>> Will add an vdpa_unsetup_irq() call in vhsot_vdpa_release(). >>> For binding, I think it is a new dev bound to the the driver, >>> it should go through the vdpa_setup_irq() routine. or if it is >>> a device re-bind to vhost_vdpa, I think we have cleaned up >>> irq_bypass_producer for it as we would call vhdpa_unsetup_irq() >>> in the release function. >> >> >> I meant can the following things happen? >> >> 1) some vDPA device driver probe the hardware and call >> vdpa_request_irq() in its PCI probe function. >> 2) vDPA device is probed by vhost-vDPA >> >> Then irq bypass can't work since we when vdpa_unsetup_irq() is >> called, there's no driver bound. Or is there a requirement that >> vdap_request/free_irq() must be called somewhere (e.g in the >> set_status bus operations)? If yes, we need document those requirements. > vdpa_unseup_irq is only called when we want to unregister the producer, Typo, I meant vdpa_setup_irq(). Thanks > now we have two code path using it: free_irq and relaase(). I agree we can document this requirements for the helpers, these functions can only be called through status changes(DRIVER_OK and !DRIVER_OK). > > Thanks, > BR > Zhu Lingshan >> >> Thanks >>