A final patch for below mentioned RFC patch discussions. [RFC PATCH v3 1/3] vfio: revert change that does intr eventfd setup at probe http://mails.dpdk.org/archives/dev/2019-July/138358.html [RFC PATCH v3 2/3] eal: add mask and unmask interrupt APIs http://mails.dpdk.org/archives/dev/2019-July/138357.html [RFC PATCH v3 3/3] drivers/net: use unmask API in interrupt handlers http://mails.dpdk.org/archives/dev/2019-July/138359.html Please also refer to below discussions http://mails.dpdk.org/archives/dev/2019-July/138184.html http://mails.dpdk.org/archives/dev/2019-July/138113.html This patch set basically reverts vfio patch that moves eventfd setup to probe time (1/3) because msix initialization is broken. It adds new ack interrupt api for purposes of simple and light weight ack method and also to avoid race conditions that was the root cause of this discussion. Nithin Dabilpuram (3): vfio: revert change that does intr eventfd setup at probe eal: add ack interrupt API drivers/net: use ack API in interrupt handlers drivers/bus/pci/linux/pci_vfio.c | 78 ++++--- drivers/net/atlantic/Makefile | 1 + drivers/net/atlantic/atl_ethdev.c | 2 +- drivers/net/atlantic/meson.build | 2 + drivers/net/avp/avp_ethdev.c | 2 +- drivers/net/avp/meson.build | 1 + drivers/net/axgbe/Makefile | 1 + drivers/net/axgbe/axgbe_ethdev.c | 4 +- drivers/net/axgbe/meson.build | 1 + drivers/net/bnx2x/bnx2x_ethdev.c | 2 +- drivers/net/bnx2x/meson.build | 1 + drivers/net/e1000/em_ethdev.c | 4 +- drivers/net/e1000/igb_ethdev.c | 6 +- drivers/net/fm10k/fm10k_ethdev.c | 6 +- drivers/net/fm10k/meson.build | 1 + drivers/net/i40e/i40e_ethdev.c | 2 +- drivers/net/iavf/iavf_ethdev.c | 2 +- drivers/net/ice/Makefile | 1 + drivers/net/ice/ice_ethdev.c | 4 +- drivers/net/ice/meson.build | 1 + drivers/net/ixgbe/ixgbe_ethdev.c | 6 +- drivers/net/nfp/nfp_net.c | 2 +- drivers/net/qede/Makefile | 1 + drivers/net/qede/meson.build | 2 + drivers/net/qede/qede_ethdev.c | 8 +- drivers/net/sfc/sfc_intr.c | 4 +- drivers/net/virtio/virtio_ethdev.c | 16 +- drivers/net/vmxnet3/vmxnet3_ethdev.c | 2 +- lib/librte_eal/common/include/rte_interrupts.h | 22 ++ lib/librte_eal/freebsd/eal/eal_interrupts.c | 9 + lib/librte_eal/linux/eal/eal_interrupts.c | 282 ++++++++++++++++++++----- lib/librte_eal/rte_eal_version.map | 1 + 32 files changed, 360 insertions(+), 117 deletions(-) -- 2.8.4
This reverts commit 89aac60e0be9ed95a87b16e3595f102f9faaffb4. "vfio: fix interrupts race condition" The above mentioned commit moves the interrupt's eventfd setup to probe time but only enables one interrupt for all types of interrupt handles i.e VFIO_MSI, VFIO_LEGACY, VFIO_MSIX, UIO. It works fine with default case but breaks below cases specifically for MSIX based interrupt handles. * Applications like l3fwd-power that request rxq interrupts while ethdev setup. * Drivers that need > 1 MSIx interrupts to be configured for functionality to work. VFIO PCI for MSIx expects all the possible vectors to be setup up when using VFIO_IRQ_SET_ACTION_TRIGGER so that they can be allocated from kernel pci subsystem. Only way to increase the number of vectors later is first free all by using VFIO_IRQ_SET_DATA_NONE with action trigger and then enable new vector count. Above commit changes the behavior of rte_intr_[enable|disable] to only mask and unmask unlike earlier behavior and thereby breaking above two scenarios. Fixes: 89aac60e0be9 ("vfio: fix interrupts race condition") Cc: david.marchand@redhat.com Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> Signed-off-by: Jerin Jacob <jerinj@marvell.com> --- drivers/bus/pci/linux/pci_vfio.c | 78 ++++++------ lib/librte_eal/linux/eal/eal_interrupts.c | 201 +++++++++++++++++++++++------- 2 files changed, 191 insertions(+), 88 deletions(-) diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c index ee31239..1ceb1c0 100644 --- a/drivers/bus/pci/linux/pci_vfio.c +++ b/drivers/bus/pci/linux/pci_vfio.c @@ -187,11 +187,8 @@ pci_vfio_set_bus_master(int dev_fd, bool op) static int pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) { - char irq_set_buf[sizeof(struct vfio_irq_set) + sizeof(int)]; - struct vfio_irq_set *irq_set; - enum rte_intr_mode intr_mode; int i, ret, intr_idx; - int fd; + enum rte_intr_mode intr_mode; /* default to invalid index */ intr_idx = VFIO_PCI_NUM_IRQS; @@ -223,6 +220,7 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) /* start from MSI-X interrupt type */ for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) { struct vfio_irq_info irq = { .argsz = sizeof(irq) }; + int fd = -1; /* skip interrupt modes we don't want */ if (intr_mode != RTE_INTR_MODE_NONE && @@ -238,51 +236,51 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) return -1; } - /* found a usable interrupt mode */ - if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) != 0) - break; - /* if this vector cannot be used with eventfd, fail if we explicitly * specified interrupt type, otherwise continue */ - if (intr_mode != RTE_INTR_MODE_NONE) { - RTE_LOG(ERR, EAL, " interrupt vector does not support eventfd!\n"); + if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) { + if (intr_mode != RTE_INTR_MODE_NONE) { + RTE_LOG(ERR, EAL, + " interrupt vector does not support eventfd!\n"); + return -1; + } else + continue; + } + + /* set up an eventfd for interrupts */ + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + RTE_LOG(ERR, EAL, " cannot set up eventfd, " + "error %i (%s)\n", errno, strerror(errno)); return -1; } - } - if (i < 0) - return -1; + dev->intr_handle.fd = fd; + dev->intr_handle.vfio_dev_fd = vfio_dev_fd; - fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); - if (fd < 0) { - RTE_LOG(ERR, EAL, " cannot set up eventfd, error %i (%s)\n", - errno, strerror(errno)); - return -1; - } + switch (i) { + case VFIO_PCI_MSIX_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_MSIX; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; + break; + case VFIO_PCI_MSI_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_MSI; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; + break; + case VFIO_PCI_INTX_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_LEGACY; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; + break; + default: + RTE_LOG(ERR, EAL, " unknown interrupt type!\n"); + return -1; + } - irq_set = (struct vfio_irq_set *)irq_set_buf; - irq_set->argsz = sizeof(irq_set_buf); - irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD|VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set->index = i; - irq_set->start = 0; - irq_set->count = 1; - memcpy(&irq_set->data, &fd, sizeof(int)); - if (ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set) < 0) { - RTE_LOG(ERR, EAL, " error configuring interrupt\n"); - close(fd); - return -1; + return 0; } - dev->intr_handle.fd = fd; - dev->intr_handle.vfio_dev_fd = vfio_dev_fd; - if (i == VFIO_PCI_MSIX_IRQ_INDEX) - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; - else if (i == VFIO_PCI_MSI_IRQ_INDEX) - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; - else if (i == VFIO_PCI_INTX_IRQ_INDEX) - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; - - return 0; + /* if we're here, we haven't found a suitable interrupt vector */ + return -1; } #ifdef HAVE_VFIO_DEV_REQ_INTERFACE diff --git a/lib/librte_eal/linux/eal/eal_interrupts.c b/lib/librte_eal/linux/eal/eal_interrupts.c index 27976b3..79ad5e8 100644 --- a/lib/librte_eal/linux/eal/eal_interrupts.c +++ b/lib/librte_eal/linux/eal/eal_interrupts.c @@ -109,19 +109,42 @@ static pthread_t intr_thread; /* enable legacy (INTx) interrupts */ static int -vfio_enable_intx(const struct rte_intr_handle *intr_handle) -{ - struct vfio_irq_set irq_set; - int ret; - - /* unmask INTx */ - irq_set.argsz = sizeof(irq_set); - irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; - irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; - irq_set.start = 0; - irq_set.count = 1; - - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); +vfio_enable_intx(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + /* enable INTx */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + *fd_ptr = intr_handle->fd; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling INTx interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + + /* unmask INTx after enabling */ + memset(irq_set, 0, len); + len = sizeof(struct vfio_irq_set); + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); if (ret) { RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", @@ -133,51 +156,128 @@ vfio_enable_intx(const struct rte_intr_handle *intr_handle) /* disable legacy (INTx) interrupts */ static int -vfio_disable_intx(const struct rte_intr_handle *intr_handle) -{ - struct vfio_irq_set irq_set; - int ret; +vfio_disable_intx(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; - /* mask interrupts */ - irq_set.argsz = sizeof(irq_set); - irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK; - irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; - irq_set.start = 0; - irq_set.count = 1; + len = sizeof(struct vfio_irq_set); - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); + /* mask interrupts before disabling */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); if (ret) { RTE_LOG(ERR, EAL, "Error masking INTx interrupts for fd %d\n", intr_handle->fd); return -1; } + + /* disable INTx*/ + memset(irq_set, 0, len); + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, + "Error disabling INTx interrupts for fd %d\n", intr_handle->fd); + return -1; + } return 0; } +/* enable MSI interrupts */ +static int +vfio_enable_msi(const struct rte_intr_handle *intr_handle) { + int len, ret; + char irq_set_buf[IRQ_SET_BUF_LEN]; + struct vfio_irq_set *irq_set; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + *fd_ptr = intr_handle->fd; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling MSI interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + return 0; +} + +/* disable MSI interrupts */ +static int +vfio_disable_msi(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + + len = sizeof(struct vfio_irq_set); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) + RTE_LOG(ERR, EAL, + "Error disabling MSI interrupts for fd %d\n", intr_handle->fd); + + return ret; +} + /* enable MSI-X interrupts */ static int -vfio_enable_msix(const struct rte_intr_handle *intr_handle) -{ +vfio_enable_msix(const struct rte_intr_handle *intr_handle) { + int len, ret; char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; struct vfio_irq_set *irq_set; - int len, ret; - - if (intr_handle->nb_efd == 0) - return 0; + int *fd_ptr; len = sizeof(irq_set_buf); irq_set = (struct vfio_irq_set *) irq_set_buf; irq_set->argsz = len; + /* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */ + irq_set->count = intr_handle->max_intr ? + (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID + 1 ? + RTE_MAX_RXTX_INTR_VEC_ID + 1 : intr_handle->max_intr) : 1; irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; - irq_set->start = RTE_INTR_VEC_RXTX_OFFSET; - irq_set->count = intr_handle->nb_efd; - memcpy(&irq_set->data, intr_handle->efds, - sizeof(*intr_handle->efds) * intr_handle->nb_efd); + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + /* INTR vector offset 0 reserve for non-efds mapping */ + fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = intr_handle->fd; + memcpy(&fd_ptr[RTE_INTR_VEC_RXTX_OFFSET], intr_handle->efds, + sizeof(*intr_handle->efds) * intr_handle->nb_efd); ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + if (ret) { RTE_LOG(ERR, EAL, "Error enabling MSI-X interrupts for fd %d\n", intr_handle->fd); @@ -189,21 +289,22 @@ vfio_enable_msix(const struct rte_intr_handle *intr_handle) /* disable MSI-X interrupts */ static int -vfio_disable_msix(const struct rte_intr_handle *intr_handle) -{ - struct vfio_irq_set irq_set; - int ret; +vfio_disable_msix(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; + int len, ret; - if (intr_handle->nb_efd == 0) - return 0; + len = sizeof(struct vfio_irq_set); - irq_set.argsz = sizeof(irq_set); - irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set.index = VFIO_PCI_MSIX_IRQ_INDEX; - irq_set.start = RTE_INTR_VEC_RXTX_OFFSET; - irq_set.count = intr_handle->nb_efd; + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); if (ret) RTE_LOG(ERR, EAL, "Error disabling MSI-X interrupts for fd %d\n", intr_handle->fd); @@ -564,7 +665,9 @@ rte_intr_enable(const struct rte_intr_handle *intr_handle) return -1; break; case RTE_INTR_HANDLE_VFIO_MSI: - return 0; + if (vfio_enable_msi(intr_handle)) + return -1; + break; case RTE_INTR_HANDLE_VFIO_LEGACY: if (vfio_enable_intx(intr_handle)) return -1; @@ -618,7 +721,9 @@ rte_intr_disable(const struct rte_intr_handle *intr_handle) return -1; break; case RTE_INTR_HANDLE_VFIO_MSI: - return 0; + if (vfio_disable_msi(intr_handle)) + return -1; + break; case RTE_INTR_HANDLE_VFIO_LEGACY: if (vfio_disable_intx(intr_handle)) return -1; -- 2.8.4
Add new ack interrupt API to avoid using VFIO_IRQ_SET_ACTION_TRIGGER(rte_intr_enable()) for acking interrupt purpose for VFIO based interrupt handlers. This implementation is specific to Linux. Using rte_intr_enable() for acking interrupt has below issues * Time consuming to do for every interrupt received as it will free_irq() followed by request_irq() and all other initializations * A race condition because of a window between free_irq() and request_irq() with packet reception still on and device still enabled and would throw warning messages like below. [158764.159833] do_IRQ: 9.34 No irq handler for vector In this patch, rte_intr_ack() is a no-op for VFIO_MSIX/VFIO_MSI interrupts as they are edge triggered and kernel would not mask the interrupt before delivering the event to userspace and we don't need to ack. Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> Signed-off-by: Jerin Jacob <jerinj@marvell.com> --- lib/librte_eal/common/include/rte_interrupts.h | 22 +++++++ lib/librte_eal/freebsd/eal/eal_interrupts.c | 9 +++ lib/librte_eal/linux/eal/eal_interrupts.c | 81 ++++++++++++++++++++++++++ lib/librte_eal/rte_eal_version.map | 1 + 4 files changed, 113 insertions(+) diff --git a/lib/librte_eal/common/include/rte_interrupts.h b/lib/librte_eal/common/include/rte_interrupts.h index c1e912c..93b31cd 100644 --- a/lib/librte_eal/common/include/rte_interrupts.h +++ b/lib/librte_eal/common/include/rte_interrupts.h @@ -118,6 +118,28 @@ int rte_intr_enable(const struct rte_intr_handle *intr_handle); */ int rte_intr_disable(const struct rte_intr_handle *intr_handle); +/** + * It acks an interrupt raised for the specified handle. + * + * Call this function to ack an interrupt from interrupt + * handler either from application or driver, so that + * new interrupts are raised. + * + * @note For interrupt handle types VFIO_MSIX and VFIO_MSI, + * this function is a no-op and returns success without + * changing anything as kernel doesn't expect + * them to be acked. + * + * @param intr_handle + * pointer to the interrupt handle. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +__rte_experimental +int rte_intr_ack(const struct rte_intr_handle *intr_handle); + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/freebsd/eal/eal_interrupts.c b/lib/librte_eal/freebsd/eal/eal_interrupts.c index 10375bd..f6831b7 100644 --- a/lib/librte_eal/freebsd/eal/eal_interrupts.c +++ b/lib/librte_eal/freebsd/eal/eal_interrupts.c @@ -387,6 +387,15 @@ rte_intr_disable(const struct rte_intr_handle *intr_handle) return 0; } +int +rte_intr_ack(const struct rte_intr_handle *intr_handle) +{ + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 0; + + return -1; +} + static void eal_intr_process_interrupts(struct kevent *events, int nfds) { diff --git a/lib/librte_eal/linux/eal/eal_interrupts.c b/lib/librte_eal/linux/eal/eal_interrupts.c index 79ad5e8..91e220c 100644 --- a/lib/librte_eal/linux/eal/eal_interrupts.c +++ b/lib/librte_eal/linux/eal/eal_interrupts.c @@ -197,6 +197,35 @@ vfio_disable_intx(const struct rte_intr_handle *intr_handle) { return 0; } +/* unmask/ack legacy (INTx) interrupts */ +static int +vfio_ack_intx(const struct rte_intr_handle *intr_handle) +{ + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + + len = sizeof(struct vfio_irq_set); + + /* unmask INTx */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + memset(irq_set, 0, len); + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + return 0; +} + /* enable MSI interrupts */ static int vfio_enable_msi(const struct rte_intr_handle *intr_handle) { @@ -694,6 +723,58 @@ rte_intr_enable(const struct rte_intr_handle *intr_handle) } int +rte_intr_ack(const struct rte_intr_handle *intr_handle) +{ + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 0; + + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) + return -1; + + switch (intr_handle->type) { + /* Both acking and disabling are same for UIO */ + case RTE_INTR_HANDLE_UIO: + if (uio_intr_enable(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_UIO_INTX: + if (uio_intx_intr_enable(intr_handle)) + return -1; + break; + /* not used at this moment */ + case RTE_INTR_HANDLE_ALARM: + return -1; +#ifdef VFIO_PRESENT + /* Since VFIO_MSIX is implicitly acked + * unlike INTx, we report success + */ + case RTE_INTR_HANDLE_VFIO_MSIX: + case RTE_INTR_HANDLE_VFIO_MSI: + return 0; + case RTE_INTR_HANDLE_VFIO_LEGACY: + if (vfio_ack_intx(intr_handle)) + return -1; + break; +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + case RTE_INTR_HANDLE_VFIO_REQ: + return -1; +#endif +#endif + /* not used at this moment */ + case RTE_INTR_HANDLE_DEV_EVENT: + return -1; + /* unknown handle type */ + default: + RTE_LOG(ERR, EAL, + "Unknown handle type of fd %d\n", + intr_handle->fd); + return -1; + } + + return 0; +} + +int rte_intr_disable(const struct rte_intr_handle *intr_handle) { if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map index 1892d9e..0537a6d 100644 --- a/lib/librte_eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -407,4 +407,5 @@ EXPERIMENTAL { rte_lcore_to_cpu_id; rte_mcfg_timer_lock; rte_mcfg_timer_unlock; + rte_intr_ack; }; -- 2.8.4
Replace rte_intr_enable() with rte_intr_ack() API for acking an interrupt in interrupt handlers and rx_queue_intr_enable() callbacks of PMD's. This is inline with original intent of this change in PMDs to ack interrupts after handling is completed if device is backed by UIO, IGB_UIO or VFIO(with INTx). Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> Signed-off-by: Jerin Jacob <jerinj@marvell.com> --- drivers/net/atlantic/Makefile | 1 + drivers/net/atlantic/atl_ethdev.c | 2 +- drivers/net/atlantic/meson.build | 2 ++ drivers/net/avp/avp_ethdev.c | 2 +- drivers/net/avp/meson.build | 1 + drivers/net/axgbe/Makefile | 1 + drivers/net/axgbe/axgbe_ethdev.c | 4 ++-- drivers/net/axgbe/meson.build | 1 + drivers/net/bnx2x/bnx2x_ethdev.c | 2 +- drivers/net/bnx2x/meson.build | 1 + drivers/net/e1000/em_ethdev.c | 4 ++-- drivers/net/e1000/igb_ethdev.c | 6 +++--- drivers/net/fm10k/fm10k_ethdev.c | 6 +++--- drivers/net/fm10k/meson.build | 1 + drivers/net/i40e/i40e_ethdev.c | 2 +- drivers/net/iavf/iavf_ethdev.c | 2 +- drivers/net/ice/Makefile | 1 + drivers/net/ice/ice_ethdev.c | 4 ++-- drivers/net/ice/meson.build | 1 + drivers/net/ixgbe/ixgbe_ethdev.c | 6 +++--- drivers/net/nfp/nfp_net.c | 2 +- drivers/net/qede/Makefile | 1 + drivers/net/qede/meson.build | 2 ++ drivers/net/qede/qede_ethdev.c | 8 ++++---- drivers/net/sfc/sfc_intr.c | 4 ++-- drivers/net/virtio/virtio_ethdev.c | 16 +++++++++++++++- drivers/net/vmxnet3/vmxnet3_ethdev.c | 2 +- 27 files changed, 56 insertions(+), 29 deletions(-) diff --git a/drivers/net/atlantic/Makefile b/drivers/net/atlantic/Makefile index 263f12b..fc12e6a 100644 --- a/drivers/net/atlantic/Makefile +++ b/drivers/net/atlantic/Makefile @@ -10,6 +10,7 @@ LIB = librte_pmd_atlantic.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API EXPORT_MAP := rte_pmd_atlantic_version.map diff --git a/drivers/net/atlantic/atl_ethdev.c b/drivers/net/atlantic/atl_ethdev.c index fdc0a7f..79001da 100644 --- a/drivers/net/atlantic/atl_ethdev.c +++ b/drivers/net/atlantic/atl_ethdev.c @@ -1394,7 +1394,7 @@ atl_dev_interrupt_action(struct rte_eth_dev *dev, } done: atl_enable_intr(dev); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } diff --git a/drivers/net/atlantic/meson.build b/drivers/net/atlantic/meson.build index 60b8468..d5602dd 100644 --- a/drivers/net/atlantic/meson.build +++ b/drivers/net/atlantic/meson.build @@ -11,3 +11,5 @@ sources = files( 'hw_atl/hw_atl_utils.c', 'rte_pmd_atlantic.c', ) + +cflags += '-DALLOW_EXPERIMENTAL_API' diff --git a/drivers/net/avp/avp_ethdev.c b/drivers/net/avp/avp_ethdev.c index 47b96ec..504435e 100644 --- a/drivers/net/avp/avp_ethdev.c +++ b/drivers/net/avp/avp_ethdev.c @@ -713,7 +713,7 @@ avp_dev_interrupt_handler(void *data) status); /* re-enable UIO interrupt handling */ - ret = rte_intr_enable(&pci_dev->intr_handle); + ret = rte_intr_ack(&pci_dev->intr_handle); if (ret < 0) { PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n", ret); diff --git a/drivers/net/avp/meson.build b/drivers/net/avp/meson.build index a5f63cd..7fb9706 100644 --- a/drivers/net/avp/meson.build +++ b/drivers/net/avp/meson.build @@ -7,3 +7,4 @@ if not is_linux endif sources = files('avp_ethdev.c') install_headers('rte_avp_common.h', 'rte_avp_fifo.h') +cflags += '-DALLOW_EXPERIMENTAL_API' diff --git a/drivers/net/axgbe/Makefile b/drivers/net/axgbe/Makefile index c2d4336..bcdcd54 100644 --- a/drivers/net/axgbe/Makefile +++ b/drivers/net/axgbe/Makefile @@ -10,6 +10,7 @@ LIB = librte_pmd_axgbe.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API EXPORT_MAP := rte_pmd_axgbe_version.map diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c index cfb1720..56d8dac 100644 --- a/drivers/net/axgbe/axgbe_ethdev.c +++ b/drivers/net/axgbe/axgbe_ethdev.c @@ -136,8 +136,8 @@ axgbe_dev_interrupt_handler(void *param) DMA_CH_SR, dma_ch_isr); } } - /* Enable interrupts since disabled after generation*/ - rte_intr_enable(&pdata->pci_dev->intr_handle); + /* Unmask interrupts since disabled after generation*/ + rte_intr_ack(&pdata->pci_dev->intr_handle); } /* diff --git a/drivers/net/axgbe/meson.build b/drivers/net/axgbe/meson.build index 86873b7..226d11d 100644 --- a/drivers/net/axgbe/meson.build +++ b/drivers/net/axgbe/meson.build @@ -14,6 +14,7 @@ sources = files('axgbe_ethdev.c', 'axgbe_rxtx.c') cflags += '-Wno-cast-qual' +cflags += '-DALLOW_EXPERIMENTAL_API' if arch_subdir == 'x86' sources += files('axgbe_rxtx_vec_sse.c') diff --git a/drivers/net/bnx2x/bnx2x_ethdev.c b/drivers/net/bnx2x/bnx2x_ethdev.c index 10b4fdb..191a3ef 100644 --- a/drivers/net/bnx2x/bnx2x_ethdev.c +++ b/drivers/net/bnx2x/bnx2x_ethdev.c @@ -133,7 +133,7 @@ bnx2x_interrupt_handler(void *param) PMD_DEBUG_PERIODIC_LOG(INFO, sc, "Interrupt handled"); bnx2x_interrupt_action(dev, 1); - rte_intr_enable(&sc->pci_dev->intr_handle); + rte_intr_ack(&sc->pci_dev->intr_handle); } static void bnx2x_periodic_start(void *param) diff --git a/drivers/net/bnx2x/meson.build b/drivers/net/bnx2x/meson.build index 4892bb2..1bc84b7 100644 --- a/drivers/net/bnx2x/meson.build +++ b/drivers/net/bnx2x/meson.build @@ -6,6 +6,7 @@ build = dep.found() reason = 'missing dependency, "zlib"' ext_deps += dep cflags += '-DZLIB_CONST' +cflags += '-DALLOW_EXPERIMENTAL_API' sources = files('bnx2x.c', 'bnx2x_ethdev.c', 'bnx2x_rxtx.c', diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c index dc88661..0b7f501 100644 --- a/drivers/net/e1000/em_ethdev.c +++ b/drivers/net/e1000/em_ethdev.c @@ -1001,7 +1001,7 @@ eth_em_rx_queue_intr_enable(struct rte_eth_dev *dev, __rte_unused uint16_t queue struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; em_rxq_intr_enable(hw); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } @@ -1568,7 +1568,7 @@ eth_em_interrupt_action(struct rte_eth_dev *dev, return -1; intr->flags &= ~E1000_FLAG_NEED_LINK_UPDATE; - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); /* set get_link_status to check register later */ hw->mac.get_link_status = 1; diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c index 3ee28cf..793a31c 100644 --- a/drivers/net/e1000/igb_ethdev.c +++ b/drivers/net/e1000/igb_ethdev.c @@ -2876,7 +2876,7 @@ eth_igb_interrupt_action(struct rte_eth_dev *dev, } igb_intr_enable(dev); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); if (intr->flags & E1000_FLAG_NEED_LINK_UPDATE) { intr->flags &= ~E1000_FLAG_NEED_LINK_UPDATE; @@ -2987,7 +2987,7 @@ eth_igbvf_interrupt_action(struct rte_eth_dev *dev, struct rte_intr_handle *intr } igbvf_intr_enable(dev); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } @@ -5500,7 +5500,7 @@ eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) E1000_WRITE_REG(hw, E1000_EIMS, regval | mask); E1000_WRITE_FLUSH(hw); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c index a1e3836..e8fa8a2 100644 --- a/drivers/net/fm10k/fm10k_ethdev.c +++ b/drivers/net/fm10k/fm10k_ethdev.c @@ -2381,7 +2381,7 @@ fm10k_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) else FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(pdev, queue_id)), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); - rte_intr_enable(&pdev->intr_handle); + rte_intr_ack(&pdev->intr_handle); return 0; } @@ -2680,7 +2680,7 @@ fm10k_dev_interrupt_handler_pf(void *param) FM10K_WRITE_REG(hw, FM10K_ITR(0), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); /* Re-enable interrupt from host side */ - rte_intr_enable(dev->intr_handle); + rte_intr_ack(dev->intr_handle); } /** @@ -2760,7 +2760,7 @@ fm10k_dev_interrupt_handler_vf(void *param) FM10K_WRITE_REG(hw, FM10K_VFITR(0), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); /* Re-enable interrupt from host side */ - rte_intr_enable(dev->intr_handle); + rte_intr_ack(dev->intr_handle); } /* Mailbox message handler in VF */ diff --git a/drivers/net/fm10k/meson.build b/drivers/net/fm10k/meson.build index 2772ea4..b7d34c7 100644 --- a/drivers/net/fm10k/meson.build +++ b/drivers/net/fm10k/meson.build @@ -14,3 +14,4 @@ if arch_subdir == 'x86' endif includes += include_directories('base') +cflags += '-DALLOW_EXPERIMENTAL_API' diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c index 2b9fc45..5217b9c 100644 --- a/drivers/net/i40e/i40e_ethdev.c +++ b/drivers/net/i40e/i40e_ethdev.c @@ -11646,7 +11646,7 @@ i40e_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) I40E_PFINT_DYN_CTLN_ITR_INDX_MASK); I40E_WRITE_FLUSH(hw); - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_ack(&pci_dev->intr_handle); return 0; } diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c index 53dc05c..00d6ed5 100644 --- a/drivers/net/iavf/iavf_ethdev.c +++ b/drivers/net/iavf/iavf_ethdev.c @@ -1098,7 +1098,7 @@ iavf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) IAVF_WRITE_FLUSH(hw); - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_ack(&pci_dev->intr_handle); return 0; } diff --git a/drivers/net/ice/Makefile b/drivers/net/ice/Makefile index 32abeb6..ae53c26 100644 --- a/drivers/net/ice/Makefile +++ b/drivers/net/ice/Makefile @@ -10,6 +10,7 @@ LIB = librte_pmd_ice.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API LDLIBS += -lrte_eal -lrte_mbuf -lrte_ethdev -lrte_kvargs LDLIBS += -lrte_bus_pci -lrte_mempool diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c index 9ce730c..77b5a71 100644 --- a/drivers/net/ice/ice_ethdev.c +++ b/drivers/net/ice/ice_ethdev.c @@ -1118,7 +1118,7 @@ ice_interrupt_handler(void *param) done: /* Enable interrupt */ ice_pf_enable_irq0(hw); - rte_intr_enable(dev->intr_handle); + rte_intr_ack(dev->intr_handle); } /* Initialize SW parameters of PF */ @@ -3002,7 +3002,7 @@ static int ice_rx_queue_intr_enable(struct rte_eth_dev *dev, val &= ~GLINT_DYN_CTL_WB_ON_ITR_M; ICE_WRITE_REG(hw, GLINT_DYN_CTL(msix_intr), val); - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_ack(&pci_dev->intr_handle); return 0; } diff --git a/drivers/net/ice/meson.build b/drivers/net/ice/meson.build index 7f16647..70f349e 100644 --- a/drivers/net/ice/meson.build +++ b/drivers/net/ice/meson.build @@ -13,6 +13,7 @@ sources = files( deps += ['hash'] includes += include_directories('base') +cflags += '-DALLOW_EXPERIMENTAL_API' if arch_subdir == 'x86' sources += files('ice_rxtx_vec_sse.c') diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c index 22c5b2c..f5920f5 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/drivers/net/ixgbe/ixgbe_ethdev.c @@ -4502,7 +4502,7 @@ ixgbe_dev_interrupt_delayed_handler(void *param) PMD_DRV_LOG(DEBUG, "enable intr in delayed handler S[%08x]", eicr); ixgbe_enable_intr(dev); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); } /** @@ -5763,7 +5763,7 @@ ixgbevf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) RTE_SET_USED(queue_id); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, intr->mask); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } @@ -5812,7 +5812,7 @@ ixgbe_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) mask &= (1 << (queue_id - 32)); IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); } - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c index 1a7aa17..91a226c 100644 --- a/drivers/net/nfp/nfp_net.c +++ b/drivers/net/nfp/nfp_net.c @@ -1412,7 +1412,7 @@ nfp_net_irq_unmask(struct rte_eth_dev *dev) if (hw->ctrl & NFP_NET_CFG_CTRL_MSIXAUTO) { /* If MSI-X auto-masking is used, clear the entry */ rte_wmb(); - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_ack(&pci_dev->intr_handle); } else { /* Make sure all updates are written before un-masking */ rte_wmb(); diff --git a/drivers/net/qede/Makefile b/drivers/net/qede/Makefile index 2ecbd8d..a11d594 100644 --- a/drivers/net/qede/Makefile +++ b/drivers/net/qede/Makefile @@ -12,6 +12,7 @@ LIB = librte_pmd_qede.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs LDLIBS += -lrte_bus_pci diff --git a/drivers/net/qede/meson.build b/drivers/net/qede/meson.build index 12388a6..c8f9c6d 100644 --- a/drivers/net/qede/meson.build +++ b/drivers/net/qede/meson.build @@ -10,3 +10,5 @@ sources = files( 'qede_main.c', 'qede_rxtx.c', ) + +cflags += '-DALLOW_EXPERIMENTAL_API' diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c index 82363e6..9ac9da3 100644 --- a/drivers/net/qede/qede_ethdev.c +++ b/drivers/net/qede/qede_ethdev.c @@ -248,8 +248,8 @@ qede_interrupt_handler_intx(void *param) if (status & 0x1) { qede_interrupt_action(ECORE_LEADING_HWFN(edev)); - if (rte_intr_enable(eth_dev->intr_handle)) - DP_ERR(edev, "rte_intr_enable failed\n"); + if (rte_intr_ack(eth_dev->intr_handle)) + DP_ERR(edev, "rte_intr_ack failed\n"); } } @@ -261,8 +261,8 @@ qede_interrupt_handler(void *param) struct ecore_dev *edev = &qdev->edev; qede_interrupt_action(ECORE_LEADING_HWFN(edev)); - if (rte_intr_enable(eth_dev->intr_handle)) - DP_ERR(edev, "rte_intr_enable failed\n"); + if (rte_intr_ack(eth_dev->intr_handle)) + DP_ERR(edev, "rte_intr_ack failed\n"); } static void diff --git a/drivers/net/sfc/sfc_intr.c b/drivers/net/sfc/sfc_intr.c index 1f4969b..76cb630 100644 --- a/drivers/net/sfc/sfc_intr.c +++ b/drivers/net/sfc/sfc_intr.c @@ -79,7 +79,7 @@ sfc_intr_line_handler(void *cb_arg) if (qmask & (1 << sa->mgmt_evq_index)) sfc_intr_handle_mgmt_evq(sa); - if (rte_intr_enable(&pci_dev->intr_handle) != 0) + if (rte_intr_ack(&pci_dev->intr_handle) != 0) sfc_err(sa, "cannot reenable interrupts"); sfc_log_init(sa, "done"); @@ -123,7 +123,7 @@ sfc_intr_message_handler(void *cb_arg) sfc_intr_handle_mgmt_evq(sa); - if (rte_intr_enable(&pci_dev->intr_handle) != 0) + if (rte_intr_ack(&pci_dev->intr_handle) != 0) sfc_err(sa, "cannot reenable interrupts"); sfc_log_init(sa, "done"); diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 04aecb7..62c8274 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -1265,6 +1265,20 @@ virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) } static int +virtio_intr_unmask(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = dev->data->dev_private; + + if (rte_intr_ack(dev->intr_handle) < 0) + return -1; + + if (!hw->virtio_user_dev) + hw->use_msix = vtpci_msix_detect(RTE_ETH_DEV_TO_PCI(dev)); + + return 0; +} + +static int virtio_intr_enable(struct rte_eth_dev *dev) { struct virtio_hw *hw = dev->data->dev_private; @@ -1457,7 +1471,7 @@ virtio_interrupt_handler(void *param) isr = vtpci_isr(hw); PMD_DRV_LOG(INFO, "interrupt status = %#x", isr); - if (virtio_intr_enable(dev) < 0) + if (virtio_intr_unmask(dev) < 0) PMD_DRV_LOG(ERR, "interrupt enable failed"); if (isr & VIRTIO_PCI_ISR_CONFIG) { diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c b/drivers/net/vmxnet3/vmxnet3_ethdev.c index 2b1e915..57feb37 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethdev.c +++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c @@ -1426,7 +1426,7 @@ vmxnet3_interrupt_handler(void *param) vmxnet3_process_events(dev); - if (rte_intr_enable(&pci_dev->intr_handle) < 0) + if (rte_intr_ack(&pci_dev->intr_handle) < 0) PMD_DRV_LOG(ERR, "interrupt enable failed"); } -- 2.8.4
A final patch for below mentioned RFC patch discussions. [RFC PATCH v3 1/3] vfio: revert change that does intr eventfd setup at probe http://mails.dpdk.org/archives/dev/2019-July/138358.html [RFC PATCH v3 2/3] eal: add mask and unmask interrupt APIs http://mails.dpdk.org/archives/dev/2019-July/138357.html [RFC PATCH v3 3/3] drivers/net: use unmask API in interrupt handlers http://mails.dpdk.org/archives/dev/2019-July/138359.html Please also refer to below discussions http://mails.dpdk.org/archives/dev/2019-July/138184.html http://mails.dpdk.org/archives/dev/2019-July/138113.html This patch set basically reverts vfio patch that moves eventfd setup to probe time (1/3) because msix initialization is broken. It adds new ack interrupt api for purposes of simple and light weight ack method and also to avoid race conditions that was the root cause of this discussion. Nithin Dabilpuram (3): vfio: revert change that does intr eventfd setup at probe eal: add ack interrupt API drivers/net: use ack API in interrupt handlers drivers/bus/pci/linux/pci_vfio.c | 78 ++++--- drivers/net/atlantic/Makefile | 1 + drivers/net/atlantic/atl_ethdev.c | 2 +- drivers/net/atlantic/meson.build | 2 + drivers/net/avp/avp_ethdev.c | 2 +- drivers/net/avp/meson.build | 1 + drivers/net/axgbe/Makefile | 1 + drivers/net/axgbe/axgbe_ethdev.c | 4 +- drivers/net/axgbe/meson.build | 1 + drivers/net/bnx2x/bnx2x_ethdev.c | 2 +- drivers/net/bnx2x/meson.build | 1 + drivers/net/e1000/em_ethdev.c | 4 +- drivers/net/e1000/igb_ethdev.c | 6 +- drivers/net/fm10k/fm10k_ethdev.c | 6 +- drivers/net/fm10k/meson.build | 1 + drivers/net/i40e/i40e_ethdev.c | 2 +- drivers/net/iavf/iavf_ethdev.c | 2 +- drivers/net/ice/Makefile | 1 + drivers/net/ice/ice_ethdev.c | 4 +- drivers/net/ice/meson.build | 1 + drivers/net/ixgbe/ixgbe_ethdev.c | 6 +- drivers/net/nfp/nfp_net.c | 2 +- drivers/net/qede/Makefile | 1 + drivers/net/qede/meson.build | 2 + drivers/net/qede/qede_ethdev.c | 8 +- drivers/net/sfc/sfc_intr.c | 4 +- drivers/net/virtio/virtio_ethdev.c | 16 +- drivers/net/vmxnet3/vmxnet3_ethdev.c | 2 +- lib/librte_eal/common/include/rte_interrupts.h | 22 ++ lib/librte_eal/freebsd/eal/eal_interrupts.c | 9 + lib/librte_eal/linux/eal/eal_interrupts.c | 282 ++++++++++++++++++++----- lib/librte_eal/rte_eal_version.map | 1 + 32 files changed, 360 insertions(+), 117 deletions(-) -- 2.8.4
This reverts commit 89aac60e0be9ed95a87b16e3595f102f9faaffb4. "vfio: fix interrupts race condition" The above mentioned commit moves the interrupt's eventfd setup to probe time but only enables one interrupt for all types of interrupt handles i.e VFIO_MSI, VFIO_LEGACY, VFIO_MSIX, UIO. It works fine with default case but breaks below cases specifically for MSIX based interrupt handles. * Applications like l3fwd-power that request rxq interrupts while ethdev setup. * Drivers that need > 1 MSIx interrupts to be configured for functionality to work. VFIO PCI for MSIx expects all the possible vectors to be setup up when using VFIO_IRQ_SET_ACTION_TRIGGER so that they can be allocated from kernel pci subsystem. Only way to increase the number of vectors later is first free all by using VFIO_IRQ_SET_DATA_NONE with action trigger and then enable new vector count. Above commit changes the behavior of rte_intr_[enable|disable] to only mask and unmask unlike earlier behavior and thereby breaking above two scenarios. Fixes: 89aac60e0be9 ("vfio: fix interrupts race condition") Cc: david.marchand@redhat.com Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> Signed-off-by: Jerin Jacob <jerinj@marvell.com> Tested-by: Stephen Hemminger <stephen at networkplumber.org> --- v2: * Include tested by sign from Stephen drivers/bus/pci/linux/pci_vfio.c | 78 ++++++------ lib/librte_eal/linux/eal/eal_interrupts.c | 201 +++++++++++++++++++++++------- 2 files changed, 191 insertions(+), 88 deletions(-) diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c index ee31239..1ceb1c0 100644 --- a/drivers/bus/pci/linux/pci_vfio.c +++ b/drivers/bus/pci/linux/pci_vfio.c @@ -187,11 +187,8 @@ pci_vfio_set_bus_master(int dev_fd, bool op) static int pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) { - char irq_set_buf[sizeof(struct vfio_irq_set) + sizeof(int)]; - struct vfio_irq_set *irq_set; - enum rte_intr_mode intr_mode; int i, ret, intr_idx; - int fd; + enum rte_intr_mode intr_mode; /* default to invalid index */ intr_idx = VFIO_PCI_NUM_IRQS; @@ -223,6 +220,7 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) /* start from MSI-X interrupt type */ for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) { struct vfio_irq_info irq = { .argsz = sizeof(irq) }; + int fd = -1; /* skip interrupt modes we don't want */ if (intr_mode != RTE_INTR_MODE_NONE && @@ -238,51 +236,51 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) return -1; } - /* found a usable interrupt mode */ - if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) != 0) - break; - /* if this vector cannot be used with eventfd, fail if we explicitly * specified interrupt type, otherwise continue */ - if (intr_mode != RTE_INTR_MODE_NONE) { - RTE_LOG(ERR, EAL, " interrupt vector does not support eventfd!\n"); + if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) { + if (intr_mode != RTE_INTR_MODE_NONE) { + RTE_LOG(ERR, EAL, + " interrupt vector does not support eventfd!\n"); + return -1; + } else + continue; + } + + /* set up an eventfd for interrupts */ + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + RTE_LOG(ERR, EAL, " cannot set up eventfd, " + "error %i (%s)\n", errno, strerror(errno)); return -1; } - } - if (i < 0) - return -1; + dev->intr_handle.fd = fd; + dev->intr_handle.vfio_dev_fd = vfio_dev_fd; - fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); - if (fd < 0) { - RTE_LOG(ERR, EAL, " cannot set up eventfd, error %i (%s)\n", - errno, strerror(errno)); - return -1; - } + switch (i) { + case VFIO_PCI_MSIX_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_MSIX; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; + break; + case VFIO_PCI_MSI_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_MSI; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; + break; + case VFIO_PCI_INTX_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_LEGACY; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; + break; + default: + RTE_LOG(ERR, EAL, " unknown interrupt type!\n"); + return -1; + } - irq_set = (struct vfio_irq_set *)irq_set_buf; - irq_set->argsz = sizeof(irq_set_buf); - irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD|VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set->index = i; - irq_set->start = 0; - irq_set->count = 1; - memcpy(&irq_set->data, &fd, sizeof(int)); - if (ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set) < 0) { - RTE_LOG(ERR, EAL, " error configuring interrupt\n"); - close(fd); - return -1; + return 0; } - dev->intr_handle.fd = fd; - dev->intr_handle.vfio_dev_fd = vfio_dev_fd; - if (i == VFIO_PCI_MSIX_IRQ_INDEX) - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; - else if (i == VFIO_PCI_MSI_IRQ_INDEX) - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; - else if (i == VFIO_PCI_INTX_IRQ_INDEX) - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; - - return 0; + /* if we're here, we haven't found a suitable interrupt vector */ + return -1; } #ifdef HAVE_VFIO_DEV_REQ_INTERFACE diff --git a/lib/librte_eal/linux/eal/eal_interrupts.c b/lib/librte_eal/linux/eal/eal_interrupts.c index 27976b3..79ad5e8 100644 --- a/lib/librte_eal/linux/eal/eal_interrupts.c +++ b/lib/librte_eal/linux/eal/eal_interrupts.c @@ -109,19 +109,42 @@ static pthread_t intr_thread; /* enable legacy (INTx) interrupts */ static int -vfio_enable_intx(const struct rte_intr_handle *intr_handle) -{ - struct vfio_irq_set irq_set; - int ret; - - /* unmask INTx */ - irq_set.argsz = sizeof(irq_set); - irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; - irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; - irq_set.start = 0; - irq_set.count = 1; - - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); +vfio_enable_intx(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + /* enable INTx */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + *fd_ptr = intr_handle->fd; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling INTx interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + + /* unmask INTx after enabling */ + memset(irq_set, 0, len); + len = sizeof(struct vfio_irq_set); + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); if (ret) { RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", @@ -133,51 +156,128 @@ vfio_enable_intx(const struct rte_intr_handle *intr_handle) /* disable legacy (INTx) interrupts */ static int -vfio_disable_intx(const struct rte_intr_handle *intr_handle) -{ - struct vfio_irq_set irq_set; - int ret; +vfio_disable_intx(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; - /* mask interrupts */ - irq_set.argsz = sizeof(irq_set); - irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK; - irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; - irq_set.start = 0; - irq_set.count = 1; + len = sizeof(struct vfio_irq_set); - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); + /* mask interrupts before disabling */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); if (ret) { RTE_LOG(ERR, EAL, "Error masking INTx interrupts for fd %d\n", intr_handle->fd); return -1; } + + /* disable INTx*/ + memset(irq_set, 0, len); + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, + "Error disabling INTx interrupts for fd %d\n", intr_handle->fd); + return -1; + } return 0; } +/* enable MSI interrupts */ +static int +vfio_enable_msi(const struct rte_intr_handle *intr_handle) { + int len, ret; + char irq_set_buf[IRQ_SET_BUF_LEN]; + struct vfio_irq_set *irq_set; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + *fd_ptr = intr_handle->fd; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling MSI interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + return 0; +} + +/* disable MSI interrupts */ +static int +vfio_disable_msi(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + + len = sizeof(struct vfio_irq_set); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) + RTE_LOG(ERR, EAL, + "Error disabling MSI interrupts for fd %d\n", intr_handle->fd); + + return ret; +} + /* enable MSI-X interrupts */ static int -vfio_enable_msix(const struct rte_intr_handle *intr_handle) -{ +vfio_enable_msix(const struct rte_intr_handle *intr_handle) { + int len, ret; char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; struct vfio_irq_set *irq_set; - int len, ret; - - if (intr_handle->nb_efd == 0) - return 0; + int *fd_ptr; len = sizeof(irq_set_buf); irq_set = (struct vfio_irq_set *) irq_set_buf; irq_set->argsz = len; + /* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */ + irq_set->count = intr_handle->max_intr ? + (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID + 1 ? + RTE_MAX_RXTX_INTR_VEC_ID + 1 : intr_handle->max_intr) : 1; irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; - irq_set->start = RTE_INTR_VEC_RXTX_OFFSET; - irq_set->count = intr_handle->nb_efd; - memcpy(&irq_set->data, intr_handle->efds, - sizeof(*intr_handle->efds) * intr_handle->nb_efd); + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + /* INTR vector offset 0 reserve for non-efds mapping */ + fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = intr_handle->fd; + memcpy(&fd_ptr[RTE_INTR_VEC_RXTX_OFFSET], intr_handle->efds, + sizeof(*intr_handle->efds) * intr_handle->nb_efd); ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + if (ret) { RTE_LOG(ERR, EAL, "Error enabling MSI-X interrupts for fd %d\n", intr_handle->fd); @@ -189,21 +289,22 @@ vfio_enable_msix(const struct rte_intr_handle *intr_handle) /* disable MSI-X interrupts */ static int -vfio_disable_msix(const struct rte_intr_handle *intr_handle) -{ - struct vfio_irq_set irq_set; - int ret; +vfio_disable_msix(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; + int len, ret; - if (intr_handle->nb_efd == 0) - return 0; + len = sizeof(struct vfio_irq_set); - irq_set.argsz = sizeof(irq_set); - irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set.index = VFIO_PCI_MSIX_IRQ_INDEX; - irq_set.start = RTE_INTR_VEC_RXTX_OFFSET; - irq_set.count = intr_handle->nb_efd; + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); if (ret) RTE_LOG(ERR, EAL, "Error disabling MSI-X interrupts for fd %d\n", intr_handle->fd); @@ -564,7 +665,9 @@ rte_intr_enable(const struct rte_intr_handle *intr_handle) return -1; break; case RTE_INTR_HANDLE_VFIO_MSI: - return 0; + if (vfio_enable_msi(intr_handle)) + return -1; + break; case RTE_INTR_HANDLE_VFIO_LEGACY: if (vfio_enable_intx(intr_handle)) return -1; @@ -618,7 +721,9 @@ rte_intr_disable(const struct rte_intr_handle *intr_handle) return -1; break; case RTE_INTR_HANDLE_VFIO_MSI: - return 0; + if (vfio_disable_msi(intr_handle)) + return -1; + break; case RTE_INTR_HANDLE_VFIO_LEGACY: if (vfio_disable_intx(intr_handle)) return -1; -- 2.8.4
Add new ack interrupt API to avoid using VFIO_IRQ_SET_ACTION_TRIGGER(rte_intr_enable()) for acking interrupt purpose for VFIO based interrupt handlers. This implementation is specific to Linux. Using rte_intr_enable() for acking interrupt has below issues * Time consuming to do for every interrupt received as it will free_irq() followed by request_irq() and all other initializations * A race condition because of a window between free_irq() and request_irq() with packet reception still on and device still enabled and would throw warning messages like below. [158764.159833] do_IRQ: 9.34 No irq handler for vector In this patch, rte_intr_ack() is a no-op for VFIO_MSIX/VFIO_MSI interrupts as they are edge triggered and kernel would not mask the interrupt before delivering the event to userspace and we don't need to ack. Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> Signed-off-by: Jerin Jacob <jerinj@marvell.com> --- v2: * No change lib/librte_eal/common/include/rte_interrupts.h | 22 +++++++ lib/librte_eal/freebsd/eal/eal_interrupts.c | 9 +++ lib/librte_eal/linux/eal/eal_interrupts.c | 81 ++++++++++++++++++++++++++ lib/librte_eal/rte_eal_version.map | 1 + 4 files changed, 113 insertions(+) diff --git a/lib/librte_eal/common/include/rte_interrupts.h b/lib/librte_eal/common/include/rte_interrupts.h index c1e912c..93b31cd 100644 --- a/lib/librte_eal/common/include/rte_interrupts.h +++ b/lib/librte_eal/common/include/rte_interrupts.h @@ -118,6 +118,28 @@ int rte_intr_enable(const struct rte_intr_handle *intr_handle); */ int rte_intr_disable(const struct rte_intr_handle *intr_handle); +/** + * It acks an interrupt raised for the specified handle. + * + * Call this function to ack an interrupt from interrupt + * handler either from application or driver, so that + * new interrupts are raised. + * + * @note For interrupt handle types VFIO_MSIX and VFIO_MSI, + * this function is a no-op and returns success without + * changing anything as kernel doesn't expect + * them to be acked. + * + * @param intr_handle + * pointer to the interrupt handle. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +__rte_experimental +int rte_intr_ack(const struct rte_intr_handle *intr_handle); + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/freebsd/eal/eal_interrupts.c b/lib/librte_eal/freebsd/eal/eal_interrupts.c index 10375bd..f6831b7 100644 --- a/lib/librte_eal/freebsd/eal/eal_interrupts.c +++ b/lib/librte_eal/freebsd/eal/eal_interrupts.c @@ -387,6 +387,15 @@ rte_intr_disable(const struct rte_intr_handle *intr_handle) return 0; } +int +rte_intr_ack(const struct rte_intr_handle *intr_handle) +{ + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 0; + + return -1; +} + static void eal_intr_process_interrupts(struct kevent *events, int nfds) { diff --git a/lib/librte_eal/linux/eal/eal_interrupts.c b/lib/librte_eal/linux/eal/eal_interrupts.c index 79ad5e8..91e220c 100644 --- a/lib/librte_eal/linux/eal/eal_interrupts.c +++ b/lib/librte_eal/linux/eal/eal_interrupts.c @@ -197,6 +197,35 @@ vfio_disable_intx(const struct rte_intr_handle *intr_handle) { return 0; } +/* unmask/ack legacy (INTx) interrupts */ +static int +vfio_ack_intx(const struct rte_intr_handle *intr_handle) +{ + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + + len = sizeof(struct vfio_irq_set); + + /* unmask INTx */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + memset(irq_set, 0, len); + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + return 0; +} + /* enable MSI interrupts */ static int vfio_enable_msi(const struct rte_intr_handle *intr_handle) { @@ -694,6 +723,58 @@ rte_intr_enable(const struct rte_intr_handle *intr_handle) } int +rte_intr_ack(const struct rte_intr_handle *intr_handle) +{ + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 0; + + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) + return -1; + + switch (intr_handle->type) { + /* Both acking and disabling are same for UIO */ + case RTE_INTR_HANDLE_UIO: + if (uio_intr_enable(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_UIO_INTX: + if (uio_intx_intr_enable(intr_handle)) + return -1; + break; + /* not used at this moment */ + case RTE_INTR_HANDLE_ALARM: + return -1; +#ifdef VFIO_PRESENT + /* Since VFIO_MSIX is implicitly acked + * unlike INTx, we report success + */ + case RTE_INTR_HANDLE_VFIO_MSIX: + case RTE_INTR_HANDLE_VFIO_MSI: + return 0; + case RTE_INTR_HANDLE_VFIO_LEGACY: + if (vfio_ack_intx(intr_handle)) + return -1; + break; +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + case RTE_INTR_HANDLE_VFIO_REQ: + return -1; +#endif +#endif + /* not used at this moment */ + case RTE_INTR_HANDLE_DEV_EVENT: + return -1; + /* unknown handle type */ + default: + RTE_LOG(ERR, EAL, + "Unknown handle type of fd %d\n", + intr_handle->fd); + return -1; + } + + return 0; +} + +int rte_intr_disable(const struct rte_intr_handle *intr_handle) { if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map index 1892d9e..0537a6d 100644 --- a/lib/librte_eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -407,4 +407,5 @@ EXPERIMENTAL { rte_lcore_to_cpu_id; rte_mcfg_timer_lock; rte_mcfg_timer_unlock; + rte_intr_ack; }; -- 2.8.4
Replace rte_intr_enable() with rte_intr_ack() API for acking an interrupt in interrupt handlers and rx_queue_intr_enable() callbacks of PMD's. This is inline with original intent of this change in PMDs to ack interrupts after handling is completed if device is backed by UIO, IGB_UIO or VFIO(with INTx). Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> Signed-off-by: Jerin Jacob <jerinj@marvell.com> --- v2: * No change drivers/net/atlantic/Makefile | 1 + drivers/net/atlantic/atl_ethdev.c | 2 +- drivers/net/atlantic/meson.build | 2 ++ drivers/net/avp/avp_ethdev.c | 2 +- drivers/net/avp/meson.build | 1 + drivers/net/axgbe/Makefile | 1 + drivers/net/axgbe/axgbe_ethdev.c | 4 ++-- drivers/net/axgbe/meson.build | 1 + drivers/net/bnx2x/bnx2x_ethdev.c | 2 +- drivers/net/bnx2x/meson.build | 1 + drivers/net/e1000/em_ethdev.c | 4 ++-- drivers/net/e1000/igb_ethdev.c | 6 +++--- drivers/net/fm10k/fm10k_ethdev.c | 6 +++--- drivers/net/fm10k/meson.build | 1 + drivers/net/i40e/i40e_ethdev.c | 2 +- drivers/net/iavf/iavf_ethdev.c | 2 +- drivers/net/ice/Makefile | 1 + drivers/net/ice/ice_ethdev.c | 4 ++-- drivers/net/ice/meson.build | 1 + drivers/net/ixgbe/ixgbe_ethdev.c | 6 +++--- drivers/net/nfp/nfp_net.c | 2 +- drivers/net/qede/Makefile | 1 + drivers/net/qede/meson.build | 2 ++ drivers/net/qede/qede_ethdev.c | 8 ++++---- drivers/net/sfc/sfc_intr.c | 4 ++-- drivers/net/virtio/virtio_ethdev.c | 16 +++++++++++++++- drivers/net/vmxnet3/vmxnet3_ethdev.c | 2 +- 27 files changed, 56 insertions(+), 29 deletions(-) diff --git a/drivers/net/atlantic/Makefile b/drivers/net/atlantic/Makefile index 263f12b..fc12e6a 100644 --- a/drivers/net/atlantic/Makefile +++ b/drivers/net/atlantic/Makefile @@ -10,6 +10,7 @@ LIB = librte_pmd_atlantic.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API EXPORT_MAP := rte_pmd_atlantic_version.map diff --git a/drivers/net/atlantic/atl_ethdev.c b/drivers/net/atlantic/atl_ethdev.c index fdc0a7f..79001da 100644 --- a/drivers/net/atlantic/atl_ethdev.c +++ b/drivers/net/atlantic/atl_ethdev.c @@ -1394,7 +1394,7 @@ atl_dev_interrupt_action(struct rte_eth_dev *dev, } done: atl_enable_intr(dev); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } diff --git a/drivers/net/atlantic/meson.build b/drivers/net/atlantic/meson.build index 60b8468..d5602dd 100644 --- a/drivers/net/atlantic/meson.build +++ b/drivers/net/atlantic/meson.build @@ -11,3 +11,5 @@ sources = files( 'hw_atl/hw_atl_utils.c', 'rte_pmd_atlantic.c', ) + +cflags += '-DALLOW_EXPERIMENTAL_API' diff --git a/drivers/net/avp/avp_ethdev.c b/drivers/net/avp/avp_ethdev.c index 47b96ec..504435e 100644 --- a/drivers/net/avp/avp_ethdev.c +++ b/drivers/net/avp/avp_ethdev.c @@ -713,7 +713,7 @@ avp_dev_interrupt_handler(void *data) status); /* re-enable UIO interrupt handling */ - ret = rte_intr_enable(&pci_dev->intr_handle); + ret = rte_intr_ack(&pci_dev->intr_handle); if (ret < 0) { PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n", ret); diff --git a/drivers/net/avp/meson.build b/drivers/net/avp/meson.build index a5f63cd..7fb9706 100644 --- a/drivers/net/avp/meson.build +++ b/drivers/net/avp/meson.build @@ -7,3 +7,4 @@ if not is_linux endif sources = files('avp_ethdev.c') install_headers('rte_avp_common.h', 'rte_avp_fifo.h') +cflags += '-DALLOW_EXPERIMENTAL_API' diff --git a/drivers/net/axgbe/Makefile b/drivers/net/axgbe/Makefile index c2d4336..bcdcd54 100644 --- a/drivers/net/axgbe/Makefile +++ b/drivers/net/axgbe/Makefile @@ -10,6 +10,7 @@ LIB = librte_pmd_axgbe.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API EXPORT_MAP := rte_pmd_axgbe_version.map diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c index cfb1720..56d8dac 100644 --- a/drivers/net/axgbe/axgbe_ethdev.c +++ b/drivers/net/axgbe/axgbe_ethdev.c @@ -136,8 +136,8 @@ axgbe_dev_interrupt_handler(void *param) DMA_CH_SR, dma_ch_isr); } } - /* Enable interrupts since disabled after generation*/ - rte_intr_enable(&pdata->pci_dev->intr_handle); + /* Unmask interrupts since disabled after generation*/ + rte_intr_ack(&pdata->pci_dev->intr_handle); } /* diff --git a/drivers/net/axgbe/meson.build b/drivers/net/axgbe/meson.build index 86873b7..226d11d 100644 --- a/drivers/net/axgbe/meson.build +++ b/drivers/net/axgbe/meson.build @@ -14,6 +14,7 @@ sources = files('axgbe_ethdev.c', 'axgbe_rxtx.c') cflags += '-Wno-cast-qual' +cflags += '-DALLOW_EXPERIMENTAL_API' if arch_subdir == 'x86' sources += files('axgbe_rxtx_vec_sse.c') diff --git a/drivers/net/bnx2x/bnx2x_ethdev.c b/drivers/net/bnx2x/bnx2x_ethdev.c index 10b4fdb..191a3ef 100644 --- a/drivers/net/bnx2x/bnx2x_ethdev.c +++ b/drivers/net/bnx2x/bnx2x_ethdev.c @@ -133,7 +133,7 @@ bnx2x_interrupt_handler(void *param) PMD_DEBUG_PERIODIC_LOG(INFO, sc, "Interrupt handled"); bnx2x_interrupt_action(dev, 1); - rte_intr_enable(&sc->pci_dev->intr_handle); + rte_intr_ack(&sc->pci_dev->intr_handle); } static void bnx2x_periodic_start(void *param) diff --git a/drivers/net/bnx2x/meson.build b/drivers/net/bnx2x/meson.build index 4892bb2..1bc84b7 100644 --- a/drivers/net/bnx2x/meson.build +++ b/drivers/net/bnx2x/meson.build @@ -6,6 +6,7 @@ build = dep.found() reason = 'missing dependency, "zlib"' ext_deps += dep cflags += '-DZLIB_CONST' +cflags += '-DALLOW_EXPERIMENTAL_API' sources = files('bnx2x.c', 'bnx2x_ethdev.c', 'bnx2x_rxtx.c', diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c index dc88661..0b7f501 100644 --- a/drivers/net/e1000/em_ethdev.c +++ b/drivers/net/e1000/em_ethdev.c @@ -1001,7 +1001,7 @@ eth_em_rx_queue_intr_enable(struct rte_eth_dev *dev, __rte_unused uint16_t queue struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; em_rxq_intr_enable(hw); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } @@ -1568,7 +1568,7 @@ eth_em_interrupt_action(struct rte_eth_dev *dev, return -1; intr->flags &= ~E1000_FLAG_NEED_LINK_UPDATE; - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); /* set get_link_status to check register later */ hw->mac.get_link_status = 1; diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c index 3ee28cf..793a31c 100644 --- a/drivers/net/e1000/igb_ethdev.c +++ b/drivers/net/e1000/igb_ethdev.c @@ -2876,7 +2876,7 @@ eth_igb_interrupt_action(struct rte_eth_dev *dev, } igb_intr_enable(dev); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); if (intr->flags & E1000_FLAG_NEED_LINK_UPDATE) { intr->flags &= ~E1000_FLAG_NEED_LINK_UPDATE; @@ -2987,7 +2987,7 @@ eth_igbvf_interrupt_action(struct rte_eth_dev *dev, struct rte_intr_handle *intr } igbvf_intr_enable(dev); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } @@ -5500,7 +5500,7 @@ eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) E1000_WRITE_REG(hw, E1000_EIMS, regval | mask); E1000_WRITE_FLUSH(hw); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c index a1e3836..e8fa8a2 100644 --- a/drivers/net/fm10k/fm10k_ethdev.c +++ b/drivers/net/fm10k/fm10k_ethdev.c @@ -2381,7 +2381,7 @@ fm10k_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) else FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(pdev, queue_id)), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); - rte_intr_enable(&pdev->intr_handle); + rte_intr_ack(&pdev->intr_handle); return 0; } @@ -2680,7 +2680,7 @@ fm10k_dev_interrupt_handler_pf(void *param) FM10K_WRITE_REG(hw, FM10K_ITR(0), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); /* Re-enable interrupt from host side */ - rte_intr_enable(dev->intr_handle); + rte_intr_ack(dev->intr_handle); } /** @@ -2760,7 +2760,7 @@ fm10k_dev_interrupt_handler_vf(void *param) FM10K_WRITE_REG(hw, FM10K_VFITR(0), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); /* Re-enable interrupt from host side */ - rte_intr_enable(dev->intr_handle); + rte_intr_ack(dev->intr_handle); } /* Mailbox message handler in VF */ diff --git a/drivers/net/fm10k/meson.build b/drivers/net/fm10k/meson.build index 2772ea4..b7d34c7 100644 --- a/drivers/net/fm10k/meson.build +++ b/drivers/net/fm10k/meson.build @@ -14,3 +14,4 @@ if arch_subdir == 'x86' endif includes += include_directories('base') +cflags += '-DALLOW_EXPERIMENTAL_API' diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c index 2b9fc45..5217b9c 100644 --- a/drivers/net/i40e/i40e_ethdev.c +++ b/drivers/net/i40e/i40e_ethdev.c @@ -11646,7 +11646,7 @@ i40e_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) I40E_PFINT_DYN_CTLN_ITR_INDX_MASK); I40E_WRITE_FLUSH(hw); - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_ack(&pci_dev->intr_handle); return 0; } diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c index 53dc05c..00d6ed5 100644 --- a/drivers/net/iavf/iavf_ethdev.c +++ b/drivers/net/iavf/iavf_ethdev.c @@ -1098,7 +1098,7 @@ iavf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) IAVF_WRITE_FLUSH(hw); - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_ack(&pci_dev->intr_handle); return 0; } diff --git a/drivers/net/ice/Makefile b/drivers/net/ice/Makefile index 32abeb6..ae53c26 100644 --- a/drivers/net/ice/Makefile +++ b/drivers/net/ice/Makefile @@ -10,6 +10,7 @@ LIB = librte_pmd_ice.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API LDLIBS += -lrte_eal -lrte_mbuf -lrte_ethdev -lrte_kvargs LDLIBS += -lrte_bus_pci -lrte_mempool diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c index 9ce730c..77b5a71 100644 --- a/drivers/net/ice/ice_ethdev.c +++ b/drivers/net/ice/ice_ethdev.c @@ -1118,7 +1118,7 @@ ice_interrupt_handler(void *param) done: /* Enable interrupt */ ice_pf_enable_irq0(hw); - rte_intr_enable(dev->intr_handle); + rte_intr_ack(dev->intr_handle); } /* Initialize SW parameters of PF */ @@ -3002,7 +3002,7 @@ static int ice_rx_queue_intr_enable(struct rte_eth_dev *dev, val &= ~GLINT_DYN_CTL_WB_ON_ITR_M; ICE_WRITE_REG(hw, GLINT_DYN_CTL(msix_intr), val); - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_ack(&pci_dev->intr_handle); return 0; } diff --git a/drivers/net/ice/meson.build b/drivers/net/ice/meson.build index 7f16647..70f349e 100644 --- a/drivers/net/ice/meson.build +++ b/drivers/net/ice/meson.build @@ -13,6 +13,7 @@ sources = files( deps += ['hash'] includes += include_directories('base') +cflags += '-DALLOW_EXPERIMENTAL_API' if arch_subdir == 'x86' sources += files('ice_rxtx_vec_sse.c') diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c index 22c5b2c..f5920f5 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/drivers/net/ixgbe/ixgbe_ethdev.c @@ -4502,7 +4502,7 @@ ixgbe_dev_interrupt_delayed_handler(void *param) PMD_DRV_LOG(DEBUG, "enable intr in delayed handler S[%08x]", eicr); ixgbe_enable_intr(dev); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); } /** @@ -5763,7 +5763,7 @@ ixgbevf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) RTE_SET_USED(queue_id); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, intr->mask); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } @@ -5812,7 +5812,7 @@ ixgbe_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) mask &= (1 << (queue_id - 32)); IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); } - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c index 1a7aa17..91a226c 100644 --- a/drivers/net/nfp/nfp_net.c +++ b/drivers/net/nfp/nfp_net.c @@ -1412,7 +1412,7 @@ nfp_net_irq_unmask(struct rte_eth_dev *dev) if (hw->ctrl & NFP_NET_CFG_CTRL_MSIXAUTO) { /* If MSI-X auto-masking is used, clear the entry */ rte_wmb(); - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_ack(&pci_dev->intr_handle); } else { /* Make sure all updates are written before un-masking */ rte_wmb(); diff --git a/drivers/net/qede/Makefile b/drivers/net/qede/Makefile index 2ecbd8d..a11d594 100644 --- a/drivers/net/qede/Makefile +++ b/drivers/net/qede/Makefile @@ -12,6 +12,7 @@ LIB = librte_pmd_qede.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs LDLIBS += -lrte_bus_pci diff --git a/drivers/net/qede/meson.build b/drivers/net/qede/meson.build index 12388a6..c8f9c6d 100644 --- a/drivers/net/qede/meson.build +++ b/drivers/net/qede/meson.build @@ -10,3 +10,5 @@ sources = files( 'qede_main.c', 'qede_rxtx.c', ) + +cflags += '-DALLOW_EXPERIMENTAL_API' diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c index 82363e6..9ac9da3 100644 --- a/drivers/net/qede/qede_ethdev.c +++ b/drivers/net/qede/qede_ethdev.c @@ -248,8 +248,8 @@ qede_interrupt_handler_intx(void *param) if (status & 0x1) { qede_interrupt_action(ECORE_LEADING_HWFN(edev)); - if (rte_intr_enable(eth_dev->intr_handle)) - DP_ERR(edev, "rte_intr_enable failed\n"); + if (rte_intr_ack(eth_dev->intr_handle)) + DP_ERR(edev, "rte_intr_ack failed\n"); } } @@ -261,8 +261,8 @@ qede_interrupt_handler(void *param) struct ecore_dev *edev = &qdev->edev; qede_interrupt_action(ECORE_LEADING_HWFN(edev)); - if (rte_intr_enable(eth_dev->intr_handle)) - DP_ERR(edev, "rte_intr_enable failed\n"); + if (rte_intr_ack(eth_dev->intr_handle)) + DP_ERR(edev, "rte_intr_ack failed\n"); } static void diff --git a/drivers/net/sfc/sfc_intr.c b/drivers/net/sfc/sfc_intr.c index 1f4969b..76cb630 100644 --- a/drivers/net/sfc/sfc_intr.c +++ b/drivers/net/sfc/sfc_intr.c @@ -79,7 +79,7 @@ sfc_intr_line_handler(void *cb_arg) if (qmask & (1 << sa->mgmt_evq_index)) sfc_intr_handle_mgmt_evq(sa); - if (rte_intr_enable(&pci_dev->intr_handle) != 0) + if (rte_intr_ack(&pci_dev->intr_handle) != 0) sfc_err(sa, "cannot reenable interrupts"); sfc_log_init(sa, "done"); @@ -123,7 +123,7 @@ sfc_intr_message_handler(void *cb_arg) sfc_intr_handle_mgmt_evq(sa); - if (rte_intr_enable(&pci_dev->intr_handle) != 0) + if (rte_intr_ack(&pci_dev->intr_handle) != 0) sfc_err(sa, "cannot reenable interrupts"); sfc_log_init(sa, "done"); diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 04aecb7..62c8274 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -1265,6 +1265,20 @@ virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) } static int +virtio_intr_unmask(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = dev->data->dev_private; + + if (rte_intr_ack(dev->intr_handle) < 0) + return -1; + + if (!hw->virtio_user_dev) + hw->use_msix = vtpci_msix_detect(RTE_ETH_DEV_TO_PCI(dev)); + + return 0; +} + +static int virtio_intr_enable(struct rte_eth_dev *dev) { struct virtio_hw *hw = dev->data->dev_private; @@ -1457,7 +1471,7 @@ virtio_interrupt_handler(void *param) isr = vtpci_isr(hw); PMD_DRV_LOG(INFO, "interrupt status = %#x", isr); - if (virtio_intr_enable(dev) < 0) + if (virtio_intr_unmask(dev) < 0) PMD_DRV_LOG(ERR, "interrupt enable failed"); if (isr & VIRTIO_PCI_ISR_CONFIG) { diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c b/drivers/net/vmxnet3/vmxnet3_ethdev.c index 2b1e915..57feb37 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethdev.c +++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c @@ -1426,7 +1426,7 @@ vmxnet3_interrupt_handler(void *param) vmxnet3_process_events(dev); - if (rte_intr_enable(&pci_dev->intr_handle) < 0) + if (rte_intr_ack(&pci_dev->intr_handle) < 0) PMD_DRV_LOG(ERR, "interrupt enable failed"); } -- 2.8.4
> -----Original Message----- > From: Nithin Dabilpuram <ndabilpuram@marvell.com> > Sent: Wednesday, July 17, 2019 8:59 PM [...] > Subject: [PATCH 3/3] drivers/net: use ack API in interrupt handlers > > Replace rte_intr_enable() with rte_intr_ack() API > for acking an interrupt in interrupt handlers and > rx_queue_intr_enable() callbacks of PMD's. > > This is inline with original intent of this change in PMDs > to ack interrupts after handling is completed if > device is backed by UIO, IGB_UIO or VFIO(with INTx). > > Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> > Signed-off-by: Jerin Jacob <jerinj@marvell.com> > --- > drivers/net/atlantic/Makefile | 1 + > drivers/net/atlantic/atl_ethdev.c | 2 +- > drivers/net/atlantic/meson.build | 2 ++ > drivers/net/avp/avp_ethdev.c | 2 +- > drivers/net/avp/meson.build | 1 + > drivers/net/axgbe/Makefile | 1 + > drivers/net/axgbe/axgbe_ethdev.c | 4 ++-- > drivers/net/axgbe/meson.build | 1 + > drivers/net/bnx2x/bnx2x_ethdev.c | 2 +- > drivers/net/bnx2x/meson.build | 1 + > drivers/net/e1000/em_ethdev.c | 4 ++-- > drivers/net/e1000/igb_ethdev.c | 6 +++--- > drivers/net/fm10k/fm10k_ethdev.c | 6 +++--- > drivers/net/fm10k/meson.build | 1 + > drivers/net/i40e/i40e_ethdev.c | 2 +- > drivers/net/iavf/iavf_ethdev.c | 2 +- > drivers/net/ice/Makefile | 1 + > drivers/net/ice/ice_ethdev.c | 4 ++-- > drivers/net/ice/meson.build | 1 + > drivers/net/ixgbe/ixgbe_ethdev.c | 6 +++--- > drivers/net/nfp/nfp_net.c | 2 +- > drivers/net/qede/Makefile | 1 + > drivers/net/qede/meson.build | 2 ++ > drivers/net/qede/qede_ethdev.c | 8 ++++---- > drivers/net/sfc/sfc_intr.c | 4 ++-- > drivers/net/virtio/virtio_ethdev.c | 16 +++++++++++++++- > drivers/net/vmxnet3/vmxnet3_ethdev.c | 2 +- > 27 files changed, 56 insertions(+), 29 deletions(-) > [...] > diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c > index 1a7aa17..91a226c 100644 > --- a/drivers/net/nfp/nfp_net.c > +++ b/drivers/net/nfp/nfp_net.c > @@ -1412,7 +1412,7 @@ nfp_net_irq_unmask(struct rte_eth_dev *dev) > if (hw->ctrl & NFP_NET_CFG_CTRL_MSIXAUTO) { > /* If MSI-X auto-masking is used, clear the entry */ > rte_wmb(); > - rte_intr_enable(&pci_dev->intr_handle); > + rte_intr_ack(&pci_dev->intr_handle); > } else { > /* Make sure all updates are written before un-masking */ > rte_wmb(); This part really needs ack/testing by nfp folks. [...] -Hyong
> -----Original Message-----
> From: Nithin Dabilpuram <ndabilpuram@marvell.com>
> Sent: Wednesday, July 17, 2019 9:44 PM
> To: Hyong Youb Kim (hyonkim) <hyonkim@cisco.com>; David Marchand
> <david.marchand@redhat.com>; Thomas Monjalon
> <thomas@monjalon.net>; Ferruh Yigit <ferruh.yigit@intel.com>; Bruce
> Richardson <bruce.richardson@intel.com>
> Cc: jerinj@marvell.com; John Daley (johndale) <johndale@cisco.com>;
> Shahed Shaikh <shshaikh@marvell.com>; dev@dpdk.org; Nithin Dabilpuram
> <ndabilpuram@marvell.com>
> Subject: [PATCH v2 2/3] eal: add ack interrupt API
>
> Add new ack interrupt API to avoid using
> VFIO_IRQ_SET_ACTION_TRIGGER(rte_intr_enable()) for
> acking interrupt purpose for VFIO based interrupt handlers.
> This implementation is specific to Linux.
>
> Using rte_intr_enable() for acking interrupt has below issues
>
> * Time consuming to do for every interrupt received as it will
> free_irq() followed by request_irq() and all other initializations
> * A race condition because of a window between free_irq() and
> request_irq() with packet reception still on and device still
> enabled and would throw warning messages like below.
> [158764.159833] do_IRQ: 9.34 No irq handler for vector
>
> In this patch, rte_intr_ack() is a no-op for VFIO_MSIX/VFIO_MSI interrupts
> as they are edge triggered and kernel would not mask the interrupt before
> delivering the event to userspace and we don't need to ack.
>
> Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
> Signed-off-by: Jerin Jacob <jerinj@marvell.com>
> ---
> v2:
> * No change
>
> lib/librte_eal/common/include/rte_interrupts.h | 22 +++++++
> lib/librte_eal/freebsd/eal/eal_interrupts.c | 9 +++
> lib/librte_eal/linux/eal/eal_interrupts.c | 81
> ++++++++++++++++++++++++++
> lib/librte_eal/rte_eal_version.map | 1 +
> 4 files changed, 113 insertions(+)
>
> diff --git a/lib/librte_eal/common/include/rte_interrupts.h
> b/lib/librte_eal/common/include/rte_interrupts.h
> index c1e912c..93b31cd 100644
> --- a/lib/librte_eal/common/include/rte_interrupts.h
> +++ b/lib/librte_eal/common/include/rte_interrupts.h
> @@ -118,6 +118,28 @@ int rte_intr_enable(const struct rte_intr_handle
> *intr_handle);
> */
> int rte_intr_disable(const struct rte_intr_handle *intr_handle);
>
> +/**
> + * It acks an interrupt raised for the specified handle.
> + *
> + * Call this function to ack an interrupt from interrupt
> + * handler either from application or driver, so that
> + * new interrupts are raised.
> + *
> + * @note For interrupt handle types VFIO_MSIX and VFIO_MSI,
> + * this function is a no-op and returns success without
> + * changing anything as kernel doesn't expect
> + * them to be acked.
> + *
[...]
Shouldn't we explain that this really is "unmask" but named "ack" because
of x and y, and that it is expected at end of INTx handler? Ack does
not have a well-defined meaning, whereas everyone knows what unmask
means..
[...]
Thanks.
-Hyong
On Wed, Jul 17, 2019 at 12:57:29PM +0000, Hyong Youb Kim (hyonkim) wrote: > External Email > > ---------------------------------------------------------------------- > > -----Original Message----- > > From: Nithin Dabilpuram <ndabilpuram@marvell.com> > > Sent: Wednesday, July 17, 2019 9:44 PM > > To: Hyong Youb Kim (hyonkim) <hyonkim@cisco.com>; David Marchand > > <david.marchand@redhat.com>; Thomas Monjalon > > <thomas@monjalon.net>; Ferruh Yigit <ferruh.yigit@intel.com>; Bruce > > Richardson <bruce.richardson@intel.com> > > Cc: jerinj@marvell.com; John Daley (johndale) <johndale@cisco.com>; > > Shahed Shaikh <shshaikh@marvell.com>; dev@dpdk.org; Nithin Dabilpuram > > <ndabilpuram@marvell.com> > > Subject: [PATCH v2 2/3] eal: add ack interrupt API > > > > Add new ack interrupt API to avoid using > > VFIO_IRQ_SET_ACTION_TRIGGER(rte_intr_enable()) for > > acking interrupt purpose for VFIO based interrupt handlers. > > This implementation is specific to Linux. > > > > Using rte_intr_enable() for acking interrupt has below issues > > > > * Time consuming to do for every interrupt received as it will > > free_irq() followed by request_irq() and all other initializations > > * A race condition because of a window between free_irq() and > > request_irq() with packet reception still on and device still > > enabled and would throw warning messages like below. > > [158764.159833] do_IRQ: 9.34 No irq handler for vector > > > > In this patch, rte_intr_ack() is a no-op for VFIO_MSIX/VFIO_MSI interrupts > > as they are edge triggered and kernel would not mask the interrupt before > > delivering the event to userspace and we don't need to ack. > > > > Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> > > Signed-off-by: Jerin Jacob <jerinj@marvell.com> > > --- > > v2: > > * No change > > > > lib/librte_eal/common/include/rte_interrupts.h | 22 +++++++ > > lib/librte_eal/freebsd/eal/eal_interrupts.c | 9 +++ > > lib/librte_eal/linux/eal/eal_interrupts.c | 81 > > ++++++++++++++++++++++++++ > > lib/librte_eal/rte_eal_version.map | 1 + > > 4 files changed, 113 insertions(+) > > > > diff --git a/lib/librte_eal/common/include/rte_interrupts.h > > b/lib/librte_eal/common/include/rte_interrupts.h > > index c1e912c..93b31cd 100644 > > --- a/lib/librte_eal/common/include/rte_interrupts.h > > +++ b/lib/librte_eal/common/include/rte_interrupts.h > > @@ -118,6 +118,28 @@ int rte_intr_enable(const struct rte_intr_handle > > *intr_handle); > > */ > > int rte_intr_disable(const struct rte_intr_handle *intr_handle); > > > > +/** > > + * It acks an interrupt raised for the specified handle. > > + * > > + * Call this function to ack an interrupt from interrupt > > + * handler either from application or driver, so that > > + * new interrupts are raised. > > + * > > + * @note For interrupt handle types VFIO_MSIX and VFIO_MSI, > > + * this function is a no-op and returns success without > > + * changing anything as kernel doesn't expect > > + * them to be acked. > > + * > [...] > > Shouldn't we explain that this really is "unmask" but named "ack" because > of x and y, and that it is expected at end of INTx handler? Ack does > not have a well-defined meaning, whereas everyone knows what unmask > means.. > Ok. Is the below text fine with you ? Or please suggest. @note For interrupt handle types VFIO_MSIX and VFIO_MSI, this function is a no-op and returns success without changing anything as kernel doesn't expect them to be acked. This needs be used atleast for PCI devices with INTx interrupt as kernel before passing on event for INTx triggered interrupt, masks the interrupt and expects application to unmask it so that, further interrupts can be raised/triggered. This is also due to the fact that INTx is level triggered interrupt where as MSI/MSIx is not. Ideally this should have been called as intr_unmask() representing underlying api, but since unmask operation is not supported and not needed for VFIO MSI/MSIx interrupts after handling, it is named as ack. > [...] > > Thanks. > -Hyong >
> -----Original Message----- > From: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com> > Sent: Wednesday, July 17, 2019 11:36 PM [...] > > > Subject: [PATCH v2 2/3] eal: add ack interrupt API > > > > > > Add new ack interrupt API to avoid using > > > VFIO_IRQ_SET_ACTION_TRIGGER(rte_intr_enable()) for > > > acking interrupt purpose for VFIO based interrupt handlers. > > > This implementation is specific to Linux. > > > > > > Using rte_intr_enable() for acking interrupt has below issues > > > > > > * Time consuming to do for every interrupt received as it will > > > free_irq() followed by request_irq() and all other initializations > > > * A race condition because of a window between free_irq() and > > > request_irq() with packet reception still on and device still > > > enabled and would throw warning messages like below. > > > [158764.159833] do_IRQ: 9.34 No irq handler for vector > > > > > > In this patch, rte_intr_ack() is a no-op for VFIO_MSIX/VFIO_MSI > interrupts > > > as they are edge triggered and kernel would not mask the interrupt > before > > > delivering the event to userspace and we don't need to ack. > > > > > > Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> > > > Signed-off-by: Jerin Jacob <jerinj@marvell.com> > > > --- > > > v2: > > > * No change > > > > > > lib/librte_eal/common/include/rte_interrupts.h | 22 +++++++ > > > lib/librte_eal/freebsd/eal/eal_interrupts.c | 9 +++ > > > lib/librte_eal/linux/eal/eal_interrupts.c | 81 > > > ++++++++++++++++++++++++++ > > > lib/librte_eal/rte_eal_version.map | 1 + > > > 4 files changed, 113 insertions(+) > > > > > > diff --git a/lib/librte_eal/common/include/rte_interrupts.h > > > b/lib/librte_eal/common/include/rte_interrupts.h > > > index c1e912c..93b31cd 100644 > > > --- a/lib/librte_eal/common/include/rte_interrupts.h > > > +++ b/lib/librte_eal/common/include/rte_interrupts.h > > > @@ -118,6 +118,28 @@ int rte_intr_enable(const struct rte_intr_handle > > > *intr_handle); > > > */ > > > int rte_intr_disable(const struct rte_intr_handle *intr_handle); > > > > > > +/** > > > + * It acks an interrupt raised for the specified handle. > > > + * > > > + * Call this function to ack an interrupt from interrupt > > > + * handler either from application or driver, so that > > > + * new interrupts are raised. > > > + * > > > + * @note For interrupt handle types VFIO_MSIX and VFIO_MSI, > > > + * this function is a no-op and returns success without > > > + * changing anything as kernel doesn't expect > > > + * them to be acked. > > > + * > > [...] > > > > Shouldn't we explain that this really is "unmask" but named "ack" because > > of x and y, and that it is expected at end of INTx handler? Ack does > > not have a well-defined meaning, whereas everyone knows what unmask > > means.. > > > > > Ok. Is the below text fine with you ? Or please suggest. > > @note For interrupt handle types VFIO_MSIX and VFIO_MSI, > this function is a no-op and returns success without > changing anything as kernel doesn't expect > them to be acked. > This needs be used atleast for PCI devices with INTx interrupt > as kernel before passing on event for INTx triggered interrupt, > masks the interrupt and expects application to unmask it so that, > further interrupts can be raised/triggered. This is also due to > the fact that INTx is level triggered interrupt where as MSI/MSIx > is not. Ideally this should have been called as intr_unmask() > representing underlying api, but since unmask operation > is not supported and not needed for VFIO MSI/MSIx interrupts > after handling, it is named as ack. > How about this? PMD generally calls this function at the end of its IRQ callback. Internally, it unmasks the interrupt if possible. For INTx, unmasking is required as the interrupt is auto-masked prior to invoking callback. For MSI/MSI-X, unmasking is typically not needed as the interrupt is not auto-masked. In fact, for interrupt handle types VFIO_MSIX and VFIO_MSI, this function is no-op. Thanks for your effort.. -Hyong
On Wed, Jul 17, 2019 at 03:05:47PM +0000, Hyong Youb Kim (hyonkim) wrote: > > -----Original Message----- > > From: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com> > > Sent: Wednesday, July 17, 2019 11:36 PM > [...] > > > > Subject: [PATCH v2 2/3] eal: add ack interrupt API > > > > > > > > Add new ack interrupt API to avoid using > > > > VFIO_IRQ_SET_ACTION_TRIGGER(rte_intr_enable()) for > > > > acking interrupt purpose for VFIO based interrupt handlers. > > > > This implementation is specific to Linux. > > > > > > > > Using rte_intr_enable() for acking interrupt has below issues > > > > > > > > * Time consuming to do for every interrupt received as it will > > > > free_irq() followed by request_irq() and all other initializations > > > > * A race condition because of a window between free_irq() and > > > > request_irq() with packet reception still on and device still > > > > enabled and would throw warning messages like below. > > > > [158764.159833] do_IRQ: 9.34 No irq handler for vector > > > > > > > > In this patch, rte_intr_ack() is a no-op for VFIO_MSIX/VFIO_MSI > > interrupts > > > > as they are edge triggered and kernel would not mask the interrupt > > before > > > > delivering the event to userspace and we don't need to ack. > > > > > > > > Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> > > > > Signed-off-by: Jerin Jacob <jerinj@marvell.com> > > > > --- > > > > v2: > > > > * No change > > > > > > > > lib/librte_eal/common/include/rte_interrupts.h | 22 +++++++ > > > > lib/librte_eal/freebsd/eal/eal_interrupts.c | 9 +++ > > > > lib/librte_eal/linux/eal/eal_interrupts.c | 81 > > > > ++++++++++++++++++++++++++ > > > > lib/librte_eal/rte_eal_version.map | 1 + > > > > 4 files changed, 113 insertions(+) > > > > > > > > diff --git a/lib/librte_eal/common/include/rte_interrupts.h > > > > b/lib/librte_eal/common/include/rte_interrupts.h > > > > index c1e912c..93b31cd 100644 > > > > --- a/lib/librte_eal/common/include/rte_interrupts.h > > > > +++ b/lib/librte_eal/common/include/rte_interrupts.h > > > > @@ -118,6 +118,28 @@ int rte_intr_enable(const struct rte_intr_handle > > > > *intr_handle); > > > > */ > > > > int rte_intr_disable(const struct rte_intr_handle *intr_handle); > > > > > > > > +/** > > > > + * It acks an interrupt raised for the specified handle. > > > > + * > > > > + * Call this function to ack an interrupt from interrupt > > > > + * handler either from application or driver, so that > > > > + * new interrupts are raised. > > > > + * > > > > + * @note For interrupt handle types VFIO_MSIX and VFIO_MSI, > > > > + * this function is a no-op and returns success without > > > > + * changing anything as kernel doesn't expect > > > > + * them to be acked. > > > > + * > > > [...] > > > > > > Shouldn't we explain that this really is "unmask" but named "ack" because > > > of x and y, and that it is expected at end of INTx handler? Ack does > > > not have a well-defined meaning, whereas everyone knows what unmask > > > means.. > > > > > > > > > Ok. Is the below text fine with you ? Or please suggest. > > > > @note For interrupt handle types VFIO_MSIX and VFIO_MSI, > > this function is a no-op and returns success without > > changing anything as kernel doesn't expect > > them to be acked. > > This needs be used atleast for PCI devices with INTx interrupt > > as kernel before passing on event for INTx triggered interrupt, > > masks the interrupt and expects application to unmask it so that, > > further interrupts can be raised/triggered. This is also due to > > the fact that INTx is level triggered interrupt where as MSI/MSIx > > is not. Ideally this should have been called as intr_unmask() > > representing underlying api, but since unmask operation > > is not supported and not needed for VFIO MSI/MSIx interrupts > > after handling, it is named as ack. > > > > How about this? > > PMD generally calls this function at the end of its IRQ callback. > Internally, it unmasks the interrupt if possible. For INTx, unmasking > is required as the interrupt is auto-masked prior to invoking > callback. For MSI/MSI-X, unmasking is typically not needed as the > interrupt is not auto-masked. In fact, for interrupt handle types > VFIO_MSIX and VFIO_MSI, this function is no-op. > Ok. Thanks. Will add this in next revision. > Thanks for your effort.. > -Hyong >
> -----Original Message----- > From: Nithin Dabilpuram <ndabilpuram@marvell.com> > Sent: Wednesday, July 17, 2019 5:29 PM > To: Hyong Youb Kim <hyonkim@cisco.com>; David Marchand > <david.marchand@redhat.com>; Thomas Monjalon <thomas@monjalon.net>; > Ferruh Yigit <ferruh.yigit@intel.com>; Igor Russkikh > <igor.russkikh@aquantia.com>; Pavel Belous <pavel.belous@aquantia.com>; > Allain Legacy <allain.legacy@windriver.com>; Matt Peters > <matt.peters@windriver.com>; Ravi Kumar <ravi1.kumar@amd.com>; Rasesh > Mody <rmody@marvell.com>; Shahed Shaikh <shshaikh@marvell.com>; > Wenzhuo Lu <wenzhuo.lu@intel.com>; Qi Zhang <qi.z.zhang@intel.com>; Xiao > Wang <xiao.w.wang@intel.com>; Beilei Xing <beilei.xing@intel.com>; Jingjing > Wu <jingjing.wu@intel.com>; Qiming Yang <qiming.yang@intel.com>; > Konstantin Ananyev <konstantin.ananyev@intel.com>; Alejandro Lucero > <alejandro.lucero@netronome.com>; Andrew Rybchenko > <arybchenko@solarflare.com>; Maxime Coquelin > <maxime.coquelin@redhat.com>; Tiwei Bie <tiwei.bie@intel.com>; Zhihong > Wang <zhihong.wang@intel.com>; Yong Wang <yongwang@vmware.com> > Cc: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; John Daley > <johndale@cisco.com>; dev@dpdk.org; Nithin Kumar Dabilpuram > <ndabilpuram@marvell.com> > Subject: [PATCH 3/3] drivers/net: use ack API in interrupt handlers > > Replace rte_intr_enable() with rte_intr_ack() API for acking an interrupt in > interrupt handlers and > rx_queue_intr_enable() callbacks of PMD's. > > This is inline with original intent of this change in PMDs to ack interrupts after > handling is completed if device is backed by UIO, IGB_UIO or VFIO(with INTx). > > Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> > Signed-off-by: Jerin Jacob <jerinj@marvell.com> > --- > drivers/net/atlantic/Makefile | 1 + > drivers/net/atlantic/atl_ethdev.c | 2 +- > drivers/net/atlantic/meson.build | 2 ++ > drivers/net/avp/avp_ethdev.c | 2 +- > drivers/net/avp/meson.build | 1 + > drivers/net/axgbe/Makefile | 1 + > drivers/net/axgbe/axgbe_ethdev.c | 4 ++-- > drivers/net/axgbe/meson.build | 1 + > drivers/net/bnx2x/bnx2x_ethdev.c | 2 +- > drivers/net/bnx2x/meson.build | 1 + > drivers/net/e1000/em_ethdev.c | 4 ++-- > drivers/net/e1000/igb_ethdev.c | 6 +++--- > drivers/net/fm10k/fm10k_ethdev.c | 6 +++--- > drivers/net/fm10k/meson.build | 1 + > drivers/net/i40e/i40e_ethdev.c | 2 +- > drivers/net/iavf/iavf_ethdev.c | 2 +- > drivers/net/ice/Makefile | 1 + > drivers/net/ice/ice_ethdev.c | 4 ++-- > drivers/net/ice/meson.build | 1 + > drivers/net/ixgbe/ixgbe_ethdev.c | 6 +++--- > drivers/net/nfp/nfp_net.c | 2 +- > drivers/net/qede/Makefile | 1 + > drivers/net/qede/meson.build | 2 ++ > drivers/net/qede/qede_ethdev.c | 8 ++++---- > drivers/net/sfc/sfc_intr.c | 4 ++-- > drivers/net/virtio/virtio_ethdev.c | 16 +++++++++++++++- > drivers/net/vmxnet3/vmxnet3_ethdev.c | 2 +- > 27 files changed, 56 insertions(+), 29 deletions(-) > ... Hi Nithin, Jerin and Hyong, I have tested this series with all igb_uio, vfio-pci and uio_pci_generic modules which covers both MSIx and INTx modes and everything is working fine. > a/drivers/net/qede/meson.build b/drivers/net/qede/meson.build index > 12388a6..c8f9c6d 100644 > --- a/drivers/net/qede/meson.build > +++ b/drivers/net/qede/meson.build > @@ -10,3 +10,5 @@ sources = files( > 'qede_main.c', > 'qede_rxtx.c', > ) > + > +cflags += '-DALLOW_EXPERIMENTAL_API' > diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c > index 82363e6..9ac9da3 100644 > --- a/drivers/net/qede/qede_ethdev.c > +++ b/drivers/net/qede/qede_ethdev.c > @@ -248,8 +248,8 @@ qede_interrupt_handler_intx(void *param) > if (status & 0x1) { > qede_interrupt_action(ECORE_LEADING_HWFN(edev)); > > - if (rte_intr_enable(eth_dev->intr_handle)) > - DP_ERR(edev, "rte_intr_enable failed\n"); > + if (rte_intr_ack(eth_dev->intr_handle)) > + DP_ERR(edev, "rte_intr_ack failed\n"); > } > } ACK for this change. > > @@ -261,8 +261,8 @@ qede_interrupt_handler(void *param) > struct ecore_dev *edev = &qdev->edev; > > qede_interrupt_action(ECORE_LEADING_HWFN(edev)); > - if (rte_intr_enable(eth_dev->intr_handle)) > - DP_ERR(edev, "rte_intr_enable failed\n"); > + if (rte_intr_ack(eth_dev->intr_handle)) > + DP_ERR(edev, "rte_intr_ack failed\n"); > } > I tried to remove rte_intr_ack() from MSIx interrupt handler in qede and device still generates interrupts. That means in MSIx interrupt handler - qede_interrupt_handler(), we can remove rte_intr_ack()/rte_intr_enable() call. So for qede PMD - Acked-by: Shahed Shaikh <shshaikh@marvell.com> For whole series - Tested-by: Shahed Shaikh <shshaikh@marvell.com> Thanks, Shahed .
A final patch for below mentioned RFC patch discussions. [RFC PATCH v3 1/3] vfio: revert change that does intr eventfd setup at probe http://mails.dpdk.org/archives/dev/2019-July/138358.html [RFC PATCH v3 2/3] eal: add mask and unmask interrupt APIs http://mails.dpdk.org/archives/dev/2019-July/138357.html [RFC PATCH v3 3/3] drivers/net: use unmask API in interrupt handlers http://mails.dpdk.org/archives/dev/2019-July/138359.html Please also refer to below discussions http://mails.dpdk.org/archives/dev/2019-July/138184.html http://mails.dpdk.org/archives/dev/2019-July/138113.html This patch set basically reverts vfio patch that moves eventfd setup to probe time (1/3) because msix initialization is broken. It adds new ack interrupt api for purposes of simple and light weight ack method and also to avoid race conditions that was the root cause of this discussion. Nithin Dabilpuram (3): vfio: revert change that does intr eventfd setup at probe eal: add ack interrupt API drivers/net: use ack API in interrupt handlers drivers/bus/pci/linux/pci_vfio.c | 78 ++++--- drivers/net/atlantic/Makefile | 1 + drivers/net/atlantic/atl_ethdev.c | 2 +- drivers/net/atlantic/meson.build | 2 + drivers/net/avp/avp_ethdev.c | 2 +- drivers/net/avp/meson.build | 1 + drivers/net/axgbe/Makefile | 1 + drivers/net/axgbe/axgbe_ethdev.c | 4 +- drivers/net/axgbe/meson.build | 1 + drivers/net/bnx2x/bnx2x_ethdev.c | 2 +- drivers/net/bnx2x/meson.build | 1 + drivers/net/e1000/em_ethdev.c | 4 +- drivers/net/e1000/igb_ethdev.c | 6 +- drivers/net/fm10k/fm10k_ethdev.c | 6 +- drivers/net/fm10k/meson.build | 1 + drivers/net/i40e/i40e_ethdev.c | 2 +- drivers/net/iavf/iavf_ethdev.c | 2 +- drivers/net/ice/Makefile | 1 + drivers/net/ice/ice_ethdev.c | 4 +- drivers/net/ice/meson.build | 1 + drivers/net/ixgbe/ixgbe_ethdev.c | 6 +- drivers/net/nfp/nfp_net.c | 2 +- drivers/net/qede/Makefile | 1 + drivers/net/qede/meson.build | 2 + drivers/net/qede/qede_ethdev.c | 6 +- drivers/net/sfc/sfc_intr.c | 4 +- drivers/net/virtio/virtio_ethdev.c | 16 +- drivers/net/vmxnet3/vmxnet3_ethdev.c | 2 +- lib/librte_eal/common/include/rte_interrupts.h | 24 +++ lib/librte_eal/freebsd/eal/eal_interrupts.c | 9 + lib/librte_eal/linux/eal/eal_interrupts.c | 282 ++++++++++++++++++++----- lib/librte_eal/rte_eal_version.map | 1 + 32 files changed, 360 insertions(+), 117 deletions(-) -- 2.8.4
This reverts commit 89aac60e0be9ed95a87b16e3595f102f9faaffb4. "vfio: fix interrupts race condition" The above mentioned commit moves the interrupt's eventfd setup to probe time but only enables one interrupt for all types of interrupt handles i.e VFIO_MSI, VFIO_LEGACY, VFIO_MSIX, UIO. It works fine with default case but breaks below cases specifically for MSIX based interrupt handles. * Applications like l3fwd-power that request rxq interrupts while ethdev setup. * Drivers that need > 1 MSIx interrupts to be configured for functionality to work. VFIO PCI for MSIx expects all the possible vectors to be setup up when using VFIO_IRQ_SET_ACTION_TRIGGER so that they can be allocated from kernel pci subsystem. Only way to increase the number of vectors later is first free all by using VFIO_IRQ_SET_DATA_NONE with action trigger and then enable new vector count. Above commit changes the behavior of rte_intr_[enable|disable] to only mask and unmask unlike earlier behavior and thereby breaking above two scenarios. Fixes: 89aac60e0be9 ("vfio: fix interrupts race condition") Cc: david.marchand@redhat.com Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> Signed-off-by: Jerin Jacob <jerinj@marvell.com> Tested-by: Stephen Hemminger <stephen@networkplumber.org> Tested-by: Shahed Shaikh <shshaikh@marvell.com> --- v3: * Update Shahed Shaikh's tested-by v2: * Include tested by sign from Stephen drivers/bus/pci/linux/pci_vfio.c | 78 ++++++------ lib/librte_eal/linux/eal/eal_interrupts.c | 201 +++++++++++++++++++++++------- 2 files changed, 191 insertions(+), 88 deletions(-) diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c index ee31239..1ceb1c0 100644 --- a/drivers/bus/pci/linux/pci_vfio.c +++ b/drivers/bus/pci/linux/pci_vfio.c @@ -187,11 +187,8 @@ pci_vfio_set_bus_master(int dev_fd, bool op) static int pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) { - char irq_set_buf[sizeof(struct vfio_irq_set) + sizeof(int)]; - struct vfio_irq_set *irq_set; - enum rte_intr_mode intr_mode; int i, ret, intr_idx; - int fd; + enum rte_intr_mode intr_mode; /* default to invalid index */ intr_idx = VFIO_PCI_NUM_IRQS; @@ -223,6 +220,7 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) /* start from MSI-X interrupt type */ for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) { struct vfio_irq_info irq = { .argsz = sizeof(irq) }; + int fd = -1; /* skip interrupt modes we don't want */ if (intr_mode != RTE_INTR_MODE_NONE && @@ -238,51 +236,51 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) return -1; } - /* found a usable interrupt mode */ - if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) != 0) - break; - /* if this vector cannot be used with eventfd, fail if we explicitly * specified interrupt type, otherwise continue */ - if (intr_mode != RTE_INTR_MODE_NONE) { - RTE_LOG(ERR, EAL, " interrupt vector does not support eventfd!\n"); + if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) { + if (intr_mode != RTE_INTR_MODE_NONE) { + RTE_LOG(ERR, EAL, + " interrupt vector does not support eventfd!\n"); + return -1; + } else + continue; + } + + /* set up an eventfd for interrupts */ + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + RTE_LOG(ERR, EAL, " cannot set up eventfd, " + "error %i (%s)\n", errno, strerror(errno)); return -1; } - } - if (i < 0) - return -1; + dev->intr_handle.fd = fd; + dev->intr_handle.vfio_dev_fd = vfio_dev_fd; - fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); - if (fd < 0) { - RTE_LOG(ERR, EAL, " cannot set up eventfd, error %i (%s)\n", - errno, strerror(errno)); - return -1; - } + switch (i) { + case VFIO_PCI_MSIX_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_MSIX; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; + break; + case VFIO_PCI_MSI_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_MSI; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; + break; + case VFIO_PCI_INTX_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_LEGACY; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; + break; + default: + RTE_LOG(ERR, EAL, " unknown interrupt type!\n"); + return -1; + } - irq_set = (struct vfio_irq_set *)irq_set_buf; - irq_set->argsz = sizeof(irq_set_buf); - irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD|VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set->index = i; - irq_set->start = 0; - irq_set->count = 1; - memcpy(&irq_set->data, &fd, sizeof(int)); - if (ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set) < 0) { - RTE_LOG(ERR, EAL, " error configuring interrupt\n"); - close(fd); - return -1; + return 0; } - dev->intr_handle.fd = fd; - dev->intr_handle.vfio_dev_fd = vfio_dev_fd; - if (i == VFIO_PCI_MSIX_IRQ_INDEX) - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; - else if (i == VFIO_PCI_MSI_IRQ_INDEX) - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; - else if (i == VFIO_PCI_INTX_IRQ_INDEX) - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; - - return 0; + /* if we're here, we haven't found a suitable interrupt vector */ + return -1; } #ifdef HAVE_VFIO_DEV_REQ_INTERFACE diff --git a/lib/librte_eal/linux/eal/eal_interrupts.c b/lib/librte_eal/linux/eal/eal_interrupts.c index 27976b3..79ad5e8 100644 --- a/lib/librte_eal/linux/eal/eal_interrupts.c +++ b/lib/librte_eal/linux/eal/eal_interrupts.c @@ -109,19 +109,42 @@ static pthread_t intr_thread; /* enable legacy (INTx) interrupts */ static int -vfio_enable_intx(const struct rte_intr_handle *intr_handle) -{ - struct vfio_irq_set irq_set; - int ret; - - /* unmask INTx */ - irq_set.argsz = sizeof(irq_set); - irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; - irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; - irq_set.start = 0; - irq_set.count = 1; - - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); +vfio_enable_intx(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + /* enable INTx */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + *fd_ptr = intr_handle->fd; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling INTx interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + + /* unmask INTx after enabling */ + memset(irq_set, 0, len); + len = sizeof(struct vfio_irq_set); + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); if (ret) { RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", @@ -133,51 +156,128 @@ vfio_enable_intx(const struct rte_intr_handle *intr_handle) /* disable legacy (INTx) interrupts */ static int -vfio_disable_intx(const struct rte_intr_handle *intr_handle) -{ - struct vfio_irq_set irq_set; - int ret; +vfio_disable_intx(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; - /* mask interrupts */ - irq_set.argsz = sizeof(irq_set); - irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK; - irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; - irq_set.start = 0; - irq_set.count = 1; + len = sizeof(struct vfio_irq_set); - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); + /* mask interrupts before disabling */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); if (ret) { RTE_LOG(ERR, EAL, "Error masking INTx interrupts for fd %d\n", intr_handle->fd); return -1; } + + /* disable INTx*/ + memset(irq_set, 0, len); + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, + "Error disabling INTx interrupts for fd %d\n", intr_handle->fd); + return -1; + } return 0; } +/* enable MSI interrupts */ +static int +vfio_enable_msi(const struct rte_intr_handle *intr_handle) { + int len, ret; + char irq_set_buf[IRQ_SET_BUF_LEN]; + struct vfio_irq_set *irq_set; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + *fd_ptr = intr_handle->fd; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling MSI interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + return 0; +} + +/* disable MSI interrupts */ +static int +vfio_disable_msi(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + + len = sizeof(struct vfio_irq_set); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) + RTE_LOG(ERR, EAL, + "Error disabling MSI interrupts for fd %d\n", intr_handle->fd); + + return ret; +} + /* enable MSI-X interrupts */ static int -vfio_enable_msix(const struct rte_intr_handle *intr_handle) -{ +vfio_enable_msix(const struct rte_intr_handle *intr_handle) { + int len, ret; char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; struct vfio_irq_set *irq_set; - int len, ret; - - if (intr_handle->nb_efd == 0) - return 0; + int *fd_ptr; len = sizeof(irq_set_buf); irq_set = (struct vfio_irq_set *) irq_set_buf; irq_set->argsz = len; + /* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */ + irq_set->count = intr_handle->max_intr ? + (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID + 1 ? + RTE_MAX_RXTX_INTR_VEC_ID + 1 : intr_handle->max_intr) : 1; irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; - irq_set->start = RTE_INTR_VEC_RXTX_OFFSET; - irq_set->count = intr_handle->nb_efd; - memcpy(&irq_set->data, intr_handle->efds, - sizeof(*intr_handle->efds) * intr_handle->nb_efd); + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + /* INTR vector offset 0 reserve for non-efds mapping */ + fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = intr_handle->fd; + memcpy(&fd_ptr[RTE_INTR_VEC_RXTX_OFFSET], intr_handle->efds, + sizeof(*intr_handle->efds) * intr_handle->nb_efd); ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + if (ret) { RTE_LOG(ERR, EAL, "Error enabling MSI-X interrupts for fd %d\n", intr_handle->fd); @@ -189,21 +289,22 @@ vfio_enable_msix(const struct rte_intr_handle *intr_handle) /* disable MSI-X interrupts */ static int -vfio_disable_msix(const struct rte_intr_handle *intr_handle) -{ - struct vfio_irq_set irq_set; - int ret; +vfio_disable_msix(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; + int len, ret; - if (intr_handle->nb_efd == 0) - return 0; + len = sizeof(struct vfio_irq_set); - irq_set.argsz = sizeof(irq_set); - irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set.index = VFIO_PCI_MSIX_IRQ_INDEX; - irq_set.start = RTE_INTR_VEC_RXTX_OFFSET; - irq_set.count = intr_handle->nb_efd; + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); if (ret) RTE_LOG(ERR, EAL, "Error disabling MSI-X interrupts for fd %d\n", intr_handle->fd); @@ -564,7 +665,9 @@ rte_intr_enable(const struct rte_intr_handle *intr_handle) return -1; break; case RTE_INTR_HANDLE_VFIO_MSI: - return 0; + if (vfio_enable_msi(intr_handle)) + return -1; + break; case RTE_INTR_HANDLE_VFIO_LEGACY: if (vfio_enable_intx(intr_handle)) return -1; @@ -618,7 +721,9 @@ rte_intr_disable(const struct rte_intr_handle *intr_handle) return -1; break; case RTE_INTR_HANDLE_VFIO_MSI: - return 0; + if (vfio_disable_msi(intr_handle)) + return -1; + break; case RTE_INTR_HANDLE_VFIO_LEGACY: if (vfio_disable_intx(intr_handle)) return -1; -- 2.8.4
Add new ack interrupt API to avoid using VFIO_IRQ_SET_ACTION_TRIGGER(rte_intr_enable()) for acking interrupt purpose for VFIO based interrupt handlers. This implementation is specific to Linux. Using rte_intr_enable() for acking interrupt has below issues * Time consuming to do for every interrupt received as it will free_irq() followed by request_irq() and all other initializations * A race condition because of a window between free_irq() and request_irq() with packet reception still on and device still enabled and would throw warning messages like below. [158764.159833] do_IRQ: 9.34 No irq handler for vector In this patch, rte_intr_ack() is a no-op for VFIO_MSIX/VFIO_MSI interrupts as they are edge triggered and kernel would not mask the interrupt before delivering the event to userspace and we don't need to ack. Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> Signed-off-by: Jerin Jacob <jerinj@marvell.com> Tested-by: Shahed Shaikh <shshaikh@marvell.com> --- v3: * Update note on new api v2: * No change lib/librte_eal/common/include/rte_interrupts.h | 24 ++++++++ lib/librte_eal/freebsd/eal/eal_interrupts.c | 9 +++ lib/librte_eal/linux/eal/eal_interrupts.c | 81 ++++++++++++++++++++++++++ lib/librte_eal/rte_eal_version.map | 1 + 4 files changed, 115 insertions(+) diff --git a/lib/librte_eal/common/include/rte_interrupts.h b/lib/librte_eal/common/include/rte_interrupts.h index c1e912c..5811b78 100644 --- a/lib/librte_eal/common/include/rte_interrupts.h +++ b/lib/librte_eal/common/include/rte_interrupts.h @@ -118,6 +118,30 @@ int rte_intr_enable(const struct rte_intr_handle *intr_handle); */ int rte_intr_disable(const struct rte_intr_handle *intr_handle); +/** + * It acks an interrupt raised for the specified handle. + * + * Call this function to ack an interrupt from interrupt + * handler either from application or driver, so that + * new interrupts are raised. + * + * @note PMD generally calls this function at the end of its IRQ callback. + * Internally, it unmasks the interrupt if possible. For INTx, unmasking + * is required as the interrupt is auto-masked prior to invoking + * callback. For MSI/MSI-X, unmasking is typically not needed as the + * interrupt is not auto-masked. In fact, for interrupt handle types + * VFIO_MSIX and VFIO_MSI, this function is no-op. + * + * @param intr_handle + * pointer to the interrupt handle. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +__rte_experimental +int rte_intr_ack(const struct rte_intr_handle *intr_handle); + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/freebsd/eal/eal_interrupts.c b/lib/librte_eal/freebsd/eal/eal_interrupts.c index 10375bd..f6831b7 100644 --- a/lib/librte_eal/freebsd/eal/eal_interrupts.c +++ b/lib/librte_eal/freebsd/eal/eal_interrupts.c @@ -387,6 +387,15 @@ rte_intr_disable(const struct rte_intr_handle *intr_handle) return 0; } +int +rte_intr_ack(const struct rte_intr_handle *intr_handle) +{ + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 0; + + return -1; +} + static void eal_intr_process_interrupts(struct kevent *events, int nfds) { diff --git a/lib/librte_eal/linux/eal/eal_interrupts.c b/lib/librte_eal/linux/eal/eal_interrupts.c index 79ad5e8..91e220c 100644 --- a/lib/librte_eal/linux/eal/eal_interrupts.c +++ b/lib/librte_eal/linux/eal/eal_interrupts.c @@ -197,6 +197,35 @@ vfio_disable_intx(const struct rte_intr_handle *intr_handle) { return 0; } +/* unmask/ack legacy (INTx) interrupts */ +static int +vfio_ack_intx(const struct rte_intr_handle *intr_handle) +{ + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + + len = sizeof(struct vfio_irq_set); + + /* unmask INTx */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + memset(irq_set, 0, len); + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + return 0; +} + /* enable MSI interrupts */ static int vfio_enable_msi(const struct rte_intr_handle *intr_handle) { @@ -694,6 +723,58 @@ rte_intr_enable(const struct rte_intr_handle *intr_handle) } int +rte_intr_ack(const struct rte_intr_handle *intr_handle) +{ + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 0; + + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) + return -1; + + switch (intr_handle->type) { + /* Both acking and disabling are same for UIO */ + case RTE_INTR_HANDLE_UIO: + if (uio_intr_enable(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_UIO_INTX: + if (uio_intx_intr_enable(intr_handle)) + return -1; + break; + /* not used at this moment */ + case RTE_INTR_HANDLE_ALARM: + return -1; +#ifdef VFIO_PRESENT + /* Since VFIO_MSIX is implicitly acked + * unlike INTx, we report success + */ + case RTE_INTR_HANDLE_VFIO_MSIX: + case RTE_INTR_HANDLE_VFIO_MSI: + return 0; + case RTE_INTR_HANDLE_VFIO_LEGACY: + if (vfio_ack_intx(intr_handle)) + return -1; + break; +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + case RTE_INTR_HANDLE_VFIO_REQ: + return -1; +#endif +#endif + /* not used at this moment */ + case RTE_INTR_HANDLE_DEV_EVENT: + return -1; + /* unknown handle type */ + default: + RTE_LOG(ERR, EAL, + "Unknown handle type of fd %d\n", + intr_handle->fd); + return -1; + } + + return 0; +} + +int rte_intr_disable(const struct rte_intr_handle *intr_handle) { if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map index 1892d9e..0537a6d 100644 --- a/lib/librte_eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -407,4 +407,5 @@ EXPERIMENTAL { rte_lcore_to_cpu_id; rte_mcfg_timer_lock; rte_mcfg_timer_unlock; + rte_intr_ack; }; -- 2.8.4
Replace rte_intr_enable() with rte_intr_ack() API for acking an interrupt in interrupt handlers and rx_queue_intr_enable() callbacks of PMD's. This is inline with original intent of this change in PMDs to ack interrupts after handling is completed if device is backed by UIO, IGB_UIO or VFIO(with INTx). Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> Signed-off-by: Jerin Jacob <jerinj@marvell.com> Acked-by: Shahed Shaikh <shshaikh@marvell.com> Tested-by: Shahed Shaikh <shshaikh@marvell.com> --- v3: * Removed ack call from qede PMD msix handler * Ack from Shahed v2: * No change drivers/net/atlantic/Makefile | 1 + drivers/net/atlantic/atl_ethdev.c | 2 +- drivers/net/atlantic/meson.build | 2 ++ drivers/net/avp/avp_ethdev.c | 2 +- drivers/net/avp/meson.build | 1 + drivers/net/axgbe/Makefile | 1 + drivers/net/axgbe/axgbe_ethdev.c | 4 ++-- drivers/net/axgbe/meson.build | 1 + drivers/net/bnx2x/bnx2x_ethdev.c | 2 +- drivers/net/bnx2x/meson.build | 1 + drivers/net/e1000/em_ethdev.c | 4 ++-- drivers/net/e1000/igb_ethdev.c | 6 +++--- drivers/net/fm10k/fm10k_ethdev.c | 6 +++--- drivers/net/fm10k/meson.build | 1 + drivers/net/i40e/i40e_ethdev.c | 2 +- drivers/net/iavf/iavf_ethdev.c | 2 +- drivers/net/ice/Makefile | 1 + drivers/net/ice/ice_ethdev.c | 4 ++-- drivers/net/ice/meson.build | 1 + drivers/net/ixgbe/ixgbe_ethdev.c | 6 +++--- drivers/net/nfp/nfp_net.c | 2 +- drivers/net/qede/Makefile | 1 + drivers/net/qede/meson.build | 2 ++ drivers/net/qede/qede_ethdev.c | 6 ++---- drivers/net/sfc/sfc_intr.c | 4 ++-- drivers/net/virtio/virtio_ethdev.c | 16 +++++++++++++++- drivers/net/vmxnet3/vmxnet3_ethdev.c | 2 +- 27 files changed, 54 insertions(+), 29 deletions(-) diff --git a/drivers/net/atlantic/Makefile b/drivers/net/atlantic/Makefile index 263f12b..fc12e6a 100644 --- a/drivers/net/atlantic/Makefile +++ b/drivers/net/atlantic/Makefile @@ -10,6 +10,7 @@ LIB = librte_pmd_atlantic.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API EXPORT_MAP := rte_pmd_atlantic_version.map diff --git a/drivers/net/atlantic/atl_ethdev.c b/drivers/net/atlantic/atl_ethdev.c index fdc0a7f..79001da 100644 --- a/drivers/net/atlantic/atl_ethdev.c +++ b/drivers/net/atlantic/atl_ethdev.c @@ -1394,7 +1394,7 @@ atl_dev_interrupt_action(struct rte_eth_dev *dev, } done: atl_enable_intr(dev); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } diff --git a/drivers/net/atlantic/meson.build b/drivers/net/atlantic/meson.build index 60b8468..d5602dd 100644 --- a/drivers/net/atlantic/meson.build +++ b/drivers/net/atlantic/meson.build @@ -11,3 +11,5 @@ sources = files( 'hw_atl/hw_atl_utils.c', 'rte_pmd_atlantic.c', ) + +cflags += '-DALLOW_EXPERIMENTAL_API' diff --git a/drivers/net/avp/avp_ethdev.c b/drivers/net/avp/avp_ethdev.c index 47b96ec..504435e 100644 --- a/drivers/net/avp/avp_ethdev.c +++ b/drivers/net/avp/avp_ethdev.c @@ -713,7 +713,7 @@ avp_dev_interrupt_handler(void *data) status); /* re-enable UIO interrupt handling */ - ret = rte_intr_enable(&pci_dev->intr_handle); + ret = rte_intr_ack(&pci_dev->intr_handle); if (ret < 0) { PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n", ret); diff --git a/drivers/net/avp/meson.build b/drivers/net/avp/meson.build index a5f63cd..7fb9706 100644 --- a/drivers/net/avp/meson.build +++ b/drivers/net/avp/meson.build @@ -7,3 +7,4 @@ if not is_linux endif sources = files('avp_ethdev.c') install_headers('rte_avp_common.h', 'rte_avp_fifo.h') +cflags += '-DALLOW_EXPERIMENTAL_API' diff --git a/drivers/net/axgbe/Makefile b/drivers/net/axgbe/Makefile index c2d4336..bcdcd54 100644 --- a/drivers/net/axgbe/Makefile +++ b/drivers/net/axgbe/Makefile @@ -10,6 +10,7 @@ LIB = librte_pmd_axgbe.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API EXPORT_MAP := rte_pmd_axgbe_version.map diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c index cfb1720..56d8dac 100644 --- a/drivers/net/axgbe/axgbe_ethdev.c +++ b/drivers/net/axgbe/axgbe_ethdev.c @@ -136,8 +136,8 @@ axgbe_dev_interrupt_handler(void *param) DMA_CH_SR, dma_ch_isr); } } - /* Enable interrupts since disabled after generation*/ - rte_intr_enable(&pdata->pci_dev->intr_handle); + /* Unmask interrupts since disabled after generation*/ + rte_intr_ack(&pdata->pci_dev->intr_handle); } /* diff --git a/drivers/net/axgbe/meson.build b/drivers/net/axgbe/meson.build index 86873b7..226d11d 100644 --- a/drivers/net/axgbe/meson.build +++ b/drivers/net/axgbe/meson.build @@ -14,6 +14,7 @@ sources = files('axgbe_ethdev.c', 'axgbe_rxtx.c') cflags += '-Wno-cast-qual' +cflags += '-DALLOW_EXPERIMENTAL_API' if arch_subdir == 'x86' sources += files('axgbe_rxtx_vec_sse.c') diff --git a/drivers/net/bnx2x/bnx2x_ethdev.c b/drivers/net/bnx2x/bnx2x_ethdev.c index 10b4fdb..191a3ef 100644 --- a/drivers/net/bnx2x/bnx2x_ethdev.c +++ b/drivers/net/bnx2x/bnx2x_ethdev.c @@ -133,7 +133,7 @@ bnx2x_interrupt_handler(void *param) PMD_DEBUG_PERIODIC_LOG(INFO, sc, "Interrupt handled"); bnx2x_interrupt_action(dev, 1); - rte_intr_enable(&sc->pci_dev->intr_handle); + rte_intr_ack(&sc->pci_dev->intr_handle); } static void bnx2x_periodic_start(void *param) diff --git a/drivers/net/bnx2x/meson.build b/drivers/net/bnx2x/meson.build index 4892bb2..1bc84b7 100644 --- a/drivers/net/bnx2x/meson.build +++ b/drivers/net/bnx2x/meson.build @@ -6,6 +6,7 @@ build = dep.found() reason = 'missing dependency, "zlib"' ext_deps += dep cflags += '-DZLIB_CONST' +cflags += '-DALLOW_EXPERIMENTAL_API' sources = files('bnx2x.c', 'bnx2x_ethdev.c', 'bnx2x_rxtx.c', diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c index dc88661..0b7f501 100644 --- a/drivers/net/e1000/em_ethdev.c +++ b/drivers/net/e1000/em_ethdev.c @@ -1001,7 +1001,7 @@ eth_em_rx_queue_intr_enable(struct rte_eth_dev *dev, __rte_unused uint16_t queue struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; em_rxq_intr_enable(hw); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } @@ -1568,7 +1568,7 @@ eth_em_interrupt_action(struct rte_eth_dev *dev, return -1; intr->flags &= ~E1000_FLAG_NEED_LINK_UPDATE; - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); /* set get_link_status to check register later */ hw->mac.get_link_status = 1; diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c index 3ee28cf..793a31c 100644 --- a/drivers/net/e1000/igb_ethdev.c +++ b/drivers/net/e1000/igb_ethdev.c @@ -2876,7 +2876,7 @@ eth_igb_interrupt_action(struct rte_eth_dev *dev, } igb_intr_enable(dev); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); if (intr->flags & E1000_FLAG_NEED_LINK_UPDATE) { intr->flags &= ~E1000_FLAG_NEED_LINK_UPDATE; @@ -2987,7 +2987,7 @@ eth_igbvf_interrupt_action(struct rte_eth_dev *dev, struct rte_intr_handle *intr } igbvf_intr_enable(dev); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } @@ -5500,7 +5500,7 @@ eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) E1000_WRITE_REG(hw, E1000_EIMS, regval | mask); E1000_WRITE_FLUSH(hw); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c index a1e3836..e8fa8a2 100644 --- a/drivers/net/fm10k/fm10k_ethdev.c +++ b/drivers/net/fm10k/fm10k_ethdev.c @@ -2381,7 +2381,7 @@ fm10k_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) else FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(pdev, queue_id)), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); - rte_intr_enable(&pdev->intr_handle); + rte_intr_ack(&pdev->intr_handle); return 0; } @@ -2680,7 +2680,7 @@ fm10k_dev_interrupt_handler_pf(void *param) FM10K_WRITE_REG(hw, FM10K_ITR(0), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); /* Re-enable interrupt from host side */ - rte_intr_enable(dev->intr_handle); + rte_intr_ack(dev->intr_handle); } /** @@ -2760,7 +2760,7 @@ fm10k_dev_interrupt_handler_vf(void *param) FM10K_WRITE_REG(hw, FM10K_VFITR(0), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); /* Re-enable interrupt from host side */ - rte_intr_enable(dev->intr_handle); + rte_intr_ack(dev->intr_handle); } /* Mailbox message handler in VF */ diff --git a/drivers/net/fm10k/meson.build b/drivers/net/fm10k/meson.build index 2772ea4..b7d34c7 100644 --- a/drivers/net/fm10k/meson.build +++ b/drivers/net/fm10k/meson.build @@ -14,3 +14,4 @@ if arch_subdir == 'x86' endif includes += include_directories('base') +cflags += '-DALLOW_EXPERIMENTAL_API' diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c index 2b9fc45..5217b9c 100644 --- a/drivers/net/i40e/i40e_ethdev.c +++ b/drivers/net/i40e/i40e_ethdev.c @@ -11646,7 +11646,7 @@ i40e_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) I40E_PFINT_DYN_CTLN_ITR_INDX_MASK); I40E_WRITE_FLUSH(hw); - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_ack(&pci_dev->intr_handle); return 0; } diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c index 53dc05c..00d6ed5 100644 --- a/drivers/net/iavf/iavf_ethdev.c +++ b/drivers/net/iavf/iavf_ethdev.c @@ -1098,7 +1098,7 @@ iavf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) IAVF_WRITE_FLUSH(hw); - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_ack(&pci_dev->intr_handle); return 0; } diff --git a/drivers/net/ice/Makefile b/drivers/net/ice/Makefile index 32abeb6..ae53c26 100644 --- a/drivers/net/ice/Makefile +++ b/drivers/net/ice/Makefile @@ -10,6 +10,7 @@ LIB = librte_pmd_ice.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API LDLIBS += -lrte_eal -lrte_mbuf -lrte_ethdev -lrte_kvargs LDLIBS += -lrte_bus_pci -lrte_mempool diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c index 9ce730c..77b5a71 100644 --- a/drivers/net/ice/ice_ethdev.c +++ b/drivers/net/ice/ice_ethdev.c @@ -1118,7 +1118,7 @@ ice_interrupt_handler(void *param) done: /* Enable interrupt */ ice_pf_enable_irq0(hw); - rte_intr_enable(dev->intr_handle); + rte_intr_ack(dev->intr_handle); } /* Initialize SW parameters of PF */ @@ -3002,7 +3002,7 @@ static int ice_rx_queue_intr_enable(struct rte_eth_dev *dev, val &= ~GLINT_DYN_CTL_WB_ON_ITR_M; ICE_WRITE_REG(hw, GLINT_DYN_CTL(msix_intr), val); - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_ack(&pci_dev->intr_handle); return 0; } diff --git a/drivers/net/ice/meson.build b/drivers/net/ice/meson.build index 7f16647..70f349e 100644 --- a/drivers/net/ice/meson.build +++ b/drivers/net/ice/meson.build @@ -13,6 +13,7 @@ sources = files( deps += ['hash'] includes += include_directories('base') +cflags += '-DALLOW_EXPERIMENTAL_API' if arch_subdir == 'x86' sources += files('ice_rxtx_vec_sse.c') diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c index 22c5b2c..f5920f5 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/drivers/net/ixgbe/ixgbe_ethdev.c @@ -4502,7 +4502,7 @@ ixgbe_dev_interrupt_delayed_handler(void *param) PMD_DRV_LOG(DEBUG, "enable intr in delayed handler S[%08x]", eicr); ixgbe_enable_intr(dev); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); } /** @@ -5763,7 +5763,7 @@ ixgbevf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) RTE_SET_USED(queue_id); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, intr->mask); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } @@ -5812,7 +5812,7 @@ ixgbe_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) mask &= (1 << (queue_id - 32)); IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); } - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c index 1a7aa17..91a226c 100644 --- a/drivers/net/nfp/nfp_net.c +++ b/drivers/net/nfp/nfp_net.c @@ -1412,7 +1412,7 @@ nfp_net_irq_unmask(struct rte_eth_dev *dev) if (hw->ctrl & NFP_NET_CFG_CTRL_MSIXAUTO) { /* If MSI-X auto-masking is used, clear the entry */ rte_wmb(); - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_ack(&pci_dev->intr_handle); } else { /* Make sure all updates are written before un-masking */ rte_wmb(); diff --git a/drivers/net/qede/Makefile b/drivers/net/qede/Makefile index 2ecbd8d..a11d594 100644 --- a/drivers/net/qede/Makefile +++ b/drivers/net/qede/Makefile @@ -12,6 +12,7 @@ LIB = librte_pmd_qede.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs LDLIBS += -lrte_bus_pci diff --git a/drivers/net/qede/meson.build b/drivers/net/qede/meson.build index 12388a6..c8f9c6d 100644 --- a/drivers/net/qede/meson.build +++ b/drivers/net/qede/meson.build @@ -10,3 +10,5 @@ sources = files( 'qede_main.c', 'qede_rxtx.c', ) + +cflags += '-DALLOW_EXPERIMENTAL_API' diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c index 82363e6..a3506ed 100644 --- a/drivers/net/qede/qede_ethdev.c +++ b/drivers/net/qede/qede_ethdev.c @@ -248,8 +248,8 @@ qede_interrupt_handler_intx(void *param) if (status & 0x1) { qede_interrupt_action(ECORE_LEADING_HWFN(edev)); - if (rte_intr_enable(eth_dev->intr_handle)) - DP_ERR(edev, "rte_intr_enable failed\n"); + if (rte_intr_ack(eth_dev->intr_handle)) + DP_ERR(edev, "rte_intr_ack failed\n"); } } @@ -261,8 +261,6 @@ qede_interrupt_handler(void *param) struct ecore_dev *edev = &qdev->edev; qede_interrupt_action(ECORE_LEADING_HWFN(edev)); - if (rte_intr_enable(eth_dev->intr_handle)) - DP_ERR(edev, "rte_intr_enable failed\n"); } static void diff --git a/drivers/net/sfc/sfc_intr.c b/drivers/net/sfc/sfc_intr.c index 1f4969b..76cb630 100644 --- a/drivers/net/sfc/sfc_intr.c +++ b/drivers/net/sfc/sfc_intr.c @@ -79,7 +79,7 @@ sfc_intr_line_handler(void *cb_arg) if (qmask & (1 << sa->mgmt_evq_index)) sfc_intr_handle_mgmt_evq(sa); - if (rte_intr_enable(&pci_dev->intr_handle) != 0) + if (rte_intr_ack(&pci_dev->intr_handle) != 0) sfc_err(sa, "cannot reenable interrupts"); sfc_log_init(sa, "done"); @@ -123,7 +123,7 @@ sfc_intr_message_handler(void *cb_arg) sfc_intr_handle_mgmt_evq(sa); - if (rte_intr_enable(&pci_dev->intr_handle) != 0) + if (rte_intr_ack(&pci_dev->intr_handle) != 0) sfc_err(sa, "cannot reenable interrupts"); sfc_log_init(sa, "done"); diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 04aecb7..62c8274 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -1265,6 +1265,20 @@ virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) } static int +virtio_intr_unmask(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = dev->data->dev_private; + + if (rte_intr_ack(dev->intr_handle) < 0) + return -1; + + if (!hw->virtio_user_dev) + hw->use_msix = vtpci_msix_detect(RTE_ETH_DEV_TO_PCI(dev)); + + return 0; +} + +static int virtio_intr_enable(struct rte_eth_dev *dev) { struct virtio_hw *hw = dev->data->dev_private; @@ -1457,7 +1471,7 @@ virtio_interrupt_handler(void *param) isr = vtpci_isr(hw); PMD_DRV_LOG(INFO, "interrupt status = %#x", isr); - if (virtio_intr_enable(dev) < 0) + if (virtio_intr_unmask(dev) < 0) PMD_DRV_LOG(ERR, "interrupt enable failed"); if (isr & VIRTIO_PCI_ISR_CONFIG) { diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c b/drivers/net/vmxnet3/vmxnet3_ethdev.c index 2b1e915..57feb37 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethdev.c +++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c @@ -1426,7 +1426,7 @@ vmxnet3_interrupt_handler(void *param) vmxnet3_process_events(dev); - if (rte_intr_enable(&pci_dev->intr_handle) < 0) + if (rte_intr_ack(&pci_dev->intr_handle) < 0) PMD_DRV_LOG(ERR, "interrupt enable failed"); } -- 2.8.4
On 17-Jul-19 4:05 PM, Hyong Youb Kim (hyonkim) wrote: >> -----Original Message----- >> From: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com> >> Sent: Wednesday, July 17, 2019 11:36 PM > [...] >>>> Subject: [PATCH v2 2/3] eal: add ack interrupt API >>>> >>>> Add new ack interrupt API to avoid using >>>> VFIO_IRQ_SET_ACTION_TRIGGER(rte_intr_enable()) for >>>> acking interrupt purpose for VFIO based interrupt handlers. >>>> This implementation is specific to Linux. >>>> >>>> Using rte_intr_enable() for acking interrupt has below issues >>>> >>>> * Time consuming to do for every interrupt received as it will >>>> free_irq() followed by request_irq() and all other initializations >>>> * A race condition because of a window between free_irq() and >>>> request_irq() with packet reception still on and device still >>>> enabled and would throw warning messages like below. >>>> [158764.159833] do_IRQ: 9.34 No irq handler for vector >>>> >>>> In this patch, rte_intr_ack() is a no-op for VFIO_MSIX/VFIO_MSI >> interrupts >>>> as they are edge triggered and kernel would not mask the interrupt >> before >>>> delivering the event to userspace and we don't need to ack. >>>> >>>> Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> >>>> Signed-off-by: Jerin Jacob <jerinj@marvell.com> >>>> --- >>>> v2: >>>> * No change >>>> >>>> lib/librte_eal/common/include/rte_interrupts.h | 22 +++++++ >>>> lib/librte_eal/freebsd/eal/eal_interrupts.c | 9 +++ >>>> lib/librte_eal/linux/eal/eal_interrupts.c | 81 >>>> ++++++++++++++++++++++++++ >>>> lib/librte_eal/rte_eal_version.map | 1 + >>>> 4 files changed, 113 insertions(+) >>>> >>>> diff --git a/lib/librte_eal/common/include/rte_interrupts.h >>>> b/lib/librte_eal/common/include/rte_interrupts.h >>>> index c1e912c..93b31cd 100644 >>>> --- a/lib/librte_eal/common/include/rte_interrupts.h >>>> +++ b/lib/librte_eal/common/include/rte_interrupts.h >>>> @@ -118,6 +118,28 @@ int rte_intr_enable(const struct rte_intr_handle >>>> *intr_handle); >>>> */ >>>> int rte_intr_disable(const struct rte_intr_handle *intr_handle); >>>> >>>> +/** >>>> + * It acks an interrupt raised for the specified handle. >>>> + * >>>> + * Call this function to ack an interrupt from interrupt >>>> + * handler either from application or driver, so that >>>> + * new interrupts are raised. >>>> + * >>>> + * @note For interrupt handle types VFIO_MSIX and VFIO_MSI, >>>> + * this function is a no-op and returns success without >>>> + * changing anything as kernel doesn't expect >>>> + * them to be acked. >>>> + * >>> [...] >>> >>> Shouldn't we explain that this really is "unmask" but named "ack" because >>> of x and y, and that it is expected at end of INTx handler? Ack does >>> not have a well-defined meaning, whereas everyone knows what unmask >>> means.. >>> >> >> >> Ok. Is the below text fine with you ? Or please suggest. >> >> @note For interrupt handle types VFIO_MSIX and VFIO_MSI, >> this function is a no-op and returns success without >> changing anything as kernel doesn't expect >> them to be acked. >> This needs be used atleast for PCI devices with INTx interrupt >> as kernel before passing on event for INTx triggered interrupt, >> masks the interrupt and expects application to unmask it so that, >> further interrupts can be raised/triggered. This is also due to >> the fact that INTx is level triggered interrupt where as MSI/MSIx >> is not. Ideally this should have been called as intr_unmask() >> representing underlying api, but since unmask operation >> is not supported and not needed for VFIO MSI/MSIx interrupts >> after handling, it is named as ack. >> > > How about this? > > PMD generally calls this function at the end of its IRQ callback. > Internally, it unmasks the interrupt if possible. For INTx, unmasking > is required as the interrupt is auto-masked prior to invoking > callback. For MSI/MSI-X, unmasking is typically not needed as the > interrupt is not auto-masked. In fact, for interrupt handle types > VFIO_MSIX and VFIO_MSI, this function is no-op. > Does the user of this API even cares about these details? I would think that it would be eaiser to just mandate calling this function at the end of each interrupt callback period, regardless of which interrupt mode is used. Internal details are better explained in the implementation. > Thanks for your effort.. > -Hyong > > -- Thanks, Anatoly
On 18-Jul-19 9:46 AM, Nithin Dabilpuram wrote: > Add new ack interrupt API to avoid using > VFIO_IRQ_SET_ACTION_TRIGGER(rte_intr_enable()) for > acking interrupt purpose for VFIO based interrupt handlers. > This implementation is specific to Linux. > > Using rte_intr_enable() for acking interrupt has below issues > > * Time consuming to do for every interrupt received as it will > free_irq() followed by request_irq() and all other initializations > * A race condition because of a window between free_irq() and > request_irq() with packet reception still on and device still > enabled and would throw warning messages like below. > [158764.159833] do_IRQ: 9.34 No irq handler for vector > > In this patch, rte_intr_ack() is a no-op for VFIO_MSIX/VFIO_MSI interrupts > as they are edge triggered and kernel would not mask the interrupt before > delivering the event to userspace and we don't need to ack. > > Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> > Signed-off-by: Jerin Jacob <jerinj@marvell.com> > Tested-by: Shahed Shaikh <shshaikh@marvell.com> > --- <snip> > */ > int rte_intr_disable(const struct rte_intr_handle *intr_handle); > > +/** > + * It acks an interrupt raised for the specified handle. > + * > + * Call this function to ack an interrupt from interrupt > + * handler either from application or driver, so that > + * new interrupts are raised. > + * > + * @note PMD generally calls this function at the end of its IRQ callback. > + * Internally, it unmasks the interrupt if possible. For INTx, unmasking > + * is required as the interrupt is auto-masked prior to invoking > + * callback. For MSI/MSI-X, unmasking is typically not needed as the > + * interrupt is not auto-masked. In fact, for interrupt handle types > + * VFIO_MSIX and VFIO_MSI, this function is no-op. I've added this comment in previous revision's discussion, but i'll copy it here as well. Does the user of this API even cares about these details? I would think that it would be easier to just mandate calling this function at the end of each interrupt callback, regardless of which interrupt mode is used. IMO internal details (about no-ops and such) are better explained in the implementation, not in the public-facing API header. -- Thanks, Anatoly
> -----Original Message-----
> From: Burakov, Anatoly <anatoly.burakov@intel.com>
> Sent: Thursday, July 18, 2019 6:43 PM
> To: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>; Hyong Youb Kim
> <hyonkim@cisco.com>; David Marchand <david.marchand@redhat.com>;
> Thomas Monjalon <thomas@monjalon.net>; Ferruh Yigit
> <ferruh.yigit@intel.com>; Bruce Richardson <bruce.richardson@intel.com>
> Cc: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; John Daley
> <johndale@cisco.com>; Shahed Shaikh <shshaikh@marvell.com>;
> dev@dpdk.org
> Subject: [EXT] Re: [dpdk-dev] [PATCH v3 2/3] eal: add ack interrupt API
> On 18-Jul-19 9:46 AM, Nithin Dabilpuram wrote:
> > Add new ack interrupt API to avoid using
> > VFIO_IRQ_SET_ACTION_TRIGGER(rte_intr_enable()) for acking interrupt
> > purpose for VFIO based interrupt handlers.
> > This implementation is specific to Linux.
> >
> > Using rte_intr_enable() for acking interrupt has below issues
> >
> > * Time consuming to do for every interrupt received as it will
> > free_irq() followed by request_irq() and all other initializations
> > * A race condition because of a window between free_irq() and
> > request_irq() with packet reception still on and device still
> > enabled and would throw warning messages like below.
> > [158764.159833] do_IRQ: 9.34 No irq handler for vector
> >
> > In this patch, rte_intr_ack() is a no-op for VFIO_MSIX/VFIO_MSI
> > interrupts as they are edge triggered and kernel would not mask the
> > interrupt before delivering the event to userspace and we don't need to
> ack.
> >
> > Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
> > Signed-off-by: Jerin Jacob <jerinj@marvell.com>
> > Tested-by: Shahed Shaikh <shshaikh@marvell.com>
> > ---
>
> <snip>
>
> > */
> > int rte_intr_disable(const struct rte_intr_handle *intr_handle);
> >
> > +/**
> > + * It acks an interrupt raised for the specified handle.
> > + *
> > + * Call this function to ack an interrupt from interrupt
> > + * handler either from application or driver, so that
> > + * new interrupts are raised.
> > + *
> > + * @note PMD generally calls this function at the end of its IRQ callback.
> > + * Internally, it unmasks the interrupt if possible. For INTx, unmasking
> > + * is required as the interrupt is auto-masked prior to invoking
> > + * callback. For MSI/MSI-X, unmasking is typically not needed as the
> > + * interrupt is not auto-masked. In fact, for interrupt handle types
> > + * VFIO_MSIX and VFIO_MSI, this function is no-op.
>
> I've added this comment in previous revision's discussion, but i'll copy it here
> as well.
>
> Does the user of this API even cares about these details? I would think that it
> would be easier to just mandate calling this function at the end of each
> interrupt callback, regardless of which interrupt mode is used.
>
> IMO internal details (about no-ops and such) are better explained in the
> implementation, not in the public-facing API header.
+1 to move the comment under @note to implementation.
On 7/18/2019 6:57 PM, Jerin Jacob Kollanukkaran wrote: >> -----Original Message----- >> From: Burakov, Anatoly <anatoly.burakov@intel.com> >> Sent: Thursday, July 18, 2019 6:43 PM >> To: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>; Hyong Youb Kim >> <hyonkim@cisco.com>; David Marchand <david.marchand@redhat.com>; >> Thomas Monjalon <thomas@monjalon.net>; Ferruh Yigit >> <ferruh.yigit@intel.com>; Bruce Richardson <bruce.richardson@intel.com> >> Cc: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; John Daley >> <johndale@cisco.com>; Shahed Shaikh <shshaikh@marvell.com>; >> dev@dpdk.org >> Subject: [EXT] Re: [dpdk-dev] [PATCH v3 2/3] eal: add ack interrupt API >> On 18-Jul-19 9:46 AM, Nithin Dabilpuram wrote: >>> Add new ack interrupt API to avoid using >>> VFIO_IRQ_SET_ACTION_TRIGGER(rte_intr_enable()) for acking interrupt >>> purpose for VFIO based interrupt handlers. >>> This implementation is specific to Linux. >>> >>> Using rte_intr_enable() for acking interrupt has below issues >>> >>> * Time consuming to do for every interrupt received as it will >>> free_irq() followed by request_irq() and all other initializations >>> * A race condition because of a window between free_irq() and >>> request_irq() with packet reception still on and device still >>> enabled and would throw warning messages like below. >>> [158764.159833] do_IRQ: 9.34 No irq handler for vector >>> >>> In this patch, rte_intr_ack() is a no-op for VFIO_MSIX/VFIO_MSI >>> interrupts as they are edge triggered and kernel would not mask the >>> interrupt before delivering the event to userspace and we don't need to >> ack. >>> Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> >>> Signed-off-by: Jerin Jacob <jerinj@marvell.com> >>> Tested-by: Shahed Shaikh <shshaikh@marvell.com> >>> --- >> <snip> >> >>> */ >>> int rte_intr_disable(const struct rte_intr_handle *intr_handle); >>> >>> +/** >>> + * It acks an interrupt raised for the specified handle. >>> + * >>> + * Call this function to ack an interrupt from interrupt >>> + * handler either from application or driver, so that >>> + * new interrupts are raised. >>> + * >>> + * @note PMD generally calls this function at the end of its IRQ callback. >>> + * Internally, it unmasks the interrupt if possible. For INTx, unmasking >>> + * is required as the interrupt is auto-masked prior to invoking >>> + * callback. For MSI/MSI-X, unmasking is typically not needed as the >>> + * interrupt is not auto-masked. In fact, for interrupt handle types >>> + * VFIO_MSIX and VFIO_MSI, this function is no-op. >> I've added this comment in previous revision's discussion, but i'll copy it here >> as well. >> >> Does the user of this API even cares about these details? I would think that it >> would be easier to just mandate calling this function at the end of each >> interrupt callback, regardless of which interrupt mode is used. >> >> IMO internal details (about no-ops and such) are better explained in the >> implementation, not in the public-facing API header. > +1 to move the comment under @note to implementation. Ok. I'll just move this note alone to implementation of function in next revision. >
A final patch for below mentioned RFC patch discussions. [RFC PATCH v3 1/3] vfio: revert change that does intr eventfd setup at probe http://mails.dpdk.org/archives/dev/2019-July/138358.html [RFC PATCH v3 2/3] eal: add mask and unmask interrupt APIs http://mails.dpdk.org/archives/dev/2019-July/138357.html [RFC PATCH v3 3/3] drivers/net: use unmask API in interrupt handlers http://mails.dpdk.org/archives/dev/2019-July/138359.html Please also refer to below discussions http://mails.dpdk.org/archives/dev/2019-July/138184.html http://mails.dpdk.org/archives/dev/2019-July/138113.html This patch set basically reverts vfio patch that moves eventfd setup to probe time (1/3) because msix initialization is broken. It adds new ack interrupt api for purposes of simple and light weight ack method and also to avoid race conditions that was the root cause of this discussion. Nithin Dabilpuram (3): vfio: revert change that does intr eventfd setup at probe eal: add ack interrupt API drivers/net: use ack API in interrupt handlers drivers/bus/pci/linux/pci_vfio.c | 78 ++++--- drivers/net/atlantic/Makefile | 1 + drivers/net/atlantic/atl_ethdev.c | 2 +- drivers/net/atlantic/meson.build | 2 + drivers/net/avp/Makefile | 1 + drivers/net/avp/avp_ethdev.c | 2 +- drivers/net/avp/meson.build | 1 + drivers/net/axgbe/Makefile | 1 + drivers/net/axgbe/axgbe_ethdev.c | 4 +- drivers/net/axgbe/meson.build | 1 + drivers/net/bnx2x/Makefile | 1 + drivers/net/bnx2x/bnx2x_ethdev.c | 2 +- drivers/net/bnx2x/meson.build | 1 + drivers/net/e1000/em_ethdev.c | 4 +- drivers/net/e1000/igb_ethdev.c | 6 +- drivers/net/fm10k/Makefile | 1 + drivers/net/fm10k/fm10k_ethdev.c | 6 +- drivers/net/fm10k/meson.build | 1 + drivers/net/i40e/i40e_ethdev.c | 2 +- drivers/net/iavf/iavf_ethdev.c | 2 +- drivers/net/ice/Makefile | 1 + drivers/net/ice/ice_ethdev.c | 4 +- drivers/net/ice/meson.build | 1 + drivers/net/ixgbe/ixgbe_ethdev.c | 6 +- drivers/net/nfp/nfp_net.c | 2 +- drivers/net/qede/Makefile | 1 + drivers/net/qede/meson.build | 2 + drivers/net/qede/qede_ethdev.c | 8 +- drivers/net/sfc/sfc_intr.c | 4 +- drivers/net/virtio/virtio_ethdev.c | 16 +- drivers/net/vmxnet3/vmxnet3_ethdev.c | 2 +- lib/librte_eal/common/include/rte_interrupts.h | 18 ++ lib/librte_eal/freebsd/eal/eal_interrupts.c | 9 + lib/librte_eal/linux/eal/eal_interrupts.c | 291 +++++++++++++++++++++---- lib/librte_eal/rte_eal_version.map | 1 + 35 files changed, 368 insertions(+), 117 deletions(-) -- 2.8.4
This reverts commit 89aac60e0be9ed95a87b16e3595f102f9faaffb4. "vfio: fix interrupts race condition" The above mentioned commit moves the interrupt's eventfd setup to probe time but only enables one interrupt for all types of interrupt handles i.e VFIO_MSI, VFIO_LEGACY, VFIO_MSIX, UIO. It works fine with default case but breaks below cases specifically for MSIX based interrupt handles. * Applications like l3fwd-power that request rxq interrupts while ethdev setup. * Drivers that need > 1 MSIx interrupts to be configured for functionality to work. VFIO PCI for MSIx expects all the possible vectors to be setup up when using VFIO_IRQ_SET_ACTION_TRIGGER so that they can be allocated from kernel pci subsystem. Only way to increase the number of vectors later is first free all by using VFIO_IRQ_SET_DATA_NONE with action trigger and then enable new vector count. Above commit changes the behavior of rte_intr_[enable|disable] to only mask and unmask unlike earlier behavior and thereby breaking above two scenarios. Fixes: 89aac60e0be9 ("vfio: fix interrupts race condition") Cc: david.marchand@redhat.com Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> Signed-off-by: Jerin Jacob <jerinj@marvell.com> Tested-by: Stephen Hemminger <stephen@networkplumber.org> Tested-by: Shahed Shaikh <shshaikh@marvell.com> --- v4: * No change. v3: * Update Shahed Shaikh's tested-by v2: * Include tested by sign from Stephen drivers/bus/pci/linux/pci_vfio.c | 78 ++++++------ lib/librte_eal/linux/eal/eal_interrupts.c | 201 +++++++++++++++++++++++------- 2 files changed, 191 insertions(+), 88 deletions(-) diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c index ee31239..1ceb1c0 100644 --- a/drivers/bus/pci/linux/pci_vfio.c +++ b/drivers/bus/pci/linux/pci_vfio.c @@ -187,11 +187,8 @@ pci_vfio_set_bus_master(int dev_fd, bool op) static int pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) { - char irq_set_buf[sizeof(struct vfio_irq_set) + sizeof(int)]; - struct vfio_irq_set *irq_set; - enum rte_intr_mode intr_mode; int i, ret, intr_idx; - int fd; + enum rte_intr_mode intr_mode; /* default to invalid index */ intr_idx = VFIO_PCI_NUM_IRQS; @@ -223,6 +220,7 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) /* start from MSI-X interrupt type */ for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) { struct vfio_irq_info irq = { .argsz = sizeof(irq) }; + int fd = -1; /* skip interrupt modes we don't want */ if (intr_mode != RTE_INTR_MODE_NONE && @@ -238,51 +236,51 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) return -1; } - /* found a usable interrupt mode */ - if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) != 0) - break; - /* if this vector cannot be used with eventfd, fail if we explicitly * specified interrupt type, otherwise continue */ - if (intr_mode != RTE_INTR_MODE_NONE) { - RTE_LOG(ERR, EAL, " interrupt vector does not support eventfd!\n"); + if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) { + if (intr_mode != RTE_INTR_MODE_NONE) { + RTE_LOG(ERR, EAL, + " interrupt vector does not support eventfd!\n"); + return -1; + } else + continue; + } + + /* set up an eventfd for interrupts */ + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + RTE_LOG(ERR, EAL, " cannot set up eventfd, " + "error %i (%s)\n", errno, strerror(errno)); return -1; } - } - if (i < 0) - return -1; + dev->intr_handle.fd = fd; + dev->intr_handle.vfio_dev_fd = vfio_dev_fd; - fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); - if (fd < 0) { - RTE_LOG(ERR, EAL, " cannot set up eventfd, error %i (%s)\n", - errno, strerror(errno)); - return -1; - } + switch (i) { + case VFIO_PCI_MSIX_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_MSIX; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; + break; + case VFIO_PCI_MSI_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_MSI; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; + break; + case VFIO_PCI_INTX_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_LEGACY; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; + break; + default: + RTE_LOG(ERR, EAL, " unknown interrupt type!\n"); + return -1; + } - irq_set = (struct vfio_irq_set *)irq_set_buf; - irq_set->argsz = sizeof(irq_set_buf); - irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD|VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set->index = i; - irq_set->start = 0; - irq_set->count = 1; - memcpy(&irq_set->data, &fd, sizeof(int)); - if (ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set) < 0) { - RTE_LOG(ERR, EAL, " error configuring interrupt\n"); - close(fd); - return -1; + return 0; } - dev->intr_handle.fd = fd; - dev->intr_handle.vfio_dev_fd = vfio_dev_fd; - if (i == VFIO_PCI_MSIX_IRQ_INDEX) - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; - else if (i == VFIO_PCI_MSI_IRQ_INDEX) - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; - else if (i == VFIO_PCI_INTX_IRQ_INDEX) - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; - - return 0; + /* if we're here, we haven't found a suitable interrupt vector */ + return -1; } #ifdef HAVE_VFIO_DEV_REQ_INTERFACE diff --git a/lib/librte_eal/linux/eal/eal_interrupts.c b/lib/librte_eal/linux/eal/eal_interrupts.c index 27976b3..79ad5e8 100644 --- a/lib/librte_eal/linux/eal/eal_interrupts.c +++ b/lib/librte_eal/linux/eal/eal_interrupts.c @@ -109,19 +109,42 @@ static pthread_t intr_thread; /* enable legacy (INTx) interrupts */ static int -vfio_enable_intx(const struct rte_intr_handle *intr_handle) -{ - struct vfio_irq_set irq_set; - int ret; - - /* unmask INTx */ - irq_set.argsz = sizeof(irq_set); - irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; - irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; - irq_set.start = 0; - irq_set.count = 1; - - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); +vfio_enable_intx(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + /* enable INTx */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + *fd_ptr = intr_handle->fd; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling INTx interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + + /* unmask INTx after enabling */ + memset(irq_set, 0, len); + len = sizeof(struct vfio_irq_set); + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); if (ret) { RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", @@ -133,51 +156,128 @@ vfio_enable_intx(const struct rte_intr_handle *intr_handle) /* disable legacy (INTx) interrupts */ static int -vfio_disable_intx(const struct rte_intr_handle *intr_handle) -{ - struct vfio_irq_set irq_set; - int ret; +vfio_disable_intx(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; - /* mask interrupts */ - irq_set.argsz = sizeof(irq_set); - irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK; - irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; - irq_set.start = 0; - irq_set.count = 1; + len = sizeof(struct vfio_irq_set); - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); + /* mask interrupts before disabling */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); if (ret) { RTE_LOG(ERR, EAL, "Error masking INTx interrupts for fd %d\n", intr_handle->fd); return -1; } + + /* disable INTx*/ + memset(irq_set, 0, len); + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, + "Error disabling INTx interrupts for fd %d\n", intr_handle->fd); + return -1; + } return 0; } +/* enable MSI interrupts */ +static int +vfio_enable_msi(const struct rte_intr_handle *intr_handle) { + int len, ret; + char irq_set_buf[IRQ_SET_BUF_LEN]; + struct vfio_irq_set *irq_set; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + *fd_ptr = intr_handle->fd; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling MSI interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + return 0; +} + +/* disable MSI interrupts */ +static int +vfio_disable_msi(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + + len = sizeof(struct vfio_irq_set); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) + RTE_LOG(ERR, EAL, + "Error disabling MSI interrupts for fd %d\n", intr_handle->fd); + + return ret; +} + /* enable MSI-X interrupts */ static int -vfio_enable_msix(const struct rte_intr_handle *intr_handle) -{ +vfio_enable_msix(const struct rte_intr_handle *intr_handle) { + int len, ret; char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; struct vfio_irq_set *irq_set; - int len, ret; - - if (intr_handle->nb_efd == 0) - return 0; + int *fd_ptr; len = sizeof(irq_set_buf); irq_set = (struct vfio_irq_set *) irq_set_buf; irq_set->argsz = len; + /* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */ + irq_set->count = intr_handle->max_intr ? + (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID + 1 ? + RTE_MAX_RXTX_INTR_VEC_ID + 1 : intr_handle->max_intr) : 1; irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; - irq_set->start = RTE_INTR_VEC_RXTX_OFFSET; - irq_set->count = intr_handle->nb_efd; - memcpy(&irq_set->data, intr_handle->efds, - sizeof(*intr_handle->efds) * intr_handle->nb_efd); + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + /* INTR vector offset 0 reserve for non-efds mapping */ + fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = intr_handle->fd; + memcpy(&fd_ptr[RTE_INTR_VEC_RXTX_OFFSET], intr_handle->efds, + sizeof(*intr_handle->efds) * intr_handle->nb_efd); ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + if (ret) { RTE_LOG(ERR, EAL, "Error enabling MSI-X interrupts for fd %d\n", intr_handle->fd); @@ -189,21 +289,22 @@ vfio_enable_msix(const struct rte_intr_handle *intr_handle) /* disable MSI-X interrupts */ static int -vfio_disable_msix(const struct rte_intr_handle *intr_handle) -{ - struct vfio_irq_set irq_set; - int ret; +vfio_disable_msix(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; + int len, ret; - if (intr_handle->nb_efd == 0) - return 0; + len = sizeof(struct vfio_irq_set); - irq_set.argsz = sizeof(irq_set); - irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set.index = VFIO_PCI_MSIX_IRQ_INDEX; - irq_set.start = RTE_INTR_VEC_RXTX_OFFSET; - irq_set.count = intr_handle->nb_efd; + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); if (ret) RTE_LOG(ERR, EAL, "Error disabling MSI-X interrupts for fd %d\n", intr_handle->fd); @@ -564,7 +665,9 @@ rte_intr_enable(const struct rte_intr_handle *intr_handle) return -1; break; case RTE_INTR_HANDLE_VFIO_MSI: - return 0; + if (vfio_enable_msi(intr_handle)) + return -1; + break; case RTE_INTR_HANDLE_VFIO_LEGACY: if (vfio_enable_intx(intr_handle)) return -1; @@ -618,7 +721,9 @@ rte_intr_disable(const struct rte_intr_handle *intr_handle) return -1; break; case RTE_INTR_HANDLE_VFIO_MSI: - return 0; + if (vfio_disable_msi(intr_handle)) + return -1; + break; case RTE_INTR_HANDLE_VFIO_LEGACY: if (vfio_disable_intx(intr_handle)) return -1; -- 2.8.4
Add new ack interrupt API to avoid using VFIO_IRQ_SET_ACTION_TRIGGER(rte_intr_enable()) for acking interrupt purpose for VFIO based interrupt handlers. This implementation is specific to Linux. Using rte_intr_enable() for acking interrupt has below issues * Time consuming to do for every interrupt received as it will free_irq() followed by request_irq() and all other initializations * A race condition because of a window between free_irq() and request_irq() with packet reception still on and device still enabled and would throw warning messages like below. [158764.159833] do_IRQ: 9.34 No irq handler for vector In this patch, rte_intr_ack() is a no-op for VFIO_MSIX/VFIO_MSI interrupts as they are edge triggered and kernel would not mask the interrupt before delivering the event to userspace and we don't need to ack. Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> Signed-off-by: Jerin Jacob <jerinj@marvell.com> Tested-by: Shahed Shaikh <shshaikh@marvell.com> --- v4: * Move note to implementation and change the expectation to must call for new api. v3: * Update note on new api v2: * No change lib/librte_eal/common/include/rte_interrupts.h | 18 ++++++ lib/librte_eal/freebsd/eal/eal_interrupts.c | 9 +++ lib/librte_eal/linux/eal/eal_interrupts.c | 90 ++++++++++++++++++++++++++ lib/librte_eal/rte_eal_version.map | 1 + 4 files changed, 118 insertions(+) diff --git a/lib/librte_eal/common/include/rte_interrupts.h b/lib/librte_eal/common/include/rte_interrupts.h index c1e912c..c463265 100644 --- a/lib/librte_eal/common/include/rte_interrupts.h +++ b/lib/librte_eal/common/include/rte_interrupts.h @@ -118,6 +118,24 @@ int rte_intr_enable(const struct rte_intr_handle *intr_handle); */ int rte_intr_disable(const struct rte_intr_handle *intr_handle); +/** + * It acks an interrupt raised for the specified handle. + * + * This function should be called at the end of each interrupt + * handler either from application or driver, so that + * currently raised interrupt is acked and further + * new interrupts are raised. + * + * @param intr_handle + * pointer to the interrupt handle. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +__rte_experimental +int rte_intr_ack(const struct rte_intr_handle *intr_handle); + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/freebsd/eal/eal_interrupts.c b/lib/librte_eal/freebsd/eal/eal_interrupts.c index 10375bd..f6831b7 100644 --- a/lib/librte_eal/freebsd/eal/eal_interrupts.c +++ b/lib/librte_eal/freebsd/eal/eal_interrupts.c @@ -387,6 +387,15 @@ rte_intr_disable(const struct rte_intr_handle *intr_handle) return 0; } +int +rte_intr_ack(const struct rte_intr_handle *intr_handle) +{ + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 0; + + return -1; +} + static void eal_intr_process_interrupts(struct kevent *events, int nfds) { diff --git a/lib/librte_eal/linux/eal/eal_interrupts.c b/lib/librte_eal/linux/eal/eal_interrupts.c index 79ad5e8..794374e 100644 --- a/lib/librte_eal/linux/eal/eal_interrupts.c +++ b/lib/librte_eal/linux/eal/eal_interrupts.c @@ -197,6 +197,35 @@ vfio_disable_intx(const struct rte_intr_handle *intr_handle) { return 0; } +/* unmask/ack legacy (INTx) interrupts */ +static int +vfio_ack_intx(const struct rte_intr_handle *intr_handle) +{ + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + + len = sizeof(struct vfio_irq_set); + + /* unmask INTx */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + memset(irq_set, 0, len); + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + return 0; +} + /* enable MSI interrupts */ static int vfio_enable_msi(const struct rte_intr_handle *intr_handle) { @@ -693,6 +722,67 @@ rte_intr_enable(const struct rte_intr_handle *intr_handle) return 0; } + +/** + * PMD generally calls this function at the end of its IRQ callback. + * Internally, it unmasks the interrupt if possible. For INTx, unmasking + * is required as the interrupt is auto-masked prior to invoking + * callback. For MSI/MSI-X, unmasking is typically not needed as the + * interrupt is not auto-masked. In fact, for interrupt handle types + * VFIO_MSIX and VFIO_MSI, this function is no-op. + */ +int +rte_intr_ack(const struct rte_intr_handle *intr_handle) +{ + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 0; + + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) + return -1; + + switch (intr_handle->type) { + /* Both acking and disabling are same for UIO */ + case RTE_INTR_HANDLE_UIO: + if (uio_intr_enable(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_UIO_INTX: + if (uio_intx_intr_enable(intr_handle)) + return -1; + break; + /* not used at this moment */ + case RTE_INTR_HANDLE_ALARM: + return -1; +#ifdef VFIO_PRESENT + /* Since VFIO_MSIX is implicitly acked + * unlike INTx, we report success + */ + case RTE_INTR_HANDLE_VFIO_MSIX: + case RTE_INTR_HANDLE_VFIO_MSI: + return 0; + case RTE_INTR_HANDLE_VFIO_LEGACY: + if (vfio_ack_intx(intr_handle)) + return -1; + break; +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + case RTE_INTR_HANDLE_VFIO_REQ: + return -1; +#endif +#endif + /* not used at this moment */ + case RTE_INTR_HANDLE_DEV_EVENT: + return -1; + /* unknown handle type */ + default: + RTE_LOG(ERR, EAL, + "Unknown handle type of fd %d\n", + intr_handle->fd); + return -1; + } + + return 0; +} + int rte_intr_disable(const struct rte_intr_handle *intr_handle) { diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map index 1892d9e..0537a6d 100644 --- a/lib/librte_eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -407,4 +407,5 @@ EXPERIMENTAL { rte_lcore_to_cpu_id; rte_mcfg_timer_lock; rte_mcfg_timer_unlock; + rte_intr_ack; }; -- 2.8.4
Replace rte_intr_enable() with rte_intr_ack() API for acking an interrupt in interrupt handlers and rx_queue_intr_enable() callbacks of PMD's. This is inline with original intent of this change in PMDs to ack interrupts after handling is completed if device is backed by UIO, IGB_UIO or VFIO(with INTx). Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> Signed-off-by: Jerin Jacob <jerinj@marvell.com> Acked-by: Shahed Shaikh <shshaikh@marvell.com> Tested-by: Shahed Shaikh <shshaikh@marvell.com> --- v4: * Undo qede PMD change that was done in v3 to adhere to updated api doc. * Leaving qede PMD maintainer ack as v2 was tested and acked as well. * Updated avp, bnx2x, fm10k Makefiles for allow experimental which was mistakenly missed earlier though meson.build was updated. v3: * Removed ack call from qede PMD msix handler * Ack from Shahed v2: * No change drivers/net/atlantic/Makefile | 1 + drivers/net/atlantic/atl_ethdev.c | 2 +- drivers/net/atlantic/meson.build | 2 ++ drivers/net/avp/Makefile | 1 + drivers/net/avp/avp_ethdev.c | 2 +- drivers/net/avp/meson.build | 1 + drivers/net/axgbe/Makefile | 1 + drivers/net/axgbe/axgbe_ethdev.c | 4 ++-- drivers/net/axgbe/meson.build | 1 + drivers/net/bnx2x/Makefile | 1 + drivers/net/bnx2x/bnx2x_ethdev.c | 2 +- drivers/net/bnx2x/meson.build | 1 + drivers/net/e1000/em_ethdev.c | 4 ++-- drivers/net/e1000/igb_ethdev.c | 6 +++--- drivers/net/fm10k/Makefile | 1 + drivers/net/fm10k/fm10k_ethdev.c | 6 +++--- drivers/net/fm10k/meson.build | 1 + drivers/net/i40e/i40e_ethdev.c | 2 +- drivers/net/iavf/iavf_ethdev.c | 2 +- drivers/net/ice/Makefile | 1 + drivers/net/ice/ice_ethdev.c | 4 ++-- drivers/net/ice/meson.build | 1 + drivers/net/ixgbe/ixgbe_ethdev.c | 6 +++--- drivers/net/nfp/nfp_net.c | 2 +- drivers/net/qede/Makefile | 1 + drivers/net/qede/meson.build | 2 ++ drivers/net/qede/qede_ethdev.c | 8 ++++---- drivers/net/sfc/sfc_intr.c | 4 ++-- drivers/net/virtio/virtio_ethdev.c | 16 +++++++++++++++- drivers/net/vmxnet3/vmxnet3_ethdev.c | 2 +- 30 files changed, 59 insertions(+), 29 deletions(-) diff --git a/drivers/net/atlantic/Makefile b/drivers/net/atlantic/Makefile index 263f12b..fc12e6a 100644 --- a/drivers/net/atlantic/Makefile +++ b/drivers/net/atlantic/Makefile @@ -10,6 +10,7 @@ LIB = librte_pmd_atlantic.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API EXPORT_MAP := rte_pmd_atlantic_version.map diff --git a/drivers/net/atlantic/atl_ethdev.c b/drivers/net/atlantic/atl_ethdev.c index fdc0a7f..79001da 100644 --- a/drivers/net/atlantic/atl_ethdev.c +++ b/drivers/net/atlantic/atl_ethdev.c @@ -1394,7 +1394,7 @@ atl_dev_interrupt_action(struct rte_eth_dev *dev, } done: atl_enable_intr(dev); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } diff --git a/drivers/net/atlantic/meson.build b/drivers/net/atlantic/meson.build index 60b8468..d5602dd 100644 --- a/drivers/net/atlantic/meson.build +++ b/drivers/net/atlantic/meson.build @@ -11,3 +11,5 @@ sources = files( 'hw_atl/hw_atl_utils.c', 'rte_pmd_atlantic.c', ) + +cflags += '-DALLOW_EXPERIMENTAL_API' diff --git a/drivers/net/avp/Makefile b/drivers/net/avp/Makefile index c9db667..a753765 100644 --- a/drivers/net/avp/Makefile +++ b/drivers/net/avp/Makefile @@ -10,6 +10,7 @@ LIB = librte_pmd_avp.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) +CFLAGS += -DALLOW_EXPERIMENTAL_API LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs LDLIBS += -lrte_bus_pci diff --git a/drivers/net/avp/avp_ethdev.c b/drivers/net/avp/avp_ethdev.c index 47b96ec..504435e 100644 --- a/drivers/net/avp/avp_ethdev.c +++ b/drivers/net/avp/avp_ethdev.c @@ -713,7 +713,7 @@ avp_dev_interrupt_handler(void *data) status); /* re-enable UIO interrupt handling */ - ret = rte_intr_enable(&pci_dev->intr_handle); + ret = rte_intr_ack(&pci_dev->intr_handle); if (ret < 0) { PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n", ret); diff --git a/drivers/net/avp/meson.build b/drivers/net/avp/meson.build index a5f63cd..7fb9706 100644 --- a/drivers/net/avp/meson.build +++ b/drivers/net/avp/meson.build @@ -7,3 +7,4 @@ if not is_linux endif sources = files('avp_ethdev.c') install_headers('rte_avp_common.h', 'rte_avp_fifo.h') +cflags += '-DALLOW_EXPERIMENTAL_API' diff --git a/drivers/net/axgbe/Makefile b/drivers/net/axgbe/Makefile index c2d4336..bcdcd54 100644 --- a/drivers/net/axgbe/Makefile +++ b/drivers/net/axgbe/Makefile @@ -10,6 +10,7 @@ LIB = librte_pmd_axgbe.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API EXPORT_MAP := rte_pmd_axgbe_version.map diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c index cfb1720..56d8dac 100644 --- a/drivers/net/axgbe/axgbe_ethdev.c +++ b/drivers/net/axgbe/axgbe_ethdev.c @@ -136,8 +136,8 @@ axgbe_dev_interrupt_handler(void *param) DMA_CH_SR, dma_ch_isr); } } - /* Enable interrupts since disabled after generation*/ - rte_intr_enable(&pdata->pci_dev->intr_handle); + /* Unmask interrupts since disabled after generation*/ + rte_intr_ack(&pdata->pci_dev->intr_handle); } /* diff --git a/drivers/net/axgbe/meson.build b/drivers/net/axgbe/meson.build index 86873b7..226d11d 100644 --- a/drivers/net/axgbe/meson.build +++ b/drivers/net/axgbe/meson.build @@ -14,6 +14,7 @@ sources = files('axgbe_ethdev.c', 'axgbe_rxtx.c') cflags += '-Wno-cast-qual' +cflags += '-DALLOW_EXPERIMENTAL_API' if arch_subdir == 'x86' sources += files('axgbe_rxtx_vec_sse.c') diff --git a/drivers/net/bnx2x/Makefile b/drivers/net/bnx2x/Makefile index 55d1ad6..adead9d 100644 --- a/drivers/net/bnx2x/Makefile +++ b/drivers/net/bnx2x/Makefile @@ -12,6 +12,7 @@ LIB = librte_pmd_bnx2x.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) CFLAGS += -DZLIB_CONST +CFLAGS += -DALLOW_EXPERIMENTAL_API LDLIBS += -lz LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs diff --git a/drivers/net/bnx2x/bnx2x_ethdev.c b/drivers/net/bnx2x/bnx2x_ethdev.c index 10b4fdb..191a3ef 100644 --- a/drivers/net/bnx2x/bnx2x_ethdev.c +++ b/drivers/net/bnx2x/bnx2x_ethdev.c @@ -133,7 +133,7 @@ bnx2x_interrupt_handler(void *param) PMD_DEBUG_PERIODIC_LOG(INFO, sc, "Interrupt handled"); bnx2x_interrupt_action(dev, 1); - rte_intr_enable(&sc->pci_dev->intr_handle); + rte_intr_ack(&sc->pci_dev->intr_handle); } static void bnx2x_periodic_start(void *param) diff --git a/drivers/net/bnx2x/meson.build b/drivers/net/bnx2x/meson.build index 4892bb2..1bc84b7 100644 --- a/drivers/net/bnx2x/meson.build +++ b/drivers/net/bnx2x/meson.build @@ -6,6 +6,7 @@ build = dep.found() reason = 'missing dependency, "zlib"' ext_deps += dep cflags += '-DZLIB_CONST' +cflags += '-DALLOW_EXPERIMENTAL_API' sources = files('bnx2x.c', 'bnx2x_ethdev.c', 'bnx2x_rxtx.c', diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c index dc88661..0b7f501 100644 --- a/drivers/net/e1000/em_ethdev.c +++ b/drivers/net/e1000/em_ethdev.c @@ -1001,7 +1001,7 @@ eth_em_rx_queue_intr_enable(struct rte_eth_dev *dev, __rte_unused uint16_t queue struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; em_rxq_intr_enable(hw); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } @@ -1568,7 +1568,7 @@ eth_em_interrupt_action(struct rte_eth_dev *dev, return -1; intr->flags &= ~E1000_FLAG_NEED_LINK_UPDATE; - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); /* set get_link_status to check register later */ hw->mac.get_link_status = 1; diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c index 3ee28cf..793a31c 100644 --- a/drivers/net/e1000/igb_ethdev.c +++ b/drivers/net/e1000/igb_ethdev.c @@ -2876,7 +2876,7 @@ eth_igb_interrupt_action(struct rte_eth_dev *dev, } igb_intr_enable(dev); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); if (intr->flags & E1000_FLAG_NEED_LINK_UPDATE) { intr->flags &= ~E1000_FLAG_NEED_LINK_UPDATE; @@ -2987,7 +2987,7 @@ eth_igbvf_interrupt_action(struct rte_eth_dev *dev, struct rte_intr_handle *intr } igbvf_intr_enable(dev); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } @@ -5500,7 +5500,7 @@ eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) E1000_WRITE_REG(hw, E1000_EIMS, regval | mask); E1000_WRITE_FLUSH(hw); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } diff --git a/drivers/net/fm10k/Makefile b/drivers/net/fm10k/Makefile index d657dff..55e9cd5 100644 --- a/drivers/net/fm10k/Makefile +++ b/drivers/net/fm10k/Makefile @@ -10,6 +10,7 @@ LIB = librte_pmd_fm10k.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API EXPORT_MAP := rte_pmd_fm10k_version.map diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c index a1e3836..e8fa8a2 100644 --- a/drivers/net/fm10k/fm10k_ethdev.c +++ b/drivers/net/fm10k/fm10k_ethdev.c @@ -2381,7 +2381,7 @@ fm10k_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) else FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(pdev, queue_id)), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); - rte_intr_enable(&pdev->intr_handle); + rte_intr_ack(&pdev->intr_handle); return 0; } @@ -2680,7 +2680,7 @@ fm10k_dev_interrupt_handler_pf(void *param) FM10K_WRITE_REG(hw, FM10K_ITR(0), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); /* Re-enable interrupt from host side */ - rte_intr_enable(dev->intr_handle); + rte_intr_ack(dev->intr_handle); } /** @@ -2760,7 +2760,7 @@ fm10k_dev_interrupt_handler_vf(void *param) FM10K_WRITE_REG(hw, FM10K_VFITR(0), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); /* Re-enable interrupt from host side */ - rte_intr_enable(dev->intr_handle); + rte_intr_ack(dev->intr_handle); } /* Mailbox message handler in VF */ diff --git a/drivers/net/fm10k/meson.build b/drivers/net/fm10k/meson.build index 2772ea4..b7d34c7 100644 --- a/drivers/net/fm10k/meson.build +++ b/drivers/net/fm10k/meson.build @@ -14,3 +14,4 @@ if arch_subdir == 'x86' endif includes += include_directories('base') +cflags += '-DALLOW_EXPERIMENTAL_API' diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c index 2b9fc45..5217b9c 100644 --- a/drivers/net/i40e/i40e_ethdev.c +++ b/drivers/net/i40e/i40e_ethdev.c @@ -11646,7 +11646,7 @@ i40e_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) I40E_PFINT_DYN_CTLN_ITR_INDX_MASK); I40E_WRITE_FLUSH(hw); - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_ack(&pci_dev->intr_handle); return 0; } diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c index 53dc05c..00d6ed5 100644 --- a/drivers/net/iavf/iavf_ethdev.c +++ b/drivers/net/iavf/iavf_ethdev.c @@ -1098,7 +1098,7 @@ iavf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) IAVF_WRITE_FLUSH(hw); - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_ack(&pci_dev->intr_handle); return 0; } diff --git a/drivers/net/ice/Makefile b/drivers/net/ice/Makefile index 32abeb6..ae53c26 100644 --- a/drivers/net/ice/Makefile +++ b/drivers/net/ice/Makefile @@ -10,6 +10,7 @@ LIB = librte_pmd_ice.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API LDLIBS += -lrte_eal -lrte_mbuf -lrte_ethdev -lrte_kvargs LDLIBS += -lrte_bus_pci -lrte_mempool diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c index 9ce730c..77b5a71 100644 --- a/drivers/net/ice/ice_ethdev.c +++ b/drivers/net/ice/ice_ethdev.c @@ -1118,7 +1118,7 @@ ice_interrupt_handler(void *param) done: /* Enable interrupt */ ice_pf_enable_irq0(hw); - rte_intr_enable(dev->intr_handle); + rte_intr_ack(dev->intr_handle); } /* Initialize SW parameters of PF */ @@ -3002,7 +3002,7 @@ static int ice_rx_queue_intr_enable(struct rte_eth_dev *dev, val &= ~GLINT_DYN_CTL_WB_ON_ITR_M; ICE_WRITE_REG(hw, GLINT_DYN_CTL(msix_intr), val); - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_ack(&pci_dev->intr_handle); return 0; } diff --git a/drivers/net/ice/meson.build b/drivers/net/ice/meson.build index 7f16647..70f349e 100644 --- a/drivers/net/ice/meson.build +++ b/drivers/net/ice/meson.build @@ -13,6 +13,7 @@ sources = files( deps += ['hash'] includes += include_directories('base') +cflags += '-DALLOW_EXPERIMENTAL_API' if arch_subdir == 'x86' sources += files('ice_rxtx_vec_sse.c') diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c index 22c5b2c..f5920f5 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/drivers/net/ixgbe/ixgbe_ethdev.c @@ -4502,7 +4502,7 @@ ixgbe_dev_interrupt_delayed_handler(void *param) PMD_DRV_LOG(DEBUG, "enable intr in delayed handler S[%08x]", eicr); ixgbe_enable_intr(dev); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); } /** @@ -5763,7 +5763,7 @@ ixgbevf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) RTE_SET_USED(queue_id); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, intr->mask); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } @@ -5812,7 +5812,7 @@ ixgbe_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) mask &= (1 << (queue_id - 32)); IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); } - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c index 1a7aa17..91a226c 100644 --- a/drivers/net/nfp/nfp_net.c +++ b/drivers/net/nfp/nfp_net.c @@ -1412,7 +1412,7 @@ nfp_net_irq_unmask(struct rte_eth_dev *dev) if (hw->ctrl & NFP_NET_CFG_CTRL_MSIXAUTO) { /* If MSI-X auto-masking is used, clear the entry */ rte_wmb(); - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_ack(&pci_dev->intr_handle); } else { /* Make sure all updates are written before un-masking */ rte_wmb(); diff --git a/drivers/net/qede/Makefile b/drivers/net/qede/Makefile index 2ecbd8d..a11d594 100644 --- a/drivers/net/qede/Makefile +++ b/drivers/net/qede/Makefile @@ -12,6 +12,7 @@ LIB = librte_pmd_qede.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs LDLIBS += -lrte_bus_pci diff --git a/drivers/net/qede/meson.build b/drivers/net/qede/meson.build index 12388a6..c8f9c6d 100644 --- a/drivers/net/qede/meson.build +++ b/drivers/net/qede/meson.build @@ -10,3 +10,5 @@ sources = files( 'qede_main.c', 'qede_rxtx.c', ) + +cflags += '-DALLOW_EXPERIMENTAL_API' diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c index 82363e6..9ac9da3 100644 --- a/drivers/net/qede/qede_ethdev.c +++ b/drivers/net/qede/qede_ethdev.c @@ -248,8 +248,8 @@ qede_interrupt_handler_intx(void *param) if (status & 0x1) { qede_interrupt_action(ECORE_LEADING_HWFN(edev)); - if (rte_intr_enable(eth_dev->intr_handle)) - DP_ERR(edev, "rte_intr_enable failed\n"); + if (rte_intr_ack(eth_dev->intr_handle)) + DP_ERR(edev, "rte_intr_ack failed\n"); } } @@ -261,8 +261,8 @@ qede_interrupt_handler(void *param) struct ecore_dev *edev = &qdev->edev; qede_interrupt_action(ECORE_LEADING_HWFN(edev)); - if (rte_intr_enable(eth_dev->intr_handle)) - DP_ERR(edev, "rte_intr_enable failed\n"); + if (rte_intr_ack(eth_dev->intr_handle)) + DP_ERR(edev, "rte_intr_ack failed\n"); } static void diff --git a/drivers/net/sfc/sfc_intr.c b/drivers/net/sfc/sfc_intr.c index 1f4969b..76cb630 100644 --- a/drivers/net/sfc/sfc_intr.c +++ b/drivers/net/sfc/sfc_intr.c @@ -79,7 +79,7 @@ sfc_intr_line_handler(void *cb_arg) if (qmask & (1 << sa->mgmt_evq_index)) sfc_intr_handle_mgmt_evq(sa); - if (rte_intr_enable(&pci_dev->intr_handle) != 0) + if (rte_intr_ack(&pci_dev->intr_handle) != 0) sfc_err(sa, "cannot reenable interrupts"); sfc_log_init(sa, "done"); @@ -123,7 +123,7 @@ sfc_intr_message_handler(void *cb_arg) sfc_intr_handle_mgmt_evq(sa); - if (rte_intr_enable(&pci_dev->intr_handle) != 0) + if (rte_intr_ack(&pci_dev->intr_handle) != 0) sfc_err(sa, "cannot reenable interrupts"); sfc_log_init(sa, "done"); diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 04aecb7..62c8274 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -1265,6 +1265,20 @@ virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) } static int +virtio_intr_unmask(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = dev->data->dev_private; + + if (rte_intr_ack(dev->intr_handle) < 0) + return -1; + + if (!hw->virtio_user_dev) + hw->use_msix = vtpci_msix_detect(RTE_ETH_DEV_TO_PCI(dev)); + + return 0; +} + +static int virtio_intr_enable(struct rte_eth_dev *dev) { struct virtio_hw *hw = dev->data->dev_private; @@ -1457,7 +1471,7 @@ virtio_interrupt_handler(void *param) isr = vtpci_isr(hw); PMD_DRV_LOG(INFO, "interrupt status = %#x", isr); - if (virtio_intr_enable(dev) < 0) + if (virtio_intr_unmask(dev) < 0) PMD_DRV_LOG(ERR, "interrupt enable failed"); if (isr & VIRTIO_PCI_ISR_CONFIG) { diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c b/drivers/net/vmxnet3/vmxnet3_ethdev.c index 2b1e915..57feb37 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethdev.c +++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c @@ -1426,7 +1426,7 @@ vmxnet3_interrupt_handler(void *param) vmxnet3_process_events(dev); - if (rte_intr_enable(&pci_dev->intr_handle) < 0) + if (rte_intr_ack(&pci_dev->intr_handle) < 0) PMD_DRV_LOG(ERR, "interrupt enable failed"); } -- 2.8.4
> -----Original Message----- > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Nithin Dabilpuram > Sent: Thursday, July 18, 2019 10:36 PM > To: Hyong Youb Kim <hyonkim@cisco.com>; David Marchand > <david.marchand@redhat.com>; Thomas Monjalon > <thomas@monjalon.net>; Yigit, Ferruh <ferruh.yigit@intel.com>; Burakov, > Anatoly <anatoly.burakov@intel.com> > Cc: jerinj@marvell.com; John Daley <johndale@cisco.com>; Shahed Shaikh > <shshaikh@marvell.com>; dev@dpdk.org; Nithin Dabilpuram > <ndabilpuram@marvell.com> > Subject: [dpdk-dev] [PATCH v4 1/3] vfio: revert change that does intr > eventfd setup at probe > > This reverts commit 89aac60e0be9ed95a87b16e3595f102f9faaffb4. > "vfio: fix interrupts race condition" > > The above mentioned commit moves the interrupt's eventfd setup > to probe time but only enables one interrupt for all types of > interrupt handles i.e VFIO_MSI, VFIO_LEGACY, VFIO_MSIX, UIO. > It works fine with default case but breaks below cases specifically > for MSIX based interrupt handles. > > * Applications like l3fwd-power that request rxq interrupts > while ethdev setup. > * Drivers that need > 1 MSIx interrupts to be configured for > functionality to work. > > VFIO PCI for MSIx expects all the possible vectors to be setup up > when using VFIO_IRQ_SET_ACTION_TRIGGER so that they can be > allocated from kernel pci subsystem. Only way to increase the number > of vectors later is first free all by using VFIO_IRQ_SET_DATA_NONE > with action trigger and then enable new vector count. > > Above commit changes the behavior of rte_intr_[enable|disable] to > only mask and unmask unlike earlier behavior and thereby > breaking above two scenarios. > > Fixes: 89aac60e0be9 ("vfio: fix interrupts race condition") > Cc: david.marchand@redhat.com > > Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> > Signed-off-by: Jerin Jacob <jerinj@marvell.com> > Tested-by: Stephen Hemminger <stephen@networkplumber.org> > Tested-by: Shahed Shaikh <shshaikh@marvell.com> Tested-by: Lei Yao <lei.a.yao@Intel.com> This patch set pass the interrupt test with ixgbe, i40e and virtio. > --- > v4: > * No change. > v3: > * Update Shahed Shaikh's tested-by > v2: > * Include tested by sign from Stephen > > drivers/bus/pci/linux/pci_vfio.c | 78 ++++++------ > lib/librte_eal/linux/eal/eal_interrupts.c | 201 +++++++++++++++++++++++- > ------ > 2 files changed, 191 insertions(+), 88 deletions(-) > > diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c > index ee31239..1ceb1c0 100644 > --- a/drivers/bus/pci/linux/pci_vfio.c > +++ b/drivers/bus/pci/linux/pci_vfio.c > @@ -187,11 +187,8 @@ pci_vfio_set_bus_master(int dev_fd, bool op) > static int > pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) > { > - char irq_set_buf[sizeof(struct vfio_irq_set) + sizeof(int)]; > - struct vfio_irq_set *irq_set; > - enum rte_intr_mode intr_mode; > int i, ret, intr_idx; > - int fd; > + enum rte_intr_mode intr_mode; > > /* default to invalid index */ > intr_idx = VFIO_PCI_NUM_IRQS; > @@ -223,6 +220,7 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, > int vfio_dev_fd) > /* start from MSI-X interrupt type */ > for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) { > struct vfio_irq_info irq = { .argsz = sizeof(irq) }; > + int fd = -1; > > /* skip interrupt modes we don't want */ > if (intr_mode != RTE_INTR_MODE_NONE && > @@ -238,51 +236,51 @@ pci_vfio_setup_interrupts(struct rte_pci_device > *dev, int vfio_dev_fd) > return -1; > } > > - /* found a usable interrupt mode */ > - if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) != 0) > - break; > - > /* if this vector cannot be used with eventfd, fail if we > explicitly > * specified interrupt type, otherwise continue */ > - if (intr_mode != RTE_INTR_MODE_NONE) { > - RTE_LOG(ERR, EAL, " interrupt vector does not > support eventfd!\n"); > + if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) { > + if (intr_mode != RTE_INTR_MODE_NONE) { > + RTE_LOG(ERR, EAL, > + " interrupt vector does not > support eventfd!\n"); > + return -1; > + } else > + continue; > + } > + > + /* set up an eventfd for interrupts */ > + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); > + if (fd < 0) { > + RTE_LOG(ERR, EAL, " cannot set up eventfd, " > + "error %i (%s)\n", errno, > strerror(errno)); > return -1; > } > - } > > - if (i < 0) > - return -1; > + dev->intr_handle.fd = fd; > + dev->intr_handle.vfio_dev_fd = vfio_dev_fd; > > - fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); > - if (fd < 0) { > - RTE_LOG(ERR, EAL, " cannot set up eventfd, error %i (%s)\n", > - errno, strerror(errno)); > - return -1; > - } > + switch (i) { > + case VFIO_PCI_MSIX_IRQ_INDEX: > + intr_mode = RTE_INTR_MODE_MSIX; > + dev->intr_handle.type = > RTE_INTR_HANDLE_VFIO_MSIX; > + break; > + case VFIO_PCI_MSI_IRQ_INDEX: > + intr_mode = RTE_INTR_MODE_MSI; > + dev->intr_handle.type = > RTE_INTR_HANDLE_VFIO_MSI; > + break; > + case VFIO_PCI_INTX_IRQ_INDEX: > + intr_mode = RTE_INTR_MODE_LEGACY; > + dev->intr_handle.type = > RTE_INTR_HANDLE_VFIO_LEGACY; > + break; > + default: > + RTE_LOG(ERR, EAL, " unknown interrupt type!\n"); > + return -1; > + } > > - irq_set = (struct vfio_irq_set *)irq_set_buf; > - irq_set->argsz = sizeof(irq_set_buf); > - irq_set->flags = > VFIO_IRQ_SET_DATA_EVENTFD|VFIO_IRQ_SET_ACTION_TRIGGER; > - irq_set->index = i; > - irq_set->start = 0; > - irq_set->count = 1; > - memcpy(&irq_set->data, &fd, sizeof(int)); > - if (ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set) < 0) { > - RTE_LOG(ERR, EAL, " error configuring interrupt\n"); > - close(fd); > - return -1; > + return 0; > } > > - dev->intr_handle.fd = fd; > - dev->intr_handle.vfio_dev_fd = vfio_dev_fd; > - if (i == VFIO_PCI_MSIX_IRQ_INDEX) > - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; > - else if (i == VFIO_PCI_MSI_IRQ_INDEX) > - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; > - else if (i == VFIO_PCI_INTX_IRQ_INDEX) > - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; > - > - return 0; > + /* if we're here, we haven't found a suitable interrupt vector */ > + return -1; > } > > #ifdef HAVE_VFIO_DEV_REQ_INTERFACE > diff --git a/lib/librte_eal/linux/eal/eal_interrupts.c > b/lib/librte_eal/linux/eal/eal_interrupts.c > index 27976b3..79ad5e8 100644 > --- a/lib/librte_eal/linux/eal/eal_interrupts.c > +++ b/lib/librte_eal/linux/eal/eal_interrupts.c > @@ -109,19 +109,42 @@ static pthread_t intr_thread; > > /* enable legacy (INTx) interrupts */ > static int > -vfio_enable_intx(const struct rte_intr_handle *intr_handle) > -{ > - struct vfio_irq_set irq_set; > - int ret; > - > - /* unmask INTx */ > - irq_set.argsz = sizeof(irq_set); > - irq_set.flags = VFIO_IRQ_SET_DATA_NONE | > VFIO_IRQ_SET_ACTION_UNMASK; > - irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; > - irq_set.start = 0; > - irq_set.count = 1; > - > - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, > &irq_set); > +vfio_enable_intx(const struct rte_intr_handle *intr_handle) { > + struct vfio_irq_set *irq_set; > + char irq_set_buf[IRQ_SET_BUF_LEN]; > + int len, ret; > + int *fd_ptr; > + > + len = sizeof(irq_set_buf); > + > + /* enable INTx */ > + irq_set = (struct vfio_irq_set *) irq_set_buf; > + irq_set->argsz = len; > + irq_set->count = 1; > + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | > VFIO_IRQ_SET_ACTION_TRIGGER; > + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; > + irq_set->start = 0; > + fd_ptr = (int *) &irq_set->data; > + *fd_ptr = intr_handle->fd; > + > + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, > irq_set); > + > + if (ret) { > + RTE_LOG(ERR, EAL, "Error enabling INTx interrupts for > fd %d\n", > + intr_handle->fd); > + return -1; > + } > + > + /* unmask INTx after enabling */ > + memset(irq_set, 0, len); > + len = sizeof(struct vfio_irq_set); > + irq_set->argsz = len; > + irq_set->count = 1; > + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | > VFIO_IRQ_SET_ACTION_UNMASK; > + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; > + irq_set->start = 0; > + > + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, > irq_set); > > if (ret) { > RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for > fd %d\n", > @@ -133,51 +156,128 @@ vfio_enable_intx(const struct rte_intr_handle > *intr_handle) > > /* disable legacy (INTx) interrupts */ > static int > -vfio_disable_intx(const struct rte_intr_handle *intr_handle) > -{ > - struct vfio_irq_set irq_set; > - int ret; > +vfio_disable_intx(const struct rte_intr_handle *intr_handle) { > + struct vfio_irq_set *irq_set; > + char irq_set_buf[IRQ_SET_BUF_LEN]; > + int len, ret; > > - /* mask interrupts */ > - irq_set.argsz = sizeof(irq_set); > - irq_set.flags = VFIO_IRQ_SET_DATA_NONE | > VFIO_IRQ_SET_ACTION_MASK; > - irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; > - irq_set.start = 0; > - irq_set.count = 1; > + len = sizeof(struct vfio_irq_set); > > - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, > &irq_set); > + /* mask interrupts before disabling */ > + irq_set = (struct vfio_irq_set *) irq_set_buf; > + irq_set->argsz = len; > + irq_set->count = 1; > + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | > VFIO_IRQ_SET_ACTION_MASK; > + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; > + irq_set->start = 0; > + > + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, > irq_set); > > if (ret) { > RTE_LOG(ERR, EAL, "Error masking INTx interrupts for > fd %d\n", > intr_handle->fd); > return -1; > } > + > + /* disable INTx*/ > + memset(irq_set, 0, len); > + irq_set->argsz = len; > + irq_set->count = 0; > + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | > VFIO_IRQ_SET_ACTION_TRIGGER; > + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; > + irq_set->start = 0; > + > + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, > irq_set); > + > + if (ret) { > + RTE_LOG(ERR, EAL, > + "Error disabling INTx interrupts for fd %d\n", > intr_handle->fd); > + return -1; > + } > return 0; > } > > +/* enable MSI interrupts */ > +static int > +vfio_enable_msi(const struct rte_intr_handle *intr_handle) { > + int len, ret; > + char irq_set_buf[IRQ_SET_BUF_LEN]; > + struct vfio_irq_set *irq_set; > + int *fd_ptr; > + > + len = sizeof(irq_set_buf); > + > + irq_set = (struct vfio_irq_set *) irq_set_buf; > + irq_set->argsz = len; > + irq_set->count = 1; > + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | > VFIO_IRQ_SET_ACTION_TRIGGER; > + irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; > + irq_set->start = 0; > + fd_ptr = (int *) &irq_set->data; > + *fd_ptr = intr_handle->fd; > + > + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, > irq_set); > + > + if (ret) { > + RTE_LOG(ERR, EAL, "Error enabling MSI interrupts for > fd %d\n", > + intr_handle->fd); > + return -1; > + } > + return 0; > +} > + > +/* disable MSI interrupts */ > +static int > +vfio_disable_msi(const struct rte_intr_handle *intr_handle) { > + struct vfio_irq_set *irq_set; > + char irq_set_buf[IRQ_SET_BUF_LEN]; > + int len, ret; > + > + len = sizeof(struct vfio_irq_set); > + > + irq_set = (struct vfio_irq_set *) irq_set_buf; > + irq_set->argsz = len; > + irq_set->count = 0; > + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | > VFIO_IRQ_SET_ACTION_TRIGGER; > + irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; > + irq_set->start = 0; > + > + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, > irq_set); > + > + if (ret) > + RTE_LOG(ERR, EAL, > + "Error disabling MSI interrupts for fd %d\n", > intr_handle->fd); > + > + return ret; > +} > + > /* enable MSI-X interrupts */ > static int > -vfio_enable_msix(const struct rte_intr_handle *intr_handle) > -{ > +vfio_enable_msix(const struct rte_intr_handle *intr_handle) { > + int len, ret; > char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; > struct vfio_irq_set *irq_set; > - int len, ret; > - > - if (intr_handle->nb_efd == 0) > - return 0; > + int *fd_ptr; > > len = sizeof(irq_set_buf); > > irq_set = (struct vfio_irq_set *) irq_set_buf; > irq_set->argsz = len; > + /* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */ > + irq_set->count = intr_handle->max_intr ? > + (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID + 1 ? > + RTE_MAX_RXTX_INTR_VEC_ID + 1 : intr_handle->max_intr) : > 1; > irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | > VFIO_IRQ_SET_ACTION_TRIGGER; > irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; > - irq_set->start = RTE_INTR_VEC_RXTX_OFFSET; > - irq_set->count = intr_handle->nb_efd; > - memcpy(&irq_set->data, intr_handle->efds, > - sizeof(*intr_handle->efds) * intr_handle->nb_efd); > + irq_set->start = 0; > + fd_ptr = (int *) &irq_set->data; > + /* INTR vector offset 0 reserve for non-efds mapping */ > + fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = intr_handle->fd; > + memcpy(&fd_ptr[RTE_INTR_VEC_RXTX_OFFSET], intr_handle->efds, > + sizeof(*intr_handle->efds) * intr_handle->nb_efd); > > ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, > irq_set); > + > if (ret) { > RTE_LOG(ERR, EAL, "Error enabling MSI-X interrupts for > fd %d\n", > intr_handle->fd); > @@ -189,21 +289,22 @@ vfio_enable_msix(const struct rte_intr_handle > *intr_handle) > > /* disable MSI-X interrupts */ > static int > -vfio_disable_msix(const struct rte_intr_handle *intr_handle) > -{ > - struct vfio_irq_set irq_set; > - int ret; > +vfio_disable_msix(const struct rte_intr_handle *intr_handle) { > + struct vfio_irq_set *irq_set; > + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; > + int len, ret; > > - if (intr_handle->nb_efd == 0) > - return 0; > + len = sizeof(struct vfio_irq_set); > > - irq_set.argsz = sizeof(irq_set); > - irq_set.flags = VFIO_IRQ_SET_DATA_NONE | > VFIO_IRQ_SET_ACTION_TRIGGER; > - irq_set.index = VFIO_PCI_MSIX_IRQ_INDEX; > - irq_set.start = RTE_INTR_VEC_RXTX_OFFSET; > - irq_set.count = intr_handle->nb_efd; > + irq_set = (struct vfio_irq_set *) irq_set_buf; > + irq_set->argsz = len; > + irq_set->count = 0; > + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | > VFIO_IRQ_SET_ACTION_TRIGGER; > + irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; > + irq_set->start = 0; > + > + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, > irq_set); > > - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, > &irq_set); > if (ret) > RTE_LOG(ERR, EAL, > "Error disabling MSI-X interrupts for fd %d\n", > intr_handle->fd); > @@ -564,7 +665,9 @@ rte_intr_enable(const struct rte_intr_handle > *intr_handle) > return -1; > break; > case RTE_INTR_HANDLE_VFIO_MSI: > - return 0; > + if (vfio_enable_msi(intr_handle)) > + return -1; > + break; > case RTE_INTR_HANDLE_VFIO_LEGACY: > if (vfio_enable_intx(intr_handle)) > return -1; > @@ -618,7 +721,9 @@ rte_intr_disable(const struct rte_intr_handle > *intr_handle) > return -1; > break; > case RTE_INTR_HANDLE_VFIO_MSI: > - return 0; > + if (vfio_disable_msi(intr_handle)) > + return -1; > + break; > case RTE_INTR_HANDLE_VFIO_LEGACY: > if (vfio_disable_intx(intr_handle)) > return -1; > -- > 2.8.4
Hi,
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Nithin Dabilpuram
> Sent: Wednesday, July 17, 2019 13:59
> To: Hyong Youb Kim <hyonkim@cisco.com>; David Marchand
> <david.marchand@redhat.com>; Thomas Monjalon
> <thomas@monjalon.net>; Yigit, Ferruh <ferruh.yigit@intel.com>; Burakov,
> Anatoly <anatoly.burakov@intel.com>
> Cc: jerinj@marvell.com; John Daley <johndale@cisco.com>; Shahed Shaikh
> <shshaikh@marvell.com>; dev@dpdk.org; Nithin Dabilpuram
> <ndabilpuram@marvell.com>
> Subject: [dpdk-dev] [PATCH 1/3] vfio: revert change that does intr eventfd
> setup at probe
>
> This reverts commit 89aac60e0be9ed95a87b16e3595f102f9faaffb4.
> "vfio: fix interrupts race condition"
>
> The above mentioned commit moves the interrupt's eventfd setup to probe
> time but only enables one interrupt for all types of interrupt handles i.e
> VFIO_MSI, VFIO_LEGACY, VFIO_MSIX, UIO.
> It works fine with default case but breaks below cases specifically for MSIX
> based interrupt handles.
>
> * Applications like l3fwd-power that request rxq interrupts
> while ethdev setup.
> * Drivers that need > 1 MSIx interrupts to be configured for
> functionality to work.
>
> VFIO PCI for MSIx expects all the possible vectors to be setup up when using
> VFIO_IRQ_SET_ACTION_TRIGGER so that they can be allocated from kernel
> pci subsystem. Only way to increase the number of vectors later is first free
> all by using VFIO_IRQ_SET_DATA_NONE with action trigger and then enable
> new vector count.
>
> Above commit changes the behavior of rte_intr_[enable|disable] to only
> mask and unmask unlike earlier behavior and thereby breaking above two
> scenarios.
>
> Fixes: 89aac60e0be9 ("vfio: fix interrupts race condition")
> Cc: david.marchand@redhat.com
>
> Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
> Signed-off-by: Jerin Jacob <jerinj@marvell.com>
> ---
> drivers/bus/pci/linux/pci_vfio.c | 78 ++++++------
> lib/librte_eal/linux/eal/eal_interrupts.c | 201 +++++++++++++++++++++++-
> ------
I can confirm this patch fixes "EAL: error configuring interrupt" issue on my C6XX board
Damian
On Thu, Jul 18, 2019 at 4:36 PM Nithin Dabilpuram
<ndabilpuram@marvell.com> wrote:
>
> This reverts commit 89aac60e0be9ed95a87b16e3595f102f9faaffb4.
> "vfio: fix interrupts race condition"
>
> The above mentioned commit moves the interrupt's eventfd setup
> to probe time but only enables one interrupt for all types of
> interrupt handles i.e VFIO_MSI, VFIO_LEGACY, VFIO_MSIX, UIO.
> It works fine with default case but breaks below cases specifically
> for MSIX based interrupt handles.
>
> * Applications like l3fwd-power that request rxq interrupts
> while ethdev setup.
> * Drivers that need > 1 MSIx interrupts to be configured for
> functionality to work.
>
> VFIO PCI for MSIx expects all the possible vectors to be setup up
> when using VFIO_IRQ_SET_ACTION_TRIGGER so that they can be
> allocated from kernel pci subsystem. Only way to increase the number
> of vectors later is first free all by using VFIO_IRQ_SET_DATA_NONE
> with action trigger and then enable new vector count.
Indeed, this is the part that I had missed.
Ok for this revert.
A bit sad to see the coding style issues back :-).
--
David Marchand
On Thu, Jul 18, 2019 at 4:36 PM Nithin Dabilpuram <ndabilpuram@marvell.com> wrote: > > Add new ack interrupt API to avoid using > VFIO_IRQ_SET_ACTION_TRIGGER(rte_intr_enable()) for > acking interrupt purpose for VFIO based interrupt handlers. > This implementation is specific to Linux. > > Using rte_intr_enable() for acking interrupt has below issues > > * Time consuming to do for every interrupt received as it will > free_irq() followed by request_irq() and all other initializations > * A race condition because of a window between free_irq() and > request_irq() with packet reception still on and device still > enabled and would throw warning messages like below. > [158764.159833] do_IRQ: 9.34 No irq handler for vector > > In this patch, rte_intr_ack() is a no-op for VFIO_MSIX/VFIO_MSI interrupts > as they are edge triggered and kernel would not mask the interrupt before > delivering the event to userspace and we don't need to ack. > > Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> > Signed-off-by: Jerin Jacob <jerinj@marvell.com> > Tested-by: Shahed Shaikh <shshaikh@marvell.com> > --- > v4: > * Move note to implementation and change > the expectation to must call for new api. > v3: > * Update note on new api > v2: > * No change > > lib/librte_eal/common/include/rte_interrupts.h | 18 ++++++ > lib/librte_eal/freebsd/eal/eal_interrupts.c | 9 +++ > lib/librte_eal/linux/eal/eal_interrupts.c | 90 ++++++++++++++++++++++++++ > lib/librte_eal/rte_eal_version.map | 1 + > 4 files changed, 118 insertions(+) > > diff --git a/lib/librte_eal/common/include/rte_interrupts.h b/lib/librte_eal/common/include/rte_interrupts.h > index c1e912c..c463265 100644 > --- a/lib/librte_eal/common/include/rte_interrupts.h > +++ b/lib/librte_eal/common/include/rte_interrupts.h > @@ -118,6 +118,24 @@ int rte_intr_enable(const struct rte_intr_handle *intr_handle); > */ > int rte_intr_disable(const struct rte_intr_handle *intr_handle); > > +/** Missing a banner. * @warning * @b EXPERIMENTAL: this API may change without prior notice > + * It acks an interrupt raised for the specified handle. acknowledges > + * > + * This function should be called at the end of each interrupt > + * handler either from application or driver, so that > + * currently raised interrupt is acked and further > + * new interrupts are raised. > + * > + * @param intr_handle > + * pointer to the interrupt handle. > + * > + * @return > + * - On success, zero. > + * - On failure, a negative value. > + */ > +__rte_experimental > +int rte_intr_ack(const struct rte_intr_handle *intr_handle); > + > #ifdef __cplusplus > } > #endif > diff --git a/lib/librte_eal/freebsd/eal/eal_interrupts.c b/lib/librte_eal/freebsd/eal/eal_interrupts.c > index 10375bd..f6831b7 100644 > --- a/lib/librte_eal/freebsd/eal/eal_interrupts.c > +++ b/lib/librte_eal/freebsd/eal/eal_interrupts.c > @@ -387,6 +387,15 @@ rte_intr_disable(const struct rte_intr_handle *intr_handle) > return 0; > } > > +int > +rte_intr_ack(const struct rte_intr_handle *intr_handle) > +{ > + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) > + return 0; > + > + return -1; > +} > + > static void > eal_intr_process_interrupts(struct kevent *events, int nfds) > { > diff --git a/lib/librte_eal/linux/eal/eal_interrupts.c b/lib/librte_eal/linux/eal/eal_interrupts.c > index 79ad5e8..794374e 100644 > --- a/lib/librte_eal/linux/eal/eal_interrupts.c > +++ b/lib/librte_eal/linux/eal/eal_interrupts.c > @@ -197,6 +197,35 @@ vfio_disable_intx(const struct rte_intr_handle *intr_handle) { > return 0; > } > > +/* unmask/ack legacy (INTx) interrupts */ > +static int > +vfio_ack_intx(const struct rte_intr_handle *intr_handle) > +{ > + struct vfio_irq_set *irq_set; > + char irq_set_buf[IRQ_SET_BUF_LEN]; > + int len, ret; > + > + len = sizeof(struct vfio_irq_set); > + > + /* unmask INTx */ > + irq_set = (struct vfio_irq_set *) irq_set_buf; > + memset(irq_set, 0, len); > + irq_set->argsz = len; > + irq_set->count = 1; > + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; > + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; > + irq_set->start = 0; > + > + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); > + > + if (ret) { > + RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", > + intr_handle->fd); > + return -1; > + } > + return 0; > +} > + We are not setting any fd. No need to have the irq_set_buf[] array, you can directly declare irq_set on the stack and remove the len and ret variables. Something like: /* unmask/ack legacy (INTx) interrupts */ static int vfio_ack_intx(const struct rte_intr_handle *intr_handle) { struct vfio_irq_set irq_set; /* unmask INTx */ memset(&irq_set, 0, sizeof(irq_set)); irq_set.argsz = sizeof(irq_set); irq_set.count = 1; irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; irq_set.start = 0; if (ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set)) { RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", intr_handle->fd); return -1; } return 0; } > /* enable MSI interrupts */ > static int > vfio_enable_msi(const struct rte_intr_handle *intr_handle) { > @@ -693,6 +722,67 @@ rte_intr_enable(const struct rte_intr_handle *intr_handle) > return 0; > } > > + Double empty line. > +/** > + * PMD generally calls this function at the end of its IRQ callback. > + * Internally, it unmasks the interrupt if possible. For INTx, unmasking > + * is required as the interrupt is auto-masked prior to invoking > + * callback. For MSI/MSI-X, unmasking is typically not needed as the > + * interrupt is not auto-masked. In fact, for interrupt handle types > + * VFIO_MSIX and VFIO_MSI, this function is no-op. No double space at the beginning of the lines. How about separating the cases, like: /** * PMD generally calls this function at the end of its IRQ callback. * Internally, it unmasks the interrupt if possible. * * For INTx, unmasking is required as the interrupt is auto-masked prior to * invoking callback. * * For MSI/MSI-X, unmasking is typically not needed as the interrupt is not * auto-masked. In fact, for interrupt handle types VFIO_MSIX and VFIO_MSI, * this function is no-op. */ > + */ > +int > +rte_intr_ack(const struct rte_intr_handle *intr_handle) > +{ > + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) > + return 0; > + > + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) > + return -1; > + > + switch (intr_handle->type) { > + /* Both acking and disabling are same for UIO */ enabling? > + case RTE_INTR_HANDLE_UIO: > + if (uio_intr_enable(intr_handle)) > + return -1; > + break; > + case RTE_INTR_HANDLE_UIO_INTX: > + if (uio_intx_intr_enable(intr_handle)) > + return -1; > + break; > + /* not used at this moment */ > + case RTE_INTR_HANDLE_ALARM: > + return -1; > +#ifdef VFIO_PRESENT > + /* Since VFIO_MSIX is implicitly acked > + * unlike INTx, we report success > + */ Indent is weird. How about: /* VFIO MSI* is implicitly acked unlike INTx, nothing to do */ > + case RTE_INTR_HANDLE_VFIO_MSIX: > + case RTE_INTR_HANDLE_VFIO_MSI: > + return 0; > + case RTE_INTR_HANDLE_VFIO_LEGACY: > + if (vfio_ack_intx(intr_handle)) > + return -1; > + break; > +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE > + case RTE_INTR_HANDLE_VFIO_REQ: > + return -1; > +#endif > +#endif > + /* not used at this moment */ > + case RTE_INTR_HANDLE_DEV_EVENT: > + return -1; > + /* unknown handle type */ > + default: > + RTE_LOG(ERR, EAL, > + "Unknown handle type of fd %d\n", > + intr_handle->fd); > + return -1; > + } > + > + return 0; > +} > + > int > rte_intr_disable(const struct rte_intr_handle *intr_handle) > { > diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map > index 1892d9e..0537a6d 100644 > --- a/lib/librte_eal/rte_eal_version.map > +++ b/lib/librte_eal/rte_eal_version.map > @@ -407,4 +407,5 @@ EXPERIMENTAL { > rte_lcore_to_cpu_id; > rte_mcfg_timer_lock; > rte_mcfg_timer_unlock; > + rte_intr_ack; This should be alphabetically sorted. > }; > -- > 2.8.4 > -- David Marchand
On Thu, Jul 18, 2019 at 4:37 PM Nithin Dabilpuram <ndabilpuram@marvell.com> wrote: > > Replace rte_intr_enable() with rte_intr_ack() API > for acking an interrupt in interrupt handlers and > rx_queue_intr_enable() callbacks of PMD's. > > This is inline with original intent of this change in PMDs > to ack interrupts after handling is completed if > device is backed by UIO, IGB_UIO or VFIO(with INTx). > > Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> > Signed-off-by: Jerin Jacob <jerinj@marvell.com> > Acked-by: Shahed Shaikh <shshaikh@marvell.com> > Tested-by: Shahed Shaikh <shshaikh@marvell.com> > --- > v4: > * Undo qede PMD change that was done in v3 > to adhere to updated api doc. > * Leaving qede PMD maintainer ack as v2 > was tested and acked as well. > * Updated avp, bnx2x, fm10k Makefiles for > allow experimental which was mistakenly > missed earlier though meson.build was updated. > v3: > * Removed ack call from qede PMD msix handler > * Ack from Shahed > v2: > * No change > > drivers/net/atlantic/Makefile | 1 + > drivers/net/atlantic/atl_ethdev.c | 2 +- > drivers/net/atlantic/meson.build | 2 ++ > drivers/net/avp/Makefile | 1 + > drivers/net/avp/avp_ethdev.c | 2 +- > drivers/net/avp/meson.build | 1 + > drivers/net/axgbe/Makefile | 1 + > drivers/net/axgbe/axgbe_ethdev.c | 4 ++-- > drivers/net/axgbe/meson.build | 1 + > drivers/net/bnx2x/Makefile | 1 + > drivers/net/bnx2x/bnx2x_ethdev.c | 2 +- > drivers/net/bnx2x/meson.build | 1 + > drivers/net/e1000/em_ethdev.c | 4 ++-- > drivers/net/e1000/igb_ethdev.c | 6 +++--- > drivers/net/fm10k/Makefile | 1 + > drivers/net/fm10k/fm10k_ethdev.c | 6 +++--- > drivers/net/fm10k/meson.build | 1 + > drivers/net/i40e/i40e_ethdev.c | 2 +- > drivers/net/iavf/iavf_ethdev.c | 2 +- > drivers/net/ice/Makefile | 1 + > drivers/net/ice/ice_ethdev.c | 4 ++-- > drivers/net/ice/meson.build | 1 + > drivers/net/ixgbe/ixgbe_ethdev.c | 6 +++--- > drivers/net/nfp/nfp_net.c | 2 +- > drivers/net/qede/Makefile | 1 + > drivers/net/qede/meson.build | 2 ++ > drivers/net/qede/qede_ethdev.c | 8 ++++---- > drivers/net/sfc/sfc_intr.c | 4 ++-- > drivers/net/virtio/virtio_ethdev.c | 16 +++++++++++++++- > drivers/net/vmxnet3/vmxnet3_ethdev.c | 2 +- > 30 files changed, 59 insertions(+), 29 deletions(-) > > diff --git a/drivers/net/atlantic/Makefile b/drivers/net/atlantic/Makefile > index 263f12b..fc12e6a 100644 > --- a/drivers/net/atlantic/Makefile > +++ b/drivers/net/atlantic/Makefile > @@ -10,6 +10,7 @@ LIB = librte_pmd_atlantic.a > > CFLAGS += -O3 > CFLAGS += $(WERROR_FLAGS) > +CFLAGS += -DALLOW_EXPERIMENTAL_API > > EXPORT_MAP := rte_pmd_atlantic_version.map > > diff --git a/drivers/net/atlantic/atl_ethdev.c b/drivers/net/atlantic/atl_ethdev.c > index fdc0a7f..79001da 100644 > --- a/drivers/net/atlantic/atl_ethdev.c > +++ b/drivers/net/atlantic/atl_ethdev.c > @@ -1394,7 +1394,7 @@ atl_dev_interrupt_action(struct rte_eth_dev *dev, > } > done: > atl_enable_intr(dev); > - rte_intr_enable(intr_handle); > + rte_intr_ack(intr_handle); > > return 0; > } > diff --git a/drivers/net/atlantic/meson.build b/drivers/net/atlantic/meson.build > index 60b8468..d5602dd 100644 > --- a/drivers/net/atlantic/meson.build > +++ b/drivers/net/atlantic/meson.build > @@ -11,3 +11,5 @@ sources = files( > 'hw_atl/hw_atl_utils.c', > 'rte_pmd_atlantic.c', > ) > + > +cflags += '-DALLOW_EXPERIMENTAL_API' You should use allow_experimental_apis = true (idem in the rest of the patch). > diff --git a/drivers/net/avp/Makefile b/drivers/net/avp/Makefile > index c9db667..a753765 100644 > --- a/drivers/net/avp/Makefile > +++ b/drivers/net/avp/Makefile > @@ -10,6 +10,7 @@ LIB = librte_pmd_avp.a > > CFLAGS += -O3 > CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) > +CFLAGS += -DALLOW_EXPERIMENTAL_API > LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring > LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs > LDLIBS += -lrte_bus_pci > diff --git a/drivers/net/avp/avp_ethdev.c b/drivers/net/avp/avp_ethdev.c > index 47b96ec..504435e 100644 > --- a/drivers/net/avp/avp_ethdev.c > +++ b/drivers/net/avp/avp_ethdev.c > @@ -713,7 +713,7 @@ avp_dev_interrupt_handler(void *data) > status); > > /* re-enable UIO interrupt handling */ > - ret = rte_intr_enable(&pci_dev->intr_handle); > + ret = rte_intr_ack(&pci_dev->intr_handle); > if (ret < 0) { > PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n", > ret); > diff --git a/drivers/net/avp/meson.build b/drivers/net/avp/meson.build > index a5f63cd..7fb9706 100644 > --- a/drivers/net/avp/meson.build > +++ b/drivers/net/avp/meson.build > @@ -7,3 +7,4 @@ if not is_linux > endif > sources = files('avp_ethdev.c') > install_headers('rte_avp_common.h', 'rte_avp_fifo.h') > +cflags += '-DALLOW_EXPERIMENTAL_API' > diff --git a/drivers/net/axgbe/Makefile b/drivers/net/axgbe/Makefile > index c2d4336..bcdcd54 100644 > --- a/drivers/net/axgbe/Makefile > +++ b/drivers/net/axgbe/Makefile > @@ -10,6 +10,7 @@ LIB = librte_pmd_axgbe.a > > CFLAGS += -O3 > CFLAGS += $(WERROR_FLAGS) > +CFLAGS += -DALLOW_EXPERIMENTAL_API > > EXPORT_MAP := rte_pmd_axgbe_version.map > > diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c > index cfb1720..56d8dac 100644 > --- a/drivers/net/axgbe/axgbe_ethdev.c > +++ b/drivers/net/axgbe/axgbe_ethdev.c > @@ -136,8 +136,8 @@ axgbe_dev_interrupt_handler(void *param) > DMA_CH_SR, dma_ch_isr); > } > } > - /* Enable interrupts since disabled after generation*/ > - rte_intr_enable(&pdata->pci_dev->intr_handle); > + /* Unmask interrupts since disabled after generation*/ missing space at the end of the comment. > + rte_intr_ack(&pdata->pci_dev->intr_handle); > } > > /* > diff --git a/drivers/net/axgbe/meson.build b/drivers/net/axgbe/meson.build > index 86873b7..226d11d 100644 > --- a/drivers/net/axgbe/meson.build > +++ b/drivers/net/axgbe/meson.build > @@ -14,6 +14,7 @@ sources = files('axgbe_ethdev.c', > 'axgbe_rxtx.c') > > cflags += '-Wno-cast-qual' > +cflags += '-DALLOW_EXPERIMENTAL_API' > > if arch_subdir == 'x86' > sources += files('axgbe_rxtx_vec_sse.c') > diff --git a/drivers/net/bnx2x/Makefile b/drivers/net/bnx2x/Makefile > index 55d1ad6..adead9d 100644 > --- a/drivers/net/bnx2x/Makefile > +++ b/drivers/net/bnx2x/Makefile > @@ -12,6 +12,7 @@ LIB = librte_pmd_bnx2x.a > CFLAGS += -O3 > CFLAGS += $(WERROR_FLAGS) > CFLAGS += -DZLIB_CONST > +CFLAGS += -DALLOW_EXPERIMENTAL_API > LDLIBS += -lz > LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring > LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs > diff --git a/drivers/net/bnx2x/bnx2x_ethdev.c b/drivers/net/bnx2x/bnx2x_ethdev.c > index 10b4fdb..191a3ef 100644 > --- a/drivers/net/bnx2x/bnx2x_ethdev.c > +++ b/drivers/net/bnx2x/bnx2x_ethdev.c > @@ -133,7 +133,7 @@ bnx2x_interrupt_handler(void *param) > PMD_DEBUG_PERIODIC_LOG(INFO, sc, "Interrupt handled"); > > bnx2x_interrupt_action(dev, 1); > - rte_intr_enable(&sc->pci_dev->intr_handle); > + rte_intr_ack(&sc->pci_dev->intr_handle); > } > > static void bnx2x_periodic_start(void *param) > diff --git a/drivers/net/bnx2x/meson.build b/drivers/net/bnx2x/meson.build > index 4892bb2..1bc84b7 100644 > --- a/drivers/net/bnx2x/meson.build > +++ b/drivers/net/bnx2x/meson.build > @@ -6,6 +6,7 @@ build = dep.found() > reason = 'missing dependency, "zlib"' > ext_deps += dep > cflags += '-DZLIB_CONST' > +cflags += '-DALLOW_EXPERIMENTAL_API' > sources = files('bnx2x.c', > 'bnx2x_ethdev.c', > 'bnx2x_rxtx.c', > diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c > index dc88661..0b7f501 100644 > --- a/drivers/net/e1000/em_ethdev.c > +++ b/drivers/net/e1000/em_ethdev.c > @@ -1001,7 +1001,7 @@ eth_em_rx_queue_intr_enable(struct rte_eth_dev *dev, __rte_unused uint16_t queue > struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; > > em_rxq_intr_enable(hw); > - rte_intr_enable(intr_handle); > + rte_intr_ack(intr_handle); > > return 0; > } > @@ -1568,7 +1568,7 @@ eth_em_interrupt_action(struct rte_eth_dev *dev, > return -1; > > intr->flags &= ~E1000_FLAG_NEED_LINK_UPDATE; > - rte_intr_enable(intr_handle); > + rte_intr_ack(intr_handle); > > /* set get_link_status to check register later */ > hw->mac.get_link_status = 1; > diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c > index 3ee28cf..793a31c 100644 > --- a/drivers/net/e1000/igb_ethdev.c > +++ b/drivers/net/e1000/igb_ethdev.c > @@ -2876,7 +2876,7 @@ eth_igb_interrupt_action(struct rte_eth_dev *dev, > } > > igb_intr_enable(dev); > - rte_intr_enable(intr_handle); > + rte_intr_ack(intr_handle); > > if (intr->flags & E1000_FLAG_NEED_LINK_UPDATE) { > intr->flags &= ~E1000_FLAG_NEED_LINK_UPDATE; > @@ -2987,7 +2987,7 @@ eth_igbvf_interrupt_action(struct rte_eth_dev *dev, struct rte_intr_handle *intr > } > > igbvf_intr_enable(dev); > - rte_intr_enable(intr_handle); > + rte_intr_ack(intr_handle); > > return 0; > } > @@ -5500,7 +5500,7 @@ eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) > E1000_WRITE_REG(hw, E1000_EIMS, regval | mask); > E1000_WRITE_FLUSH(hw); > > - rte_intr_enable(intr_handle); > + rte_intr_ack(intr_handle); > > return 0; > } > diff --git a/drivers/net/fm10k/Makefile b/drivers/net/fm10k/Makefile > index d657dff..55e9cd5 100644 > --- a/drivers/net/fm10k/Makefile > +++ b/drivers/net/fm10k/Makefile > @@ -10,6 +10,7 @@ LIB = librte_pmd_fm10k.a > > CFLAGS += -O3 > CFLAGS += $(WERROR_FLAGS) > +CFLAGS += -DALLOW_EXPERIMENTAL_API > > EXPORT_MAP := rte_pmd_fm10k_version.map > > diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c > index a1e3836..e8fa8a2 100644 > --- a/drivers/net/fm10k/fm10k_ethdev.c > +++ b/drivers/net/fm10k/fm10k_ethdev.c > @@ -2381,7 +2381,7 @@ fm10k_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) > else > FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(pdev, queue_id)), > FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); > - rte_intr_enable(&pdev->intr_handle); > + rte_intr_ack(&pdev->intr_handle); > return 0; > } > > @@ -2680,7 +2680,7 @@ fm10k_dev_interrupt_handler_pf(void *param) > FM10K_WRITE_REG(hw, FM10K_ITR(0), FM10K_ITR_AUTOMASK | > FM10K_ITR_MASK_CLEAR); > /* Re-enable interrupt from host side */ > - rte_intr_enable(dev->intr_handle); > + rte_intr_ack(dev->intr_handle); > } > > /** > @@ -2760,7 +2760,7 @@ fm10k_dev_interrupt_handler_vf(void *param) > FM10K_WRITE_REG(hw, FM10K_VFITR(0), FM10K_ITR_AUTOMASK | > FM10K_ITR_MASK_CLEAR); > /* Re-enable interrupt from host side */ > - rte_intr_enable(dev->intr_handle); > + rte_intr_ack(dev->intr_handle); > } > > /* Mailbox message handler in VF */ > diff --git a/drivers/net/fm10k/meson.build b/drivers/net/fm10k/meson.build > index 2772ea4..b7d34c7 100644 > --- a/drivers/net/fm10k/meson.build > +++ b/drivers/net/fm10k/meson.build > @@ -14,3 +14,4 @@ if arch_subdir == 'x86' > endif > > includes += include_directories('base') > +cflags += '-DALLOW_EXPERIMENTAL_API' > diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c > index 2b9fc45..5217b9c 100644 > --- a/drivers/net/i40e/i40e_ethdev.c > +++ b/drivers/net/i40e/i40e_ethdev.c > @@ -11646,7 +11646,7 @@ i40e_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) > I40E_PFINT_DYN_CTLN_ITR_INDX_MASK); > > I40E_WRITE_FLUSH(hw); > - rte_intr_enable(&pci_dev->intr_handle); > + rte_intr_ack(&pci_dev->intr_handle); > > return 0; > } > diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c > index 53dc05c..00d6ed5 100644 > --- a/drivers/net/iavf/iavf_ethdev.c > +++ b/drivers/net/iavf/iavf_ethdev.c > @@ -1098,7 +1098,7 @@ iavf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) > > IAVF_WRITE_FLUSH(hw); > > - rte_intr_enable(&pci_dev->intr_handle); > + rte_intr_ack(&pci_dev->intr_handle); > > return 0; > } > diff --git a/drivers/net/ice/Makefile b/drivers/net/ice/Makefile > index 32abeb6..ae53c26 100644 > --- a/drivers/net/ice/Makefile > +++ b/drivers/net/ice/Makefile > @@ -10,6 +10,7 @@ LIB = librte_pmd_ice.a > > CFLAGS += -O3 > CFLAGS += $(WERROR_FLAGS) > +CFLAGS += -DALLOW_EXPERIMENTAL_API > > LDLIBS += -lrte_eal -lrte_mbuf -lrte_ethdev -lrte_kvargs > LDLIBS += -lrte_bus_pci -lrte_mempool > diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c > index 9ce730c..77b5a71 100644 > --- a/drivers/net/ice/ice_ethdev.c > +++ b/drivers/net/ice/ice_ethdev.c > @@ -1118,7 +1118,7 @@ ice_interrupt_handler(void *param) > done: > /* Enable interrupt */ > ice_pf_enable_irq0(hw); > - rte_intr_enable(dev->intr_handle); > + rte_intr_ack(dev->intr_handle); > } > > /* Initialize SW parameters of PF */ > @@ -3002,7 +3002,7 @@ static int ice_rx_queue_intr_enable(struct rte_eth_dev *dev, > val &= ~GLINT_DYN_CTL_WB_ON_ITR_M; > > ICE_WRITE_REG(hw, GLINT_DYN_CTL(msix_intr), val); > - rte_intr_enable(&pci_dev->intr_handle); > + rte_intr_ack(&pci_dev->intr_handle); > > return 0; > } > diff --git a/drivers/net/ice/meson.build b/drivers/net/ice/meson.build > index 7f16647..70f349e 100644 > --- a/drivers/net/ice/meson.build > +++ b/drivers/net/ice/meson.build > @@ -13,6 +13,7 @@ sources = files( > > deps += ['hash'] > includes += include_directories('base') > +cflags += '-DALLOW_EXPERIMENTAL_API' > > if arch_subdir == 'x86' > sources += files('ice_rxtx_vec_sse.c') > diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c > index 22c5b2c..f5920f5 100644 > --- a/drivers/net/ixgbe/ixgbe_ethdev.c > +++ b/drivers/net/ixgbe/ixgbe_ethdev.c > @@ -4502,7 +4502,7 @@ ixgbe_dev_interrupt_delayed_handler(void *param) > > PMD_DRV_LOG(DEBUG, "enable intr in delayed handler S[%08x]", eicr); > ixgbe_enable_intr(dev); > - rte_intr_enable(intr_handle); > + rte_intr_ack(intr_handle); > } > > /** > @@ -5763,7 +5763,7 @@ ixgbevf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) > RTE_SET_USED(queue_id); > IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, intr->mask); > > - rte_intr_enable(intr_handle); > + rte_intr_ack(intr_handle); > > return 0; > } > @@ -5812,7 +5812,7 @@ ixgbe_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) > mask &= (1 << (queue_id - 32)); > IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); > } > - rte_intr_enable(intr_handle); > + rte_intr_ack(intr_handle); > > return 0; > } > diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c > index 1a7aa17..91a226c 100644 > --- a/drivers/net/nfp/nfp_net.c > +++ b/drivers/net/nfp/nfp_net.c > @@ -1412,7 +1412,7 @@ nfp_net_irq_unmask(struct rte_eth_dev *dev) > if (hw->ctrl & NFP_NET_CFG_CTRL_MSIXAUTO) { > /* If MSI-X auto-masking is used, clear the entry */ > rte_wmb(); > - rte_intr_enable(&pci_dev->intr_handle); > + rte_intr_ack(&pci_dev->intr_handle); > } else { > /* Make sure all updates are written before un-masking */ > rte_wmb(); > diff --git a/drivers/net/qede/Makefile b/drivers/net/qede/Makefile > index 2ecbd8d..a11d594 100644 > --- a/drivers/net/qede/Makefile > +++ b/drivers/net/qede/Makefile > @@ -12,6 +12,7 @@ LIB = librte_pmd_qede.a > > CFLAGS += -O3 > CFLAGS += $(WERROR_FLAGS) > +CFLAGS += -DALLOW_EXPERIMENTAL_API > LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring > LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs > LDLIBS += -lrte_bus_pci > diff --git a/drivers/net/qede/meson.build b/drivers/net/qede/meson.build > index 12388a6..c8f9c6d 100644 > --- a/drivers/net/qede/meson.build > +++ b/drivers/net/qede/meson.build > @@ -10,3 +10,5 @@ sources = files( > 'qede_main.c', > 'qede_rxtx.c', > ) > + > +cflags += '-DALLOW_EXPERIMENTAL_API' > diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c > index 82363e6..9ac9da3 100644 > --- a/drivers/net/qede/qede_ethdev.c > +++ b/drivers/net/qede/qede_ethdev.c > @@ -248,8 +248,8 @@ qede_interrupt_handler_intx(void *param) > if (status & 0x1) { > qede_interrupt_action(ECORE_LEADING_HWFN(edev)); > > - if (rte_intr_enable(eth_dev->intr_handle)) > - DP_ERR(edev, "rte_intr_enable failed\n"); > + if (rte_intr_ack(eth_dev->intr_handle)) > + DP_ERR(edev, "rte_intr_ack failed\n"); > } > } > > @@ -261,8 +261,8 @@ qede_interrupt_handler(void *param) > struct ecore_dev *edev = &qdev->edev; > > qede_interrupt_action(ECORE_LEADING_HWFN(edev)); > - if (rte_intr_enable(eth_dev->intr_handle)) > - DP_ERR(edev, "rte_intr_enable failed\n"); > + if (rte_intr_ack(eth_dev->intr_handle)) > + DP_ERR(edev, "rte_intr_ack failed\n"); > } > > static void > diff --git a/drivers/net/sfc/sfc_intr.c b/drivers/net/sfc/sfc_intr.c > index 1f4969b..76cb630 100644 > --- a/drivers/net/sfc/sfc_intr.c > +++ b/drivers/net/sfc/sfc_intr.c > @@ -79,7 +79,7 @@ sfc_intr_line_handler(void *cb_arg) > if (qmask & (1 << sa->mgmt_evq_index)) > sfc_intr_handle_mgmt_evq(sa); > > - if (rte_intr_enable(&pci_dev->intr_handle) != 0) > + if (rte_intr_ack(&pci_dev->intr_handle) != 0) > sfc_err(sa, "cannot reenable interrupts"); > > sfc_log_init(sa, "done"); > @@ -123,7 +123,7 @@ sfc_intr_message_handler(void *cb_arg) > > sfc_intr_handle_mgmt_evq(sa); > > - if (rte_intr_enable(&pci_dev->intr_handle) != 0) > + if (rte_intr_ack(&pci_dev->intr_handle) != 0) > sfc_err(sa, "cannot reenable interrupts"); > > sfc_log_init(sa, "done"); > diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c > index 04aecb7..62c8274 100644 > --- a/drivers/net/virtio/virtio_ethdev.c > +++ b/drivers/net/virtio/virtio_ethdev.c > @@ -1265,6 +1265,20 @@ virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) > } > > static int > +virtio_intr_unmask(struct rte_eth_dev *dev) > +{ > + struct virtio_hw *hw = dev->data->dev_private; > + > + if (rte_intr_ack(dev->intr_handle) < 0) > + return -1; > + > + if (!hw->virtio_user_dev) > + hw->use_msix = vtpci_msix_detect(RTE_ETH_DEV_TO_PCI(dev)); This is equivalent to what we had before in virtio_intr_enable, but I suspect the part on detecting msix again is unneeded. Given the timing, let's keep it safe and leave as is. -- David Marchand
On Thu, Jul 18, 2019 at 4:36 PM Nithin Dabilpuram <ndabilpuram@marvell.com> wrote: > > A final patch for below mentioned RFC patch discussions. > > [RFC PATCH v3 1/3] vfio: revert change that does intr eventfd setup at probe > http://mails.dpdk.org/archives/dev/2019-July/138358.html > [RFC PATCH v3 2/3] eal: add mask and unmask interrupt APIs > http://mails.dpdk.org/archives/dev/2019-July/138357.html > [RFC PATCH v3 3/3] drivers/net: use unmask API in interrupt handlers > http://mails.dpdk.org/archives/dev/2019-July/138359.html > > Please also refer to below discussions > http://mails.dpdk.org/archives/dev/2019-July/138184.html > http://mails.dpdk.org/archives/dev/2019-July/138113.html > > This patch set basically reverts vfio patch that moves eventfd setup > to probe time (1/3) because msix initialization is broken. > It adds new ack interrupt api for purposes of simple and light > weight ack method and also to avoid race conditions > that was the root cause of this discussion. Thanks a lot for working on this. I only have small comments (sent in separate mails), you already did all the work. I prepared incremental patches if you agree with those comments: - patch 2 https://github.com/david-marchand/dpdk/commit/374acb3409c206ffcb1bf6b5a7e8bb0bfca4e2b9 - patch 3 https://github.com/david-marchand/dpdk/commit/71102b9301b2a55a057a605f8e48e4a24f9d74a3 And a squashed branch: https://github.com/david-marchand/dpdk/commits/interrupt_fixes -- David Marchand
On 7/23/2019 1:08 AM, David Marchand wrote: > External Email > > ---------------------------------------------------------------------- > On Thu, Jul 18, 2019 at 4:36 PM Nithin Dabilpuram > <ndabilpuram@marvell.com> wrote: >> A final patch for below mentioned RFC patch discussions. >> >> [RFC PATCH v3 1/3] vfio: revert change that does intr eventfd setup at probe >> http://mails.dpdk.org/archives/dev/2019-July/138358.html >> [RFC PATCH v3 2/3] eal: add mask and unmask interrupt APIs >> http://mails.dpdk.org/archives/dev/2019-July/138357.html >> [RFC PATCH v3 3/3] drivers/net: use unmask API in interrupt handlers >> http://mails.dpdk.org/archives/dev/2019-July/138359.html >> >> Please also refer to below discussions >> http://mails.dpdk.org/archives/dev/2019-July/138184.html >> http://mails.dpdk.org/archives/dev/2019-July/138113.html >> >> This patch set basically reverts vfio patch that moves eventfd setup >> to probe time (1/3) because msix initialization is broken. >> It adds new ack interrupt api for purposes of simple and light >> weight ack method and also to avoid race conditions >> that was the root cause of this discussion. > Thanks a lot for working on this. > > I only have small comments (sent in separate mails), you already did > all the work. > I prepared incremental patches if you agree with those comments: > - patch 2 https://github.com/david-marchand/dpdk/commit/374acb3409c206ffcb1bf6b5a7e8bb0bfca4e2b9 > - patch 3 https://github.com/david-marchand/dpdk/commit/71102b9301b2a55a057a605f8e48e4a24f9d74a3 > > And a squashed branch: > https://github.com/david-marchand/dpdk/commits/interrupt_fixes Thanks David. I'm fine with the comments and your incremental change. You can send a v3 or Thomas can pick from your repo, anything should be fine. > >
From Nithin original cover letter: A final patch for below mentioned RFC patch discussions. [RFC PATCH v3 1/3] vfio: revert change that does intr eventfd setup at probe http://mails.dpdk.org/archives/dev/2019-July/138358.html [RFC PATCH v3 2/3] eal: add mask and unmask interrupt APIs http://mails.dpdk.org/archives/dev/2019-July/138357.html [RFC PATCH v3 3/3] drivers/net: use unmask API in interrupt handlers http://mails.dpdk.org/archives/dev/2019-July/138359.html Please also refer to below discussions http://mails.dpdk.org/archives/dev/2019-July/138184.html http://mails.dpdk.org/archives/dev/2019-July/138113.html This patch set basically reverts vfio patch that moves eventfd setup to probe time (1/3) because msix initialization is broken. It adds new ack interrupt api for purposes of simple and light weight ack method and also to avoid race conditions that was the root cause of this discussion. Changelog since v4: - -- David Marchand Nithin Dabilpuram (3): vfio: revert change that does intr eventfd setup at probe eal: add ack interrupt API drivers/net: use ack API in interrupt handlers drivers/bus/pci/linux/pci_vfio.c | 78 ++++--- drivers/net/atlantic/Makefile | 1 + drivers/net/atlantic/atl_ethdev.c | 2 +- drivers/net/atlantic/meson.build | 2 + drivers/net/avp/Makefile | 1 + drivers/net/avp/avp_ethdev.c | 2 +- drivers/net/avp/meson.build | 1 + drivers/net/axgbe/Makefile | 1 + drivers/net/axgbe/axgbe_ethdev.c | 4 +- drivers/net/axgbe/meson.build | 2 + drivers/net/bnx2x/Makefile | 1 + drivers/net/bnx2x/bnx2x_ethdev.c | 2 +- drivers/net/bnx2x/meson.build | 1 + drivers/net/e1000/em_ethdev.c | 4 +- drivers/net/e1000/igb_ethdev.c | 6 +- drivers/net/fm10k/Makefile | 1 + drivers/net/fm10k/fm10k_ethdev.c | 6 +- drivers/net/fm10k/meson.build | 2 + drivers/net/i40e/i40e_ethdev.c | 2 +- drivers/net/iavf/iavf_ethdev.c | 2 +- drivers/net/ice/Makefile | 1 + drivers/net/ice/ice_ethdev.c | 4 +- drivers/net/ice/meson.build | 2 + drivers/net/ixgbe/ixgbe_ethdev.c | 6 +- drivers/net/nfp/nfp_net.c | 2 +- drivers/net/qede/Makefile | 1 + drivers/net/qede/meson.build | 2 + drivers/net/qede/qede_ethdev.c | 8 +- drivers/net/sfc/sfc_intr.c | 4 +- drivers/net/virtio/virtio_ethdev.c | 16 +- drivers/net/vmxnet3/vmxnet3_ethdev.c | 2 +- lib/librte_eal/common/include/rte_interrupts.h | 20 ++ lib/librte_eal/freebsd/eal/eal_interrupts.c | 9 + lib/librte_eal/linux/eal/eal_interrupts.c | 271 +++++++++++++++++++++---- lib/librte_eal/rte_eal_version.map | 1 + 35 files changed, 359 insertions(+), 111 deletions(-) -- 1.8.3.1
From: Nithin Dabilpuram <ndabilpuram@marvell.com> This reverts commit 89aac60e0be9ed95a87b16e3595f102f9faaffb4. "vfio: fix interrupts race condition" The above mentioned commit moves the interrupt's eventfd setup to probe time but only enables one interrupt for all types of interrupt handles i.e VFIO_MSI, VFIO_LEGACY, VFIO_MSIX, UIO. It works fine with default case but breaks below cases specifically for MSIX based interrupt handles. * Applications like l3fwd-power that request rxq interrupts while ethdev setup. * Drivers that need > 1 MSIx interrupts to be configured for functionality to work. VFIO PCI for MSIx expects all the possible vectors to be setup up when using VFIO_IRQ_SET_ACTION_TRIGGER so that they can be allocated from kernel pci subsystem. Only way to increase the number of vectors later is first free all by using VFIO_IRQ_SET_DATA_NONE with action trigger and then enable new vector count. Above commit changes the behavior of rte_intr_[enable|disable] to only mask and unmask unlike earlier behavior and thereby breaking above two scenarios. Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> Signed-off-by: Jerin Jacob <jerinj@marvell.com> Tested-by: Stephen Hemminger <stephen@networkplumber.org> Tested-by: Shahed Shaikh <shshaikh@marvell.com> Tested-by: Lei Yao <lei.a.yao@Intel.com> Acked-by: David Marchand <david.marchand@redhat.com> --- Changelog since v2: * Update Shahed Shaikh's tested-by Changelog since v1: * Include tested by sign from Stephen --- drivers/bus/pci/linux/pci_vfio.c | 78 ++++++------ lib/librte_eal/linux/eal/eal_interrupts.c | 197 +++++++++++++++++++++++------- 2 files changed, 189 insertions(+), 86 deletions(-) diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c index ee31239..1ceb1c0 100644 --- a/drivers/bus/pci/linux/pci_vfio.c +++ b/drivers/bus/pci/linux/pci_vfio.c @@ -187,11 +187,8 @@ static int pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) { - char irq_set_buf[sizeof(struct vfio_irq_set) + sizeof(int)]; - struct vfio_irq_set *irq_set; - enum rte_intr_mode intr_mode; int i, ret, intr_idx; - int fd; + enum rte_intr_mode intr_mode; /* default to invalid index */ intr_idx = VFIO_PCI_NUM_IRQS; @@ -223,6 +220,7 @@ /* start from MSI-X interrupt type */ for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) { struct vfio_irq_info irq = { .argsz = sizeof(irq) }; + int fd = -1; /* skip interrupt modes we don't want */ if (intr_mode != RTE_INTR_MODE_NONE && @@ -238,51 +236,51 @@ return -1; } - /* found a usable interrupt mode */ - if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) != 0) - break; - /* if this vector cannot be used with eventfd, fail if we explicitly * specified interrupt type, otherwise continue */ - if (intr_mode != RTE_INTR_MODE_NONE) { - RTE_LOG(ERR, EAL, " interrupt vector does not support eventfd!\n"); + if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) { + if (intr_mode != RTE_INTR_MODE_NONE) { + RTE_LOG(ERR, EAL, + " interrupt vector does not support eventfd!\n"); + return -1; + } else + continue; + } + + /* set up an eventfd for interrupts */ + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + RTE_LOG(ERR, EAL, " cannot set up eventfd, " + "error %i (%s)\n", errno, strerror(errno)); return -1; } - } - if (i < 0) - return -1; + dev->intr_handle.fd = fd; + dev->intr_handle.vfio_dev_fd = vfio_dev_fd; - fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); - if (fd < 0) { - RTE_LOG(ERR, EAL, " cannot set up eventfd, error %i (%s)\n", - errno, strerror(errno)); - return -1; - } + switch (i) { + case VFIO_PCI_MSIX_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_MSIX; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; + break; + case VFIO_PCI_MSI_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_MSI; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; + break; + case VFIO_PCI_INTX_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_LEGACY; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; + break; + default: + RTE_LOG(ERR, EAL, " unknown interrupt type!\n"); + return -1; + } - irq_set = (struct vfio_irq_set *)irq_set_buf; - irq_set->argsz = sizeof(irq_set_buf); - irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD|VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set->index = i; - irq_set->start = 0; - irq_set->count = 1; - memcpy(&irq_set->data, &fd, sizeof(int)); - if (ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set) < 0) { - RTE_LOG(ERR, EAL, " error configuring interrupt\n"); - close(fd); - return -1; + return 0; } - dev->intr_handle.fd = fd; - dev->intr_handle.vfio_dev_fd = vfio_dev_fd; - if (i == VFIO_PCI_MSIX_IRQ_INDEX) - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; - else if (i == VFIO_PCI_MSI_IRQ_INDEX) - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; - else if (i == VFIO_PCI_INTX_IRQ_INDEX) - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; - - return 0; + /* if we're here, we haven't found a suitable interrupt vector */ + return -1; } #ifdef HAVE_VFIO_DEV_REQ_INTERFACE diff --git a/lib/librte_eal/linux/eal/eal_interrupts.c b/lib/librte_eal/linux/eal/eal_interrupts.c index 27976b3..79ad5e8 100644 --- a/lib/librte_eal/linux/eal/eal_interrupts.c +++ b/lib/librte_eal/linux/eal/eal_interrupts.c @@ -109,19 +109,42 @@ struct rte_intr_source { /* enable legacy (INTx) interrupts */ static int -vfio_enable_intx(const struct rte_intr_handle *intr_handle) -{ - struct vfio_irq_set irq_set; - int ret; +vfio_enable_intx(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + /* enable INTx */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + *fd_ptr = intr_handle->fd; - /* unmask INTx */ - irq_set.argsz = sizeof(irq_set); - irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; - irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; - irq_set.start = 0; - irq_set.count = 1; + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling INTx interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + + /* unmask INTx after enabling */ + memset(irq_set, 0, len); + len = sizeof(struct vfio_irq_set); + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); if (ret) { RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", @@ -133,51 +156,128 @@ struct rte_intr_source { /* disable legacy (INTx) interrupts */ static int -vfio_disable_intx(const struct rte_intr_handle *intr_handle) -{ - struct vfio_irq_set irq_set; - int ret; +vfio_disable_intx(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + + len = sizeof(struct vfio_irq_set); - /* mask interrupts */ - irq_set.argsz = sizeof(irq_set); - irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK; - irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; - irq_set.start = 0; - irq_set.count = 1; + /* mask interrupts before disabling */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); if (ret) { RTE_LOG(ERR, EAL, "Error masking INTx interrupts for fd %d\n", intr_handle->fd); return -1; } + + /* disable INTx*/ + memset(irq_set, 0, len); + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, + "Error disabling INTx interrupts for fd %d\n", intr_handle->fd); + return -1; + } return 0; } -/* enable MSI-X interrupts */ +/* enable MSI interrupts */ static int -vfio_enable_msix(const struct rte_intr_handle *intr_handle) -{ - char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; +vfio_enable_msi(const struct rte_intr_handle *intr_handle) { + int len, ret; + char irq_set_buf[IRQ_SET_BUF_LEN]; + struct vfio_irq_set *irq_set; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + *fd_ptr = intr_handle->fd; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling MSI interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + return 0; +} + +/* disable MSI interrupts */ +static int +vfio_disable_msi(const struct rte_intr_handle *intr_handle) { struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; int len, ret; - if (intr_handle->nb_efd == 0) - return 0; + len = sizeof(struct vfio_irq_set); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) + RTE_LOG(ERR, EAL, + "Error disabling MSI interrupts for fd %d\n", intr_handle->fd); + + return ret; +} + +/* enable MSI-X interrupts */ +static int +vfio_enable_msix(const struct rte_intr_handle *intr_handle) { + int len, ret; + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; + struct vfio_irq_set *irq_set; + int *fd_ptr; len = sizeof(irq_set_buf); irq_set = (struct vfio_irq_set *) irq_set_buf; irq_set->argsz = len; + /* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */ + irq_set->count = intr_handle->max_intr ? + (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID + 1 ? + RTE_MAX_RXTX_INTR_VEC_ID + 1 : intr_handle->max_intr) : 1; irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; - irq_set->start = RTE_INTR_VEC_RXTX_OFFSET; - irq_set->count = intr_handle->nb_efd; - memcpy(&irq_set->data, intr_handle->efds, - sizeof(*intr_handle->efds) * intr_handle->nb_efd); + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + /* INTR vector offset 0 reserve for non-efds mapping */ + fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = intr_handle->fd; + memcpy(&fd_ptr[RTE_INTR_VEC_RXTX_OFFSET], intr_handle->efds, + sizeof(*intr_handle->efds) * intr_handle->nb_efd); ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + if (ret) { RTE_LOG(ERR, EAL, "Error enabling MSI-X interrupts for fd %d\n", intr_handle->fd); @@ -189,21 +289,22 @@ struct rte_intr_source { /* disable MSI-X interrupts */ static int -vfio_disable_msix(const struct rte_intr_handle *intr_handle) -{ - struct vfio_irq_set irq_set; - int ret; +vfio_disable_msix(const struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; + int len, ret; - if (intr_handle->nb_efd == 0) - return 0; + len = sizeof(struct vfio_irq_set); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; + irq_set->start = 0; - irq_set.argsz = sizeof(irq_set); - irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set.index = VFIO_PCI_MSIX_IRQ_INDEX; - irq_set.start = RTE_INTR_VEC_RXTX_OFFSET; - irq_set.count = intr_handle->nb_efd; + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); if (ret) RTE_LOG(ERR, EAL, "Error disabling MSI-X interrupts for fd %d\n", intr_handle->fd); @@ -564,7 +665,9 @@ struct rte_intr_source { return -1; break; case RTE_INTR_HANDLE_VFIO_MSI: - return 0; + if (vfio_enable_msi(intr_handle)) + return -1; + break; case RTE_INTR_HANDLE_VFIO_LEGACY: if (vfio_enable_intx(intr_handle)) return -1; @@ -618,7 +721,9 @@ struct rte_intr_source { return -1; break; case RTE_INTR_HANDLE_VFIO_MSI: - return 0; + if (vfio_disable_msi(intr_handle)) + return -1; + break; case RTE_INTR_HANDLE_VFIO_LEGACY: if (vfio_disable_intx(intr_handle)) return -1; -- 1.8.3.1
From: Nithin Dabilpuram <ndabilpuram@marvell.com> Add new ack interrupt API to avoid using VFIO_IRQ_SET_ACTION_TRIGGER(rte_intr_enable()) for acking interrupt purpose for VFIO based interrupt handlers. This implementation is specific to Linux. Using rte_intr_enable() for acking interrupt has below issues * Time consuming to do for every interrupt received as it will free_irq() followed by request_irq() and all other initializations * A race condition because of a window between free_irq() and request_irq() with packet reception still on and device still enabled and would throw warning messages like below. [158764.159833] do_IRQ: 9.34 No irq handler for vector In this patch, rte_intr_ack() is a no-op for VFIO_MSIX/VFIO_MSI interrupts as they are edge triggered and kernel would not mask the interrupt before delivering the event to userspace and we don't need to ack. Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> Signed-off-by: Jerin Jacob <jerinj@marvell.com> Tested-by: Shahed Shaikh <shshaikh@marvell.com> Signed-off-by: David Marchand <david.marchand@redhat.com> --- Changelog since v4: - added EXPERIMENTAL banner + little rewording in header - simplified vfio ioctl code by shrinking the passed buffer - fixed comments - sorted new symbol in map file Changelog since v3: - Move note to implementation and change the expectation to must call for new api. Changelog since v2: - Update note on new api --- lib/librte_eal/common/include/rte_interrupts.h | 20 +++++++ lib/librte_eal/freebsd/eal/eal_interrupts.c | 9 +++ lib/librte_eal/linux/eal/eal_interrupts.c | 82 ++++++++++++++++++++++++++ lib/librte_eal/rte_eal_version.map | 1 + 4 files changed, 112 insertions(+) diff --git a/lib/librte_eal/common/include/rte_interrupts.h b/lib/librte_eal/common/include/rte_interrupts.h index c1e912c..e3b406a 100644 --- a/lib/librte_eal/common/include/rte_interrupts.h +++ b/lib/librte_eal/common/include/rte_interrupts.h @@ -118,6 +118,26 @@ int rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, */ int rte_intr_disable(const struct rte_intr_handle *intr_handle); +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * It acknowledges an interrupt raised for the specified handle. + * + * This function should be called at the end of each interrupt handler either + * from application or driver, so that currently raised interrupt is acked and + * further new interrupts are raised. + * + * @param intr_handle + * pointer to the interrupt handle. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +__rte_experimental +int rte_intr_ack(const struct rte_intr_handle *intr_handle); + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/freebsd/eal/eal_interrupts.c b/lib/librte_eal/freebsd/eal/eal_interrupts.c index 10375bd..f6831b7 100644 --- a/lib/librte_eal/freebsd/eal/eal_interrupts.c +++ b/lib/librte_eal/freebsd/eal/eal_interrupts.c @@ -387,6 +387,15 @@ struct rte_intr_source { return 0; } +int +rte_intr_ack(const struct rte_intr_handle *intr_handle) +{ + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 0; + + return -1; +} + static void eal_intr_process_interrupts(struct kevent *events, int nfds) { diff --git a/lib/librte_eal/linux/eal/eal_interrupts.c b/lib/librte_eal/linux/eal/eal_interrupts.c index 79ad5e8..1955324 100644 --- a/lib/librte_eal/linux/eal/eal_interrupts.c +++ b/lib/librte_eal/linux/eal/eal_interrupts.c @@ -197,6 +197,28 @@ struct rte_intr_source { return 0; } +/* unmask/ack legacy (INTx) interrupts */ +static int +vfio_ack_intx(const struct rte_intr_handle *intr_handle) +{ + struct vfio_irq_set irq_set; + + /* unmask INTx */ + memset(&irq_set, 0, sizeof(irq_set)); + irq_set.argsz = sizeof(irq_set); + irq_set.count = 1; + irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set.start = 0; + + if (ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set)) { + RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + return 0; +} + /* enable MSI interrupts */ static int vfio_enable_msi(const struct rte_intr_handle *intr_handle) { @@ -693,6 +715,66 @@ struct rte_intr_source { return 0; } +/** + * PMD generally calls this function at the end of its IRQ callback. + * Internally, it unmasks the interrupt if possible. + * + * For INTx, unmasking is required as the interrupt is auto-masked prior to + * invoking callback. + * + * For MSI/MSI-X, unmasking is typically not needed as the interrupt is not + * auto-masked. In fact, for interrupt handle types VFIO_MSIX and VFIO_MSI, + * this function is no-op. + */ +int +rte_intr_ack(const struct rte_intr_handle *intr_handle) +{ + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 0; + + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) + return -1; + + switch (intr_handle->type) { + /* Both acking and enabling are same for UIO */ + case RTE_INTR_HANDLE_UIO: + if (uio_intr_enable(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_UIO_INTX: + if (uio_intx_intr_enable(intr_handle)) + return -1; + break; + /* not used at this moment */ + case RTE_INTR_HANDLE_ALARM: + return -1; +#ifdef VFIO_PRESENT + /* VFIO MSI* is implicitly acked unlike INTx, nothing to do */ + case RTE_INTR_HANDLE_VFIO_MSIX: + case RTE_INTR_HANDLE_VFIO_MSI: + return 0; + case RTE_INTR_HANDLE_VFIO_LEGACY: + if (vfio_ack_intx(intr_handle)) + return -1; + break; +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + case RTE_INTR_HANDLE_VFIO_REQ: + return -1; +#endif +#endif + /* not used at this moment */ + case RTE_INTR_HANDLE_DEV_EVENT: + return -1; + /* unknown handle type */ + default: + RTE_LOG(ERR, EAL, "Unknown handle type of fd %d\n", + intr_handle->fd); + return -1; + } + + return 0; +} + int rte_intr_disable(const struct rte_intr_handle *intr_handle) { diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map index 1892d9e..2344877 100644 --- a/lib/librte_eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -403,6 +403,7 @@ EXPERIMENTAL { rte_realloc_socket; # added in 19.08 + rte_intr_ack; rte_lcore_cpuset; rte_lcore_to_cpu_id; rte_mcfg_timer_lock; -- 1.8.3.1
From: Nithin Dabilpuram <ndabilpuram@marvell.com> Replace rte_intr_enable() with rte_intr_ack() API for acking an interrupt in interrupt handlers and rx_queue_intr_enable() callbacks of PMD's. This is inline with original intent of this change in PMDs to ack interrupts after handling is completed if device is backed by UIO, IGB_UIO or VFIO(with INTx). Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> Signed-off-by: Jerin Jacob <jerinj@marvell.com> Acked-by: Shahed Shaikh <shshaikh@marvell.com> Tested-by: Shahed Shaikh <shshaikh@marvell.com> Signed-off-by: David Marchand <david.marchand@redhat.com> --- Changelog since v4: - s/-DALLOW_EXPERIMENTAL/allow_experimental_apis/ in all meson updates Changelog since v3: - Undo qede PMD change that was done in v3 to adhere to updated api doc. - Leaving qede PMD maintainer ack as v2 was tested and acked as well. - Updated avp, bnx2x, fm10k Makefiles for allow experimental which was mistakenly missed earlier though meson.build was updated. Changelog since v2: - Removed ack call from qede PMD msix handler - Ack from Shahed --- drivers/net/atlantic/Makefile | 1 + drivers/net/atlantic/atl_ethdev.c | 2 +- drivers/net/atlantic/meson.build | 2 ++ drivers/net/avp/Makefile | 1 + drivers/net/avp/avp_ethdev.c | 2 +- drivers/net/avp/meson.build | 1 + drivers/net/axgbe/Makefile | 1 + drivers/net/axgbe/axgbe_ethdev.c | 4 ++-- drivers/net/axgbe/meson.build | 2 ++ drivers/net/bnx2x/Makefile | 1 + drivers/net/bnx2x/bnx2x_ethdev.c | 2 +- drivers/net/bnx2x/meson.build | 1 + drivers/net/e1000/em_ethdev.c | 4 ++-- drivers/net/e1000/igb_ethdev.c | 6 +++--- drivers/net/fm10k/Makefile | 1 + drivers/net/fm10k/fm10k_ethdev.c | 6 +++--- drivers/net/fm10k/meson.build | 2 ++ drivers/net/i40e/i40e_ethdev.c | 2 +- drivers/net/iavf/iavf_ethdev.c | 2 +- drivers/net/ice/Makefile | 1 + drivers/net/ice/ice_ethdev.c | 4 ++-- drivers/net/ice/meson.build | 2 ++ drivers/net/ixgbe/ixgbe_ethdev.c | 6 +++--- drivers/net/nfp/nfp_net.c | 2 +- drivers/net/qede/Makefile | 1 + drivers/net/qede/meson.build | 2 ++ drivers/net/qede/qede_ethdev.c | 8 ++++---- drivers/net/sfc/sfc_intr.c | 4 ++-- drivers/net/virtio/virtio_ethdev.c | 16 +++++++++++++++- drivers/net/vmxnet3/vmxnet3_ethdev.c | 2 +- 30 files changed, 62 insertions(+), 29 deletions(-) diff --git a/drivers/net/atlantic/Makefile b/drivers/net/atlantic/Makefile index 263f12b..fc12e6a 100644 --- a/drivers/net/atlantic/Makefile +++ b/drivers/net/atlantic/Makefile @@ -10,6 +10,7 @@ LIB = librte_pmd_atlantic.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API EXPORT_MAP := rte_pmd_atlantic_version.map diff --git a/drivers/net/atlantic/atl_ethdev.c b/drivers/net/atlantic/atl_ethdev.c index fa89ae7..3c1b349 100644 --- a/drivers/net/atlantic/atl_ethdev.c +++ b/drivers/net/atlantic/atl_ethdev.c @@ -1393,7 +1393,7 @@ int atl_macsec_select_rxsa(struct rte_eth_dev *dev, } done: atl_enable_intr(dev); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } diff --git a/drivers/net/atlantic/meson.build b/drivers/net/atlantic/meson.build index 60b8468..919d741 100644 --- a/drivers/net/atlantic/meson.build +++ b/drivers/net/atlantic/meson.build @@ -1,6 +1,8 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2018 Aquantia Corporation +allow_experimental_apis = true + sources = files( 'atl_rxtx.c', 'atl_ethdev.c', diff --git a/drivers/net/avp/Makefile b/drivers/net/avp/Makefile index c9db667..a753765 100644 --- a/drivers/net/avp/Makefile +++ b/drivers/net/avp/Makefile @@ -10,6 +10,7 @@ LIB = librte_pmd_avp.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) +CFLAGS += -DALLOW_EXPERIMENTAL_API LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs LDLIBS += -lrte_bus_pci diff --git a/drivers/net/avp/avp_ethdev.c b/drivers/net/avp/avp_ethdev.c index 47b96ec..504435e 100644 --- a/drivers/net/avp/avp_ethdev.c +++ b/drivers/net/avp/avp_ethdev.c @@ -713,7 +713,7 @@ struct avp_queue { status); /* re-enable UIO interrupt handling */ - ret = rte_intr_enable(&pci_dev->intr_handle); + ret = rte_intr_ack(&pci_dev->intr_handle); if (ret < 0) { PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n", ret); diff --git a/drivers/net/avp/meson.build b/drivers/net/avp/meson.build index a5f63cd..793bb18 100644 --- a/drivers/net/avp/meson.build +++ b/drivers/net/avp/meson.build @@ -5,5 +5,6 @@ if not is_linux build = false reason = 'only supported on linux' endif +allow_experimental_apis = true sources = files('avp_ethdev.c') install_headers('rte_avp_common.h', 'rte_avp_fifo.h') diff --git a/drivers/net/axgbe/Makefile b/drivers/net/axgbe/Makefile index c2d4336..bcdcd54 100644 --- a/drivers/net/axgbe/Makefile +++ b/drivers/net/axgbe/Makefile @@ -10,6 +10,7 @@ LIB = librte_pmd_axgbe.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API EXPORT_MAP := rte_pmd_axgbe_version.map diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c index cfb1720..4fcede8 100644 --- a/drivers/net/axgbe/axgbe_ethdev.c +++ b/drivers/net/axgbe/axgbe_ethdev.c @@ -136,8 +136,8 @@ static int axgbe_phy_reset(struct axgbe_port *pdata) DMA_CH_SR, dma_ch_isr); } } - /* Enable interrupts since disabled after generation*/ - rte_intr_enable(&pdata->pci_dev->intr_handle); + /* Unmask interrupts since disabled after generation */ + rte_intr_ack(&pdata->pci_dev->intr_handle); } /* diff --git a/drivers/net/axgbe/meson.build b/drivers/net/axgbe/meson.build index 86873b7..366b289 100644 --- a/drivers/net/axgbe/meson.build +++ b/drivers/net/axgbe/meson.build @@ -6,6 +6,8 @@ if not is_linux reason = 'only supported on linux' endif +allow_experimental_apis = true + sources = files('axgbe_ethdev.c', 'axgbe_dev.c', 'axgbe_mdio.c', diff --git a/drivers/net/bnx2x/Makefile b/drivers/net/bnx2x/Makefile index 55d1ad6..adead9d 100644 --- a/drivers/net/bnx2x/Makefile +++ b/drivers/net/bnx2x/Makefile @@ -12,6 +12,7 @@ LIB = librte_pmd_bnx2x.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) CFLAGS += -DZLIB_CONST +CFLAGS += -DALLOW_EXPERIMENTAL_API LDLIBS += -lz LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs diff --git a/drivers/net/bnx2x/bnx2x_ethdev.c b/drivers/net/bnx2x/bnx2x_ethdev.c index 10b4fdb..191a3ef 100644 --- a/drivers/net/bnx2x/bnx2x_ethdev.c +++ b/drivers/net/bnx2x/bnx2x_ethdev.c @@ -133,7 +133,7 @@ struct rte_bnx2x_xstats_name_off { PMD_DEBUG_PERIODIC_LOG(INFO, sc, "Interrupt handled"); bnx2x_interrupt_action(dev, 1); - rte_intr_enable(&sc->pci_dev->intr_handle); + rte_intr_ack(&sc->pci_dev->intr_handle); } static void bnx2x_periodic_start(void *param) diff --git a/drivers/net/bnx2x/meson.build b/drivers/net/bnx2x/meson.build index 4892bb2..6787089 100644 --- a/drivers/net/bnx2x/meson.build +++ b/drivers/net/bnx2x/meson.build @@ -5,6 +5,7 @@ dep = dependency('zlib', required: false) build = dep.found() reason = 'missing dependency, "zlib"' ext_deps += dep +allow_experimental_apis = true cflags += '-DZLIB_CONST' sources = files('bnx2x.c', 'bnx2x_ethdev.c', diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c index 0c859e5..fc01378 100644 --- a/drivers/net/e1000/em_ethdev.c +++ b/drivers/net/e1000/em_ethdev.c @@ -1000,7 +1000,7 @@ static int eth_em_pci_remove(struct rte_pci_device *pci_dev) struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; em_rxq_intr_enable(hw); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } @@ -1567,7 +1567,7 @@ static int eth_em_pci_remove(struct rte_pci_device *pci_dev) return -1; intr->flags &= ~E1000_FLAG_NEED_LINK_UPDATE; - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); /* set get_link_status to check register later */ hw->mac.get_link_status = 1; diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c index e784eeb..fec2b42 100644 --- a/drivers/net/e1000/igb_ethdev.c +++ b/drivers/net/e1000/igb_ethdev.c @@ -2875,7 +2875,7 @@ static int eth_igb_rxq_interrupt_setup(struct rte_eth_dev *dev) } igb_intr_enable(dev); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); if (intr->flags & E1000_FLAG_NEED_LINK_UPDATE) { intr->flags &= ~E1000_FLAG_NEED_LINK_UPDATE; @@ -2986,7 +2986,7 @@ void igbvf_mbx_process(struct rte_eth_dev *dev) } igbvf_intr_enable(dev); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } @@ -5499,7 +5499,7 @@ static void igbvf_set_vfta_all(struct rte_eth_dev *dev, bool on) E1000_WRITE_REG(hw, E1000_EIMS, regval | mask); E1000_WRITE_FLUSH(hw); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } diff --git a/drivers/net/fm10k/Makefile b/drivers/net/fm10k/Makefile index d657dff..55e9cd5 100644 --- a/drivers/net/fm10k/Makefile +++ b/drivers/net/fm10k/Makefile @@ -10,6 +10,7 @@ LIB = librte_pmd_fm10k.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API EXPORT_MAP := rte_pmd_fm10k_version.map diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c index 2d3c477..db4d721 100644 --- a/drivers/net/fm10k/fm10k_ethdev.c +++ b/drivers/net/fm10k/fm10k_ethdev.c @@ -2381,7 +2381,7 @@ static uint64_t fm10k_get_tx_port_offloads_capa(struct rte_eth_dev *dev) else FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(pdev, queue_id)), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); - rte_intr_enable(&pdev->intr_handle); + rte_intr_ack(&pdev->intr_handle); return 0; } @@ -2680,7 +2680,7 @@ static uint64_t fm10k_get_tx_port_offloads_capa(struct rte_eth_dev *dev) FM10K_WRITE_REG(hw, FM10K_ITR(0), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); /* Re-enable interrupt from host side */ - rte_intr_enable(dev->intr_handle); + rte_intr_ack(dev->intr_handle); } /** @@ -2760,7 +2760,7 @@ static uint64_t fm10k_get_tx_port_offloads_capa(struct rte_eth_dev *dev) FM10K_WRITE_REG(hw, FM10K_VFITR(0), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); /* Re-enable interrupt from host side */ - rte_intr_enable(dev->intr_handle); + rte_intr_ack(dev->intr_handle); } /* Mailbox message handler in VF */ diff --git a/drivers/net/fm10k/meson.build b/drivers/net/fm10k/meson.build index 2772ea4..8f6e423 100644 --- a/drivers/net/fm10k/meson.build +++ b/drivers/net/fm10k/meson.build @@ -1,6 +1,8 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2017 Intel Corporation +allow_experimental_apis = true + subdir('base') objs = [base_objs] diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c index dd46d4d..8c91517 100644 --- a/drivers/net/i40e/i40e_ethdev.c +++ b/drivers/net/i40e/i40e_ethdev.c @@ -11645,7 +11645,7 @@ enum i40e_filter_pctype I40E_PFINT_DYN_CTLN_ITR_INDX_MASK); I40E_WRITE_FLUSH(hw); - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_ack(&pci_dev->intr_handle); return 0; } diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c index a97cd76..4836333 100644 --- a/drivers/net/iavf/iavf_ethdev.c +++ b/drivers/net/iavf/iavf_ethdev.c @@ -1098,7 +1098,7 @@ static int iavf_config_rx_queues_irqs(struct rte_eth_dev *dev, IAVF_WRITE_FLUSH(hw); - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_ack(&pci_dev->intr_handle); return 0; } diff --git a/drivers/net/ice/Makefile b/drivers/net/ice/Makefile index 32abeb6..ae53c26 100644 --- a/drivers/net/ice/Makefile +++ b/drivers/net/ice/Makefile @@ -10,6 +10,7 @@ LIB = librte_pmd_ice.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API LDLIBS += -lrte_eal -lrte_mbuf -lrte_ethdev -lrte_kvargs LDLIBS += -lrte_bus_pci -lrte_mempool diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c index f05b48c..2ddf9d3 100644 --- a/drivers/net/ice/ice_ethdev.c +++ b/drivers/net/ice/ice_ethdev.c @@ -1118,7 +1118,7 @@ struct ice_xstats_name_off { done: /* Enable interrupt */ ice_pf_enable_irq0(hw); - rte_intr_enable(dev->intr_handle); + rte_intr_ack(dev->intr_handle); } /* Initialize SW parameters of PF */ @@ -3002,7 +3002,7 @@ static int ice_rx_queue_intr_enable(struct rte_eth_dev *dev, val &= ~GLINT_DYN_CTL_WB_ON_ITR_M; ICE_WRITE_REG(hw, GLINT_DYN_CTL(msix_intr), val); - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_ack(&pci_dev->intr_handle); return 0; } diff --git a/drivers/net/ice/meson.build b/drivers/net/ice/meson.build index 7f16647..36b4b3c 100644 --- a/drivers/net/ice/meson.build +++ b/drivers/net/ice/meson.build @@ -1,6 +1,8 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2018 Intel Corporation +allow_experimental_apis = true + subdir('base') objs = [base_objs] diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c index 4a6e5c3..68b1e4f 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/drivers/net/ixgbe/ixgbe_ethdev.c @@ -4501,7 +4501,7 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev, PMD_DRV_LOG(DEBUG, "enable intr in delayed handler S[%08x]", eicr); ixgbe_enable_intr(dev); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); } /** @@ -5762,7 +5762,7 @@ static void ixgbevf_set_vfta_all(struct rte_eth_dev *dev, bool on) RTE_SET_USED(queue_id); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, intr->mask); - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } @@ -5811,7 +5811,7 @@ static void ixgbevf_set_vfta_all(struct rte_eth_dev *dev, bool on) mask &= (1 << (queue_id - 32)); IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); } - rte_intr_enable(intr_handle); + rte_intr_ack(intr_handle); return 0; } diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c index f5d33ef..f1a3ef2 100644 --- a/drivers/net/nfp/nfp_net.c +++ b/drivers/net/nfp/nfp_net.c @@ -1412,7 +1412,7 @@ enum nfp_qcp_ptr { if (hw->ctrl & NFP_NET_CFG_CTRL_MSIXAUTO) { /* If MSI-X auto-masking is used, clear the entry */ rte_wmb(); - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_ack(&pci_dev->intr_handle); } else { /* Make sure all updates are written before un-masking */ rte_wmb(); diff --git a/drivers/net/qede/Makefile b/drivers/net/qede/Makefile index 2ecbd8d..a11d594 100644 --- a/drivers/net/qede/Makefile +++ b/drivers/net/qede/Makefile @@ -12,6 +12,7 @@ LIB = librte_pmd_qede.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs LDLIBS += -lrte_bus_pci diff --git a/drivers/net/qede/meson.build b/drivers/net/qede/meson.build index 12388a6..1755719 100644 --- a/drivers/net/qede/meson.build +++ b/drivers/net/qede/meson.build @@ -1,6 +1,8 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2018 Luca Boccassi <bluca@debian.org> +allow_experimental_apis = true + subdir('base') objs = [base_objs] diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c index 0b3046a..528b33e 100644 --- a/drivers/net/qede/qede_ethdev.c +++ b/drivers/net/qede/qede_ethdev.c @@ -248,8 +248,8 @@ static void qede_interrupt_action(struct ecore_hwfn *p_hwfn) if (status & 0x1) { qede_interrupt_action(ECORE_LEADING_HWFN(edev)); - if (rte_intr_enable(eth_dev->intr_handle)) - DP_ERR(edev, "rte_intr_enable failed\n"); + if (rte_intr_ack(eth_dev->intr_handle)) + DP_ERR(edev, "rte_intr_ack failed\n"); } } @@ -261,8 +261,8 @@ static void qede_interrupt_action(struct ecore_hwfn *p_hwfn) struct ecore_dev *edev = &qdev->edev; qede_interrupt_action(ECORE_LEADING_HWFN(edev)); - if (rte_intr_enable(eth_dev->intr_handle)) - DP_ERR(edev, "rte_intr_enable failed\n"); + if (rte_intr_ack(eth_dev->intr_handle)) + DP_ERR(edev, "rte_intr_ack failed\n"); } static void diff --git a/drivers/net/sfc/sfc_intr.c b/drivers/net/sfc/sfc_intr.c index 1f4969b..76cb630 100644 --- a/drivers/net/sfc/sfc_intr.c +++ b/drivers/net/sfc/sfc_intr.c @@ -79,7 +79,7 @@ if (qmask & (1 << sa->mgmt_evq_index)) sfc_intr_handle_mgmt_evq(sa); - if (rte_intr_enable(&pci_dev->intr_handle) != 0) + if (rte_intr_ack(&pci_dev->intr_handle) != 0) sfc_err(sa, "cannot reenable interrupts"); sfc_log_init(sa, "done"); @@ -123,7 +123,7 @@ sfc_intr_handle_mgmt_evq(sa); - if (rte_intr_enable(&pci_dev->intr_handle) != 0) + if (rte_intr_ack(&pci_dev->intr_handle) != 0) sfc_err(sa, "cannot reenable interrupts"); sfc_log_init(sa, "done"); diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 04aecb7..62c8274 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -1265,6 +1265,20 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev, } static int +virtio_intr_unmask(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = dev->data->dev_private; + + if (rte_intr_ack(dev->intr_handle) < 0) + return -1; + + if (!hw->virtio_user_dev) + hw->use_msix = vtpci_msix_detect(RTE_ETH_DEV_TO_PCI(dev)); + + return 0; +} + +static int virtio_intr_enable(struct rte_eth_dev *dev) { struct virtio_hw *hw = dev->data->dev_private; @@ -1457,7 +1471,7 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev, isr = vtpci_isr(hw); PMD_DRV_LOG(INFO, "interrupt status = %#x", isr); - if (virtio_intr_enable(dev) < 0) + if (virtio_intr_unmask(dev) < 0) PMD_DRV_LOG(ERR, "interrupt enable failed"); if (isr & VIRTIO_PCI_ISR_CONFIG) { diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c b/drivers/net/vmxnet3/vmxnet3_ethdev.c index 2b1e915..57feb37 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethdev.c +++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c @@ -1426,7 +1426,7 @@ static int eth_vmxnet3_pci_remove(struct rte_pci_device *pci_dev) vmxnet3_process_events(dev); - if (rte_intr_enable(&pci_dev->intr_handle) < 0) + if (rte_intr_ack(&pci_dev->intr_handle) < 0) PMD_DRV_LOG(ERR, "interrupt enable failed"); } -- 1.8.3.1
23/07/2019 10:04, David Marchand:
> From Nithin original cover letter:
> This patch set basically reverts vfio patch that moves eventfd setup
> to probe time (1/3) because msix initialization is broken.
> It adds new ack interrupt api for purposes of simple and light
> weight ack method and also to avoid race conditions
> that was the root cause of this discussion.
Applied, thanks