From mboxrd@z Thu Jan 1 00:00:00 1970 From: Jianfeng Tan Subject: [PATCH] igb_uio: stop device when closing /dev/uioX Date: Fri, 2 Dec 2016 16:45:46 +0000 Message-ID: <1480697146-118038-1-git-send-email-jianfeng.tan@intel.com> References: <1480696111-116651-1-git-send-email-jianfeng.tan@intel.com> Cc: thomas.monjalon@6wind.com, david.marchand@6wind.com, ferruh.yigit@intel.com, stephen@networkplumber.org, jing.d.chen@intel.com, Jianfeng Tan To: dev@dpdk.org Return-path: Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by dpdk.org (Postfix) with ESMTP id 03A1458CF for ; Fri, 2 Dec 2016 17:45:11 +0100 (CET) In-Reply-To: <1480696111-116651-1-git-send-email-jianfeng.tan@intel.com> List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Depends-on: http://dpdk.org/dev/patchwork/patch/17493/ When a DPDK application grab a PCI device, device and driver work together to Rx/Tx packets. If the DPDK app crashes or gets killed, there's no chance for DPDK driver to stop the device, which means rte_eth_dev_stop() will not be called. This could result in problems. In virtio's case, the device (the vhost backend), will keep working. If packets come, the vhost will copy them into the vring, which is negotiated with the previous DPDK app. But the resources, especially hugepages, are recycled by VM kernel. What's worse, the memory could be allocated for other usage, and re-written. This leads to an uncertain situation. Like this post has reported, https://bugs.launchpad.net/qemu/+bug/1558175, QEMU's vhost finds out wrong value, and exits the whole QEMU process. To make it right, we need to restart (or reset) the device and make the device go into the initial state, when uio-generic or igb_uio recycles the PCI device. There's a chance in uio framework to disable devices when /dev/uioX gets closed. Here we enable the pci device in open() hook and disable it in release() hook. However, if device is not enabled in probe() phase any more, we can not register irq in probe() through uio_register_device(). To address that, we invoke request_irq() to register callback directly. Similar change needs to be done in uio-generic driver. And vfio-pci seems to have done that already. Signed-off-by: Jianfeng Tan --- lib/librte_eal/linuxapp/igb_uio/igb_uio.c | 171 +++++++++++++++++++----------- 1 file changed, 108 insertions(+), 63 deletions(-) diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c index e9d78fb..048390d 100644 --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c @@ -157,8 +157,10 @@ igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state) * If yes, disable it here and will be enable later. */ static irqreturn_t -igbuio_pci_irqhandler(int irq, struct uio_info *info) +igbuio_pci_irqhandler(int irq, void *dev_id) { + struct uio_device *idev = (struct uio_device *)dev_id; + struct uio_info *info = idev->info; struct rte_uio_pci_dev *udev = info->priv; /* Legacy mode need to mask in hardware */ @@ -166,6 +168,8 @@ igbuio_pci_irqhandler(int irq, struct uio_info *info) !pci_check_and_mask_intx(udev->pdev)) return IRQ_NONE; + uio_event_notify(info); + /* Message signal mode, no share IRQ and automasked */ return IRQ_HANDLED; } @@ -216,20 +220,58 @@ igbuio_dom0_pci_mmap(struct uio_info *info, struct vm_area_struct *vma) } #endif -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0) -static int __devinit -#else static int -#endif -igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) +igbuio_intr_init(struct uio_info *info) { - struct rte_uio_pci_dev *udev; + struct rte_uio_pci_dev *udev = info->priv; + struct pci_dev *dev = udev->pdev; struct msix_entry msix_entry; - int err; + int ret = 0; - udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL); - if (!udev) - return -ENOMEM; + switch (igbuio_intr_mode_preferred) { + case RTE_INTR_MODE_MSIX: + /* Only 1 msi-x vector needed */ + msix_entry.entry = 0; + if (pci_enable_msix(dev, &msix_entry, 1) == 0) { + dev_dbg(&dev->dev, "using MSI-X"); + info->irq = msix_entry.vector; + udev->mode = RTE_INTR_MODE_MSIX; + break; + } + /* fall back to INTX */ + case RTE_INTR_MODE_LEGACY: + if (pci_intx_mask_supported(dev)) { + dev_dbg(&dev->dev, "using INTX"); + info->irq_flags = IRQF_SHARED; + info->irq = dev->irq; + udev->mode = RTE_INTR_MODE_LEGACY; + break; + } + dev_notice(&dev->dev, "PCI INTX mask not supported\n"); + ret = -EOPNOTSUPP; + /* fall back to no IRQ */ + default: + udev->mode = RTE_INTR_MODE_NONE; + info->irq = 0; + } + + if (info->irq) { + ret = request_irq(info->irq, igbuio_pci_irqhandler, + info->irq_flags, info->name, + info->uio_dev); + if (ret && udev->mode == RTE_INTR_MODE_MSIX) + pci_disable_msix(udev->pdev); + } + + return ret; +} + +static int +igbuio_pci_open(struct uio_info *info, struct inode *inode) +{ + struct rte_uio_pci_dev *udev = info->priv; + struct pci_dev *dev = udev->pdev; + int err; /* * enable device: ask low-level code to enable I/O and @@ -238,30 +280,77 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) err = pci_enable_device(dev); if (err != 0) { dev_err(&dev->dev, "Cannot enable PCI device\n"); - goto fail_free; + return err; } /* enable bus mastering on the device */ pci_set_master(dev); /* set 64-bit DMA mask */ - err = pci_set_dma_mask(dev, DMA_BIT_MASK(64)); - if (err != 0) { + err = pci_set_dma_mask(dev, DMA_BIT_MASK(64)); + if (err) { dev_err(&dev->dev, "Cannot set DMA mask\n"); - goto fail_disable_dev; + goto error; } err = pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(64)); - if (err != 0) { + if (err) { dev_err(&dev->dev, "Cannot set consistent DMA mask\n"); - goto fail_disable_dev; + goto error; + } + + err = igbuio_intr_init(info); + if (err) { + dev_err(&dev->dev, "Enable interrupt fails\n"); + goto error; + } else { + dev_info(&dev->dev, "uio device registered with irq %lx\n", + udev->info.irq); + } + + return 0; +error: + pci_disable_device(dev); + return err; +} + +static int +igbuio_pci_release(struct uio_info *info, struct inode *inode) +{ + struct rte_uio_pci_dev *udev = info->priv; + struct pci_dev *dev = udev->pdev; + + dev_info(&udev->pdev->dev, "will be disable\n"); + if (info->irq) { + free_irq(info->irq, info->uio_dev); + info->irq = 0; } + if (udev->mode == RTE_INTR_MODE_MSIX) + pci_disable_msix(dev); + pci_disable_device(udev->pdev); + return 0; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0) +static int __devinit +#else +static int +#endif +igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) +{ + struct rte_uio_pci_dev *udev; + int err; + + udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL); + if (!udev) + return -ENOMEM; /* fill uio infos */ udev->info.name = "igb_uio"; udev->info.version = "0.1"; - udev->info.handler = igbuio_pci_irqhandler; udev->info.irqcontrol = igbuio_pci_irqcontrol; + udev->info.release = igbuio_pci_release; + udev->info.open = igbuio_pci_open; #ifdef CONFIG_XEN_DOM0 /* check if the driver run on Xen Dom0 */ if (xen_initial_domain()) @@ -270,42 +359,9 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) udev->info.priv = udev; udev->pdev = dev; - switch (igbuio_intr_mode_preferred) { - case RTE_INTR_MODE_MSIX: - /* Only 1 msi-x vector needed */ - msix_entry.entry = 0; - if (pci_enable_msix(dev, &msix_entry, 1) == 0) { - dev_dbg(&dev->dev, "using MSI-X"); - udev->info.irq = msix_entry.vector; - udev->mode = RTE_INTR_MODE_MSIX; - break; - } - /* fall back to INTX */ - case RTE_INTR_MODE_LEGACY: - if (pci_intx_mask_supported(dev)) { - dev_dbg(&dev->dev, "using INTX"); - udev->info.irq_flags = IRQF_SHARED; - udev->info.irq = dev->irq; - udev->mode = RTE_INTR_MODE_LEGACY; - break; - } - dev_notice(&dev->dev, "PCI INTX mask not supported\n"); - /* fall back to no IRQ */ - case RTE_INTR_MODE_NONE: - udev->mode = RTE_INTR_MODE_NONE; - udev->info.irq = 0; - break; - - default: - dev_err(&dev->dev, "invalid IRQ mode %u", - igbuio_intr_mode_preferred); - err = -EINVAL; - goto fail_disable_dev; - } - err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp); if (err != 0) - goto fail_disable_irq; + goto fail_free; /* register uio driver */ err = uio_register_device(&dev->dev, &udev->info); @@ -314,18 +370,10 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) pci_set_drvdata(dev, udev); - dev_info(&dev->dev, "uio device registered with irq %lx\n", - udev->info.irq); - return 0; fail_remove_group: sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); -fail_disable_irq: - if (udev->mode == RTE_INTR_MODE_MSIX) - pci_disable_msix(udev->pdev); -fail_disable_dev: - pci_disable_device(dev); fail_free: kfree(udev); @@ -339,9 +387,6 @@ igbuio_pci_remove(struct pci_dev *dev) sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); uio_unregister_device(&udev->info); - if (udev->mode == RTE_INTR_MODE_MSIX) - pci_disable_msix(dev); - pci_disable_device(dev); pci_set_drvdata(dev, NULL); kfree(udev); } -- 2.7.4