From mboxrd@z Thu Jan 1 00:00:00 1970 From: Jeff Guo Subject: [PATCH V9 4/5] pci_uio: add uevent hotplug failure handler in pci Date: Wed, 10 Jan 2018 17:12:23 +0800 Message-ID: <1515575544-2141-5-git-send-email-jia.guo@intel.com> References: <1515555037-9419-4-git-send-email-jia.guo@intel.com> <1515575544-2141-1-git-send-email-jia.guo@intel.com> Cc: konstantin.ananyev@intel.com, jblunck@infradead.org, shreyansh.jain@nxp.com, jingjing.wu@intel.com, dev@dpdk.org, jia.guo@intel.com, thomas@monjalon.net, helin.zhang@intel.com, motih@mellanox.com To: stephen@networkplumber.org, bruce.richardson@intel.com, ferruh.yigit@intel.com, gaetan.rivet@6wind.com Return-path: Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by dpdk.org (Postfix) with ESMTP id 959CA1B1B7 for ; Wed, 10 Jan 2018 10:13:42 +0100 (CET) In-Reply-To: <1515575544-2141-1-git-send-email-jia.guo@intel.com> List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" when detect hot removal uevent of device, the device resource become invalid, in order to avoid unexpected usage of this resource, remap the device resource to be a fake memory, that would lead the application keep running well but not encounter system core dump. Signed-off-by: Jeff Guo --- v9->v8: split the patch set into small and explicit patch --- drivers/bus/pci/bsd/pci.c | 23 ++++++++++++++++++++ drivers/bus/pci/linux/pci.c | 34 ++++++++++++++++++++++++++++++ drivers/bus/pci/pci_common.c | 22 +++++++++++++++++++ drivers/bus/pci/pci_common_uio.c | 28 +++++++++++++++++++++++++ drivers/bus/pci/private.h | 12 +++++++++++ drivers/bus/pci/rte_bus_pci.h | 11 ++++++++++ drivers/bus/vdev/vdev.c | 9 +++++++- lib/librte_eal/common/eal_common_bus.c | 1 + lib/librte_eal/common/include/rte_bus.h | 17 +++++++++++++++ lib/librte_eal/linuxapp/eal/eal_dev.c | 35 ++++++++++++++++++++++++++++--- lib/librte_eal/linuxapp/igb_uio/igb_uio.c | 6 ++++++ lib/librte_pci/rte_pci.c | 20 ++++++++++++++++++ lib/librte_pci/rte_pci.h | 17 +++++++++++++++ 13 files changed, 231 insertions(+), 4 deletions(-) diff --git a/drivers/bus/pci/bsd/pci.c b/drivers/bus/pci/bsd/pci.c index 655b34b..d7165b9 100644 --- a/drivers/bus/pci/bsd/pci.c +++ b/drivers/bus/pci/bsd/pci.c @@ -97,6 +97,29 @@ rte_pci_unmap_device(struct rte_pci_device *dev) } } +/* re-map pci device */ +int +rte_pci_remap_device(struct rte_pci_device *dev) +{ + int ret; + + if (dev == NULL) + return -EINVAL; + + switch (dev->kdrv) { + case RTE_KDRV_NIC_UIO: + ret = pci_uio_remap_resource(dev); + break; + default: + RTE_LOG(DEBUG, EAL, + " Not managed by a supported kernel driver, skipped\n"); + ret = 1; + break; + } + + return ret; +} + void pci_uio_free_resource(struct rte_pci_device *dev, struct mapped_pci_resource *uio_res) diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c index 25f907e..7aa3079 100644 --- a/drivers/bus/pci/linux/pci.c +++ b/drivers/bus/pci/linux/pci.c @@ -116,6 +116,38 @@ rte_pci_unmap_device(struct rte_pci_device *dev) } } +/* Map pci device */ +int +rte_pci_remap_device(struct rte_pci_device *dev) +{ + int ret = -1; + + if (dev == NULL) + return -EINVAL; + + switch (dev->kdrv) { + case RTE_KDRV_VFIO: +#ifdef VFIO_PRESENT + /* no thing to do */ +#endif + break; + case RTE_KDRV_IGB_UIO: + case RTE_KDRV_UIO_GENERIC: + if (rte_eal_using_phys_addrs()) { + /* map resources for devices that use uio */ + ret = pci_uio_remap_resource(dev); + } + break; + default: + RTE_LOG(DEBUG, EAL, + " Not managed by a supported kernel driver, skipped\n"); + ret = 1; + break; + } + + return ret; +} + void * pci_find_max_end_va(void) { @@ -357,6 +389,8 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) rte_pci_add_device(dev); } + dev->device.state = RTE_DEV_PARSED; + TAILQ_INIT(&(dev->device.uev_cbs)); return 0; } diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c index c4415a0..3fbe9d7 100644 --- a/drivers/bus/pci/pci_common.c +++ b/drivers/bus/pci/pci_common.c @@ -282,6 +282,7 @@ pci_probe_all_drivers(struct rte_pci_device *dev) if (rc > 0) /* positive value means driver doesn't support it */ continue; + dev->device.state = RTE_DEV_PROBED; return 0; } return 1; @@ -481,6 +482,7 @@ rte_pci_insert_device(struct rte_pci_device *exist_pci_dev, void rte_pci_remove_device(struct rte_pci_device *pci_dev) { + RTE_LOG(DEBUG, EAL, " rte_pci_remove_device for device list\n"); TAILQ_REMOVE(&rte_pci_bus.device_list, pci_dev, next); } @@ -522,6 +524,25 @@ pci_find_device_by_name(const struct rte_device *start, } static int +pci_remap_device(struct rte_device *dev) +{ + struct rte_pci_device *pdev; + int ret; + + if (dev == NULL) + return -EINVAL; + + pdev = RTE_DEV_TO_PCI(dev); + + /* remap resources for devices that use igb_uio */ + ret = rte_pci_remap_device(pdev); + if (ret != 0) + RTE_LOG(ERR, EAL, "failed to remap device %s", + dev->name); + return ret; +} + +static int pci_plug(struct rte_device *dev) { return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev)); @@ -552,6 +573,7 @@ struct rte_pci_bus rte_pci_bus = { .unplug = pci_unplug, .parse = pci_parse, .get_iommu_class = rte_pci_get_iommu_class, + .remap_device = pci_remap_device, }, .device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list), .driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list), diff --git a/drivers/bus/pci/pci_common_uio.c b/drivers/bus/pci/pci_common_uio.c index dd84ec8..a4bc473 100644 --- a/drivers/bus/pci/pci_common_uio.c +++ b/drivers/bus/pci/pci_common_uio.c @@ -147,6 +147,34 @@ pci_uio_unmap(struct mapped_pci_resource *uio_res) } } +/* remap the PCI resource of a PCI device in private virtual memory */ +int +pci_uio_remap_resource(struct rte_pci_device *dev) +{ + int i; + uint64_t phaddr; + void *map_address; + + /* Map all BARs */ + for (i = 0; i != PCI_MAX_RESOURCE; i++) { + /* skip empty BAR */ + phaddr = dev->mem_resource[i].phys_addr; + if (phaddr == 0) + continue; + map_address = pci_map_private_resource( + dev->mem_resource[i].addr, 0, + (size_t)dev->mem_resource[i].len); + if (map_address == MAP_FAILED) + goto error; + memset(map_address, 0xFF, (size_t)dev->mem_resource[i].len); + dev->mem_resource[i].addr = map_address; + } + + return 0; +error: + return -1; +} + static struct mapped_pci_resource * pci_uio_find_resource(struct rte_pci_device *dev) { diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h index 2283f09..10baa1a 100644 --- a/drivers/bus/pci/private.h +++ b/drivers/bus/pci/private.h @@ -202,6 +202,18 @@ void pci_uio_free_resource(struct rte_pci_device *dev, struct mapped_pci_resource *uio_res); /** + * remap the pci uio resource.. + * + * @param dev + * Point to the struct rte pci device. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +pci_uio_remap_resource(struct rte_pci_device *dev); + +/** * Map device memory to uio resource * * This function is private to EAL. diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h index d4a2996..65337eb 100644 --- a/drivers/bus/pci/rte_bus_pci.h +++ b/drivers/bus/pci/rte_bus_pci.h @@ -52,6 +52,8 @@ extern "C" { #include #include #include +#include +#include #include #include @@ -197,6 +199,15 @@ int rte_pci_map_device(struct rte_pci_device *dev); void rte_pci_unmap_device(struct rte_pci_device *dev); /** + * Remap this device + * + * @param dev + * A pointer to a rte_pci_device structure describing the device + * to use + */ +int rte_pci_remap_device(struct rte_pci_device *dev); + +/** * Dump the content of the PCI bus. * * @param f diff --git a/drivers/bus/vdev/vdev.c b/drivers/bus/vdev/vdev.c index cac2aa0..c9cd369 100644 --- a/drivers/bus/vdev/vdev.c +++ b/drivers/bus/vdev/vdev.c @@ -323,7 +323,6 @@ vdev_find_device(const struct rte_device *start, rte_dev_cmp_t cmp, return NULL; } - static struct rte_device * vdev_find_device_by_name(const struct rte_device *start, rte_dev_cmp_name_t cmp_name, @@ -343,6 +342,13 @@ vdev_find_device_by_name(const struct rte_device *start, } static int +vdev_remap_device(struct rte_device *dev) +{ + RTE_SET_USED(dev); + return 0; +} + +static int vdev_plug(struct rte_device *dev) { return vdev_probe_all_drivers(RTE_DEV_TO_VDEV(dev)); @@ -362,6 +368,7 @@ static struct rte_bus rte_vdev_bus = { .plug = vdev_plug, .unplug = vdev_unplug, .parse = vdev_parse, + .remap_device = vdev_remap_device, }; RTE_REGISTER_BUS(vdev, rte_vdev_bus); diff --git a/lib/librte_eal/common/eal_common_bus.c b/lib/librte_eal/common/eal_common_bus.c index efd5539..bdb0e54 100644 --- a/lib/librte_eal/common/eal_common_bus.c +++ b/lib/librte_eal/common/eal_common_bus.c @@ -54,6 +54,7 @@ rte_bus_register(struct rte_bus *bus) RTE_VERIFY(bus->find_device_by_name); /* Buses supporting driver plug also require unplug. */ RTE_VERIFY(!bus->plug || bus->unplug); + RTE_VERIFY(bus->remap_device); TAILQ_INSERT_TAIL(&rte_bus_list, bus, next); RTE_LOG(DEBUG, EAL, "Registered [%s] bus.\n", bus->name); diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h index 6dcfdb3..78990bc 100644 --- a/lib/librte_eal/common/include/rte_bus.h +++ b/lib/librte_eal/common/include/rte_bus.h @@ -196,6 +196,22 @@ typedef int (*rte_bus_unplug_t)(struct rte_device *dev); typedef int (*rte_bus_parse_t)(const char *name, void *addr); /** + * Implementation specific remap function which is responsible for + * remmaping devices on that bus from original share memory resource + * to a private memory resource for the sake of device has been removal, + * when detect the device removal event invoke from the kernel side, + * prior to call this function before any operation for device hw. + * + * @param dev + * Device pointer that was returned by a previous call to find_device. + * + * @return + * 0 on success. + * !0 on error. + */ +typedef int (*rte_bus_remap_device_t)(struct rte_device *dev); + +/** * Bus scan policies */ enum rte_bus_scan_mode { @@ -239,6 +255,7 @@ struct rte_bus { rte_bus_plug_t plug; /**< Probe single device for drivers */ rte_bus_unplug_t unplug; /**< Remove single device from driver */ rte_bus_parse_t parse; /**< Parse a device name */ + rte_bus_remap_device_t remap_device; /**< remap a device */ struct rte_bus_conf conf; /**< Bus configuration */ rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */ }; diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c b/lib/librte_eal/linuxapp/eal/eal_dev.c index 9d347f2..d68ef9b 100644 --- a/lib/librte_eal/linuxapp/eal/eal_dev.c +++ b/lib/librte_eal/linuxapp/eal/eal_dev.c @@ -32,6 +32,12 @@ bool service_no_init = true; #define DEV_EV_MNT_SERVICE_NAME "device_event_monitor_service" +static void sig_handler(int signum) +{ + if (signum == SIGINT || signum == SIGTERM) + rte_dev_monitor_stop(); +} + static int dev_monitor_fd_new(void) { @@ -168,6 +174,7 @@ dev_uev_process(struct epoll_event *events, int nfds) struct rte_bus *bus; struct rte_device *dev; struct rte_eal_uevent uevent; + int ret; int i; for (i = 0; i < nfds; i++) { @@ -187,9 +194,17 @@ dev_uev_process(struct epoll_event *events, int nfds) if ((!dev) || dev->state == RTE_DEV_UNDEFINED) return 0; - return(_rte_dev_callback_process(dev, - RTE_DEV_EVENT_REMOVE, - NULL)); + dev->state = RTE_DEV_FAULT; + + /** + * remap the resource to be fake + * before user's removal processing + */ + ret = bus->remap_device(dev); + if (!ret) + return(_rte_dev_callback_process(dev, + RTE_DEV_EVENT_REMOVE, + NULL)); } else if (uevent.type == RTE_DEV_EVENT_ADD) { if (dev == NULL) { return(_rte_dev_callback_process(NULL, @@ -215,12 +230,26 @@ dev_uev_process(struct epoll_event *events, int nfds) */ static int32_t dev_uev_monitoring(__rte_unused void *arg) { + struct sigaction act; + sigset_t mask; int netlink_fd = -1; struct epoll_event ep_kernel; int fd_ep = -1; service_exit = false; + /* set signal handlers */ + memset(&act, 0x00, sizeof(struct sigaction)); + act.sa_handler = sig_handler; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_RESTART; + sigaction(SIGINT, &act, NULL); + sigaction(SIGTERM, &act, NULL); + sigemptyset(&mask); + sigaddset(&mask, SIGINT); + sigaddset(&mask, SIGTERM); + sigprocmask(SIG_UNBLOCK, &mask, NULL); + fd_ep = epoll_create1(EPOLL_CLOEXEC); if (fd_ep < 0) { RTE_LOG(ERR, EAL, "error creating epoll fd: %m\n"); diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c index a3a98c1..d0e07b4 100644 --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c @@ -354,6 +354,12 @@ igbuio_pci_release(struct uio_info *info, struct inode *inode) struct rte_uio_pci_dev *udev = info->priv; struct pci_dev *dev = udev->pdev; + /* check if device have been remove before release */ + if ((&dev->dev.kobj)->state_remove_uevent_sent == 1) { + pr_info("The device have been removed\n"); + return -1; + } + /* disable interrupts */ igbuio_pci_disable_interrupts(udev); diff --git a/lib/librte_pci/rte_pci.c b/lib/librte_pci/rte_pci.c index 0160fc1..feb5fd7 100644 --- a/lib/librte_pci/rte_pci.c +++ b/lib/librte_pci/rte_pci.c @@ -172,6 +172,26 @@ rte_pci_addr_parse(const char *str, struct rte_pci_addr *addr) return -1; } +/* map a private resource from an address*/ +void * +pci_map_private_resource(void *requested_addr, off_t offset, size_t size) +{ + void *mapaddr; + + mapaddr = mmap(requested_addr, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + if (mapaddr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "%s(): cannot mmap(%p, 0x%lx, 0x%lx): " + "%s (%p)\n", + __func__, requested_addr, + (unsigned long)size, (unsigned long)offset, + strerror(errno), mapaddr); + } else + RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr); + + return mapaddr; +} /* map a particular resource from a file */ void * diff --git a/lib/librte_pci/rte_pci.h b/lib/librte_pci/rte_pci.h index 4f2cd18..f6091a6 100644 --- a/lib/librte_pci/rte_pci.h +++ b/lib/librte_pci/rte_pci.h @@ -227,6 +227,23 @@ int rte_pci_addr_cmp(const struct rte_pci_addr *addr, int rte_pci_addr_parse(const char *str, struct rte_pci_addr *addr); /** + * @internal + * Map to a particular private resource. + * + * @param requested_addr + * The starting address for the new mapping range. + * @param offset + * The offset for the mapping range. + * @param size + * The size for the mapping range. + * @return + * - On success, the function returns a pointer to the mapped area. + * - On error, the value MAP_FAILED is returned. + */ +void *pci_map_private_resource(void *requested_addr, off_t offset, + size_t size); + +/** * Map a particular resource from a file. * * @param requested_addr -- 2.7.4