From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752008AbbJDUnx (ORCPT ); Sun, 4 Oct 2015 16:43:53 -0400 Received: from mail-wi0-f179.google.com ([209.85.212.179]:34997 "EHLO mail-wi0-f179.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751911AbbJDUn3 (ORCPT ); Sun, 4 Oct 2015 16:43:29 -0400 From: Vlad Zolotarov To: linux-kernel@vger.kernel.org, mst@redhat.com, hjk@hansjkoch.de, corbet@lwn.net, gregkh@linuxfoundation.org Cc: bruce.richardson@intel.com, avi@cloudius-systems.com, gleb@cloudius-systems.com, stephen@networkplumber.org, alexander.duyck@gmail.com, Vlad Zolotarov Subject: [PATCH v3 2/3] uio_pci_generic: add MSI/MSI-X support Date: Sun, 4 Oct 2015 23:43:17 +0300 Message-Id: <1443991398-23761-3-git-send-email-vladz@cloudius-systems.com> X-Mailer: git-send-email 2.1.0 In-Reply-To: <1443991398-23761-1-git-send-email-vladz@cloudius-systems.com> References: <1443991398-23761-1-git-send-email-vladz@cloudius-systems.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Add support for MSI and MSI-X interrupt modes: - Interrupt mode selection order is: INT#X (for backward compatibility) -> MSI-X -> MSI. - Add ioctl() commands: - UIO_PCI_GENERIC_INT_MODE_GET: query the current interrupt mode. - UIO_PCI_GENERIC_IRQ_NUM_GET: query the maximum number of IRQs. - UIO_PCI_GENERIC_IRQ_SET: bind the IRQ to eventfd (similar to vfio). - Add mappings to all bars (memory and portio): some devices have registers related to MSI/MSI-X handling outside BAR0. Signed-off-by: Vlad Zolotarov --- New in v3: - Add __iomem qualifier to temp buffer receiving ioremap value. New in v2: - Added #include to uio_pci_generic.c Signed-off-by: Vlad Zolotarov --- drivers/uio/uio_pci_generic.c | 410 +++++++++++++++++++++++++++++++++++++--- include/linux/uio_pci_generic.h | 36 ++++ 2 files changed, 423 insertions(+), 23 deletions(-) create mode 100644 include/linux/uio_pci_generic.h diff --git a/drivers/uio/uio_pci_generic.c b/drivers/uio/uio_pci_generic.c index d0b508b..6b8b1789 100644 --- a/drivers/uio/uio_pci_generic.c +++ b/drivers/uio/uio_pci_generic.c @@ -22,16 +22,32 @@ #include #include #include +#include #include #include +#include +#include +#include #define DRIVER_VERSION "0.01.0" #define DRIVER_AUTHOR "Michael S. Tsirkin " #define DRIVER_DESC "Generic UIO driver for PCI 2.3 devices" +struct msix_info { + int num_irqs; + struct msix_entry *table; + struct uio_msix_irq_ctx { + struct eventfd_ctx *trigger; /* MSI-x vector to eventfd */ + char *name; /* name in /proc/interrupts */ + } *ctx; +}; + struct uio_pci_generic_dev { struct uio_info info; struct pci_dev *pdev; + struct mutex msix_state_lock; /* ioctl mutex */ + enum uio_int_mode int_mode; + struct msix_info msix; }; static inline struct uio_pci_generic_dev * @@ -40,9 +56,177 @@ to_uio_pci_generic_dev(struct uio_info *info) return container_of(info, struct uio_pci_generic_dev, info); } -/* Interrupt handler. Read/modify/write the command register to disable - * the interrupt. */ -static irqreturn_t irqhandler(int irq, struct uio_info *info) +/* Unmap previously ioremap'd resources */ +static void release_iomaps(struct uio_pci_generic_dev *gdev) +{ + int i; + struct uio_mem *mem = gdev->info.mem; + + for (i = 0; i < MAX_UIO_MAPS; i++, mem++) { + if (mem->internal_addr) { + iounmap(mem->internal_addr); + mem->internal_addr = NULL; + } + } +} + +static int setup_maps(struct pci_dev *pdev, struct uio_info *info) +{ + int i, m = 0, p = 0, err; + static const char * const bar_names[] = { + "BAR0", "BAR1", "BAR2", "BAR3", "BAR4", "BAR5", + }; + + for (i = 0; i < ARRAY_SIZE(bar_names); i++) { + unsigned long start = pci_resource_start(pdev, i); + unsigned long flags = pci_resource_flags(pdev, i); + unsigned long len = pci_resource_len(pdev, i); + + if (start == 0 || len == 0) + continue; + + if (flags & IORESOURCE_MEM) { + void __iomem *addr; + + if (m >= MAX_UIO_MAPS) + continue; + + addr = ioremap(start, len); + if (addr == NULL) { + err = -EINVAL; + goto fail; + } + + info->mem[m].name = bar_names[i]; + info->mem[m].addr = start; + info->mem[m].internal_addr = addr; + info->mem[m].size = len; + info->mem[m].memtype = UIO_MEM_PHYS; + ++m; + } else if (flags & IORESOURCE_IO) { + if (p >= MAX_UIO_PORT_REGIONS) + continue; + + info->port[p].name = bar_names[i]; + info->port[p].start = start; + info->port[p].size = len; + info->port[p].porttype = UIO_PORT_X86; + ++p; + } + } + + return 0; +fail: + for (i = 0; i < m; i++) { + iounmap(info->mem[i].internal_addr); + info->mem[i].internal_addr = NULL; + } + + return err; +} + +static irqreturn_t msix_irqhandler(int irq, void *arg); + +/* set the mapping between vector # and existing eventfd. */ +static int set_irq_eventfd(struct uio_pci_generic_dev *gdev, int vec, int fd) +{ + struct uio_msix_irq_ctx *ctx; + struct eventfd_ctx *trigger; + struct pci_dev *pdev = gdev->pdev; + int irq, err; + + if (vec >= gdev->msix.num_irqs) { + dev_notice(&gdev->pdev->dev, "vec %u >= num_vec %u\n", + vec, gdev->msix.num_irqs); + return -ERANGE; + } + + irq = gdev->msix.table[vec].vector; + + /* Cleanup existing irq mapping */ + ctx = &gdev->msix.ctx[vec]; + if (ctx->trigger) { + free_irq(irq, ctx->trigger); + eventfd_ctx_put(ctx->trigger); + ctx->trigger = NULL; + } + + /* Passing -1 is used to disable interrupt */ + if (fd < 0) + return 0; + + + trigger = eventfd_ctx_fdget(fd); + if (IS_ERR(trigger)) { + err = PTR_ERR(trigger); + dev_notice(&gdev->pdev->dev, + "eventfd ctx get failed: %d\n", err); + return err; + } + + err = request_irq(irq, msix_irqhandler, 0, ctx->name, trigger); + if (err) { + dev_notice(&pdev->dev, "request irq failed: %d\n", err); + eventfd_ctx_put(trigger); + return err; + } + + dev_dbg(&pdev->dev, "map vector %u to fd %d trigger %p\n", + vec, fd, trigger); + ctx->trigger = trigger; + + return 0; +} + +static int uio_pci_generic_ioctl(struct uio_info *info, unsigned int cmd, + unsigned long arg) +{ + struct uio_pci_generic_dev *gdev = to_uio_pci_generic_dev(info); + struct uio_pci_generic_irq_set hdr; + int err; + + switch (cmd) { + case UIO_PCI_GENERIC_IRQ_SET: + if (copy_from_user(&hdr, (void __user *)arg, sizeof(hdr))) + return -EFAULT; + + /* Locking is needed to ensure two things: + * 1) Two IRQ_SET ioctl()'s are not running in parallel. + * 2) IRQ_SET ioctl() is not running in parallel with remove(). + */ + mutex_lock(&gdev->msix_state_lock); + if (gdev->int_mode != UIO_INT_MODE_MSIX) { + mutex_unlock(&gdev->msix_state_lock); + return -EOPNOTSUPP; + } + + err = set_irq_eventfd(gdev, hdr.vec, hdr.fd); + mutex_unlock(&gdev->msix_state_lock); + + break; + case UIO_PCI_GENERIC_IRQ_NUM_GET: + if (gdev->int_mode == UIO_INT_MODE_NONE) + err = put_user(0, (u32 __user *)arg); + else if (gdev->int_mode != UIO_INT_MODE_MSIX) + err = put_user(1, (u32 __user *)arg); + else + err = put_user(gdev->msix.num_irqs, + (u32 __user *)arg); + + break; + case UIO_PCI_GENERIC_INT_MODE_GET: + err = put_user(gdev->int_mode, (u32 __user *)arg); + + break; + default: + err = -EOPNOTSUPP; + } + + return err; +} + +/* INT#X interrupt handler. */ +static irqreturn_t intx_irqhandler(int irq, struct uio_info *info) { struct uio_pci_generic_dev *gdev = to_uio_pci_generic_dev(info); @@ -53,8 +237,162 @@ static irqreturn_t irqhandler(int irq, struct uio_info *info) return IRQ_HANDLED; } -static int probe(struct pci_dev *pdev, - const struct pci_device_id *id) +/* MSI interrupt handler. */ +static irqreturn_t msi_irqhandler(int irq, struct uio_info *info) +{ + /* UIO core will signal the user process. */ + return IRQ_HANDLED; +} + +/* MSI-X interrupt handler. */ +static irqreturn_t msix_irqhandler(int irq, void *arg) +{ + struct eventfd_ctx *trigger = arg; + + pr_devel("irq %u trigger %p\n", irq, trigger); + + eventfd_signal(trigger, 1); + return IRQ_HANDLED; +} + +static bool enable_intx(struct uio_pci_generic_dev *gdev) +{ + struct pci_dev *pdev = gdev->pdev; + + if (!pdev->irq || !pci_intx_mask_supported(pdev)) + return false; + + gdev->int_mode = UIO_INT_MODE_INTX; + gdev->info.irq = pdev->irq; + gdev->info.irq_flags = IRQF_SHARED; + gdev->info.handler = intx_irqhandler; + + return true; +} + +static void set_pci_master(struct pci_dev *pdev) +{ + pci_set_master(pdev); + dev_warn(&pdev->dev, "Enabling PCI bus mastering. Bogus userspace application is able to trash kernel memory using DMA"); + add_taint(TAINT_USER, LOCKDEP_STILL_OK); +} + +static bool enable_msi(struct uio_pci_generic_dev *gdev) +{ + struct pci_dev *pdev = gdev->pdev; + + set_pci_master(pdev); + + if (pci_enable_msi(pdev)) + return false; + + gdev->int_mode = UIO_INT_MODE_MSI; + gdev->info.irq = pdev->irq; + gdev->info.irq_flags = 0; + gdev->info.handler = msi_irqhandler; + + return true; +} + +static bool enable_msix(struct uio_pci_generic_dev *gdev) +{ + struct pci_dev *pdev = gdev->pdev; + int i, vectors = pci_msix_vec_count(pdev); + + if (vectors <= 0) + return false; + + gdev->msix.table = kcalloc(vectors, sizeof(struct msix_entry), + GFP_KERNEL); + if (!gdev->msix.table) { + dev_err(&pdev->dev, "Failed to allocate memory for MSI-X table"); + return false; + } + + gdev->msix.ctx = kcalloc(vectors, sizeof(struct uio_msix_irq_ctx), + GFP_KERNEL); + if (!gdev->msix.ctx) { + dev_err(&pdev->dev, "Failed to allocate memory for MSI-X contexts"); + goto err_ctx_alloc; + } + + for (i = 0; i < vectors; i++) { + gdev->msix.table[i].entry = i; + gdev->msix.ctx[i].name = kasprintf(GFP_KERNEL, + KBUILD_MODNAME "[%d](%s)", + i, pci_name(pdev)); + if (!gdev->msix.ctx[i].name) + goto err_name_alloc; + } + + set_pci_master(pdev); + + if (pci_enable_msix(pdev, gdev->msix.table, vectors)) + goto err_msix_enable; + + gdev->int_mode = UIO_INT_MODE_MSIX; + gdev->info.irq = UIO_IRQ_CUSTOM; + gdev->msix.num_irqs = vectors; + + return true; + +err_msix_enable: + pci_clear_master(pdev); +err_name_alloc: + for (i = 0; i < vectors; i++) + kfree(gdev->msix.ctx[i].name); + + kfree(gdev->msix.ctx); +err_ctx_alloc: + kfree(gdev->msix.table); + + return false; +} + +/** + * Disable interrupts and free related resources. + * + * @gdev device handle + * + * This function should be called after the corresponding UIO device has been + * unregistered. This will ensure that there are no currently running ioctl()s + * and there won't be any new ones until next probe() call. + */ +static void disable_intr(struct uio_pci_generic_dev *gdev) +{ + struct pci_dev *pdev = gdev->pdev; + int i; + + switch (gdev->int_mode) { + case UIO_INT_MODE_MSI: + pci_disable_msi(pdev); + pci_clear_master(pdev); + + break; + case UIO_INT_MODE_MSIX: + /* No need for locking here since there shouldn't be any + * ioctl()s running by now. + */ + for (i = 0; i < gdev->msix.num_irqs; i++) { + if (gdev->msix.ctx[i].trigger) + set_irq_eventfd(gdev, i, -1); + + kfree(gdev->msix.ctx[i].name); + } + + pci_disable_msix(pdev); + pci_clear_master(pdev); + kfree(gdev->msix.ctx); + kfree(gdev->msix.table); + + break; + default: + break; + } +} + + +static int probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct uio_pci_generic_dev *gdev; int err; @@ -66,42 +404,64 @@ static int probe(struct pci_dev *pdev, return err; } - if (!pdev->irq) { - dev_warn(&pdev->dev, "No IRQ assigned to device: " - "no support for interrupts?\n"); - pci_disable_device(pdev); - return -ENODEV; - } - - if (!pci_intx_mask_supported(pdev)) { - err = -ENODEV; - goto err_verify; - } - gdev = kzalloc(sizeof(struct uio_pci_generic_dev), GFP_KERNEL); if (!gdev) { err = -ENOMEM; goto err_alloc; } + gdev->pdev = pdev; gdev->info.name = "uio_pci_generic"; gdev->info.version = DRIVER_VERSION; - gdev->info.irq = pdev->irq; - gdev->info.irq_flags = IRQF_SHARED; - gdev->info.handler = irqhandler; - gdev->pdev = pdev; + gdev->info.ioctl = uio_pci_generic_ioctl; + mutex_init(&gdev->msix_state_lock); + + err = pci_request_regions(pdev, "uio_pci_generic"); + if (err != 0) { + dev_err(&pdev->dev, "Cannot request regions\n"); + goto err_request_regions; + } + + /* Enable the corresponding interrupt mode. Try to enable INT#X first + * for backward compatibility. + */ + if (enable_intx(gdev)) + dev_info(&pdev->dev, "Using INT#x mode: IRQ %ld", + gdev->info.irq); + else if (enable_msix(gdev)) + dev_info(&pdev->dev, "Using MSI-X mode: number of IRQs %d", + gdev->msix.num_irqs); + else if (enable_msi(gdev)) + dev_info(&pdev->dev, "Using MSI mode: IRQ %ld", gdev->info.irq); + else { + err = -ENODEV; + goto err_verify; + } + + /* remap resources */ + err = setup_maps(pdev, &gdev->info); + if (err) + goto err_maps; err = uio_register_device(&pdev->dev, &gdev->info); if (err) goto err_register; + pci_set_drvdata(pdev, gdev); return 0; + err_register: + release_iomaps(gdev); +err_maps: + disable_intr(gdev); +err_verify: + pci_release_regions(pdev); +err_request_regions: kfree(gdev); err_alloc: -err_verify: pci_disable_device(pdev); + return err; } @@ -110,8 +470,12 @@ static void remove(struct pci_dev *pdev) struct uio_pci_generic_dev *gdev = pci_get_drvdata(pdev); uio_unregister_device(&gdev->info); - pci_disable_device(pdev); + disable_intr(gdev); + release_iomaps(gdev); + pci_release_regions(pdev); kfree(gdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); } static struct pci_driver uio_pci_driver = { diff --git a/include/linux/uio_pci_generic.h b/include/linux/uio_pci_generic.h new file mode 100644 index 0000000..10716fc --- /dev/null +++ b/include/linux/uio_pci_generic.h @@ -0,0 +1,36 @@ +/* + * include/linux/uio_pci_generic.h + * + * Userspace generic PCI IO driver. + * + * Licensed under the GPLv2 only. + */ + +#ifndef _UIO_PCI_GENERIC_H_ +#define _UIO_PCI_GENERIC_H_ + +#include + +enum uio_int_mode { + UIO_INT_MODE_NONE, + UIO_INT_MODE_INTX, + UIO_INT_MODE_MSI, + UIO_INT_MODE_MSIX +}; + +/* bind the requested IRQ to the given eventfd */ +struct uio_pci_generic_irq_set { + int vec; /* index of the IRQ to connect to starting from 0 */ + int fd; +}; + +#define UIO_PCI_GENERIC_BASE 0x86 + +#define UIO_PCI_GENERIC_IRQ_SET _IOW('I', UIO_PCI_GENERIC_BASE + 1, \ + struct uio_pci_generic_irq_set) +#define UIO_PCI_GENERIC_IRQ_NUM_GET _IOW('I', UIO_PCI_GENERIC_BASE + 2, \ + uint32_t) +#define UIO_PCI_GENERIC_INT_MODE_GET _IOW('I', UIO_PCI_GENERIC_BASE + 3, \ + uint32_t) + +#endif /* _UIO_PCI_GENERIC_H_ */ -- 2.1.0