From mboxrd@z Thu Jan 1 00:00:00 1970 From: Alexey Kardashevskiy Subject: Re: [PATCH 16/33] powerpc/powernv: remove dead npu-dma code Date: Mon, 15 Oct 2018 12:34:02 +1100 Message-ID: <7709932d-efb8-2c9b-5128-99cc491c302b@ozlabs.ru> References: <20181009132500.17643-1-hch@lst.de> <20181009132500.17643-17-hch@lst.de> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <20181009132500.17643-17-hch-jcswGhMUV9g@public.gmane.org> Content-Language: en-US List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: iommu-bounces-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org Errors-To: iommu-bounces-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org To: Christoph Hellwig , Benjamin Herrenschmidt , Paul Mackerras , Michael Ellerman Cc: linux-arch-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org, iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org, linuxppc-dev-uLR06cmDAlY/bJ5BZ2RsiQ@public.gmane.org, linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org List-Id: linux-arch.vger.kernel.org On 10/10/2018 00:24, Christoph Hellwig wrote: > This code has been unused since it was merged and is in the way of > cleaning up the DMA code, thus remove it. > > This effectively reverts commit 5d2aa710 ("powerpc/powernv: Add support > for Nvlink NPUs"). This code is heavily used by the NVIDIA GPU driver. > > Signed-off-by: Christoph Hellwig > --- > arch/powerpc/include/asm/pci.h | 3 - > arch/powerpc/include/asm/powernv.h | 23 - > arch/powerpc/platforms/powernv/Makefile | 2 +- > arch/powerpc/platforms/powernv/npu-dma.c | 999 ---------------------- > arch/powerpc/platforms/powernv/pci-ioda.c | 243 ------ > arch/powerpc/platforms/powernv/pci.h | 11 - > 6 files changed, 1 insertion(+), 1280 deletions(-) > delete mode 100644 arch/powerpc/platforms/powernv/npu-dma.c > > diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h > index 2af9ded80540..a01d2e3d6ff9 100644 > --- a/arch/powerpc/include/asm/pci.h > +++ b/arch/powerpc/include/asm/pci.h > @@ -127,7 +127,4 @@ extern void pcibios_scan_phb(struct pci_controller *hose); > > #endif /* __KERNEL__ */ > > -extern struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev); > -extern struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index); > - > #endif /* __ASM_POWERPC_PCI_H */ > diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h > index 2f3ff7a27881..4848a6b3c6b2 100644 > --- a/arch/powerpc/include/asm/powernv.h > +++ b/arch/powerpc/include/asm/powernv.h > @@ -11,33 +11,10 @@ > #define _ASM_POWERNV_H > > #ifdef CONFIG_PPC_POWERNV > -#define NPU2_WRITE 1 > extern void powernv_set_nmmu_ptcr(unsigned long ptcr); > -extern struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, > - unsigned long flags, > - void (*cb)(struct npu_context *, void *), > - void *priv); > -extern void pnv_npu2_destroy_context(struct npu_context *context, > - struct pci_dev *gpdev); > -extern int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea, > - unsigned long *flags, unsigned long *status, > - int count); > - > void pnv_tm_init(void); > #else > static inline void powernv_set_nmmu_ptcr(unsigned long ptcr) { } > -static inline struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, > - unsigned long flags, > - struct npu_context *(*cb)(struct npu_context *, void *), > - void *priv) { return ERR_PTR(-ENODEV); } > -static inline void pnv_npu2_destroy_context(struct npu_context *context, > - struct pci_dev *gpdev) { } > - > -static inline int pnv_npu2_handle_fault(struct npu_context *context, > - uintptr_t *ea, unsigned long *flags, > - unsigned long *status, int count) { > - return -ENODEV; > -} > > static inline void pnv_tm_init(void) { } > static inline void pnv_power9_force_smt4(void) { } > diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile > index b540ce8eec55..2b13e9dd137c 100644 > --- a/arch/powerpc/platforms/powernv/Makefile > +++ b/arch/powerpc/platforms/powernv/Makefile > @@ -6,7 +6,7 @@ obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o > obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o > > obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o > -obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o > +obj-$(CONFIG_PCI) += pci.o pci-ioda.o pci-ioda-tce.o > obj-$(CONFIG_CXL_BASE) += pci-cxl.o > obj-$(CONFIG_EEH) += eeh-powernv.o > obj-$(CONFIG_PPC_SCOM) += opal-xscom.o > diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c > deleted file mode 100644 > index 8006c54a91e3..000000000000 > --- a/arch/powerpc/platforms/powernv/npu-dma.c > +++ /dev/null > @@ -1,999 +0,0 @@ > -/* > - * This file implements the DMA operations for NVLink devices. The NPU > - * devices all point to the same iommu table as the parent PCI device. > - * > - * Copyright Alistair Popple, IBM Corporation 2015. > - * > - * This program is free software; you can redistribute it and/or > - * modify it under the terms of version 2 of the GNU General Public > - * License as published by the Free Software Foundation. > - */ > - > -#include > -#include > -#include > -#include > -#include > -#include > -#include > -#include > -#include > - > -#include > -#include > -#include > -#include > -#include > -#include > -#include > -#include > -#include > -#include > - > -#include "powernv.h" > -#include "pci.h" > - > -#define npu_to_phb(x) container_of(x, struct pnv_phb, npu) > - > -/* > - * spinlock to protect initialisation of an npu_context for a particular > - * mm_struct. > - */ > -static DEFINE_SPINLOCK(npu_context_lock); > - > -/* > - * When an address shootdown range exceeds this threshold we invalidate the > - * entire TLB on the GPU for the given PID rather than each specific address in > - * the range. > - */ > -static uint64_t atsd_threshold = 2 * 1024 * 1024; > -static struct dentry *atsd_threshold_dentry; > - > -/* > - * Other types of TCE cache invalidation are not functional in the > - * hardware. > - */ > -static struct pci_dev *get_pci_dev(struct device_node *dn) > -{ > - struct pci_dn *pdn = PCI_DN(dn); > - > - return pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus), > - pdn->busno, pdn->devfn); > -} > - > -/* Given a NPU device get the associated PCI device. */ > -struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev) > -{ > - struct device_node *dn; > - struct pci_dev *gpdev; > - > - if (WARN_ON(!npdev)) > - return NULL; > - > - if (WARN_ON(!npdev->dev.of_node)) > - return NULL; > - > - /* Get assoicated PCI device */ > - dn = of_parse_phandle(npdev->dev.of_node, "ibm,gpu", 0); > - if (!dn) > - return NULL; > - > - gpdev = get_pci_dev(dn); > - of_node_put(dn); > - > - return gpdev; > -} > -EXPORT_SYMBOL(pnv_pci_get_gpu_dev); > - > -/* Given the real PCI device get a linked NPU device. */ > -struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index) > -{ > - struct device_node *dn; > - struct pci_dev *npdev; > - > - if (WARN_ON(!gpdev)) > - return NULL; > - > - /* Not all PCI devices have device-tree nodes */ > - if (!gpdev->dev.of_node) > - return NULL; > - > - /* Get assoicated PCI device */ > - dn = of_parse_phandle(gpdev->dev.of_node, "ibm,npu", index); > - if (!dn) > - return NULL; > - > - npdev = get_pci_dev(dn); > - of_node_put(dn); > - > - return npdev; > -} > -EXPORT_SYMBOL(pnv_pci_get_npu_dev); > - > -#define NPU_DMA_OP_UNSUPPORTED() \ > - dev_err_once(dev, "%s operation unsupported for NVLink devices\n", \ > - __func__) > - > -static void *dma_npu_alloc(struct device *dev, size_t size, > - dma_addr_t *dma_handle, gfp_t flag, > - unsigned long attrs) > -{ > - NPU_DMA_OP_UNSUPPORTED(); > - return NULL; > -} > - > -static void dma_npu_free(struct device *dev, size_t size, > - void *vaddr, dma_addr_t dma_handle, > - unsigned long attrs) > -{ > - NPU_DMA_OP_UNSUPPORTED(); > -} > - > -static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page, > - unsigned long offset, size_t size, > - enum dma_data_direction direction, > - unsigned long attrs) > -{ > - NPU_DMA_OP_UNSUPPORTED(); > - return 0; > -} > - > -static int dma_npu_map_sg(struct device *dev, struct scatterlist *sglist, > - int nelems, enum dma_data_direction direction, > - unsigned long attrs) > -{ > - NPU_DMA_OP_UNSUPPORTED(); > - return 0; > -} > - > -static int dma_npu_dma_supported(struct device *dev, u64 mask) > -{ > - NPU_DMA_OP_UNSUPPORTED(); > - return 0; > -} > - > -static u64 dma_npu_get_required_mask(struct device *dev) > -{ > - NPU_DMA_OP_UNSUPPORTED(); > - return 0; > -} > - > -static const struct dma_map_ops dma_npu_ops = { > - .map_page = dma_npu_map_page, > - .map_sg = dma_npu_map_sg, > - .alloc = dma_npu_alloc, > - .free = dma_npu_free, > - .dma_supported = dma_npu_dma_supported, > - .get_required_mask = dma_npu_get_required_mask, > -}; > - > -/* > - * Returns the PE assoicated with the PCI device of the given > - * NPU. Returns the linked pci device if pci_dev != NULL. > - */ > -static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe, > - struct pci_dev **gpdev) > -{ > - struct pnv_phb *phb; > - struct pci_controller *hose; > - struct pci_dev *pdev; > - struct pnv_ioda_pe *pe; > - struct pci_dn *pdn; > - > - pdev = pnv_pci_get_gpu_dev(npe->pdev); > - if (!pdev) > - return NULL; > - > - pdn = pci_get_pdn(pdev); > - if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) > - return NULL; > - > - hose = pci_bus_to_host(pdev->bus); > - phb = hose->private_data; > - pe = &phb->ioda.pe_array[pdn->pe_number]; > - > - if (gpdev) > - *gpdev = pdev; > - > - return pe; > -} > - > -long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num, > - struct iommu_table *tbl) > -{ > - struct pnv_phb *phb = npe->phb; > - int64_t rc; > - const unsigned long size = tbl->it_indirect_levels ? > - tbl->it_level_size : tbl->it_size; > - const __u64 start_addr = tbl->it_offset << tbl->it_page_shift; > - const __u64 win_size = tbl->it_size << tbl->it_page_shift; > - > - pe_info(npe, "Setting up window %llx..%llx pg=%lx\n", > - start_addr, start_addr + win_size - 1, > - IOMMU_PAGE_SIZE(tbl)); > - > - rc = opal_pci_map_pe_dma_window(phb->opal_id, > - npe->pe_number, > - npe->pe_number, > - tbl->it_indirect_levels + 1, > - __pa(tbl->it_base), > - size << 3, > - IOMMU_PAGE_SIZE(tbl)); > - if (rc) { > - pe_err(npe, "Failed to configure TCE table, err %lld\n", rc); > - return rc; > - } > - pnv_pci_ioda2_tce_invalidate_entire(phb, false); > - > - /* Add the table to the list so its TCE cache will get invalidated */ > - pnv_pci_link_table_and_group(phb->hose->node, num, > - tbl, &npe->table_group); > - > - return 0; > -} > - > -long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num) > -{ > - struct pnv_phb *phb = npe->phb; > - int64_t rc; > - > - pe_info(npe, "Removing DMA window\n"); > - > - rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number, > - npe->pe_number, > - 0/* levels */, 0/* table address */, > - 0/* table size */, 0/* page size */); > - if (rc) { > - pe_err(npe, "Unmapping failed, ret = %lld\n", rc); > - return rc; > - } > - pnv_pci_ioda2_tce_invalidate_entire(phb, false); > - > - pnv_pci_unlink_table_and_group(npe->table_group.tables[num], > - &npe->table_group); > - > - return 0; > -} > - > -/* > - * Enables 32 bit DMA on NPU. > - */ > -static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe) > -{ > - struct pci_dev *gpdev; > - struct pnv_ioda_pe *gpe; > - int64_t rc; > - > - /* > - * Find the assoicated PCI devices and get the dma window > - * information from there. > - */ > - if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV)) > - return; > - > - gpe = get_gpu_pci_dev_and_pe(npe, &gpdev); > - if (!gpe) > - return; > - > - rc = pnv_npu_set_window(npe, 0, gpe->table_group.tables[0]); > - > - /* > - * We don't initialise npu_pe->tce32_table as we always use > - * dma_npu_ops which are nops. > - */ > - set_dma_ops(&npe->pdev->dev, &dma_npu_ops); > -} > - > -/* > - * Enables bypass mode on the NPU. The NPU only supports one > - * window per link, so bypass needs to be explicitly enabled or > - * disabled. Unlike for a PHB3 bypass and non-bypass modes can't be > - * active at the same time. > - */ > -static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe) > -{ > - struct pnv_phb *phb = npe->phb; > - int64_t rc = 0; > - phys_addr_t top = memblock_end_of_DRAM(); > - > - if (phb->type != PNV_PHB_NPU_NVLINK || !npe->pdev) > - return -EINVAL; > - > - rc = pnv_npu_unset_window(npe, 0); > - if (rc != OPAL_SUCCESS) > - return rc; > - > - /* Enable the bypass window */ > - > - top = roundup_pow_of_two(top); > - dev_info(&npe->pdev->dev, "Enabling bypass for PE %x\n", > - npe->pe_number); > - rc = opal_pci_map_pe_dma_window_real(phb->opal_id, > - npe->pe_number, npe->pe_number, > - 0 /* bypass base */, top); > - > - if (rc == OPAL_SUCCESS) > - pnv_pci_ioda2_tce_invalidate_entire(phb, false); > - > - return rc; > -} > - > -void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass) > -{ > - int i; > - struct pnv_phb *phb; > - struct pci_dn *pdn; > - struct pnv_ioda_pe *npe; > - struct pci_dev *npdev; > - > - for (i = 0; ; ++i) { > - npdev = pnv_pci_get_npu_dev(gpdev, i); > - > - if (!npdev) > - break; > - > - pdn = pci_get_pdn(npdev); > - if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) > - return; > - > - phb = pci_bus_to_host(npdev->bus)->private_data; > - > - /* We only do bypass if it's enabled on the linked device */ > - npe = &phb->ioda.pe_array[pdn->pe_number]; > - > - if (bypass) { > - dev_info(&npdev->dev, > - "Using 64-bit DMA iommu bypass\n"); > - pnv_npu_dma_set_bypass(npe); > - } else { > - dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n"); > - pnv_npu_dma_set_32(npe); > - } > - } > -} > - > -/* Switch ownership from platform code to external user (e.g. VFIO) */ > -void pnv_npu_take_ownership(struct pnv_ioda_pe *npe) > -{ > - struct pnv_phb *phb = npe->phb; > - int64_t rc; > - > - /* > - * Note: NPU has just a single TVE in the hardware which means that > - * while used by the kernel, it can have either 32bit window or > - * DMA bypass but never both. So we deconfigure 32bit window only > - * if it was enabled at the moment of ownership change. > - */ > - if (npe->table_group.tables[0]) { > - pnv_npu_unset_window(npe, 0); > - return; > - } > - > - /* Disable bypass */ > - rc = opal_pci_map_pe_dma_window_real(phb->opal_id, > - npe->pe_number, npe->pe_number, > - 0 /* bypass base */, 0); > - if (rc) { > - pe_err(npe, "Failed to disable bypass, err %lld\n", rc); > - return; > - } > - pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false); > -} > - > -struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe) > -{ > - struct pnv_phb *phb = npe->phb; > - struct pci_bus *pbus = phb->hose->bus; > - struct pci_dev *npdev, *gpdev = NULL, *gptmp; > - struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(npe, &gpdev); > - > - if (!gpe || !gpdev) > - return NULL; > - > - list_for_each_entry(npdev, &pbus->devices, bus_list) { > - gptmp = pnv_pci_get_gpu_dev(npdev); > - > - if (gptmp != gpdev) > - continue; > - > - pe_info(gpe, "Attached NPU %s\n", dev_name(&npdev->dev)); > - iommu_group_add_device(gpe->table_group.group, &npdev->dev); > - } > - > - return gpe; > -} > - > -/* Maximum number of nvlinks per npu */ > -#define NV_MAX_LINKS 6 > - > -/* Maximum index of npu2 hosts in the system. Always < NV_MAX_NPUS */ > -static int max_npu2_index; > - > -struct npu_context { > - struct mm_struct *mm; > - struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS]; > - struct mmu_notifier mn; > - struct kref kref; > - bool nmmu_flush; > - > - /* Callback to stop translation requests on a given GPU */ > - void (*release_cb)(struct npu_context *context, void *priv); > - > - /* > - * Private pointer passed to the above callback for usage by > - * device drivers. > - */ > - void *priv; > -}; > - > -struct mmio_atsd_reg { > - struct npu *npu; > - int reg; > -}; > - > -/* > - * Find a free MMIO ATSD register and mark it in use. Return -ENOSPC > - * if none are available. > - */ > -static int get_mmio_atsd_reg(struct npu *npu) > -{ > - int i; > - > - for (i = 0; i < npu->mmio_atsd_count; i++) { > - if (!test_bit(i, &npu->mmio_atsd_usage)) > - if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage)) > - return i; > - } > - > - return -ENOSPC; > -} > - > -static void put_mmio_atsd_reg(struct npu *npu, int reg) > -{ > - clear_bit_unlock(reg, &npu->mmio_atsd_usage); > -} > - > -/* MMIO ATSD register offsets */ > -#define XTS_ATSD_AVA 1 > -#define XTS_ATSD_STAT 2 > - > -static void mmio_launch_invalidate(struct mmio_atsd_reg *mmio_atsd_reg, > - unsigned long launch, unsigned long va) > -{ > - struct npu *npu = mmio_atsd_reg->npu; > - int reg = mmio_atsd_reg->reg; > - > - __raw_writeq_be(va, npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA); > - eieio(); > - __raw_writeq_be(launch, npu->mmio_atsd_regs[reg]); > -} > - > -static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], > - unsigned long pid, bool flush) > -{ > - int i; > - unsigned long launch; > - > - for (i = 0; i <= max_npu2_index; i++) { > - if (mmio_atsd_reg[i].reg < 0) > - continue; > - > - /* IS set to invalidate matching PID */ > - launch = PPC_BIT(12); > - > - /* PRS set to process-scoped */ > - launch |= PPC_BIT(13); > - > - /* AP */ > - launch |= (u64) > - mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); > - > - /* PID */ > - launch |= pid << PPC_BITLSHIFT(38); > - > - /* No flush */ > - launch |= !flush << PPC_BITLSHIFT(39); > - > - /* Invalidating the entire process doesn't use a va */ > - mmio_launch_invalidate(&mmio_atsd_reg[i], launch, 0); > - } > -} > - > -static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], > - unsigned long va, unsigned long pid, bool flush) > -{ > - int i; > - unsigned long launch; > - > - for (i = 0; i <= max_npu2_index; i++) { > - if (mmio_atsd_reg[i].reg < 0) > - continue; > - > - /* IS set to invalidate target VA */ > - launch = 0; > - > - /* PRS set to process scoped */ > - launch |= PPC_BIT(13); > - > - /* AP */ > - launch |= (u64) > - mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); > - > - /* PID */ > - launch |= pid << PPC_BITLSHIFT(38); > - > - /* No flush */ > - launch |= !flush << PPC_BITLSHIFT(39); > - > - mmio_launch_invalidate(&mmio_atsd_reg[i], launch, va); > - } > -} > - > -#define mn_to_npu_context(x) container_of(x, struct npu_context, mn) > - > -static void mmio_invalidate_wait( > - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) > -{ > - struct npu *npu; > - int i, reg; > - > - /* Wait for all invalidations to complete */ > - for (i = 0; i <= max_npu2_index; i++) { > - if (mmio_atsd_reg[i].reg < 0) > - continue; > - > - /* Wait for completion */ > - npu = mmio_atsd_reg[i].npu; > - reg = mmio_atsd_reg[i].reg; > - while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT)) > - cpu_relax(); > - } > -} > - > -/* > - * Acquires all the address translation shootdown (ATSD) registers required to > - * launch an ATSD on all links this npu_context is active on. > - */ > -static void acquire_atsd_reg(struct npu_context *npu_context, > - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) > -{ > - int i, j; > - struct npu *npu; > - struct pci_dev *npdev; > - struct pnv_phb *nphb; > - > - for (i = 0; i <= max_npu2_index; i++) { > - mmio_atsd_reg[i].reg = -1; > - for (j = 0; j < NV_MAX_LINKS; j++) { > - /* > - * There are no ordering requirements with respect to > - * the setup of struct npu_context, but to ensure > - * consistent behaviour we need to ensure npdev[][] is > - * only read once. > - */ > - npdev = READ_ONCE(npu_context->npdev[i][j]); > - if (!npdev) > - continue; > - > - nphb = pci_bus_to_host(npdev->bus)->private_data; > - npu = &nphb->npu; > - mmio_atsd_reg[i].npu = npu; > - mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu); > - while (mmio_atsd_reg[i].reg < 0) { > - mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu); > - cpu_relax(); > - } > - break; > - } > - } > -} > - > -/* > - * Release previously acquired ATSD registers. To avoid deadlocks the registers > - * must be released in the same order they were acquired above in > - * acquire_atsd_reg. > - */ > -static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) > -{ > - int i; > - > - for (i = 0; i <= max_npu2_index; i++) { > - /* > - * We can't rely on npu_context->npdev[][] being the same here > - * as when acquire_atsd_reg() was called, hence we use the > - * values stored in mmio_atsd_reg during the acquire phase > - * rather than re-reading npdev[][]. > - */ > - if (mmio_atsd_reg[i].reg < 0) > - continue; > - > - put_mmio_atsd_reg(mmio_atsd_reg[i].npu, mmio_atsd_reg[i].reg); > - } > -} > - > -/* > - * Invalidate either a single address or an entire PID depending on > - * the value of va. > - */ > -static void mmio_invalidate(struct npu_context *npu_context, int va, > - unsigned long address, bool flush) > -{ > - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]; > - unsigned long pid = npu_context->mm->context.id; > - > - if (npu_context->nmmu_flush) > - /* > - * Unfortunately the nest mmu does not support flushing specific > - * addresses so we have to flush the whole mm once before > - * shooting down the GPU translation. > - */ > - flush_all_mm(npu_context->mm); > - > - /* > - * Loop over all the NPUs this process is active on and launch > - * an invalidate. > - */ > - acquire_atsd_reg(npu_context, mmio_atsd_reg); > - if (va) > - mmio_invalidate_va(mmio_atsd_reg, address, pid, flush); > - else > - mmio_invalidate_pid(mmio_atsd_reg, pid, flush); > - > - mmio_invalidate_wait(mmio_atsd_reg); > - if (flush) { > - /* > - * The GPU requires two flush ATSDs to ensure all entries have > - * been flushed. We use PID 0 as it will never be used for a > - * process on the GPU. > - */ > - mmio_invalidate_pid(mmio_atsd_reg, 0, true); > - mmio_invalidate_wait(mmio_atsd_reg); > - mmio_invalidate_pid(mmio_atsd_reg, 0, true); > - mmio_invalidate_wait(mmio_atsd_reg); > - } > - release_atsd_reg(mmio_atsd_reg); > -} > - > -static void pnv_npu2_mn_release(struct mmu_notifier *mn, > - struct mm_struct *mm) > -{ > - struct npu_context *npu_context = mn_to_npu_context(mn); > - > - /* Call into device driver to stop requests to the NMMU */ > - if (npu_context->release_cb) > - npu_context->release_cb(npu_context, npu_context->priv); > - > - /* > - * There should be no more translation requests for this PID, but we > - * need to ensure any entries for it are removed from the TLB. > - */ > - mmio_invalidate(npu_context, 0, 0, true); > -} > - > -static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, > - struct mm_struct *mm, > - unsigned long address, > - pte_t pte) > -{ > - struct npu_context *npu_context = mn_to_npu_context(mn); > - > - mmio_invalidate(npu_context, 1, address, true); > -} > - > -static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, > - struct mm_struct *mm, > - unsigned long start, unsigned long end) > -{ > - struct npu_context *npu_context = mn_to_npu_context(mn); > - unsigned long address; > - > - if (end - start > atsd_threshold) { > - /* > - * Just invalidate the entire PID if the address range is too > - * large. > - */ > - mmio_invalidate(npu_context, 0, 0, true); > - } else { > - for (address = start; address < end; address += PAGE_SIZE) > - mmio_invalidate(npu_context, 1, address, false); > - > - /* Do the flush only on the final addess == end */ > - mmio_invalidate(npu_context, 1, address, true); > - } > -} > - > -static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { > - .release = pnv_npu2_mn_release, > - .change_pte = pnv_npu2_mn_change_pte, > - .invalidate_range = pnv_npu2_mn_invalidate_range, > -}; > - > -/* > - * Call into OPAL to setup the nmmu context for the current task in > - * the NPU. This must be called to setup the context tables before the > - * GPU issues ATRs. pdev should be a pointed to PCIe GPU device. > - * > - * A release callback should be registered to allow a device driver to > - * be notified that it should not launch any new translation requests > - * as the final TLB invalidate is about to occur. > - * > - * Returns an error if there no contexts are currently available or a > - * npu_context which should be passed to pnv_npu2_handle_fault(). > - * > - * mmap_sem must be held in write mode and must not be called from interrupt > - * context. > - */ > -struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, > - unsigned long flags, > - void (*cb)(struct npu_context *, void *), > - void *priv) > -{ > - int rc; > - u32 nvlink_index; > - struct device_node *nvlink_dn; > - struct mm_struct *mm = current->mm; > - struct pnv_phb *nphb; > - struct npu *npu; > - struct npu_context *npu_context; > - > - /* > - * At present we don't support GPUs connected to multiple NPUs and I'm > - * not sure the hardware does either. > - */ > - struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); > - > - if (!firmware_has_feature(FW_FEATURE_OPAL)) > - return ERR_PTR(-ENODEV); > - > - if (!npdev) > - /* No nvlink associated with this GPU device */ > - return ERR_PTR(-ENODEV); > - > - nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); > - if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", > - &nvlink_index))) > - return ERR_PTR(-ENODEV); > - > - if (!mm || mm->context.id == 0) { > - /* > - * Kernel thread contexts are not supported and context id 0 is > - * reserved on the GPU. > - */ > - return ERR_PTR(-EINVAL); > - } > - > - nphb = pci_bus_to_host(npdev->bus)->private_data; > - npu = &nphb->npu; > - > - /* > - * Setup the NPU context table for a particular GPU. These need to be > - * per-GPU as we need the tables to filter ATSDs when there are no > - * active contexts on a particular GPU. It is safe for these to be > - * called concurrently with destroy as the OPAL call takes appropriate > - * locks and refcounts on init/destroy. > - */ > - rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags, > - PCI_DEVID(gpdev->bus->number, gpdev->devfn)); > - if (rc < 0) > - return ERR_PTR(-ENOSPC); > - > - /* > - * We store the npu pci device so we can more easily get at the > - * associated npus. > - */ > - spin_lock(&npu_context_lock); > - npu_context = mm->context.npu_context; > - if (npu_context) { > - if (npu_context->release_cb != cb || > - npu_context->priv != priv) { > - spin_unlock(&npu_context_lock); > - opal_npu_destroy_context(nphb->opal_id, mm->context.id, > - PCI_DEVID(gpdev->bus->number, > - gpdev->devfn)); > - return ERR_PTR(-EINVAL); > - } > - > - WARN_ON(!kref_get_unless_zero(&npu_context->kref)); > - } > - spin_unlock(&npu_context_lock); > - > - if (!npu_context) { > - /* > - * We can set up these fields without holding the > - * npu_context_lock as the npu_context hasn't been returned to > - * the caller meaning it can't be destroyed. Parallel allocation > - * is protected against by mmap_sem. > - */ > - rc = -ENOMEM; > - npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL); > - if (npu_context) { > - kref_init(&npu_context->kref); > - npu_context->mm = mm; > - npu_context->mn.ops = &nv_nmmu_notifier_ops; > - rc = __mmu_notifier_register(&npu_context->mn, mm); > - } > - > - if (rc) { > - kfree(npu_context); > - opal_npu_destroy_context(nphb->opal_id, mm->context.id, > - PCI_DEVID(gpdev->bus->number, > - gpdev->devfn)); > - return ERR_PTR(rc); > - } > - > - mm->context.npu_context = npu_context; > - } > - > - npu_context->release_cb = cb; > - npu_context->priv = priv; > - > - /* > - * npdev is a pci_dev pointer setup by the PCI code. We assign it to > - * npdev[][] to indicate to the mmu notifiers that an invalidation > - * should also be sent over this nvlink. The notifiers don't use any > - * other fields in npu_context, so we just need to ensure that when they > - * deference npu_context->npdev[][] it is either a valid pointer or > - * NULL. > - */ > - WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev); > - > - if (!nphb->npu.nmmu_flush) { > - /* > - * If we're not explicitly flushing ourselves we need to mark > - * the thread for global flushes > - */ > - npu_context->nmmu_flush = false; > - mm_context_add_copro(mm); > - } else > - npu_context->nmmu_flush = true; > - > - return npu_context; > -} > -EXPORT_SYMBOL(pnv_npu2_init_context); > - > -static void pnv_npu2_release_context(struct kref *kref) > -{ > - struct npu_context *npu_context = > - container_of(kref, struct npu_context, kref); > - > - if (!npu_context->nmmu_flush) > - mm_context_remove_copro(npu_context->mm); > - > - npu_context->mm->context.npu_context = NULL; > -} > - > -/* > - * Destroy a context on the given GPU. May free the npu_context if it is no > - * longer active on any GPUs. Must not be called from interrupt context. > - */ > -void pnv_npu2_destroy_context(struct npu_context *npu_context, > - struct pci_dev *gpdev) > -{ > - int removed; > - struct pnv_phb *nphb; > - struct npu *npu; > - struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); > - struct device_node *nvlink_dn; > - u32 nvlink_index; > - > - if (WARN_ON(!npdev)) > - return; > - > - if (!firmware_has_feature(FW_FEATURE_OPAL)) > - return; > - > - nphb = pci_bus_to_host(npdev->bus)->private_data; > - npu = &nphb->npu; > - nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); > - if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", > - &nvlink_index))) > - return; > - WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL); > - opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id, > - PCI_DEVID(gpdev->bus->number, gpdev->devfn)); > - spin_lock(&npu_context_lock); > - removed = kref_put(&npu_context->kref, pnv_npu2_release_context); > - spin_unlock(&npu_context_lock); > - > - /* > - * We need to do this outside of pnv_npu2_release_context so that it is > - * outside the spinlock as mmu_notifier_destroy uses SRCU. > - */ > - if (removed) { > - mmu_notifier_unregister(&npu_context->mn, > - npu_context->mm); > - > - kfree(npu_context); > - } > - > -} > -EXPORT_SYMBOL(pnv_npu2_destroy_context); > - > -/* > - * Assumes mmap_sem is held for the contexts associated mm. > - */ > -int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea, > - unsigned long *flags, unsigned long *status, int count) > -{ > - u64 rc = 0, result = 0; > - int i, is_write; > - struct page *page[1]; > - > - /* mmap_sem should be held so the struct_mm must be present */ > - struct mm_struct *mm = context->mm; > - > - if (!firmware_has_feature(FW_FEATURE_OPAL)) > - return -ENODEV; > - > - WARN_ON(!rwsem_is_locked(&mm->mmap_sem)); > - > - for (i = 0; i < count; i++) { > - is_write = flags[i] & NPU2_WRITE; > - rc = get_user_pages_remote(NULL, mm, ea[i], 1, > - is_write ? FOLL_WRITE : 0, > - page, NULL, NULL); > - > - /* > - * To support virtualised environments we will have to do an > - * access to the page to ensure it gets faulted into the > - * hypervisor. For the moment virtualisation is not supported in > - * other areas so leave the access out. > - */ > - if (rc != 1) { > - status[i] = rc; > - result = -EFAULT; > - continue; > - } > - > - status[i] = 0; > - put_page(page[0]); > - } > - > - return result; > -} > -EXPORT_SYMBOL(pnv_npu2_handle_fault); > - > -int pnv_npu2_init(struct pnv_phb *phb) > -{ > - unsigned int i; > - u64 mmio_atsd; > - struct device_node *dn; > - struct pci_dev *gpdev; > - static int npu_index; > - uint64_t rc = 0; > - > - if (!atsd_threshold_dentry) { > - atsd_threshold_dentry = debugfs_create_x64("atsd_threshold", > - 0600, powerpc_debugfs_root, &atsd_threshold); > - } > - > - phb->npu.nmmu_flush = > - of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush"); > - for_each_child_of_node(phb->hose->dn, dn) { > - gpdev = pnv_pci_get_gpu_dev(get_pci_dev(dn)); > - if (gpdev) { > - rc = opal_npu_map_lpar(phb->opal_id, > - PCI_DEVID(gpdev->bus->number, gpdev->devfn), > - 0, 0); > - if (rc) > - dev_err(&gpdev->dev, > - "Error %lld mapping device to LPAR\n", > - rc); > - } > - } > - > - for (i = 0; !of_property_read_u64_index(phb->hose->dn, "ibm,mmio-atsd", > - i, &mmio_atsd); i++) > - phb->npu.mmio_atsd_regs[i] = ioremap(mmio_atsd, 32); > - > - pr_info("NPU%lld: Found %d MMIO ATSD registers", phb->opal_id, i); > - phb->npu.mmio_atsd_count = i; > - phb->npu.mmio_atsd_usage = 0; > - npu_index++; > - if (WARN_ON(npu_index >= NV_MAX_NPUS)) > - return -ENOSPC; > - max_npu2_index = npu_index; > - phb->npu.index = npu_index; > - > - return 0; > -} > diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c > index 913175ba1c10..b6db65917bb4 100644 > --- a/arch/powerpc/platforms/powernv/pci-ioda.c > +++ b/arch/powerpc/platforms/powernv/pci-ioda.c > @@ -1203,75 +1203,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) > return pe; > } > > -static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) > -{ > - int pe_num, found_pe = false, rc; > - long rid; > - struct pnv_ioda_pe *pe; > - struct pci_dev *gpu_pdev; > - struct pci_dn *npu_pdn; > - struct pci_controller *hose = pci_bus_to_host(npu_pdev->bus); > - struct pnv_phb *phb = hose->private_data; > - > - /* > - * Due to a hardware errata PE#0 on the NPU is reserved for > - * error handling. This means we only have three PEs remaining > - * which need to be assigned to four links, implying some > - * links must share PEs. > - * > - * To achieve this we assign PEs such that NPUs linking the > - * same GPU get assigned the same PE. > - */ > - gpu_pdev = pnv_pci_get_gpu_dev(npu_pdev); > - for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) { > - pe = &phb->ioda.pe_array[pe_num]; > - if (!pe->pdev) > - continue; > - > - if (pnv_pci_get_gpu_dev(pe->pdev) == gpu_pdev) { > - /* > - * This device has the same peer GPU so should > - * be assigned the same PE as the existing > - * peer NPU. > - */ > - dev_info(&npu_pdev->dev, > - "Associating to existing PE %x\n", pe_num); > - pci_dev_get(npu_pdev); > - npu_pdn = pci_get_pdn(npu_pdev); > - rid = npu_pdev->bus->number << 8 | npu_pdn->devfn; > - npu_pdn->pe_number = pe_num; > - phb->ioda.pe_rmap[rid] = pe->pe_number; > - > - /* Map the PE to this link */ > - rc = opal_pci_set_pe(phb->opal_id, pe_num, rid, > - OpalPciBusAll, > - OPAL_COMPARE_RID_DEVICE_NUMBER, > - OPAL_COMPARE_RID_FUNCTION_NUMBER, > - OPAL_MAP_PE); > - WARN_ON(rc != OPAL_SUCCESS); > - found_pe = true; > - break; > - } > - } > - > - if (!found_pe) > - /* > - * Could not find an existing PE so allocate a new > - * one. > - */ > - return pnv_ioda_setup_dev_PE(npu_pdev); > - else > - return pe; > -} > - > -static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus) > -{ > - struct pci_dev *pdev; > - > - list_for_each_entry(pdev, &bus->devices, bus_list) > - pnv_ioda_setup_npu_PE(pdev); > -} > - > static void pnv_pci_ioda_setup_PEs(void) > { > struct pci_controller *hose, *tmp; > @@ -1281,13 +1212,6 @@ static void pnv_pci_ioda_setup_PEs(void) > > list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { > phb = hose->private_data; > - if (phb->type == PNV_PHB_NPU_NVLINK) { > - /* PE#0 is needed for error reporting */ > - pnv_ioda_reserve_pe(phb, 0); > - pnv_ioda_setup_npu_PEs(hose->bus); > - if (phb->model == PNV_PHB_MODEL_NPU2) > - pnv_npu2_init(phb); > - } > if (phb->type == PNV_PHB_NPU_OCAPI) { > bus = hose->bus; > list_for_each_entry(pdev, &bus->devices, bus_list) > @@ -1871,9 +1795,6 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) > } > *pdev->dev.dma_mask = dma_mask; > > - /* Update peer npu devices */ > - pnv_npu_try_dma_set_bypass(pdev, bypass); > - > return 0; > } > > @@ -2119,14 +2040,6 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, > } > } > > -void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm) > -{ > - if (phb->model == PNV_PHB_MODEL_NPU || phb->model == PNV_PHB_MODEL_PHB3) > - pnv_pci_phb3_tce_invalidate_entire(phb, rm); > - else > - opal_pci_tce_kill(phb->opal_id, OPAL_PCI_TCE_KILL, 0, 0, 0, 0); > -} > - > static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, > long npages, unsigned long uaddr, > enum dma_data_direction direction, > @@ -2615,137 +2528,6 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = { > .take_ownership = pnv_ioda2_take_ownership, > .release_ownership = pnv_ioda2_release_ownership, > }; > - > -static int gpe_table_group_to_npe_cb(struct device *dev, void *opaque) > -{ > - struct pci_controller *hose; > - struct pnv_phb *phb; > - struct pnv_ioda_pe **ptmppe = opaque; > - struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); > - struct pci_dn *pdn = pci_get_pdn(pdev); > - > - if (!pdn || pdn->pe_number == IODA_INVALID_PE) > - return 0; > - > - hose = pci_bus_to_host(pdev->bus); > - phb = hose->private_data; > - if (phb->type != PNV_PHB_NPU_NVLINK) > - return 0; > - > - *ptmppe = &phb->ioda.pe_array[pdn->pe_number]; > - > - return 1; > -} > - > -/* > - * This returns PE of associated NPU. > - * This assumes that NPU is in the same IOMMU group with GPU and there is > - * no other PEs. > - */ > -static struct pnv_ioda_pe *gpe_table_group_to_npe( > - struct iommu_table_group *table_group) > -{ > - struct pnv_ioda_pe *npe = NULL; > - int ret = iommu_group_for_each_dev(table_group->group, &npe, > - gpe_table_group_to_npe_cb); > - > - BUG_ON(!ret || !npe); > - > - return npe; > -} > - > -static long pnv_pci_ioda2_npu_set_window(struct iommu_table_group *table_group, > - int num, struct iommu_table *tbl) > -{ > - struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group); > - int num2 = (num == 0) ? 1 : 0; > - long ret = pnv_pci_ioda2_set_window(table_group, num, tbl); > - > - if (ret) > - return ret; > - > - if (table_group->tables[num2]) > - pnv_npu_unset_window(npe, num2); > - > - ret = pnv_npu_set_window(npe, num, tbl); > - if (ret) { > - pnv_pci_ioda2_unset_window(table_group, num); > - if (table_group->tables[num2]) > - pnv_npu_set_window(npe, num2, > - table_group->tables[num2]); > - } > - > - return ret; > -} > - > -static long pnv_pci_ioda2_npu_unset_window( > - struct iommu_table_group *table_group, > - int num) > -{ > - struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group); > - int num2 = (num == 0) ? 1 : 0; > - long ret = pnv_pci_ioda2_unset_window(table_group, num); > - > - if (ret) > - return ret; > - > - if (!npe->table_group.tables[num]) > - return 0; > - > - ret = pnv_npu_unset_window(npe, num); > - if (ret) > - return ret; > - > - if (table_group->tables[num2]) > - ret = pnv_npu_set_window(npe, num2, table_group->tables[num2]); > - > - return ret; > -} > - > -static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group) > -{ > - /* > - * Detach NPU first as pnv_ioda2_take_ownership() will destroy > - * the iommu_table if 32bit DMA is enabled. > - */ > - pnv_npu_take_ownership(gpe_table_group_to_npe(table_group)); > - pnv_ioda2_take_ownership(table_group); > -} > - > -static struct iommu_table_group_ops pnv_pci_ioda2_npu_ops = { > - .get_table_size = pnv_pci_ioda2_get_table_size, > - .create_table = pnv_pci_ioda2_create_table_userspace, > - .set_window = pnv_pci_ioda2_npu_set_window, > - .unset_window = pnv_pci_ioda2_npu_unset_window, > - .take_ownership = pnv_ioda2_npu_take_ownership, > - .release_ownership = pnv_ioda2_release_ownership, > -}; > - > -static void pnv_pci_ioda_setup_iommu_api(void) > -{ > - struct pci_controller *hose, *tmp; > - struct pnv_phb *phb; > - struct pnv_ioda_pe *pe, *gpe; > - > - /* > - * Now we have all PHBs discovered, time to add NPU devices to > - * the corresponding IOMMU groups. > - */ > - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { > - phb = hose->private_data; > - > - if (phb->type != PNV_PHB_NPU_NVLINK) > - continue; > - > - list_for_each_entry(pe, &phb->ioda.pe_list, list) { > - gpe = pnv_pci_npu_setup_iommu(pe); > - if (gpe) > - gpe->table_group.ops = &pnv_pci_ioda2_npu_ops; > - } > - } > -} > -#else /* !CONFIG_IOMMU_API */ > -static void pnv_pci_ioda_setup_iommu_api(void) { }; > #endif > > static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb) > @@ -3242,7 +3024,6 @@ static void pnv_pci_enable_bridges(void) > static void pnv_pci_ioda_fixup(void) > { > pnv_pci_ioda_setup_PEs(); > - pnv_pci_ioda_setup_iommu_api(); > pnv_pci_ioda_create_dbgfs(); > > pnv_pci_enable_bridges(); > @@ -3689,27 +3470,6 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { > .shutdown = pnv_pci_ioda_shutdown, > }; > > -static int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask) > -{ > - dev_err_once(&npdev->dev, > - "%s operation unsupported for NVLink devices\n", > - __func__); > - return -EPERM; > -} > - > -static const struct pci_controller_ops pnv_npu_ioda_controller_ops = { > - .dma_dev_setup = pnv_pci_dma_dev_setup, > -#ifdef CONFIG_PCI_MSI > - .setup_msi_irqs = pnv_setup_msi_irqs, > - .teardown_msi_irqs = pnv_teardown_msi_irqs, > -#endif > - .enable_device_hook = pnv_pci_enable_device_hook, > - .window_alignment = pnv_pci_window_alignment, > - .reset_secondary_bus = pnv_pci_reset_secondary_bus, > - .dma_set_mask = pnv_npu_dma_set_mask, > - .shutdown = pnv_pci_ioda_shutdown, > -}; > - > static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = { > .enable_device_hook = pnv_pci_enable_device_hook, > .window_alignment = pnv_pci_window_alignment, > @@ -3931,9 +3691,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, > ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; > > switch (phb->type) { > - case PNV_PHB_NPU_NVLINK: > - hose->controller_ops = pnv_npu_ioda_controller_ops; > - break; > case PNV_PHB_NPU_OCAPI: > hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops; > break; > diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h > index 8b37b28e3831..54f2935b7ac5 100644 > --- a/arch/powerpc/platforms/powernv/pci.h > +++ b/arch/powerpc/platforms/powernv/pci.h > @@ -231,17 +231,6 @@ extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, > #define pe_info(pe, fmt, ...) \ > pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__) > > -/* Nvlink functions */ > -extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass); > -extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm); > -extern struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe); > -extern long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num, > - struct iommu_table *tbl); > -extern long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num); > -extern void pnv_npu_take_ownership(struct pnv_ioda_pe *npe); > -extern void pnv_npu_release_ownership(struct pnv_ioda_pe *npe); > -extern int pnv_npu2_init(struct pnv_phb *phb); > - > /* pci-ioda-tce.c */ > #define POWERNV_IOMMU_DEFAULT_LEVELS 1 > #define POWERNV_IOMMU_MAX_LEVELS 5 > -- Alexey From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pg1-f194.google.com ([209.85.215.194]:35434 "EHLO mail-pg1-f194.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726453AbeJOJRK (ORCPT ); Mon, 15 Oct 2018 05:17:10 -0400 Received: by mail-pg1-f194.google.com with SMTP id v133-v6so8364248pgb.2 for ; Sun, 14 Oct 2018 18:34:10 -0700 (PDT) Subject: Re: [PATCH 16/33] powerpc/powernv: remove dead npu-dma code References: <20181009132500.17643-1-hch@lst.de> <20181009132500.17643-17-hch@lst.de> From: Alexey Kardashevskiy Message-ID: <7709932d-efb8-2c9b-5128-99cc491c302b@ozlabs.ru> Date: Mon, 15 Oct 2018 12:34:02 +1100 MIME-Version: 1.0 In-Reply-To: <20181009132500.17643-17-hch@lst.de> Content-Type: text/plain; charset=utf-8 Content-Language: en-US Content-Transfer-Encoding: 7bit Sender: linux-arch-owner@vger.kernel.org List-ID: To: Christoph Hellwig , Benjamin Herrenschmidt , Paul Mackerras , Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org, iommu@lists.linux-foundation.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, linux-kernel@vger.kernel.org Message-ID: <20181015013402.VdWxv47ryr2CXSwPBU-bLPhzPZ080J3Cn3oCTzDSVcQ@z> On 10/10/2018 00:24, Christoph Hellwig wrote: > This code has been unused since it was merged and is in the way of > cleaning up the DMA code, thus remove it. > > This effectively reverts commit 5d2aa710 ("powerpc/powernv: Add support > for Nvlink NPUs"). This code is heavily used by the NVIDIA GPU driver. > > Signed-off-by: Christoph Hellwig > --- > arch/powerpc/include/asm/pci.h | 3 - > arch/powerpc/include/asm/powernv.h | 23 - > arch/powerpc/platforms/powernv/Makefile | 2 +- > arch/powerpc/platforms/powernv/npu-dma.c | 999 ---------------------- > arch/powerpc/platforms/powernv/pci-ioda.c | 243 ------ > arch/powerpc/platforms/powernv/pci.h | 11 - > 6 files changed, 1 insertion(+), 1280 deletions(-) > delete mode 100644 arch/powerpc/platforms/powernv/npu-dma.c > > diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h > index 2af9ded80540..a01d2e3d6ff9 100644 > --- a/arch/powerpc/include/asm/pci.h > +++ b/arch/powerpc/include/asm/pci.h > @@ -127,7 +127,4 @@ extern void pcibios_scan_phb(struct pci_controller *hose); > > #endif /* __KERNEL__ */ > > -extern struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev); > -extern struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index); > - > #endif /* __ASM_POWERPC_PCI_H */ > diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h > index 2f3ff7a27881..4848a6b3c6b2 100644 > --- a/arch/powerpc/include/asm/powernv.h > +++ b/arch/powerpc/include/asm/powernv.h > @@ -11,33 +11,10 @@ > #define _ASM_POWERNV_H > > #ifdef CONFIG_PPC_POWERNV > -#define NPU2_WRITE 1 > extern void powernv_set_nmmu_ptcr(unsigned long ptcr); > -extern struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, > - unsigned long flags, > - void (*cb)(struct npu_context *, void *), > - void *priv); > -extern void pnv_npu2_destroy_context(struct npu_context *context, > - struct pci_dev *gpdev); > -extern int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea, > - unsigned long *flags, unsigned long *status, > - int count); > - > void pnv_tm_init(void); > #else > static inline void powernv_set_nmmu_ptcr(unsigned long ptcr) { } > -static inline struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, > - unsigned long flags, > - struct npu_context *(*cb)(struct npu_context *, void *), > - void *priv) { return ERR_PTR(-ENODEV); } > -static inline void pnv_npu2_destroy_context(struct npu_context *context, > - struct pci_dev *gpdev) { } > - > -static inline int pnv_npu2_handle_fault(struct npu_context *context, > - uintptr_t *ea, unsigned long *flags, > - unsigned long *status, int count) { > - return -ENODEV; > -} > > static inline void pnv_tm_init(void) { } > static inline void pnv_power9_force_smt4(void) { } > diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile > index b540ce8eec55..2b13e9dd137c 100644 > --- a/arch/powerpc/platforms/powernv/Makefile > +++ b/arch/powerpc/platforms/powernv/Makefile > @@ -6,7 +6,7 @@ obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o > obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o > > obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o > -obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o > +obj-$(CONFIG_PCI) += pci.o pci-ioda.o pci-ioda-tce.o > obj-$(CONFIG_CXL_BASE) += pci-cxl.o > obj-$(CONFIG_EEH) += eeh-powernv.o > obj-$(CONFIG_PPC_SCOM) += opal-xscom.o > diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c > deleted file mode 100644 > index 8006c54a91e3..000000000000 > --- a/arch/powerpc/platforms/powernv/npu-dma.c > +++ /dev/null > @@ -1,999 +0,0 @@ > -/* > - * This file implements the DMA operations for NVLink devices. The NPU > - * devices all point to the same iommu table as the parent PCI device. > - * > - * Copyright Alistair Popple, IBM Corporation 2015. > - * > - * This program is free software; you can redistribute it and/or > - * modify it under the terms of version 2 of the GNU General Public > - * License as published by the Free Software Foundation. > - */ > - > -#include > -#include > -#include > -#include > -#include > -#include > -#include > -#include > -#include > - > -#include > -#include > -#include > -#include > -#include > -#include > -#include > -#include > -#include > -#include > - > -#include "powernv.h" > -#include "pci.h" > - > -#define npu_to_phb(x) container_of(x, struct pnv_phb, npu) > - > -/* > - * spinlock to protect initialisation of an npu_context for a particular > - * mm_struct. > - */ > -static DEFINE_SPINLOCK(npu_context_lock); > - > -/* > - * When an address shootdown range exceeds this threshold we invalidate the > - * entire TLB on the GPU for the given PID rather than each specific address in > - * the range. > - */ > -static uint64_t atsd_threshold = 2 * 1024 * 1024; > -static struct dentry *atsd_threshold_dentry; > - > -/* > - * Other types of TCE cache invalidation are not functional in the > - * hardware. > - */ > -static struct pci_dev *get_pci_dev(struct device_node *dn) > -{ > - struct pci_dn *pdn = PCI_DN(dn); > - > - return pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus), > - pdn->busno, pdn->devfn); > -} > - > -/* Given a NPU device get the associated PCI device. */ > -struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev) > -{ > - struct device_node *dn; > - struct pci_dev *gpdev; > - > - if (WARN_ON(!npdev)) > - return NULL; > - > - if (WARN_ON(!npdev->dev.of_node)) > - return NULL; > - > - /* Get assoicated PCI device */ > - dn = of_parse_phandle(npdev->dev.of_node, "ibm,gpu", 0); > - if (!dn) > - return NULL; > - > - gpdev = get_pci_dev(dn); > - of_node_put(dn); > - > - return gpdev; > -} > -EXPORT_SYMBOL(pnv_pci_get_gpu_dev); > - > -/* Given the real PCI device get a linked NPU device. */ > -struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index) > -{ > - struct device_node *dn; > - struct pci_dev *npdev; > - > - if (WARN_ON(!gpdev)) > - return NULL; > - > - /* Not all PCI devices have device-tree nodes */ > - if (!gpdev->dev.of_node) > - return NULL; > - > - /* Get assoicated PCI device */ > - dn = of_parse_phandle(gpdev->dev.of_node, "ibm,npu", index); > - if (!dn) > - return NULL; > - > - npdev = get_pci_dev(dn); > - of_node_put(dn); > - > - return npdev; > -} > -EXPORT_SYMBOL(pnv_pci_get_npu_dev); > - > -#define NPU_DMA_OP_UNSUPPORTED() \ > - dev_err_once(dev, "%s operation unsupported for NVLink devices\n", \ > - __func__) > - > -static void *dma_npu_alloc(struct device *dev, size_t size, > - dma_addr_t *dma_handle, gfp_t flag, > - unsigned long attrs) > -{ > - NPU_DMA_OP_UNSUPPORTED(); > - return NULL; > -} > - > -static void dma_npu_free(struct device *dev, size_t size, > - void *vaddr, dma_addr_t dma_handle, > - unsigned long attrs) > -{ > - NPU_DMA_OP_UNSUPPORTED(); > -} > - > -static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page, > - unsigned long offset, size_t size, > - enum dma_data_direction direction, > - unsigned long attrs) > -{ > - NPU_DMA_OP_UNSUPPORTED(); > - return 0; > -} > - > -static int dma_npu_map_sg(struct device *dev, struct scatterlist *sglist, > - int nelems, enum dma_data_direction direction, > - unsigned long attrs) > -{ > - NPU_DMA_OP_UNSUPPORTED(); > - return 0; > -} > - > -static int dma_npu_dma_supported(struct device *dev, u64 mask) > -{ > - NPU_DMA_OP_UNSUPPORTED(); > - return 0; > -} > - > -static u64 dma_npu_get_required_mask(struct device *dev) > -{ > - NPU_DMA_OP_UNSUPPORTED(); > - return 0; > -} > - > -static const struct dma_map_ops dma_npu_ops = { > - .map_page = dma_npu_map_page, > - .map_sg = dma_npu_map_sg, > - .alloc = dma_npu_alloc, > - .free = dma_npu_free, > - .dma_supported = dma_npu_dma_supported, > - .get_required_mask = dma_npu_get_required_mask, > -}; > - > -/* > - * Returns the PE assoicated with the PCI device of the given > - * NPU. Returns the linked pci device if pci_dev != NULL. > - */ > -static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe, > - struct pci_dev **gpdev) > -{ > - struct pnv_phb *phb; > - struct pci_controller *hose; > - struct pci_dev *pdev; > - struct pnv_ioda_pe *pe; > - struct pci_dn *pdn; > - > - pdev = pnv_pci_get_gpu_dev(npe->pdev); > - if (!pdev) > - return NULL; > - > - pdn = pci_get_pdn(pdev); > - if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) > - return NULL; > - > - hose = pci_bus_to_host(pdev->bus); > - phb = hose->private_data; > - pe = &phb->ioda.pe_array[pdn->pe_number]; > - > - if (gpdev) > - *gpdev = pdev; > - > - return pe; > -} > - > -long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num, > - struct iommu_table *tbl) > -{ > - struct pnv_phb *phb = npe->phb; > - int64_t rc; > - const unsigned long size = tbl->it_indirect_levels ? > - tbl->it_level_size : tbl->it_size; > - const __u64 start_addr = tbl->it_offset << tbl->it_page_shift; > - const __u64 win_size = tbl->it_size << tbl->it_page_shift; > - > - pe_info(npe, "Setting up window %llx..%llx pg=%lx\n", > - start_addr, start_addr + win_size - 1, > - IOMMU_PAGE_SIZE(tbl)); > - > - rc = opal_pci_map_pe_dma_window(phb->opal_id, > - npe->pe_number, > - npe->pe_number, > - tbl->it_indirect_levels + 1, > - __pa(tbl->it_base), > - size << 3, > - IOMMU_PAGE_SIZE(tbl)); > - if (rc) { > - pe_err(npe, "Failed to configure TCE table, err %lld\n", rc); > - return rc; > - } > - pnv_pci_ioda2_tce_invalidate_entire(phb, false); > - > - /* Add the table to the list so its TCE cache will get invalidated */ > - pnv_pci_link_table_and_group(phb->hose->node, num, > - tbl, &npe->table_group); > - > - return 0; > -} > - > -long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num) > -{ > - struct pnv_phb *phb = npe->phb; > - int64_t rc; > - > - pe_info(npe, "Removing DMA window\n"); > - > - rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number, > - npe->pe_number, > - 0/* levels */, 0/* table address */, > - 0/* table size */, 0/* page size */); > - if (rc) { > - pe_err(npe, "Unmapping failed, ret = %lld\n", rc); > - return rc; > - } > - pnv_pci_ioda2_tce_invalidate_entire(phb, false); > - > - pnv_pci_unlink_table_and_group(npe->table_group.tables[num], > - &npe->table_group); > - > - return 0; > -} > - > -/* > - * Enables 32 bit DMA on NPU. > - */ > -static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe) > -{ > - struct pci_dev *gpdev; > - struct pnv_ioda_pe *gpe; > - int64_t rc; > - > - /* > - * Find the assoicated PCI devices and get the dma window > - * information from there. > - */ > - if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV)) > - return; > - > - gpe = get_gpu_pci_dev_and_pe(npe, &gpdev); > - if (!gpe) > - return; > - > - rc = pnv_npu_set_window(npe, 0, gpe->table_group.tables[0]); > - > - /* > - * We don't initialise npu_pe->tce32_table as we always use > - * dma_npu_ops which are nops. > - */ > - set_dma_ops(&npe->pdev->dev, &dma_npu_ops); > -} > - > -/* > - * Enables bypass mode on the NPU. The NPU only supports one > - * window per link, so bypass needs to be explicitly enabled or > - * disabled. Unlike for a PHB3 bypass and non-bypass modes can't be > - * active at the same time. > - */ > -static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe) > -{ > - struct pnv_phb *phb = npe->phb; > - int64_t rc = 0; > - phys_addr_t top = memblock_end_of_DRAM(); > - > - if (phb->type != PNV_PHB_NPU_NVLINK || !npe->pdev) > - return -EINVAL; > - > - rc = pnv_npu_unset_window(npe, 0); > - if (rc != OPAL_SUCCESS) > - return rc; > - > - /* Enable the bypass window */ > - > - top = roundup_pow_of_two(top); > - dev_info(&npe->pdev->dev, "Enabling bypass for PE %x\n", > - npe->pe_number); > - rc = opal_pci_map_pe_dma_window_real(phb->opal_id, > - npe->pe_number, npe->pe_number, > - 0 /* bypass base */, top); > - > - if (rc == OPAL_SUCCESS) > - pnv_pci_ioda2_tce_invalidate_entire(phb, false); > - > - return rc; > -} > - > -void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass) > -{ > - int i; > - struct pnv_phb *phb; > - struct pci_dn *pdn; > - struct pnv_ioda_pe *npe; > - struct pci_dev *npdev; > - > - for (i = 0; ; ++i) { > - npdev = pnv_pci_get_npu_dev(gpdev, i); > - > - if (!npdev) > - break; > - > - pdn = pci_get_pdn(npdev); > - if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) > - return; > - > - phb = pci_bus_to_host(npdev->bus)->private_data; > - > - /* We only do bypass if it's enabled on the linked device */ > - npe = &phb->ioda.pe_array[pdn->pe_number]; > - > - if (bypass) { > - dev_info(&npdev->dev, > - "Using 64-bit DMA iommu bypass\n"); > - pnv_npu_dma_set_bypass(npe); > - } else { > - dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n"); > - pnv_npu_dma_set_32(npe); > - } > - } > -} > - > -/* Switch ownership from platform code to external user (e.g. VFIO) */ > -void pnv_npu_take_ownership(struct pnv_ioda_pe *npe) > -{ > - struct pnv_phb *phb = npe->phb; > - int64_t rc; > - > - /* > - * Note: NPU has just a single TVE in the hardware which means that > - * while used by the kernel, it can have either 32bit window or > - * DMA bypass but never both. So we deconfigure 32bit window only > - * if it was enabled at the moment of ownership change. > - */ > - if (npe->table_group.tables[0]) { > - pnv_npu_unset_window(npe, 0); > - return; > - } > - > - /* Disable bypass */ > - rc = opal_pci_map_pe_dma_window_real(phb->opal_id, > - npe->pe_number, npe->pe_number, > - 0 /* bypass base */, 0); > - if (rc) { > - pe_err(npe, "Failed to disable bypass, err %lld\n", rc); > - return; > - } > - pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false); > -} > - > -struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe) > -{ > - struct pnv_phb *phb = npe->phb; > - struct pci_bus *pbus = phb->hose->bus; > - struct pci_dev *npdev, *gpdev = NULL, *gptmp; > - struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(npe, &gpdev); > - > - if (!gpe || !gpdev) > - return NULL; > - > - list_for_each_entry(npdev, &pbus->devices, bus_list) { > - gptmp = pnv_pci_get_gpu_dev(npdev); > - > - if (gptmp != gpdev) > - continue; > - > - pe_info(gpe, "Attached NPU %s\n", dev_name(&npdev->dev)); > - iommu_group_add_device(gpe->table_group.group, &npdev->dev); > - } > - > - return gpe; > -} > - > -/* Maximum number of nvlinks per npu */ > -#define NV_MAX_LINKS 6 > - > -/* Maximum index of npu2 hosts in the system. Always < NV_MAX_NPUS */ > -static int max_npu2_index; > - > -struct npu_context { > - struct mm_struct *mm; > - struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS]; > - struct mmu_notifier mn; > - struct kref kref; > - bool nmmu_flush; > - > - /* Callback to stop translation requests on a given GPU */ > - void (*release_cb)(struct npu_context *context, void *priv); > - > - /* > - * Private pointer passed to the above callback for usage by > - * device drivers. > - */ > - void *priv; > -}; > - > -struct mmio_atsd_reg { > - struct npu *npu; > - int reg; > -}; > - > -/* > - * Find a free MMIO ATSD register and mark it in use. Return -ENOSPC > - * if none are available. > - */ > -static int get_mmio_atsd_reg(struct npu *npu) > -{ > - int i; > - > - for (i = 0; i < npu->mmio_atsd_count; i++) { > - if (!test_bit(i, &npu->mmio_atsd_usage)) > - if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage)) > - return i; > - } > - > - return -ENOSPC; > -} > - > -static void put_mmio_atsd_reg(struct npu *npu, int reg) > -{ > - clear_bit_unlock(reg, &npu->mmio_atsd_usage); > -} > - > -/* MMIO ATSD register offsets */ > -#define XTS_ATSD_AVA 1 > -#define XTS_ATSD_STAT 2 > - > -static void mmio_launch_invalidate(struct mmio_atsd_reg *mmio_atsd_reg, > - unsigned long launch, unsigned long va) > -{ > - struct npu *npu = mmio_atsd_reg->npu; > - int reg = mmio_atsd_reg->reg; > - > - __raw_writeq_be(va, npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA); > - eieio(); > - __raw_writeq_be(launch, npu->mmio_atsd_regs[reg]); > -} > - > -static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], > - unsigned long pid, bool flush) > -{ > - int i; > - unsigned long launch; > - > - for (i = 0; i <= max_npu2_index; i++) { > - if (mmio_atsd_reg[i].reg < 0) > - continue; > - > - /* IS set to invalidate matching PID */ > - launch = PPC_BIT(12); > - > - /* PRS set to process-scoped */ > - launch |= PPC_BIT(13); > - > - /* AP */ > - launch |= (u64) > - mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); > - > - /* PID */ > - launch |= pid << PPC_BITLSHIFT(38); > - > - /* No flush */ > - launch |= !flush << PPC_BITLSHIFT(39); > - > - /* Invalidating the entire process doesn't use a va */ > - mmio_launch_invalidate(&mmio_atsd_reg[i], launch, 0); > - } > -} > - > -static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], > - unsigned long va, unsigned long pid, bool flush) > -{ > - int i; > - unsigned long launch; > - > - for (i = 0; i <= max_npu2_index; i++) { > - if (mmio_atsd_reg[i].reg < 0) > - continue; > - > - /* IS set to invalidate target VA */ > - launch = 0; > - > - /* PRS set to process scoped */ > - launch |= PPC_BIT(13); > - > - /* AP */ > - launch |= (u64) > - mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); > - > - /* PID */ > - launch |= pid << PPC_BITLSHIFT(38); > - > - /* No flush */ > - launch |= !flush << PPC_BITLSHIFT(39); > - > - mmio_launch_invalidate(&mmio_atsd_reg[i], launch, va); > - } > -} > - > -#define mn_to_npu_context(x) container_of(x, struct npu_context, mn) > - > -static void mmio_invalidate_wait( > - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) > -{ > - struct npu *npu; > - int i, reg; > - > - /* Wait for all invalidations to complete */ > - for (i = 0; i <= max_npu2_index; i++) { > - if (mmio_atsd_reg[i].reg < 0) > - continue; > - > - /* Wait for completion */ > - npu = mmio_atsd_reg[i].npu; > - reg = mmio_atsd_reg[i].reg; > - while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT)) > - cpu_relax(); > - } > -} > - > -/* > - * Acquires all the address translation shootdown (ATSD) registers required to > - * launch an ATSD on all links this npu_context is active on. > - */ > -static void acquire_atsd_reg(struct npu_context *npu_context, > - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) > -{ > - int i, j; > - struct npu *npu; > - struct pci_dev *npdev; > - struct pnv_phb *nphb; > - > - for (i = 0; i <= max_npu2_index; i++) { > - mmio_atsd_reg[i].reg = -1; > - for (j = 0; j < NV_MAX_LINKS; j++) { > - /* > - * There are no ordering requirements with respect to > - * the setup of struct npu_context, but to ensure > - * consistent behaviour we need to ensure npdev[][] is > - * only read once. > - */ > - npdev = READ_ONCE(npu_context->npdev[i][j]); > - if (!npdev) > - continue; > - > - nphb = pci_bus_to_host(npdev->bus)->private_data; > - npu = &nphb->npu; > - mmio_atsd_reg[i].npu = npu; > - mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu); > - while (mmio_atsd_reg[i].reg < 0) { > - mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu); > - cpu_relax(); > - } > - break; > - } > - } > -} > - > -/* > - * Release previously acquired ATSD registers. To avoid deadlocks the registers > - * must be released in the same order they were acquired above in > - * acquire_atsd_reg. > - */ > -static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) > -{ > - int i; > - > - for (i = 0; i <= max_npu2_index; i++) { > - /* > - * We can't rely on npu_context->npdev[][] being the same here > - * as when acquire_atsd_reg() was called, hence we use the > - * values stored in mmio_atsd_reg during the acquire phase > - * rather than re-reading npdev[][]. > - */ > - if (mmio_atsd_reg[i].reg < 0) > - continue; > - > - put_mmio_atsd_reg(mmio_atsd_reg[i].npu, mmio_atsd_reg[i].reg); > - } > -} > - > -/* > - * Invalidate either a single address or an entire PID depending on > - * the value of va. > - */ > -static void mmio_invalidate(struct npu_context *npu_context, int va, > - unsigned long address, bool flush) > -{ > - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]; > - unsigned long pid = npu_context->mm->context.id; > - > - if (npu_context->nmmu_flush) > - /* > - * Unfortunately the nest mmu does not support flushing specific > - * addresses so we have to flush the whole mm once before > - * shooting down the GPU translation. > - */ > - flush_all_mm(npu_context->mm); > - > - /* > - * Loop over all the NPUs this process is active on and launch > - * an invalidate. > - */ > - acquire_atsd_reg(npu_context, mmio_atsd_reg); > - if (va) > - mmio_invalidate_va(mmio_atsd_reg, address, pid, flush); > - else > - mmio_invalidate_pid(mmio_atsd_reg, pid, flush); > - > - mmio_invalidate_wait(mmio_atsd_reg); > - if (flush) { > - /* > - * The GPU requires two flush ATSDs to ensure all entries have > - * been flushed. We use PID 0 as it will never be used for a > - * process on the GPU. > - */ > - mmio_invalidate_pid(mmio_atsd_reg, 0, true); > - mmio_invalidate_wait(mmio_atsd_reg); > - mmio_invalidate_pid(mmio_atsd_reg, 0, true); > - mmio_invalidate_wait(mmio_atsd_reg); > - } > - release_atsd_reg(mmio_atsd_reg); > -} > - > -static void pnv_npu2_mn_release(struct mmu_notifier *mn, > - struct mm_struct *mm) > -{ > - struct npu_context *npu_context = mn_to_npu_context(mn); > - > - /* Call into device driver to stop requests to the NMMU */ > - if (npu_context->release_cb) > - npu_context->release_cb(npu_context, npu_context->priv); > - > - /* > - * There should be no more translation requests for this PID, but we > - * need to ensure any entries for it are removed from the TLB. > - */ > - mmio_invalidate(npu_context, 0, 0, true); > -} > - > -static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, > - struct mm_struct *mm, > - unsigned long address, > - pte_t pte) > -{ > - struct npu_context *npu_context = mn_to_npu_context(mn); > - > - mmio_invalidate(npu_context, 1, address, true); > -} > - > -static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, > - struct mm_struct *mm, > - unsigned long start, unsigned long end) > -{ > - struct npu_context *npu_context = mn_to_npu_context(mn); > - unsigned long address; > - > - if (end - start > atsd_threshold) { > - /* > - * Just invalidate the entire PID if the address range is too > - * large. > - */ > - mmio_invalidate(npu_context, 0, 0, true); > - } else { > - for (address = start; address < end; address += PAGE_SIZE) > - mmio_invalidate(npu_context, 1, address, false); > - > - /* Do the flush only on the final addess == end */ > - mmio_invalidate(npu_context, 1, address, true); > - } > -} > - > -static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { > - .release = pnv_npu2_mn_release, > - .change_pte = pnv_npu2_mn_change_pte, > - .invalidate_range = pnv_npu2_mn_invalidate_range, > -}; > - > -/* > - * Call into OPAL to setup the nmmu context for the current task in > - * the NPU. This must be called to setup the context tables before the > - * GPU issues ATRs. pdev should be a pointed to PCIe GPU device. > - * > - * A release callback should be registered to allow a device driver to > - * be notified that it should not launch any new translation requests > - * as the final TLB invalidate is about to occur. > - * > - * Returns an error if there no contexts are currently available or a > - * npu_context which should be passed to pnv_npu2_handle_fault(). > - * > - * mmap_sem must be held in write mode and must not be called from interrupt > - * context. > - */ > -struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, > - unsigned long flags, > - void (*cb)(struct npu_context *, void *), > - void *priv) > -{ > - int rc; > - u32 nvlink_index; > - struct device_node *nvlink_dn; > - struct mm_struct *mm = current->mm; > - struct pnv_phb *nphb; > - struct npu *npu; > - struct npu_context *npu_context; > - > - /* > - * At present we don't support GPUs connected to multiple NPUs and I'm > - * not sure the hardware does either. > - */ > - struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); > - > - if (!firmware_has_feature(FW_FEATURE_OPAL)) > - return ERR_PTR(-ENODEV); > - > - if (!npdev) > - /* No nvlink associated with this GPU device */ > - return ERR_PTR(-ENODEV); > - > - nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); > - if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", > - &nvlink_index))) > - return ERR_PTR(-ENODEV); > - > - if (!mm || mm->context.id == 0) { > - /* > - * Kernel thread contexts are not supported and context id 0 is > - * reserved on the GPU. > - */ > - return ERR_PTR(-EINVAL); > - } > - > - nphb = pci_bus_to_host(npdev->bus)->private_data; > - npu = &nphb->npu; > - > - /* > - * Setup the NPU context table for a particular GPU. These need to be > - * per-GPU as we need the tables to filter ATSDs when there are no > - * active contexts on a particular GPU. It is safe for these to be > - * called concurrently with destroy as the OPAL call takes appropriate > - * locks and refcounts on init/destroy. > - */ > - rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags, > - PCI_DEVID(gpdev->bus->number, gpdev->devfn)); > - if (rc < 0) > - return ERR_PTR(-ENOSPC); > - > - /* > - * We store the npu pci device so we can more easily get at the > - * associated npus. > - */ > - spin_lock(&npu_context_lock); > - npu_context = mm->context.npu_context; > - if (npu_context) { > - if (npu_context->release_cb != cb || > - npu_context->priv != priv) { > - spin_unlock(&npu_context_lock); > - opal_npu_destroy_context(nphb->opal_id, mm->context.id, > - PCI_DEVID(gpdev->bus->number, > - gpdev->devfn)); > - return ERR_PTR(-EINVAL); > - } > - > - WARN_ON(!kref_get_unless_zero(&npu_context->kref)); > - } > - spin_unlock(&npu_context_lock); > - > - if (!npu_context) { > - /* > - * We can set up these fields without holding the > - * npu_context_lock as the npu_context hasn't been returned to > - * the caller meaning it can't be destroyed. Parallel allocation > - * is protected against by mmap_sem. > - */ > - rc = -ENOMEM; > - npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL); > - if (npu_context) { > - kref_init(&npu_context->kref); > - npu_context->mm = mm; > - npu_context->mn.ops = &nv_nmmu_notifier_ops; > - rc = __mmu_notifier_register(&npu_context->mn, mm); > - } > - > - if (rc) { > - kfree(npu_context); > - opal_npu_destroy_context(nphb->opal_id, mm->context.id, > - PCI_DEVID(gpdev->bus->number, > - gpdev->devfn)); > - return ERR_PTR(rc); > - } > - > - mm->context.npu_context = npu_context; > - } > - > - npu_context->release_cb = cb; > - npu_context->priv = priv; > - > - /* > - * npdev is a pci_dev pointer setup by the PCI code. We assign it to > - * npdev[][] to indicate to the mmu notifiers that an invalidation > - * should also be sent over this nvlink. The notifiers don't use any > - * other fields in npu_context, so we just need to ensure that when they > - * deference npu_context->npdev[][] it is either a valid pointer or > - * NULL. > - */ > - WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev); > - > - if (!nphb->npu.nmmu_flush) { > - /* > - * If we're not explicitly flushing ourselves we need to mark > - * the thread for global flushes > - */ > - npu_context->nmmu_flush = false; > - mm_context_add_copro(mm); > - } else > - npu_context->nmmu_flush = true; > - > - return npu_context; > -} > -EXPORT_SYMBOL(pnv_npu2_init_context); > - > -static void pnv_npu2_release_context(struct kref *kref) > -{ > - struct npu_context *npu_context = > - container_of(kref, struct npu_context, kref); > - > - if (!npu_context->nmmu_flush) > - mm_context_remove_copro(npu_context->mm); > - > - npu_context->mm->context.npu_context = NULL; > -} > - > -/* > - * Destroy a context on the given GPU. May free the npu_context if it is no > - * longer active on any GPUs. Must not be called from interrupt context. > - */ > -void pnv_npu2_destroy_context(struct npu_context *npu_context, > - struct pci_dev *gpdev) > -{ > - int removed; > - struct pnv_phb *nphb; > - struct npu *npu; > - struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); > - struct device_node *nvlink_dn; > - u32 nvlink_index; > - > - if (WARN_ON(!npdev)) > - return; > - > - if (!firmware_has_feature(FW_FEATURE_OPAL)) > - return; > - > - nphb = pci_bus_to_host(npdev->bus)->private_data; > - npu = &nphb->npu; > - nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); > - if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", > - &nvlink_index))) > - return; > - WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL); > - opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id, > - PCI_DEVID(gpdev->bus->number, gpdev->devfn)); > - spin_lock(&npu_context_lock); > - removed = kref_put(&npu_context->kref, pnv_npu2_release_context); > - spin_unlock(&npu_context_lock); > - > - /* > - * We need to do this outside of pnv_npu2_release_context so that it is > - * outside the spinlock as mmu_notifier_destroy uses SRCU. > - */ > - if (removed) { > - mmu_notifier_unregister(&npu_context->mn, > - npu_context->mm); > - > - kfree(npu_context); > - } > - > -} > -EXPORT_SYMBOL(pnv_npu2_destroy_context); > - > -/* > - * Assumes mmap_sem is held for the contexts associated mm. > - */ > -int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea, > - unsigned long *flags, unsigned long *status, int count) > -{ > - u64 rc = 0, result = 0; > - int i, is_write; > - struct page *page[1]; > - > - /* mmap_sem should be held so the struct_mm must be present */ > - struct mm_struct *mm = context->mm; > - > - if (!firmware_has_feature(FW_FEATURE_OPAL)) > - return -ENODEV; > - > - WARN_ON(!rwsem_is_locked(&mm->mmap_sem)); > - > - for (i = 0; i < count; i++) { > - is_write = flags[i] & NPU2_WRITE; > - rc = get_user_pages_remote(NULL, mm, ea[i], 1, > - is_write ? FOLL_WRITE : 0, > - page, NULL, NULL); > - > - /* > - * To support virtualised environments we will have to do an > - * access to the page to ensure it gets faulted into the > - * hypervisor. For the moment virtualisation is not supported in > - * other areas so leave the access out. > - */ > - if (rc != 1) { > - status[i] = rc; > - result = -EFAULT; > - continue; > - } > - > - status[i] = 0; > - put_page(page[0]); > - } > - > - return result; > -} > -EXPORT_SYMBOL(pnv_npu2_handle_fault); > - > -int pnv_npu2_init(struct pnv_phb *phb) > -{ > - unsigned int i; > - u64 mmio_atsd; > - struct device_node *dn; > - struct pci_dev *gpdev; > - static int npu_index; > - uint64_t rc = 0; > - > - if (!atsd_threshold_dentry) { > - atsd_threshold_dentry = debugfs_create_x64("atsd_threshold", > - 0600, powerpc_debugfs_root, &atsd_threshold); > - } > - > - phb->npu.nmmu_flush = > - of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush"); > - for_each_child_of_node(phb->hose->dn, dn) { > - gpdev = pnv_pci_get_gpu_dev(get_pci_dev(dn)); > - if (gpdev) { > - rc = opal_npu_map_lpar(phb->opal_id, > - PCI_DEVID(gpdev->bus->number, gpdev->devfn), > - 0, 0); > - if (rc) > - dev_err(&gpdev->dev, > - "Error %lld mapping device to LPAR\n", > - rc); > - } > - } > - > - for (i = 0; !of_property_read_u64_index(phb->hose->dn, "ibm,mmio-atsd", > - i, &mmio_atsd); i++) > - phb->npu.mmio_atsd_regs[i] = ioremap(mmio_atsd, 32); > - > - pr_info("NPU%lld: Found %d MMIO ATSD registers", phb->opal_id, i); > - phb->npu.mmio_atsd_count = i; > - phb->npu.mmio_atsd_usage = 0; > - npu_index++; > - if (WARN_ON(npu_index >= NV_MAX_NPUS)) > - return -ENOSPC; > - max_npu2_index = npu_index; > - phb->npu.index = npu_index; > - > - return 0; > -} > diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c > index 913175ba1c10..b6db65917bb4 100644 > --- a/arch/powerpc/platforms/powernv/pci-ioda.c > +++ b/arch/powerpc/platforms/powernv/pci-ioda.c > @@ -1203,75 +1203,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) > return pe; > } > > -static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) > -{ > - int pe_num, found_pe = false, rc; > - long rid; > - struct pnv_ioda_pe *pe; > - struct pci_dev *gpu_pdev; > - struct pci_dn *npu_pdn; > - struct pci_controller *hose = pci_bus_to_host(npu_pdev->bus); > - struct pnv_phb *phb = hose->private_data; > - > - /* > - * Due to a hardware errata PE#0 on the NPU is reserved for > - * error handling. This means we only have three PEs remaining > - * which need to be assigned to four links, implying some > - * links must share PEs. > - * > - * To achieve this we assign PEs such that NPUs linking the > - * same GPU get assigned the same PE. > - */ > - gpu_pdev = pnv_pci_get_gpu_dev(npu_pdev); > - for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) { > - pe = &phb->ioda.pe_array[pe_num]; > - if (!pe->pdev) > - continue; > - > - if (pnv_pci_get_gpu_dev(pe->pdev) == gpu_pdev) { > - /* > - * This device has the same peer GPU so should > - * be assigned the same PE as the existing > - * peer NPU. > - */ > - dev_info(&npu_pdev->dev, > - "Associating to existing PE %x\n", pe_num); > - pci_dev_get(npu_pdev); > - npu_pdn = pci_get_pdn(npu_pdev); > - rid = npu_pdev->bus->number << 8 | npu_pdn->devfn; > - npu_pdn->pe_number = pe_num; > - phb->ioda.pe_rmap[rid] = pe->pe_number; > - > - /* Map the PE to this link */ > - rc = opal_pci_set_pe(phb->opal_id, pe_num, rid, > - OpalPciBusAll, > - OPAL_COMPARE_RID_DEVICE_NUMBER, > - OPAL_COMPARE_RID_FUNCTION_NUMBER, > - OPAL_MAP_PE); > - WARN_ON(rc != OPAL_SUCCESS); > - found_pe = true; > - break; > - } > - } > - > - if (!found_pe) > - /* > - * Could not find an existing PE so allocate a new > - * one. > - */ > - return pnv_ioda_setup_dev_PE(npu_pdev); > - else > - return pe; > -} > - > -static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus) > -{ > - struct pci_dev *pdev; > - > - list_for_each_entry(pdev, &bus->devices, bus_list) > - pnv_ioda_setup_npu_PE(pdev); > -} > - > static void pnv_pci_ioda_setup_PEs(void) > { > struct pci_controller *hose, *tmp; > @@ -1281,13 +1212,6 @@ static void pnv_pci_ioda_setup_PEs(void) > > list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { > phb = hose->private_data; > - if (phb->type == PNV_PHB_NPU_NVLINK) { > - /* PE#0 is needed for error reporting */ > - pnv_ioda_reserve_pe(phb, 0); > - pnv_ioda_setup_npu_PEs(hose->bus); > - if (phb->model == PNV_PHB_MODEL_NPU2) > - pnv_npu2_init(phb); > - } > if (phb->type == PNV_PHB_NPU_OCAPI) { > bus = hose->bus; > list_for_each_entry(pdev, &bus->devices, bus_list) > @@ -1871,9 +1795,6 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) > } > *pdev->dev.dma_mask = dma_mask; > > - /* Update peer npu devices */ > - pnv_npu_try_dma_set_bypass(pdev, bypass); > - > return 0; > } > > @@ -2119,14 +2040,6 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, > } > } > > -void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm) > -{ > - if (phb->model == PNV_PHB_MODEL_NPU || phb->model == PNV_PHB_MODEL_PHB3) > - pnv_pci_phb3_tce_invalidate_entire(phb, rm); > - else > - opal_pci_tce_kill(phb->opal_id, OPAL_PCI_TCE_KILL, 0, 0, 0, 0); > -} > - > static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, > long npages, unsigned long uaddr, > enum dma_data_direction direction, > @@ -2615,137 +2528,6 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = { > .take_ownership = pnv_ioda2_take_ownership, > .release_ownership = pnv_ioda2_release_ownership, > }; > - > -static int gpe_table_group_to_npe_cb(struct device *dev, void *opaque) > -{ > - struct pci_controller *hose; > - struct pnv_phb *phb; > - struct pnv_ioda_pe **ptmppe = opaque; > - struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); > - struct pci_dn *pdn = pci_get_pdn(pdev); > - > - if (!pdn || pdn->pe_number == IODA_INVALID_PE) > - return 0; > - > - hose = pci_bus_to_host(pdev->bus); > - phb = hose->private_data; > - if (phb->type != PNV_PHB_NPU_NVLINK) > - return 0; > - > - *ptmppe = &phb->ioda.pe_array[pdn->pe_number]; > - > - return 1; > -} > - > -/* > - * This returns PE of associated NPU. > - * This assumes that NPU is in the same IOMMU group with GPU and there is > - * no other PEs. > - */ > -static struct pnv_ioda_pe *gpe_table_group_to_npe( > - struct iommu_table_group *table_group) > -{ > - struct pnv_ioda_pe *npe = NULL; > - int ret = iommu_group_for_each_dev(table_group->group, &npe, > - gpe_table_group_to_npe_cb); > - > - BUG_ON(!ret || !npe); > - > - return npe; > -} > - > -static long pnv_pci_ioda2_npu_set_window(struct iommu_table_group *table_group, > - int num, struct iommu_table *tbl) > -{ > - struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group); > - int num2 = (num == 0) ? 1 : 0; > - long ret = pnv_pci_ioda2_set_window(table_group, num, tbl); > - > - if (ret) > - return ret; > - > - if (table_group->tables[num2]) > - pnv_npu_unset_window(npe, num2); > - > - ret = pnv_npu_set_window(npe, num, tbl); > - if (ret) { > - pnv_pci_ioda2_unset_window(table_group, num); > - if (table_group->tables[num2]) > - pnv_npu_set_window(npe, num2, > - table_group->tables[num2]); > - } > - > - return ret; > -} > - > -static long pnv_pci_ioda2_npu_unset_window( > - struct iommu_table_group *table_group, > - int num) > -{ > - struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group); > - int num2 = (num == 0) ? 1 : 0; > - long ret = pnv_pci_ioda2_unset_window(table_group, num); > - > - if (ret) > - return ret; > - > - if (!npe->table_group.tables[num]) > - return 0; > - > - ret = pnv_npu_unset_window(npe, num); > - if (ret) > - return ret; > - > - if (table_group->tables[num2]) > - ret = pnv_npu_set_window(npe, num2, table_group->tables[num2]); > - > - return ret; > -} > - > -static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group) > -{ > - /* > - * Detach NPU first as pnv_ioda2_take_ownership() will destroy > - * the iommu_table if 32bit DMA is enabled. > - */ > - pnv_npu_take_ownership(gpe_table_group_to_npe(table_group)); > - pnv_ioda2_take_ownership(table_group); > -} > - > -static struct iommu_table_group_ops pnv_pci_ioda2_npu_ops = { > - .get_table_size = pnv_pci_ioda2_get_table_size, > - .create_table = pnv_pci_ioda2_create_table_userspace, > - .set_window = pnv_pci_ioda2_npu_set_window, > - .unset_window = pnv_pci_ioda2_npu_unset_window, > - .take_ownership = pnv_ioda2_npu_take_ownership, > - .release_ownership = pnv_ioda2_release_ownership, > -}; > - > -static void pnv_pci_ioda_setup_iommu_api(void) > -{ > - struct pci_controller *hose, *tmp; > - struct pnv_phb *phb; > - struct pnv_ioda_pe *pe, *gpe; > - > - /* > - * Now we have all PHBs discovered, time to add NPU devices to > - * the corresponding IOMMU groups. > - */ > - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { > - phb = hose->private_data; > - > - if (phb->type != PNV_PHB_NPU_NVLINK) > - continue; > - > - list_for_each_entry(pe, &phb->ioda.pe_list, list) { > - gpe = pnv_pci_npu_setup_iommu(pe); > - if (gpe) > - gpe->table_group.ops = &pnv_pci_ioda2_npu_ops; > - } > - } > -} > -#else /* !CONFIG_IOMMU_API */ > -static void pnv_pci_ioda_setup_iommu_api(void) { }; > #endif > > static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb) > @@ -3242,7 +3024,6 @@ static void pnv_pci_enable_bridges(void) > static void pnv_pci_ioda_fixup(void) > { > pnv_pci_ioda_setup_PEs(); > - pnv_pci_ioda_setup_iommu_api(); > pnv_pci_ioda_create_dbgfs(); > > pnv_pci_enable_bridges(); > @@ -3689,27 +3470,6 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { > .shutdown = pnv_pci_ioda_shutdown, > }; > > -static int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask) > -{ > - dev_err_once(&npdev->dev, > - "%s operation unsupported for NVLink devices\n", > - __func__); > - return -EPERM; > -} > - > -static const struct pci_controller_ops pnv_npu_ioda_controller_ops = { > - .dma_dev_setup = pnv_pci_dma_dev_setup, > -#ifdef CONFIG_PCI_MSI > - .setup_msi_irqs = pnv_setup_msi_irqs, > - .teardown_msi_irqs = pnv_teardown_msi_irqs, > -#endif > - .enable_device_hook = pnv_pci_enable_device_hook, > - .window_alignment = pnv_pci_window_alignment, > - .reset_secondary_bus = pnv_pci_reset_secondary_bus, > - .dma_set_mask = pnv_npu_dma_set_mask, > - .shutdown = pnv_pci_ioda_shutdown, > -}; > - > static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = { > .enable_device_hook = pnv_pci_enable_device_hook, > .window_alignment = pnv_pci_window_alignment, > @@ -3931,9 +3691,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, > ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; > > switch (phb->type) { > - case PNV_PHB_NPU_NVLINK: > - hose->controller_ops = pnv_npu_ioda_controller_ops; > - break; > case PNV_PHB_NPU_OCAPI: > hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops; > break; > diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h > index 8b37b28e3831..54f2935b7ac5 100644 > --- a/arch/powerpc/platforms/powernv/pci.h > +++ b/arch/powerpc/platforms/powernv/pci.h > @@ -231,17 +231,6 @@ extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, > #define pe_info(pe, fmt, ...) \ > pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__) > > -/* Nvlink functions */ > -extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass); > -extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm); > -extern struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe); > -extern long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num, > - struct iommu_table *tbl); > -extern long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num); > -extern void pnv_npu_take_ownership(struct pnv_ioda_pe *npe); > -extern void pnv_npu_release_ownership(struct pnv_ioda_pe *npe); > -extern int pnv_npu2_init(struct pnv_phb *phb); > - > /* pci-ioda-tce.c */ > #define POWERNV_IOMMU_DEFAULT_LEVELS 1 > #define POWERNV_IOMMU_MAX_LEVELS 5 > -- Alexey