Linux-api Archive on lore.kernel.org
 help / color / Atom feed
* remove the nvlink2 pci_vfio subdriver
@ 2021-03-22 15:01 Christoph Hellwig
  2021-03-22 15:01 ` [PATCH 1/2] vfio/pci: remove vfio_pci_nvlink2 Christoph Hellwig
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Christoph Hellwig @ 2021-03-22 15:01 UTC (permalink / raw)
  To: Michael Ellerman, Alex Williamson
  Cc: Benjamin Herrenschmidt, Greg Kroah-Hartman, Jason Gunthorpe,
	David Airlie, Daniel Vetter, dri-devel, Paul Mackerras,
	linuxppc-dev, linux-kernel, kvm, linux-api

Hi all,

the nvlink2 vfio subdriver is a weird beast.  It supports a hardware
feature without any open source component - what would normally be
the normal open source userspace that we require for kernel drivers,
although in this particular case user space could of course be a
kernel driver in a VM.  It also happens to be a complete mess that
does not properly bind to PCI IDs, is hacked into the vfio_pci driver
and also pulles in over 1000 lines of code always build into powerpc
kernels that have Power NV support enabled.  Because of all these
issues and the lack of breaking userspace when it is removed I think
the best idea is to simply kill.

Diffstat:
 arch/powerpc/platforms/powernv/npu-dma.c     |  705 ---------------------------
 b/arch/powerpc/include/asm/opal.h            |    3 
 b/arch/powerpc/include/asm/pci-bridge.h      |    1 
 b/arch/powerpc/include/asm/pci.h             |    7 
 b/arch/powerpc/platforms/powernv/Makefile    |    2 
 b/arch/powerpc/platforms/powernv/opal-call.c |    2 
 b/arch/powerpc/platforms/powernv/pci-ioda.c  |  185 -------
 b/arch/powerpc/platforms/powernv/pci.c       |   11 
 b/arch/powerpc/platforms/powernv/pci.h       |   17 
 b/arch/powerpc/platforms/pseries/pci.c       |   23 
 b/drivers/vfio/pci/Kconfig                   |    6 
 b/drivers/vfio/pci/Makefile                  |    1 
 b/drivers/vfio/pci/vfio_pci.c                |   18 
 b/drivers/vfio/pci/vfio_pci_private.h        |   14 
 b/include/uapi/linux/vfio.h                  |   40 -
 drivers/vfio/pci/vfio_pci_nvlink2.c          |  490 ------------------
 16 files changed, 8 insertions(+), 1517 deletions(-)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/2] vfio/pci: remove vfio_pci_nvlink2
  2021-03-22 15:01 remove the nvlink2 pci_vfio subdriver Christoph Hellwig
@ 2021-03-22 15:01 ` Christoph Hellwig
  2021-03-22 17:46   ` Alex Williamson
  2021-03-22 15:01 ` [PATCH 2/2] powerpc/powernv: remove the nvlink support Christoph Hellwig
  2021-03-22 15:12 ` remove the nvlink2 pci_vfio subdriver Greg Kroah-Hartman
  2 siblings, 1 reply; 6+ messages in thread
From: Christoph Hellwig @ 2021-03-22 15:01 UTC (permalink / raw)
  To: Michael Ellerman, Alex Williamson
  Cc: Benjamin Herrenschmidt, Greg Kroah-Hartman, Jason Gunthorpe,
	David Airlie, Daniel Vetter, dri-devel, Paul Mackerras,
	linuxppc-dev, linux-kernel, kvm, linux-api

This driver never had any open userspace (which for VFIO would include
VM kernel drivers) that use it, and thus should never have been added
by our normal userspace ABI rules.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/vfio/pci/Kconfig            |   6 -
 drivers/vfio/pci/Makefile           |   1 -
 drivers/vfio/pci/vfio_pci.c         |  18 -
 drivers/vfio/pci/vfio_pci_nvlink2.c | 490 ----------------------------
 drivers/vfio/pci/vfio_pci_private.h |  14 -
 include/uapi/linux/vfio.h           |  40 ---
 6 files changed, 569 deletions(-)
 delete mode 100644 drivers/vfio/pci/vfio_pci_nvlink2.c

diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index ac3c1dd3edeff1..53ce78d7d07be0 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -39,9 +39,3 @@ config VFIO_PCI_IGD
 	  and LPC bridge config space.
 
 	  To enable Intel IGD assignment through vfio-pci, say Y.
-
-config VFIO_PCI_NVLINK2
-	def_bool y
-	depends on VFIO_PCI && PPC_POWERNV
-	help
-	  VFIO PCI support for P9 Witherspoon machine with NVIDIA V100 GPUs
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index eff97a7cd9f139..3ff42093962f6f 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -2,7 +2,6 @@
 
 vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
 vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
-vfio-pci-$(CONFIG_VFIO_PCI_NVLINK2) += vfio_pci_nvlink2.o
 vfio-pci-$(CONFIG_S390) += vfio_pci_zdev.o
 
 obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 65e7e6b44578c2..d691006b642839 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -389,24 +389,6 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
 		}
 	}
 
-	if (pdev->vendor == PCI_VENDOR_ID_NVIDIA &&
-	    IS_ENABLED(CONFIG_VFIO_PCI_NVLINK2)) {
-		ret = vfio_pci_nvdia_v100_nvlink2_init(vdev);
-		if (ret && ret != -ENODEV) {
-			pci_warn(pdev, "Failed to setup NVIDIA NV2 RAM region\n");
-			goto disable_exit;
-		}
-	}
-
-	if (pdev->vendor == PCI_VENDOR_ID_IBM &&
-	    IS_ENABLED(CONFIG_VFIO_PCI_NVLINK2)) {
-		ret = vfio_pci_ibm_npu2_init(vdev);
-		if (ret && ret != -ENODEV) {
-			pci_warn(pdev, "Failed to setup NVIDIA NV2 ATSD region\n");
-			goto disable_exit;
-		}
-	}
-
 	vfio_pci_probe_mmaps(vdev);
 
 	return 0;
diff --git a/drivers/vfio/pci/vfio_pci_nvlink2.c b/drivers/vfio/pci/vfio_pci_nvlink2.c
deleted file mode 100644
index 9adcf6a8f88857..00000000000000
--- a/drivers/vfio/pci/vfio_pci_nvlink2.c
+++ /dev/null
@@ -1,490 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * VFIO PCI NVIDIA Whitherspoon GPU support a.k.a. NVLink2.
- *
- * Copyright (C) 2018 IBM Corp.  All rights reserved.
- *     Author: Alexey Kardashevskiy <aik@ozlabs.ru>
- *
- * Register an on-GPU RAM region for cacheable access.
- *
- * Derived from original vfio_pci_igd.c:
- * Copyright (C) 2016 Red Hat, Inc.  All rights reserved.
- *	Author: Alex Williamson <alex.williamson@redhat.com>
- */
-
-#include <linux/io.h>
-#include <linux/pci.h>
-#include <linux/uaccess.h>
-#include <linux/vfio.h>
-#include <linux/sched/mm.h>
-#include <linux/mmu_context.h>
-#include <asm/kvm_ppc.h>
-#include "vfio_pci_private.h"
-
-#define CREATE_TRACE_POINTS
-#include "trace.h"
-
-EXPORT_TRACEPOINT_SYMBOL_GPL(vfio_pci_nvgpu_mmap_fault);
-EXPORT_TRACEPOINT_SYMBOL_GPL(vfio_pci_nvgpu_mmap);
-EXPORT_TRACEPOINT_SYMBOL_GPL(vfio_pci_npu2_mmap);
-
-struct vfio_pci_nvgpu_data {
-	unsigned long gpu_hpa; /* GPU RAM physical address */
-	unsigned long gpu_tgt; /* TGT address of corresponding GPU RAM */
-	unsigned long useraddr; /* GPU RAM userspace address */
-	unsigned long size; /* Size of the GPU RAM window (usually 128GB) */
-	struct mm_struct *mm;
-	struct mm_iommu_table_group_mem_t *mem; /* Pre-registered RAM descr. */
-	struct pci_dev *gpdev;
-	struct notifier_block group_notifier;
-};
-
-static size_t vfio_pci_nvgpu_rw(struct vfio_pci_device *vdev,
-		char __user *buf, size_t count, loff_t *ppos, bool iswrite)
-{
-	unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
-	struct vfio_pci_nvgpu_data *data = vdev->region[i].data;
-	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
-	loff_t posaligned = pos & PAGE_MASK, posoff = pos & ~PAGE_MASK;
-	size_t sizealigned;
-	void __iomem *ptr;
-
-	if (pos >= vdev->region[i].size)
-		return -EINVAL;
-
-	count = min(count, (size_t)(vdev->region[i].size - pos));
-
-	/*
-	 * We map only a bit of GPU RAM for a short time instead of mapping it
-	 * for the guest lifetime as:
-	 *
-	 * 1) we do not know GPU RAM size, only aperture which is 4-8 times
-	 *    bigger than actual RAM size (16/32GB RAM vs. 128GB aperture);
-	 * 2) mapping GPU RAM allows CPU to prefetch and if this happens
-	 *    before NVLink bridge is reset (which fences GPU RAM),
-	 *    hardware management interrupts (HMI) might happen, this
-	 *    will freeze NVLink bridge.
-	 *
-	 * This is not fast path anyway.
-	 */
-	sizealigned = ALIGN(posoff + count, PAGE_SIZE);
-	ptr = ioremap_cache(data->gpu_hpa + posaligned, sizealigned);
-	if (!ptr)
-		return -EFAULT;
-
-	if (iswrite) {
-		if (copy_from_user(ptr + posoff, buf, count))
-			count = -EFAULT;
-		else
-			*ppos += count;
-	} else {
-		if (copy_to_user(buf, ptr + posoff, count))
-			count = -EFAULT;
-		else
-			*ppos += count;
-	}
-
-	iounmap(ptr);
-
-	return count;
-}
-
-static void vfio_pci_nvgpu_release(struct vfio_pci_device *vdev,
-		struct vfio_pci_region *region)
-{
-	struct vfio_pci_nvgpu_data *data = region->data;
-	long ret;
-
-	/* If there were any mappings at all... */
-	if (data->mm) {
-		if (data->mem) {
-			ret = mm_iommu_put(data->mm, data->mem);
-			WARN_ON(ret);
-		}
-
-		mmdrop(data->mm);
-	}
-
-	vfio_unregister_notifier(&data->gpdev->dev, VFIO_GROUP_NOTIFY,
-			&data->group_notifier);
-
-	pnv_npu2_unmap_lpar_dev(data->gpdev);
-
-	kfree(data);
-}
-
-static vm_fault_t vfio_pci_nvgpu_mmap_fault(struct vm_fault *vmf)
-{
-	vm_fault_t ret;
-	struct vm_area_struct *vma = vmf->vma;
-	struct vfio_pci_region *region = vma->vm_private_data;
-	struct vfio_pci_nvgpu_data *data = region->data;
-	unsigned long vmf_off = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
-	unsigned long nv2pg = data->gpu_hpa >> PAGE_SHIFT;
-	unsigned long vm_pgoff = vma->vm_pgoff &
-		((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
-	unsigned long pfn = nv2pg + vm_pgoff + vmf_off;
-
-	ret = vmf_insert_pfn(vma, vmf->address, pfn);
-	trace_vfio_pci_nvgpu_mmap_fault(data->gpdev, pfn << PAGE_SHIFT,
-			vmf->address, ret);
-
-	return ret;
-}
-
-static const struct vm_operations_struct vfio_pci_nvgpu_mmap_vmops = {
-	.fault = vfio_pci_nvgpu_mmap_fault,
-};
-
-static int vfio_pci_nvgpu_mmap(struct vfio_pci_device *vdev,
-		struct vfio_pci_region *region, struct vm_area_struct *vma)
-{
-	int ret;
-	struct vfio_pci_nvgpu_data *data = region->data;
-
-	if (data->useraddr)
-		return -EPERM;
-
-	if (vma->vm_end - vma->vm_start > data->size)
-		return -EINVAL;
-
-	vma->vm_private_data = region;
-	vma->vm_flags |= VM_PFNMAP;
-	vma->vm_ops = &vfio_pci_nvgpu_mmap_vmops;
-
-	/*
-	 * Calling mm_iommu_newdev() here once as the region is not
-	 * registered yet and therefore right initialization will happen now.
-	 * Other places will use mm_iommu_find() which returns
-	 * registered @mem and does not go gup().
-	 */
-	data->useraddr = vma->vm_start;
-	data->mm = current->mm;
-
-	mmgrab(data->mm);
-	ret = (int) mm_iommu_newdev(data->mm, data->useraddr,
-			vma_pages(vma), data->gpu_hpa, &data->mem);
-
-	trace_vfio_pci_nvgpu_mmap(vdev->pdev, data->gpu_hpa, data->useraddr,
-			vma->vm_end - vma->vm_start, ret);
-
-	return ret;
-}
-
-static int vfio_pci_nvgpu_add_capability(struct vfio_pci_device *vdev,
-		struct vfio_pci_region *region, struct vfio_info_cap *caps)
-{
-	struct vfio_pci_nvgpu_data *data = region->data;
-	struct vfio_region_info_cap_nvlink2_ssatgt cap = {
-		.header.id = VFIO_REGION_INFO_CAP_NVLINK2_SSATGT,
-		.header.version = 1,
-		.tgt = data->gpu_tgt
-	};
-
-	return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
-}
-
-static const struct vfio_pci_regops vfio_pci_nvgpu_regops = {
-	.rw = vfio_pci_nvgpu_rw,
-	.release = vfio_pci_nvgpu_release,
-	.mmap = vfio_pci_nvgpu_mmap,
-	.add_capability = vfio_pci_nvgpu_add_capability,
-};
-
-static int vfio_pci_nvgpu_group_notifier(struct notifier_block *nb,
-		unsigned long action, void *opaque)
-{
-	struct kvm *kvm = opaque;
-	struct vfio_pci_nvgpu_data *data = container_of(nb,
-			struct vfio_pci_nvgpu_data,
-			group_notifier);
-
-	if (action == VFIO_GROUP_NOTIFY_SET_KVM && kvm &&
-			pnv_npu2_map_lpar_dev(data->gpdev,
-				kvm->arch.lpid, MSR_DR | MSR_PR))
-		return NOTIFY_BAD;
-
-	return NOTIFY_OK;
-}
-
-int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev)
-{
-	int ret;
-	u64 reg[2];
-	u64 tgt = 0;
-	struct device_node *npu_node, *mem_node;
-	struct pci_dev *npu_dev;
-	struct vfio_pci_nvgpu_data *data;
-	uint32_t mem_phandle = 0;
-	unsigned long events = VFIO_GROUP_NOTIFY_SET_KVM;
-
-	/*
-	 * PCI config space does not tell us about NVLink presense but
-	 * platform does, use this.
-	 */
-	npu_dev = pnv_pci_get_npu_dev(vdev->pdev, 0);
-	if (!npu_dev)
-		return -ENODEV;
-
-	npu_node = pci_device_to_OF_node(npu_dev);
-	if (!npu_node)
-		return -EINVAL;
-
-	if (of_property_read_u32(npu_node, "memory-region", &mem_phandle))
-		return -ENODEV;
-
-	mem_node = of_find_node_by_phandle(mem_phandle);
-	if (!mem_node)
-		return -EINVAL;
-
-	if (of_property_read_variable_u64_array(mem_node, "reg", reg,
-				ARRAY_SIZE(reg), ARRAY_SIZE(reg)) !=
-			ARRAY_SIZE(reg))
-		return -EINVAL;
-
-	if (of_property_read_u64(npu_node, "ibm,device-tgt-addr", &tgt)) {
-		dev_warn(&vdev->pdev->dev, "No ibm,device-tgt-addr found\n");
-		return -EFAULT;
-	}
-
-	data = kzalloc(sizeof(*data), GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	data->gpu_hpa = reg[0];
-	data->gpu_tgt = tgt;
-	data->size = reg[1];
-
-	dev_dbg(&vdev->pdev->dev, "%lx..%lx\n", data->gpu_hpa,
-			data->gpu_hpa + data->size - 1);
-
-	data->gpdev = vdev->pdev;
-	data->group_notifier.notifier_call = vfio_pci_nvgpu_group_notifier;
-
-	ret = vfio_register_notifier(&data->gpdev->dev, VFIO_GROUP_NOTIFY,
-			&events, &data->group_notifier);
-	if (ret)
-		goto free_exit;
-
-	/*
-	 * We have just set KVM, we do not need the listener anymore.
-	 * Also, keeping it registered means that if more than one GPU is
-	 * assigned, we will get several similar notifiers notifying about
-	 * the same device again which does not help with anything.
-	 */
-	vfio_unregister_notifier(&data->gpdev->dev, VFIO_GROUP_NOTIFY,
-			&data->group_notifier);
-
-	ret = vfio_pci_register_dev_region(vdev,
-			PCI_VENDOR_ID_NVIDIA | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
-			VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM,
-			&vfio_pci_nvgpu_regops,
-			data->size,
-			VFIO_REGION_INFO_FLAG_READ |
-			VFIO_REGION_INFO_FLAG_WRITE |
-			VFIO_REGION_INFO_FLAG_MMAP,
-			data);
-	if (ret)
-		goto free_exit;
-
-	return 0;
-free_exit:
-	kfree(data);
-
-	return ret;
-}
-
-/*
- * IBM NPU2 bridge
- */
-struct vfio_pci_npu2_data {
-	void *base; /* ATSD register virtual address, for emulated access */
-	unsigned long mmio_atsd; /* ATSD physical address */
-	unsigned long gpu_tgt; /* TGT address of corresponding GPU RAM */
-	unsigned int link_speed; /* The link speed from DT's ibm,nvlink-speed */
-};
-
-static size_t vfio_pci_npu2_rw(struct vfio_pci_device *vdev,
-		char __user *buf, size_t count, loff_t *ppos, bool iswrite)
-{
-	unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
-	struct vfio_pci_npu2_data *data = vdev->region[i].data;
-	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
-
-	if (pos >= vdev->region[i].size)
-		return -EINVAL;
-
-	count = min(count, (size_t)(vdev->region[i].size - pos));
-
-	if (iswrite) {
-		if (copy_from_user(data->base + pos, buf, count))
-			return -EFAULT;
-	} else {
-		if (copy_to_user(buf, data->base + pos, count))
-			return -EFAULT;
-	}
-	*ppos += count;
-
-	return count;
-}
-
-static int vfio_pci_npu2_mmap(struct vfio_pci_device *vdev,
-		struct vfio_pci_region *region, struct vm_area_struct *vma)
-{
-	int ret;
-	struct vfio_pci_npu2_data *data = region->data;
-	unsigned long req_len = vma->vm_end - vma->vm_start;
-
-	if (req_len != PAGE_SIZE)
-		return -EINVAL;
-
-	vma->vm_flags |= VM_PFNMAP;
-	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
-	ret = remap_pfn_range(vma, vma->vm_start, data->mmio_atsd >> PAGE_SHIFT,
-			req_len, vma->vm_page_prot);
-	trace_vfio_pci_npu2_mmap(vdev->pdev, data->mmio_atsd, vma->vm_start,
-			vma->vm_end - vma->vm_start, ret);
-
-	return ret;
-}
-
-static void vfio_pci_npu2_release(struct vfio_pci_device *vdev,
-		struct vfio_pci_region *region)
-{
-	struct vfio_pci_npu2_data *data = region->data;
-
-	memunmap(data->base);
-	kfree(data);
-}
-
-static int vfio_pci_npu2_add_capability(struct vfio_pci_device *vdev,
-		struct vfio_pci_region *region, struct vfio_info_cap *caps)
-{
-	struct vfio_pci_npu2_data *data = region->data;
-	struct vfio_region_info_cap_nvlink2_ssatgt captgt = {
-		.header.id = VFIO_REGION_INFO_CAP_NVLINK2_SSATGT,
-		.header.version = 1,
-		.tgt = data->gpu_tgt
-	};
-	struct vfio_region_info_cap_nvlink2_lnkspd capspd = {
-		.header.id = VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD,
-		.header.version = 1,
-		.link_speed = data->link_speed
-	};
-	int ret;
-
-	ret = vfio_info_add_capability(caps, &captgt.header, sizeof(captgt));
-	if (ret)
-		return ret;
-
-	return vfio_info_add_capability(caps, &capspd.header, sizeof(capspd));
-}
-
-static const struct vfio_pci_regops vfio_pci_npu2_regops = {
-	.rw = vfio_pci_npu2_rw,
-	.mmap = vfio_pci_npu2_mmap,
-	.release = vfio_pci_npu2_release,
-	.add_capability = vfio_pci_npu2_add_capability,
-};
-
-int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
-{
-	int ret;
-	struct vfio_pci_npu2_data *data;
-	struct device_node *nvlink_dn;
-	u32 nvlink_index = 0, mem_phandle = 0;
-	struct pci_dev *npdev = vdev->pdev;
-	struct device_node *npu_node = pci_device_to_OF_node(npdev);
-	struct pci_controller *hose = pci_bus_to_host(npdev->bus);
-	u64 mmio_atsd = 0;
-	u64 tgt = 0;
-	u32 link_speed = 0xff;
-
-	/*
-	 * PCI config space does not tell us about NVLink presense but
-	 * platform does, use this.
-	 */
-	if (!pnv_pci_get_gpu_dev(vdev->pdev))
-		return -ENODEV;
-
-	if (of_property_read_u32(npu_node, "memory-region", &mem_phandle))
-		return -ENODEV;
-
-	/*
-	 * NPU2 normally has 8 ATSD registers (for concurrency) and 6 links
-	 * so we can allocate one register per link, using nvlink index as
-	 * a key.
-	 * There is always at least one ATSD register so as long as at least
-	 * NVLink bridge #0 is passed to the guest, ATSD will be available.
-	 */
-	nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
-	if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
-			&nvlink_index)))
-		return -ENODEV;
-
-	if (of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", nvlink_index,
-			&mmio_atsd)) {
-		if (of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", 0,
-				&mmio_atsd)) {
-			dev_warn(&vdev->pdev->dev, "No available ATSD found\n");
-			mmio_atsd = 0;
-		} else {
-			dev_warn(&vdev->pdev->dev,
-				 "Using fallback ibm,mmio-atsd[0] for ATSD.\n");
-		}
-	}
-
-	if (of_property_read_u64(npu_node, "ibm,device-tgt-addr", &tgt)) {
-		dev_warn(&vdev->pdev->dev, "No ibm,device-tgt-addr found\n");
-		return -EFAULT;
-	}
-
-	if (of_property_read_u32(npu_node, "ibm,nvlink-speed", &link_speed)) {
-		dev_warn(&vdev->pdev->dev, "No ibm,nvlink-speed found\n");
-		return -EFAULT;
-	}
-
-	data = kzalloc(sizeof(*data), GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	data->mmio_atsd = mmio_atsd;
-	data->gpu_tgt = tgt;
-	data->link_speed = link_speed;
-	if (data->mmio_atsd) {
-		data->base = memremap(data->mmio_atsd, SZ_64K, MEMREMAP_WT);
-		if (!data->base) {
-			ret = -ENOMEM;
-			goto free_exit;
-		}
-	}
-
-	/*
-	 * We want to expose the capability even if this specific NVLink
-	 * did not get its own ATSD register because capabilities
-	 * belong to VFIO regions and normally there will be ATSD register
-	 * assigned to the NVLink bridge.
-	 */
-	ret = vfio_pci_register_dev_region(vdev,
-			PCI_VENDOR_ID_IBM |
-			VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
-			VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD,
-			&vfio_pci_npu2_regops,
-			data->mmio_atsd ? PAGE_SIZE : 0,
-			VFIO_REGION_INFO_FLAG_READ |
-			VFIO_REGION_INFO_FLAG_WRITE |
-			VFIO_REGION_INFO_FLAG_MMAP,
-			data);
-	if (ret)
-		goto free_exit;
-
-	return 0;
-
-free_exit:
-	if (data->base)
-		memunmap(data->base);
-	kfree(data);
-
-	return ret;
-}
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 9cd1882a05af69..cdae2e4cf11cca 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -199,20 +199,6 @@ static inline int vfio_pci_igd_init(struct vfio_pci_device *vdev)
 	return -ENODEV;
 }
 #endif
-#ifdef CONFIG_VFIO_PCI_NVLINK2
-extern int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev);
-extern int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev);
-#else
-static inline int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev)
-{
-	return -ENODEV;
-}
-
-static inline int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
-{
-	return -ENODEV;
-}
-#endif
 
 #ifdef CONFIG_S390
 extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 8ce36c1d53ca11..db7e782419d5d9 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -332,19 +332,6 @@ struct vfio_region_info_cap_type {
 #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
 #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
 
-/* 10de vendor PCI sub-types */
-/*
- * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space.
- */
-#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM	(1)
-
-/* 1014 vendor PCI sub-types */
-/*
- * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU
- * to do TLB invalidation on a GPU.
- */
-#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD	(1)
-
 /* sub-types for VFIO_REGION_TYPE_GFX */
 #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
 
@@ -637,33 +624,6 @@ struct vfio_device_migration_info {
  */
 #define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE	3
 
-/*
- * Capability with compressed real address (aka SSA - small system address)
- * where GPU RAM is mapped on a system bus. Used by a GPU for DMA routing
- * and by the userspace to associate a NVLink bridge with a GPU.
- */
-#define VFIO_REGION_INFO_CAP_NVLINK2_SSATGT	4
-
-struct vfio_region_info_cap_nvlink2_ssatgt {
-	struct vfio_info_cap_header header;
-	__u64 tgt;
-};
-
-/*
- * Capability with an NVLink link speed. The value is read by
- * the NVlink2 bridge driver from the bridge's "ibm,nvlink-speed"
- * property in the device tree. The value is fixed in the hardware
- * and failing to provide the correct value results in the link
- * not working with no indication from the driver why.
- */
-#define VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD	5
-
-struct vfio_region_info_cap_nvlink2_lnkspd {
-	struct vfio_info_cap_header header;
-	__u32 link_speed;
-	__u32 __pad;
-};
-
 /**
  * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
  *				    struct vfio_irq_info)
-- 
2.30.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 2/2] powerpc/powernv: remove the nvlink support
  2021-03-22 15:01 remove the nvlink2 pci_vfio subdriver Christoph Hellwig
  2021-03-22 15:01 ` [PATCH 1/2] vfio/pci: remove vfio_pci_nvlink2 Christoph Hellwig
@ 2021-03-22 15:01 ` Christoph Hellwig
  2021-03-22 15:12 ` remove the nvlink2 pci_vfio subdriver Greg Kroah-Hartman
  2 siblings, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2021-03-22 15:01 UTC (permalink / raw)
  To: Michael Ellerman, Alex Williamson
  Cc: Benjamin Herrenschmidt, Greg Kroah-Hartman, Jason Gunthorpe,
	David Airlie, Daniel Vetter, dri-devel, Paul Mackerras,
	linuxppc-dev, linux-kernel, kvm, linux-api

This code was only used by the vfio-nvlink2 code, which itself had no
proper use.  Drop this huge chunk of code build into every powernv
or generic build.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/powerpc/include/asm/opal.h            |   3 -
 arch/powerpc/include/asm/pci-bridge.h      |   1 -
 arch/powerpc/include/asm/pci.h             |   7 -
 arch/powerpc/platforms/powernv/Makefile    |   2 +-
 arch/powerpc/platforms/powernv/npu-dma.c   | 705 ---------------------
 arch/powerpc/platforms/powernv/opal-call.c |   2 -
 arch/powerpc/platforms/powernv/pci-ioda.c  | 185 +-----
 arch/powerpc/platforms/powernv/pci.c       |  11 -
 arch/powerpc/platforms/powernv/pci.h       |  17 +-
 arch/powerpc/platforms/pseries/pci.c       |  23 -
 10 files changed, 8 insertions(+), 948 deletions(-)
 delete mode 100644 arch/powerpc/platforms/powernv/npu-dma.c

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 9986ac34b8e224..06eaa231697344 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -28,9 +28,6 @@ extern struct device_node *opal_node;
 
 /* API functions */
 int64_t opal_invalid_call(void);
-int64_t opal_npu_destroy_context(uint64_t phb_id, uint64_t pid, uint64_t bdf);
-int64_t opal_npu_init_context(uint64_t phb_id, int pasid, uint64_t msr,
-			uint64_t bdf);
 int64_t opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid,
 			uint64_t lpcr);
 int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t bdfn,
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index d2a2a14e56f91e..74424c14515ce0 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -126,7 +126,6 @@ struct pci_controller {
 #endif	/* CONFIG_PPC64 */
 
 	void *private_data;
-	struct npu *npu;
 };
 
 /* These are used for config access before all the PCI probing
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 6436f0b41539e3..d1f53260725ca7 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -119,11 +119,4 @@ extern void pcibios_scan_phb(struct pci_controller *hose);
 
 #endif	/* __KERNEL__ */
 
-extern struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev);
-extern struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index);
-extern int pnv_npu2_init(struct pci_controller *hose);
-extern int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid,
-		unsigned long msr);
-extern int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev);
-
 #endif /* __ASM_POWERPC_PCI_H */
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 2eb6ae150d1fd5..be2546b968165e 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_SMP)	+= smp.o subcore.o subcore-asm.o
 obj-$(CONFIG_FA_DUMP)	+= opal-fadump.o
 obj-$(CONFIG_PRESERVE_FA_DUMP)	+= opal-fadump.o
 obj-$(CONFIG_OPAL_CORE)	+= opal-core.o
-obj-$(CONFIG_PCI)	+= pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o
+obj-$(CONFIG_PCI)	+= pci.o pci-ioda.o pci-ioda-tce.o
 obj-$(CONFIG_PCI_IOV)   += pci-sriov.o
 obj-$(CONFIG_CXL_BASE)	+= pci-cxl.o
 obj-$(CONFIG_EEH)	+= eeh-powernv.o
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
deleted file mode 100644
index b711dc3262a308..00000000000000
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ /dev/null
@@ -1,705 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * This file implements the DMA operations for NVLink devices. The NPU
- * devices all point to the same iommu table as the parent PCI device.
- *
- * Copyright Alistair Popple, IBM Corporation 2015.
- */
-
-#include <linux/mmu_notifier.h>
-#include <linux/mmu_context.h>
-#include <linux/of.h>
-#include <linux/pci.h>
-#include <linux/memblock.h>
-#include <linux/sizes.h>
-
-#include <asm/debugfs.h>
-#include <asm/powernv.h>
-#include <asm/ppc-pci.h>
-#include <asm/opal.h>
-
-#include "pci.h"
-
-static struct pci_dev *get_pci_dev(struct device_node *dn)
-{
-	struct pci_dn *pdn = PCI_DN(dn);
-	struct pci_dev *pdev;
-
-	pdev = pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus),
-					   pdn->busno, pdn->devfn);
-
-	/*
-	 * pci_get_domain_bus_and_slot() increased the reference count of
-	 * the PCI device, but callers don't need that actually as the PE
-	 * already holds a reference to the device. Since callers aren't
-	 * aware of the reference count change, call pci_dev_put() now to
-	 * avoid leaks.
-	 */
-	if (pdev)
-		pci_dev_put(pdev);
-
-	return pdev;
-}
-
-/* Given a NPU device get the associated PCI device. */
-struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev)
-{
-	struct device_node *dn;
-	struct pci_dev *gpdev;
-
-	if (WARN_ON(!npdev))
-		return NULL;
-
-	if (WARN_ON(!npdev->dev.of_node))
-		return NULL;
-
-	/* Get assoicated PCI device */
-	dn = of_parse_phandle(npdev->dev.of_node, "ibm,gpu", 0);
-	if (!dn)
-		return NULL;
-
-	gpdev = get_pci_dev(dn);
-	of_node_put(dn);
-
-	return gpdev;
-}
-EXPORT_SYMBOL(pnv_pci_get_gpu_dev);
-
-/* Given the real PCI device get a linked NPU device. */
-struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
-{
-	struct device_node *dn;
-	struct pci_dev *npdev;
-
-	if (WARN_ON(!gpdev))
-		return NULL;
-
-	/* Not all PCI devices have device-tree nodes */
-	if (!gpdev->dev.of_node)
-		return NULL;
-
-	/* Get assoicated PCI device */
-	dn = of_parse_phandle(gpdev->dev.of_node, "ibm,npu", index);
-	if (!dn)
-		return NULL;
-
-	npdev = get_pci_dev(dn);
-	of_node_put(dn);
-
-	return npdev;
-}
-EXPORT_SYMBOL(pnv_pci_get_npu_dev);
-
-#ifdef CONFIG_IOMMU_API
-/*
- * Returns the PE assoicated with the PCI device of the given
- * NPU. Returns the linked pci device if pci_dev != NULL.
- */
-static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe,
-						  struct pci_dev **gpdev)
-{
-	struct pnv_phb *phb;
-	struct pci_controller *hose;
-	struct pci_dev *pdev;
-	struct pnv_ioda_pe *pe;
-	struct pci_dn *pdn;
-
-	pdev = pnv_pci_get_gpu_dev(npe->pdev);
-	if (!pdev)
-		return NULL;
-
-	pdn = pci_get_pdn(pdev);
-	if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
-		return NULL;
-
-	hose = pci_bus_to_host(pdev->bus);
-	phb = hose->private_data;
-	pe = &phb->ioda.pe_array[pdn->pe_number];
-
-	if (gpdev)
-		*gpdev = pdev;
-
-	return pe;
-}
-
-static long pnv_npu_unset_window(struct iommu_table_group *table_group,
-		int num);
-
-static long pnv_npu_set_window(struct iommu_table_group *table_group, int num,
-		struct iommu_table *tbl)
-{
-	struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
-			table_group);
-	struct pnv_phb *phb = npe->phb;
-	int64_t rc;
-	const unsigned long size = tbl->it_indirect_levels ?
-		tbl->it_level_size : tbl->it_size;
-	const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
-	const __u64 win_size = tbl->it_size << tbl->it_page_shift;
-	int num2 = (num == 0) ? 1 : 0;
-
-	/* NPU has just one TVE so if there is another table, remove it first */
-	if (npe->table_group.tables[num2])
-		pnv_npu_unset_window(&npe->table_group, num2);
-
-	pe_info(npe, "Setting up window %llx..%llx pg=%lx\n",
-			start_addr, start_addr + win_size - 1,
-			IOMMU_PAGE_SIZE(tbl));
-
-	rc = opal_pci_map_pe_dma_window(phb->opal_id,
-			npe->pe_number,
-			npe->pe_number,
-			tbl->it_indirect_levels + 1,
-			__pa(tbl->it_base),
-			size << 3,
-			IOMMU_PAGE_SIZE(tbl));
-	if (rc) {
-		pe_err(npe, "Failed to configure TCE table, err %lld\n", rc);
-		return rc;
-	}
-	pnv_pci_ioda2_tce_invalidate_entire(phb, false);
-
-	/* Add the table to the list so its TCE cache will get invalidated */
-	pnv_pci_link_table_and_group(phb->hose->node, num,
-			tbl, &npe->table_group);
-
-	return 0;
-}
-
-static long pnv_npu_unset_window(struct iommu_table_group *table_group, int num)
-{
-	struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
-			table_group);
-	struct pnv_phb *phb = npe->phb;
-	int64_t rc;
-
-	if (!npe->table_group.tables[num])
-		return 0;
-
-	pe_info(npe, "Removing DMA window\n");
-
-	rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
-			npe->pe_number,
-			0/* levels */, 0/* table address */,
-			0/* table size */, 0/* page size */);
-	if (rc) {
-		pe_err(npe, "Unmapping failed, ret = %lld\n", rc);
-		return rc;
-	}
-	pnv_pci_ioda2_tce_invalidate_entire(phb, false);
-
-	pnv_pci_unlink_table_and_group(npe->table_group.tables[num],
-			&npe->table_group);
-
-	return 0;
-}
-
-/* Switch ownership from platform code to external user (e.g. VFIO) */
-static void pnv_npu_take_ownership(struct iommu_table_group *table_group)
-{
-	struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
-			table_group);
-	struct pnv_phb *phb = npe->phb;
-	int64_t rc;
-	struct pci_dev *gpdev = NULL;
-
-	/*
-	 * Note: NPU has just a single TVE in the hardware which means that
-	 * while used by the kernel, it can have either 32bit window or
-	 * DMA bypass but never both. So we deconfigure 32bit window only
-	 * if it was enabled at the moment of ownership change.
-	 */
-	if (npe->table_group.tables[0]) {
-		pnv_npu_unset_window(&npe->table_group, 0);
-		return;
-	}
-
-	/* Disable bypass */
-	rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
-			npe->pe_number, npe->pe_number,
-			0 /* bypass base */, 0);
-	if (rc) {
-		pe_err(npe, "Failed to disable bypass, err %lld\n", rc);
-		return;
-	}
-	pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false);
-
-	get_gpu_pci_dev_and_pe(npe, &gpdev);
-	if (gpdev)
-		pnv_npu2_unmap_lpar_dev(gpdev);
-}
-
-static void pnv_npu_release_ownership(struct iommu_table_group *table_group)
-{
-	struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
-			table_group);
-	struct pci_dev *gpdev = NULL;
-
-	get_gpu_pci_dev_and_pe(npe, &gpdev);
-	if (gpdev)
-		pnv_npu2_map_lpar_dev(gpdev, 0, MSR_DR | MSR_PR | MSR_HV);
-}
-
-static struct iommu_table_group_ops pnv_pci_npu_ops = {
-	.set_window = pnv_npu_set_window,
-	.unset_window = pnv_npu_unset_window,
-	.take_ownership = pnv_npu_take_ownership,
-	.release_ownership = pnv_npu_release_ownership,
-};
-#endif /* !CONFIG_IOMMU_API */
-
-/*
- * NPU2 ATS
- */
-/* Maximum possible number of ATSD MMIO registers per NPU */
-#define NV_NMMU_ATSD_REGS 8
-#define NV_NPU_MAX_PE_NUM	16
-
-/*
- * A compound NPU IOMMU group which might consist of 1 GPU + 2xNPUs (POWER8) or
- * up to 3 x (GPU + 2xNPUs) (POWER9).
- */
-struct npu_comp {
-	struct iommu_table_group table_group;
-	int pe_num;
-	struct pnv_ioda_pe *pe[NV_NPU_MAX_PE_NUM];
-};
-
-/* An NPU descriptor, valid for POWER9 only */
-struct npu {
-	int index;
-	struct npu_comp npucomp;
-};
-
-#ifdef CONFIG_IOMMU_API
-static long pnv_npu_peers_create_table_userspace(
-		struct iommu_table_group *table_group,
-		int num, __u32 page_shift, __u64 window_size, __u32 levels,
-		struct iommu_table **ptbl)
-{
-	struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
-			table_group);
-
-	if (!npucomp->pe_num || !npucomp->pe[0] ||
-			!npucomp->pe[0]->table_group.ops ||
-			!npucomp->pe[0]->table_group.ops->create_table)
-		return -EFAULT;
-
-	return npucomp->pe[0]->table_group.ops->create_table(
-			&npucomp->pe[0]->table_group, num, page_shift,
-			window_size, levels, ptbl);
-}
-
-static long pnv_npu_peers_set_window(struct iommu_table_group *table_group,
-		int num, struct iommu_table *tbl)
-{
-	int i, j;
-	long ret = 0;
-	struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
-			table_group);
-
-	for (i = 0; i < npucomp->pe_num; ++i) {
-		struct pnv_ioda_pe *pe = npucomp->pe[i];
-
-		if (!pe->table_group.ops->set_window)
-			continue;
-
-		ret = pe->table_group.ops->set_window(&pe->table_group,
-				num, tbl);
-		if (ret)
-			break;
-	}
-
-	if (ret) {
-		for (j = 0; j < i; ++j) {
-			struct pnv_ioda_pe *pe = npucomp->pe[j];
-
-			if (!pe->table_group.ops->unset_window)
-				continue;
-
-			ret = pe->table_group.ops->unset_window(
-					&pe->table_group, num);
-			if (ret)
-				break;
-		}
-	} else {
-		table_group->tables[num] = iommu_tce_table_get(tbl);
-	}
-
-	return ret;
-}
-
-static long pnv_npu_peers_unset_window(struct iommu_table_group *table_group,
-		int num)
-{
-	int i, j;
-	long ret = 0;
-	struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
-			table_group);
-
-	for (i = 0; i < npucomp->pe_num; ++i) {
-		struct pnv_ioda_pe *pe = npucomp->pe[i];
-
-		WARN_ON(npucomp->table_group.tables[num] !=
-				table_group->tables[num]);
-		if (!npucomp->table_group.tables[num])
-			continue;
-
-		if (!pe->table_group.ops->unset_window)
-			continue;
-
-		ret = pe->table_group.ops->unset_window(&pe->table_group, num);
-		if (ret)
-			break;
-	}
-
-	if (ret) {
-		for (j = 0; j < i; ++j) {
-			struct pnv_ioda_pe *pe = npucomp->pe[j];
-
-			if (!npucomp->table_group.tables[num])
-				continue;
-
-			if (!pe->table_group.ops->set_window)
-				continue;
-
-			ret = pe->table_group.ops->set_window(&pe->table_group,
-					num, table_group->tables[num]);
-			if (ret)
-				break;
-		}
-	} else if (table_group->tables[num]) {
-		iommu_tce_table_put(table_group->tables[num]);
-		table_group->tables[num] = NULL;
-	}
-
-	return ret;
-}
-
-static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group)
-{
-	int i;
-	struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
-			table_group);
-
-	for (i = 0; i < npucomp->pe_num; ++i) {
-		struct pnv_ioda_pe *pe = npucomp->pe[i];
-
-		if (!pe->table_group.ops ||
-		    !pe->table_group.ops->take_ownership)
-			continue;
-		pe->table_group.ops->take_ownership(&pe->table_group);
-	}
-}
-
-static void pnv_npu_peers_release_ownership(
-		struct iommu_table_group *table_group)
-{
-	int i;
-	struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
-			table_group);
-
-	for (i = 0; i < npucomp->pe_num; ++i) {
-		struct pnv_ioda_pe *pe = npucomp->pe[i];
-
-		if (!pe->table_group.ops ||
-		    !pe->table_group.ops->release_ownership)
-			continue;
-		pe->table_group.ops->release_ownership(&pe->table_group);
-	}
-}
-
-static struct iommu_table_group_ops pnv_npu_peers_ops = {
-	.get_table_size = pnv_pci_ioda2_get_table_size,
-	.create_table = pnv_npu_peers_create_table_userspace,
-	.set_window = pnv_npu_peers_set_window,
-	.unset_window = pnv_npu_peers_unset_window,
-	.take_ownership = pnv_npu_peers_take_ownership,
-	.release_ownership = pnv_npu_peers_release_ownership,
-};
-
-static void pnv_comp_attach_table_group(struct npu_comp *npucomp,
-		struct pnv_ioda_pe *pe)
-{
-	if (WARN_ON(npucomp->pe_num == NV_NPU_MAX_PE_NUM))
-		return;
-
-	npucomp->pe[npucomp->pe_num] = pe;
-	++npucomp->pe_num;
-}
-
-static struct iommu_table_group *
-	pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
-{
-	struct iommu_table_group *compound_group;
-	struct npu_comp *npucomp;
-	struct pci_dev *gpdev = NULL;
-	struct pci_controller *hose;
-	struct pci_dev *npdev = NULL;
-
-	list_for_each_entry(gpdev, &pe->pbus->devices, bus_list) {
-		npdev = pnv_pci_get_npu_dev(gpdev, 0);
-		if (npdev)
-			break;
-	}
-
-	if (!npdev)
-		/* It is not an NPU attached device, skip */
-		return NULL;
-
-	hose = pci_bus_to_host(npdev->bus);
-
-	if (hose->npu) {
-		/* P9 case: compound group is per-NPU (all gpus, all links) */
-		npucomp = &hose->npu->npucomp;
-	} else {
-		/* P8 case: Compound group is per-GPU (1 gpu, 2 links) */
-		npucomp = pe->npucomp = kzalloc(sizeof(*npucomp), GFP_KERNEL);
-	}
-
-	compound_group = &npucomp->table_group;
-	if (!compound_group->group) {
-		compound_group->ops = &pnv_npu_peers_ops;
-		iommu_register_group(compound_group, hose->global_number,
-				pe->pe_number);
-
-		/* Steal capabilities from a GPU PE */
-		compound_group->max_dynamic_windows_supported =
-			pe->table_group.max_dynamic_windows_supported;
-		compound_group->tce32_start = pe->table_group.tce32_start;
-		compound_group->tce32_size = pe->table_group.tce32_size;
-		compound_group->max_levels = pe->table_group.max_levels;
-		if (!compound_group->pgsizes)
-			compound_group->pgsizes = pe->table_group.pgsizes;
-	}
-
-	/*
-	 * The gpu would have been added to the iommu group that's created
-	 * for the PE. Pull it out now.
-	 */
-	iommu_del_device(&gpdev->dev);
-
-       /*
-	* I'm not sure this is strictly required, but it's probably a good idea
-	* since the table_group for the PE is going to be attached to the
-	* compound table group. If we leave the PE's iommu group active then
-	* we might have the same table_group being modifiable via two sepeate
-	* iommu groups.
-	*/
-	iommu_group_put(pe->table_group.group);
-
-	/* now put the GPU into the compound group */
-	pnv_comp_attach_table_group(npucomp, pe);
-	iommu_add_device(compound_group, &gpdev->dev);
-
-	return compound_group;
-}
-
-static struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe)
-{
-	struct iommu_table_group *table_group;
-	struct npu_comp *npucomp;
-	struct pci_dev *gpdev = NULL;
-	struct pci_dev *npdev;
-	struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(pe, &gpdev);
-
-	WARN_ON(!(pe->flags & PNV_IODA_PE_DEV));
-	if (!gpe)
-		return NULL;
-
-	/*
-	 * IODA2 bridges get this set up from pci_controller_ops::setup_bridge
-	 * but NPU bridges do not have this hook defined so we do it here.
-	 * We do not setup other table group parameters as they won't be used
-	 * anyway - NVLink bridges are subordinate PEs.
-	 */
-	pe->table_group.ops = &pnv_pci_npu_ops;
-
-	table_group = iommu_group_get_iommudata(
-			iommu_group_get(&gpdev->dev));
-
-	/*
-	 * On P9 NPU PHB and PCI PHB support different page sizes,
-	 * keep only matching. We expect here that NVLink bridge PE pgsizes is
-	 * initialized by the caller.
-	 */
-	table_group->pgsizes &= pe->table_group.pgsizes;
-	npucomp = container_of(table_group, struct npu_comp, table_group);
-	pnv_comp_attach_table_group(npucomp, pe);
-
-	list_for_each_entry(npdev, &pe->phb->hose->bus->devices, bus_list) {
-		struct pci_dev *gpdevtmp = pnv_pci_get_gpu_dev(npdev);
-
-		if (gpdevtmp != gpdev)
-			continue;
-
-		iommu_add_device(table_group, &npdev->dev);
-	}
-
-	return table_group;
-}
-
-void pnv_pci_npu_setup_iommu_groups(void)
-{
-	struct pci_controller *hose;
-	struct pnv_phb *phb;
-	struct pnv_ioda_pe *pe;
-
-	/*
-	 * For non-nvlink devices the IOMMU group is registered when the PE is
-	 * configured and devices are added to the group when the per-device
-	 * DMA setup is run. That's done in hose->ops.dma_dev_setup() which is
-	 * only initialise for "normal" IODA PHBs.
-	 *
-	 * For NVLink devices we need to ensure the NVLinks and the GPU end up
-	 * in the same IOMMU group, so that's handled here.
-	 */
-	list_for_each_entry(hose, &hose_list, list_node) {
-		phb = hose->private_data;
-
-		if (phb->type == PNV_PHB_IODA2)
-			list_for_each_entry(pe, &phb->ioda.pe_list, list)
-				pnv_try_setup_npu_table_group(pe);
-	}
-
-	/*
-	 * Now we have all PHBs discovered, time to add NPU devices to
-	 * the corresponding IOMMU groups.
-	 */
-	list_for_each_entry(hose, &hose_list, list_node) {
-		unsigned long  pgsizes;
-
-		phb = hose->private_data;
-
-		if (phb->type != PNV_PHB_NPU_NVLINK)
-			continue;
-
-		pgsizes = pnv_ioda_parse_tce_sizes(phb);
-		list_for_each_entry(pe, &phb->ioda.pe_list, list) {
-			/*
-			 * IODA2 bridges get this set up from
-			 * pci_controller_ops::setup_bridge but NPU bridges
-			 * do not have this hook defined so we do it here.
-			 */
-			pe->table_group.pgsizes = pgsizes;
-			pnv_npu_compound_attach(pe);
-		}
-	}
-}
-#endif /* CONFIG_IOMMU_API */
-
-int pnv_npu2_init(struct pci_controller *hose)
-{
-	static int npu_index;
-	struct npu *npu;
-	int ret;
-
-	npu = kzalloc(sizeof(*npu), GFP_KERNEL);
-	if (!npu)
-		return -ENOMEM;
-
-	npu_index++;
-	if (WARN_ON(npu_index >= NV_MAX_NPUS)) {
-		ret = -ENOSPC;
-		goto fail_exit;
-	}
-	npu->index = npu_index;
-	hose->npu = npu;
-
-	return 0;
-
-fail_exit:
-	kfree(npu);
-	return ret;
-}
-
-int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid,
-		unsigned long msr)
-{
-	int ret;
-	struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
-	struct pci_controller *hose;
-	struct pnv_phb *nphb;
-
-	if (!npdev)
-		return -ENODEV;
-
-	hose = pci_bus_to_host(npdev->bus);
-	if (hose->npu == NULL) {
-		dev_info_once(&npdev->dev, "Nvlink1 does not support contexts");
-		return 0;
-	}
-
-	nphb = hose->private_data;
-
-	dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n",
-			nphb->opal_id, lparid);
-	/*
-	 * Currently we only support radix and non-zero LPCR only makes sense
-	 * for hash tables so skiboot expects the LPCR parameter to be a zero.
-	 */
-	ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), lparid,
-				0 /* LPCR bits */);
-	if (ret) {
-		dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret);
-		return ret;
-	}
-
-	dev_dbg(&gpdev->dev, "init context opalid=%llu msr=%lx\n",
-			nphb->opal_id, msr);
-	ret = opal_npu_init_context(nphb->opal_id, 0/*__unused*/, msr,
-				    pci_dev_id(gpdev));
-	if (ret < 0)
-		dev_err(&gpdev->dev, "Failed to init context: %d\n", ret);
-	else
-		ret = 0;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(pnv_npu2_map_lpar_dev);
-
-void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr)
-{
-	struct pci_dev *gpdev;
-
-	list_for_each_entry(gpdev, &gpe->pbus->devices, bus_list)
-		pnv_npu2_map_lpar_dev(gpdev, 0, msr);
-}
-
-int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev)
-{
-	int ret;
-	struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
-	struct pci_controller *hose;
-	struct pnv_phb *nphb;
-
-	if (!npdev)
-		return -ENODEV;
-
-	hose = pci_bus_to_host(npdev->bus);
-	if (hose->npu == NULL) {
-		dev_info_once(&npdev->dev, "Nvlink1 does not support contexts");
-		return 0;
-	}
-
-	nphb = hose->private_data;
-
-	dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n",
-			nphb->opal_id);
-	ret = opal_npu_destroy_context(nphb->opal_id, 0/*__unused*/,
-				       pci_dev_id(gpdev));
-	if (ret < 0) {
-		dev_err(&gpdev->dev, "Failed to destroy context: %d\n", ret);
-		return ret;
-	}
-
-	/* Set LPID to 0 anyway, just to be safe */
-	dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=0\n", nphb->opal_id);
-	ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), 0 /*LPID*/,
-				0 /* LPCR bits */);
-	if (ret)
-		dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(pnv_npu2_unmap_lpar_dev);
diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
index 5cd0f52d258f64..01401e3da7ca14 100644
--- a/arch/powerpc/platforms/powernv/opal-call.c
+++ b/arch/powerpc/platforms/powernv/opal-call.c
@@ -267,8 +267,6 @@ OPAL_CALL(opal_xive_get_queue_state,		OPAL_XIVE_GET_QUEUE_STATE);
 OPAL_CALL(opal_xive_set_queue_state,		OPAL_XIVE_SET_QUEUE_STATE);
 OPAL_CALL(opal_xive_get_vp_state,		OPAL_XIVE_GET_VP_STATE);
 OPAL_CALL(opal_signal_system_reset,		OPAL_SIGNAL_SYSTEM_RESET);
-OPAL_CALL(opal_npu_init_context,		OPAL_NPU_INIT_CONTEXT);
-OPAL_CALL(opal_npu_destroy_context,		OPAL_NPU_DESTROY_CONTEXT);
 OPAL_CALL(opal_npu_map_lpar,			OPAL_NPU_MAP_LPAR);
 OPAL_CALL(opal_imc_counters_init,		OPAL_IMC_COUNTERS_INIT);
 OPAL_CALL(opal_imc_counters_start,		OPAL_IMC_COUNTERS_START);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index f0f901683a2fe1..5c88d7145a23e0 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -47,8 +47,7 @@
 #define PNV_IODA1_M64_SEGS	8	/* Segments per M64 BAR	*/
 #define PNV_IODA1_DMA32_SEGSIZE	0x10000000
 
-static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK",
-					      "NPU_OCAPI" };
+static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_OCAPI" };
 
 static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
 static void pnv_pci_configure_bus(struct pci_bus *bus);
@@ -192,8 +191,6 @@ void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
 	unsigned int pe_num = pe->pe_number;
 
 	WARN_ON(pe->pdev);
-	WARN_ON(pe->npucomp); /* NPUs for nvlink are not supposed to be freed */
-	kfree(pe->npucomp);
 	memset(pe, 0, sizeof(struct pnv_ioda_pe));
 
 	mutex_lock(&phb->ioda.pe_alloc_mutex);
@@ -875,7 +872,7 @@ int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 	 * Release from all parents PELT-V. NPUs don't have a PELTV
 	 * table
 	 */
-	if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI)
+	if (phb->type != PNV_PHB_NPU_OCAPI)
 		pnv_ioda_unset_peltv(phb, pe, parent);
 
 	rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
@@ -946,7 +943,7 @@ int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 	 * Configure PELTV. NPUs don't have a PELTV table so skip
 	 * configuration on them.
 	 */
-	if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI)
+	if (phb->type != PNV_PHB_NPU_OCAPI)
 		pnv_ioda_set_peltv(phb, pe, true);
 
 	/* Setup reverse map */
@@ -1002,8 +999,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
 
 	/* NOTE: We don't get a reference for the pointer in the PE
 	 * data structure, both the device and PE structures should be
-	 * destroyed at the same time. However, removing nvlink
-	 * devices will need some work.
+	 * destroyed at the same time.
 	 *
 	 * At some point we want to remove the PDN completely anyways
 	 */
@@ -1099,113 +1095,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
 	return pe;
 }
 
-static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
-{
-	int pe_num, found_pe = false, rc;
-	long rid;
-	struct pnv_ioda_pe *pe;
-	struct pci_dev *gpu_pdev;
-	struct pci_dn *npu_pdn;
-	struct pnv_phb *phb = pci_bus_to_pnvhb(npu_pdev->bus);
-
-	/*
-	 * Intentionally leak a reference on the npu device (for
-	 * nvlink only; this is not an opencapi path) to make sure it
-	 * never goes away, as it's been the case all along and some
-	 * work is needed otherwise.
-	 */
-	pci_dev_get(npu_pdev);
-
-	/*
-	 * Due to a hardware errata PE#0 on the NPU is reserved for
-	 * error handling. This means we only have three PEs remaining
-	 * which need to be assigned to four links, implying some
-	 * links must share PEs.
-	 *
-	 * To achieve this we assign PEs such that NPUs linking the
-	 * same GPU get assigned the same PE.
-	 */
-	gpu_pdev = pnv_pci_get_gpu_dev(npu_pdev);
-	for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) {
-		pe = &phb->ioda.pe_array[pe_num];
-		if (!pe->pdev)
-			continue;
-
-		if (pnv_pci_get_gpu_dev(pe->pdev) == gpu_pdev) {
-			/*
-			 * This device has the same peer GPU so should
-			 * be assigned the same PE as the existing
-			 * peer NPU.
-			 */
-			dev_info(&npu_pdev->dev,
-				"Associating to existing PE %x\n", pe_num);
-			npu_pdn = pci_get_pdn(npu_pdev);
-			rid = npu_pdev->bus->number << 8 | npu_pdn->devfn;
-			npu_pdn->pe_number = pe_num;
-			phb->ioda.pe_rmap[rid] = pe->pe_number;
-			pe->device_count++;
-
-			/* Map the PE to this link */
-			rc = opal_pci_set_pe(phb->opal_id, pe_num, rid,
-					OpalPciBusAll,
-					OPAL_COMPARE_RID_DEVICE_NUMBER,
-					OPAL_COMPARE_RID_FUNCTION_NUMBER,
-					OPAL_MAP_PE);
-			WARN_ON(rc != OPAL_SUCCESS);
-			found_pe = true;
-			break;
-		}
-	}
-
-	if (!found_pe)
-		/*
-		 * Could not find an existing PE so allocate a new
-		 * one.
-		 */
-		return pnv_ioda_setup_dev_PE(npu_pdev);
-	else
-		return pe;
-}
-
-static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus)
-{
-	struct pci_dev *pdev;
-
-	list_for_each_entry(pdev, &bus->devices, bus_list)
-		pnv_ioda_setup_npu_PE(pdev);
-}
-
-static void pnv_pci_ioda_setup_nvlink(void)
-{
-	struct pci_controller *hose;
-	struct pnv_phb *phb;
-	struct pnv_ioda_pe *pe;
-
-	list_for_each_entry(hose, &hose_list, list_node) {
-		phb = hose->private_data;
-		if (phb->type == PNV_PHB_NPU_NVLINK) {
-			/* PE#0 is needed for error reporting */
-			pnv_ioda_reserve_pe(phb, 0);
-			pnv_ioda_setup_npu_PEs(hose->bus);
-			if (phb->model == PNV_PHB_MODEL_NPU2)
-				WARN_ON_ONCE(pnv_npu2_init(hose));
-		}
-	}
-	list_for_each_entry(hose, &hose_list, list_node) {
-		phb = hose->private_data;
-		if (phb->type != PNV_PHB_IODA2)
-			continue;
-
-		list_for_each_entry(pe, &phb->ioda.pe_list, list)
-			pnv_npu2_map_lpar(pe, MSR_DR | MSR_PR | MSR_HV);
-	}
-
-#ifdef CONFIG_IOMMU_API
-	/* setup iommu groups so we can do nvlink pass-thru */
-	pnv_pci_npu_setup_iommu_groups();
-#endif
-}
-
 static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
 				       struct pnv_ioda_pe *pe);
 
@@ -1468,18 +1357,6 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = {
 #define PHB3_TCE_KILL_INVAL_PE		PPC_BIT(1)
 #define PHB3_TCE_KILL_INVAL_ONE		PPC_BIT(2)
 
-static void pnv_pci_phb3_tce_invalidate_entire(struct pnv_phb *phb, bool rm)
-{
-	__be64 __iomem *invalidate = pnv_ioda_get_inval_reg(phb, rm);
-	const unsigned long val = PHB3_TCE_KILL_INVAL_ALL;
-
-	mb(); /* Ensure previous TCE table stores are visible */
-	if (rm)
-		__raw_rm_writeq_be(val, invalidate);
-	else
-		__raw_writeq_be(val, invalidate);
-}
-
 static inline void pnv_pci_phb3_tce_invalidate_pe(struct pnv_ioda_pe *pe)
 {
 	/* 01xb - invalidate TCEs that match the specified PE# */
@@ -1539,20 +1416,6 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
 		struct pnv_phb *phb = pe->phb;
 		unsigned int shift = tbl->it_page_shift;
 
-		/*
-		 * NVLink1 can use the TCE kill register directly as
-		 * it's the same as PHB3. NVLink2 is different and
-		 * should go via the OPAL call.
-		 */
-		if (phb->model == PNV_PHB_MODEL_NPU) {
-			/*
-			 * The NVLink hardware does not support TCE kill
-			 * per TCE entry so we have to invalidate
-			 * the entire cache for it.
-			 */
-			pnv_pci_phb3_tce_invalidate_entire(phb, rm);
-			continue;
-		}
 		if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs)
 			pnv_pci_phb3_tce_invalidate(pe, rm, shift,
 						    index, npages);
@@ -1564,14 +1427,6 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
 	}
 }
 
-void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm)
-{
-	if (phb->model == PNV_PHB_MODEL_NPU || phb->model == PNV_PHB_MODEL_PHB3)
-		pnv_pci_phb3_tce_invalidate_entire(phb, rm);
-	else
-		opal_pci_tce_kill(phb->opal_id, OPAL_PCI_TCE_KILL, 0, 0, 0, 0);
-}
-
 static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
 		long npages, unsigned long uaddr,
 		enum dma_data_direction direction,
@@ -2450,7 +2305,6 @@ static void pnv_pci_enable_bridges(void)
 
 static void pnv_pci_ioda_fixup(void)
 {
-	pnv_pci_ioda_setup_nvlink();
 	pnv_pci_ioda_create_dbgfs();
 
 	pnv_pci_enable_bridges();
@@ -2823,15 +2677,6 @@ static void pnv_pci_release_device(struct pci_dev *pdev)
 		pnv_ioda_release_pe(pe);
 }
 
-static void pnv_npu_disable_device(struct pci_dev *pdev)
-{
-	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
-	struct eeh_pe *eehpe = edev ? edev->pe : NULL;
-
-	if (eehpe && eeh_ops && eeh_ops->reset)
-		eeh_ops->reset(eehpe, EEH_RESET_HOT);
-}
-
 static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
 {
 	struct pnv_phb *phb = hose->private_data;
@@ -2873,16 +2718,6 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
 	.shutdown		= pnv_pci_ioda_shutdown,
 };
 
-static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
-	.setup_msi_irqs		= pnv_setup_msi_irqs,
-	.teardown_msi_irqs	= pnv_teardown_msi_irqs,
-	.enable_device_hook	= pnv_pci_enable_device_hook,
-	.window_alignment	= pnv_pci_window_alignment,
-	.reset_secondary_bus	= pnv_pci_reset_secondary_bus,
-	.shutdown		= pnv_pci_ioda_shutdown,
-	.disable_device		= pnv_npu_disable_device,
-};
-
 static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = {
 	.enable_device_hook	= pnv_ocapi_enable_device_hook,
 	.release_device		= pnv_pci_release_device,
@@ -2956,10 +2791,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 		phb->model = PNV_PHB_MODEL_P7IOC;
 	else if (of_device_is_compatible(np, "ibm,power8-pciex"))
 		phb->model = PNV_PHB_MODEL_PHB3;
-	else if (of_device_is_compatible(np, "ibm,power8-npu-pciex"))
-		phb->model = PNV_PHB_MODEL_NPU;
-	else if (of_device_is_compatible(np, "ibm,power9-npu-pciex"))
-		phb->model = PNV_PHB_MODEL_NPU2;
 	else
 		phb->model = PNV_PHB_MODEL_UNKNOWN;
 
@@ -3117,9 +2948,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
 
 	switch (phb->type) {
-	case PNV_PHB_NPU_NVLINK:
-		hose->controller_ops = pnv_npu_ioda_controller_ops;
-		break;
 	case PNV_PHB_NPU_OCAPI:
 		hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops;
 		break;
@@ -3172,11 +3000,6 @@ void __init pnv_pci_init_ioda2_phb(struct device_node *np)
 	pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
 }
 
-void __init pnv_pci_init_npu_phb(struct device_node *np)
-{
-	pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_NVLINK);
-}
-
 void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np)
 {
 	pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_OCAPI);
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 9b9bca169275a8..b18468dc31ff5b 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -926,17 +926,6 @@ void __init pnv_pci_init(void)
 	for_each_compatible_node(np, NULL, "ibm,ioda3-phb")
 		pnv_pci_init_ioda2_phb(np);
 
-	/* Look for NPU PHBs */
-	for_each_compatible_node(np, NULL, "ibm,ioda2-npu-phb")
-		pnv_pci_init_npu_phb(np);
-
-	/*
-	 * Look for NPU2 PHBs which we treat mostly as NPU PHBs with
-	 * the exception of TCE kill which requires an OPAL call.
-	 */
-	for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-phb")
-		pnv_pci_init_npu_phb(np);
-
 	/* Look for NPU2 OpenCAPI PHBs */
 	for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-opencapi-phb")
 		pnv_pci_init_npu2_opencapi_phb(np);
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 36d22920f5a3cb..c8d4f222a86fe3 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -10,10 +10,9 @@
 struct pci_dn;
 
 enum pnv_phb_type {
-	PNV_PHB_IODA1		= 0,
-	PNV_PHB_IODA2		= 1,
-	PNV_PHB_NPU_NVLINK	= 2,
-	PNV_PHB_NPU_OCAPI	= 3,
+	PNV_PHB_IODA1,
+	PNV_PHB_IODA2,
+	PNV_PHB_NPU_OCAPI,
 };
 
 /* Precise PHB model for error management */
@@ -21,8 +20,6 @@ enum pnv_phb_model {
 	PNV_PHB_MODEL_UNKNOWN,
 	PNV_PHB_MODEL_P7IOC,
 	PNV_PHB_MODEL_PHB3,
-	PNV_PHB_MODEL_NPU,
-	PNV_PHB_MODEL_NPU2,
 };
 
 #define PNV_PCI_DIAG_BUF_SIZE	8192
@@ -81,7 +78,6 @@ struct pnv_ioda_pe {
 
 	/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
 	struct iommu_table_group table_group;
-	struct npu_comp		*npucomp;
 
 	/* 64-bit TCE bypass region */
 	bool			tce_bypass_enabled;
@@ -289,9 +285,7 @@ extern struct iommu_table *pnv_pci_table_alloc(int nid);
 
 extern void pnv_pci_init_ioda_hub(struct device_node *np);
 extern void pnv_pci_init_ioda2_phb(struct device_node *np);
-extern void pnv_pci_init_npu_phb(struct device_node *np);
 extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np);
-extern void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr);
 extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
 extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
 
@@ -314,11 +308,6 @@ extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
 #define pe_info(pe, fmt, ...)					\
 	pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__)
 
-/* Nvlink functions */
-extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass);
-extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm);
-extern void pnv_pci_npu_setup_iommu_groups(void);
-
 /* pci-ioda-tce.c */
 #define POWERNV_IOMMU_DEFAULT_LEVELS	2
 #define POWERNV_IOMMU_MAX_LEVELS	5
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 1bffbd1c9a94b1..3b6800f774c241 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -224,8 +224,6 @@ static void __init pSeries_request_regions(void)
 
 void __init pSeries_final_fixup(void)
 {
-	struct pci_controller *hose;
-
 	pSeries_request_regions();
 
 	eeh_show_enabled();
@@ -234,27 +232,6 @@ void __init pSeries_final_fixup(void)
 	ppc_md.pcibios_sriov_enable = pseries_pcibios_sriov_enable;
 	ppc_md.pcibios_sriov_disable = pseries_pcibios_sriov_disable;
 #endif
-	list_for_each_entry(hose, &hose_list, list_node) {
-		struct device_node *dn = hose->dn, *nvdn;
-
-		while (1) {
-			dn = of_find_all_nodes(dn);
-			if (!dn)
-				break;
-			nvdn = of_parse_phandle(dn, "ibm,nvlink", 0);
-			if (!nvdn)
-				continue;
-			if (!of_device_is_compatible(nvdn, "ibm,npu-link"))
-				continue;
-			if (!of_device_is_compatible(nvdn->parent,
-						"ibm,power9-npu"))
-				continue;
-#ifdef CONFIG_PPC_POWERNV
-			WARN_ON_ONCE(pnv_npu2_init(hose));
-#endif
-			break;
-		}
-	}
 }
 
 /*
-- 
2.30.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: remove the nvlink2 pci_vfio subdriver
  2021-03-22 15:01 remove the nvlink2 pci_vfio subdriver Christoph Hellwig
  2021-03-22 15:01 ` [PATCH 1/2] vfio/pci: remove vfio_pci_nvlink2 Christoph Hellwig
  2021-03-22 15:01 ` [PATCH 2/2] powerpc/powernv: remove the nvlink support Christoph Hellwig
@ 2021-03-22 15:12 ` Greg Kroah-Hartman
  2 siblings, 0 replies; 6+ messages in thread
From: Greg Kroah-Hartman @ 2021-03-22 15:12 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Michael Ellerman, Alex Williamson, Benjamin Herrenschmidt,
	Jason Gunthorpe, David Airlie, Daniel Vetter, dri-devel,
	Paul Mackerras, linuxppc-dev, linux-kernel, kvm, linux-api

On Mon, Mar 22, 2021 at 04:01:53PM +0100, Christoph Hellwig wrote:
> Hi all,
> 
> the nvlink2 vfio subdriver is a weird beast.  It supports a hardware
> feature without any open source component - what would normally be
> the normal open source userspace that we require for kernel drivers,
> although in this particular case user space could of course be a
> kernel driver in a VM.  It also happens to be a complete mess that
> does not properly bind to PCI IDs, is hacked into the vfio_pci driver
> and also pulles in over 1000 lines of code always build into powerpc
> kernels that have Power NV support enabled.  Because of all these
> issues and the lack of breaking userspace when it is removed I think
> the best idea is to simply kill.
> 
> Diffstat:
>  arch/powerpc/platforms/powernv/npu-dma.c     |  705 ---------------------------
>  b/arch/powerpc/include/asm/opal.h            |    3 
>  b/arch/powerpc/include/asm/pci-bridge.h      |    1 
>  b/arch/powerpc/include/asm/pci.h             |    7 
>  b/arch/powerpc/platforms/powernv/Makefile    |    2 
>  b/arch/powerpc/platforms/powernv/opal-call.c |    2 
>  b/arch/powerpc/platforms/powernv/pci-ioda.c  |  185 -------
>  b/arch/powerpc/platforms/powernv/pci.c       |   11 
>  b/arch/powerpc/platforms/powernv/pci.h       |   17 
>  b/arch/powerpc/platforms/pseries/pci.c       |   23 
>  b/drivers/vfio/pci/Kconfig                   |    6 
>  b/drivers/vfio/pci/Makefile                  |    1 
>  b/drivers/vfio/pci/vfio_pci.c                |   18 
>  b/drivers/vfio/pci/vfio_pci_private.h        |   14 
>  b/include/uapi/linux/vfio.h                  |   40 -
>  drivers/vfio/pci/vfio_pci_nvlink2.c          |  490 ------------------
>  16 files changed, 8 insertions(+), 1517 deletions(-)

I thought this was supposed to be removed a few years ago!

Anyway, no objection from me:

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] vfio/pci: remove vfio_pci_nvlink2
  2021-03-22 15:01 ` [PATCH 1/2] vfio/pci: remove vfio_pci_nvlink2 Christoph Hellwig
@ 2021-03-22 17:46   ` Alex Williamson
  0 siblings, 0 replies; 6+ messages in thread
From: Alex Williamson @ 2021-03-22 17:46 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Michael Ellerman, Benjamin Herrenschmidt, Greg Kroah-Hartman,
	Jason Gunthorpe, David Airlie, Daniel Vetter, dri-devel,
	Paul Mackerras, linuxppc-dev, linux-kernel, kvm, linux-api,
	Alexey Kardashevskiy

On Mon, 22 Mar 2021 16:01:54 +0100
Christoph Hellwig <hch@lst.de> wrote:
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 8ce36c1d53ca11..db7e782419d5d9 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -332,19 +332,6 @@ struct vfio_region_info_cap_type {
>  #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
>  #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
>  
> -/* 10de vendor PCI sub-types */
> -/*
> - * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space.
> - */
> -#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM	(1)
> -
> -/* 1014 vendor PCI sub-types */
> -/*
> - * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU
> - * to do TLB invalidation on a GPU.
> - */
> -#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD	(1)
> -
>  /* sub-types for VFIO_REGION_TYPE_GFX */
>  #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
>  
> @@ -637,33 +624,6 @@ struct vfio_device_migration_info {
>   */
>  #define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE	3
>  
> -/*
> - * Capability with compressed real address (aka SSA - small system address)
> - * where GPU RAM is mapped on a system bus. Used by a GPU for DMA routing
> - * and by the userspace to associate a NVLink bridge with a GPU.
> - */
> -#define VFIO_REGION_INFO_CAP_NVLINK2_SSATGT	4
> -
> -struct vfio_region_info_cap_nvlink2_ssatgt {
> -	struct vfio_info_cap_header header;
> -	__u64 tgt;
> -};
> -
> -/*
> - * Capability with an NVLink link speed. The value is read by
> - * the NVlink2 bridge driver from the bridge's "ibm,nvlink-speed"
> - * property in the device tree. The value is fixed in the hardware
> - * and failing to provide the correct value results in the link
> - * not working with no indication from the driver why.
> - */
> -#define VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD	5
> -
> -struct vfio_region_info_cap_nvlink2_lnkspd {
> -	struct vfio_info_cap_header header;
> -	__u32 link_speed;
> -	__u32 __pad;
> -};
> -
>  /**
>   * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
>   *				    struct vfio_irq_info)

I'll leave any attempt to defend keeping this code to Alexey, but
minimally these region sub-types and capability IDs should probably be
reserved to avoid breaking whatever userspace might exist to consume
these.  Our ID space is sufficiently large that we don't need to
recycle them any time soon.  Thanks,

Alex


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 2/2] powerpc/powernv: remove the nvlink support
  2021-03-26  6:13 remove the nvlink2 pci_vfio subdriver v2 Christoph Hellwig
@ 2021-03-26  6:13 ` Christoph Hellwig
  0 siblings, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2021-03-26  6:13 UTC (permalink / raw)
  To: Michael Ellerman, Alex Williamson
  Cc: Benjamin Herrenschmidt, Greg Kroah-Hartman, Jason Gunthorpe,
	David Airlie, Daniel Vetter, dri-devel, Paul Mackerras,
	linuxppc-dev, linux-kernel, kvm, linux-api

This code was only used by the vfio-nvlink2 code, which itself had no
proper use.  Drop this huge chunk of code build into every powernv
or generic build.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/include/asm/opal.h            |   3 -
 arch/powerpc/include/asm/pci-bridge.h      |   1 -
 arch/powerpc/include/asm/pci.h             |   7 -
 arch/powerpc/platforms/powernv/Makefile    |   2 +-
 arch/powerpc/platforms/powernv/npu-dma.c   | 705 ---------------------
 arch/powerpc/platforms/powernv/opal-call.c |   2 -
 arch/powerpc/platforms/powernv/pci-ioda.c  | 185 +-----
 arch/powerpc/platforms/powernv/pci.c       |  11 -
 arch/powerpc/platforms/powernv/pci.h       |  17 +-
 arch/powerpc/platforms/pseries/pci.c       |  23 -
 10 files changed, 8 insertions(+), 948 deletions(-)
 delete mode 100644 arch/powerpc/platforms/powernv/npu-dma.c

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 9986ac34b8e224..06eaa231697344 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -28,9 +28,6 @@ extern struct device_node *opal_node;
 
 /* API functions */
 int64_t opal_invalid_call(void);
-int64_t opal_npu_destroy_context(uint64_t phb_id, uint64_t pid, uint64_t bdf);
-int64_t opal_npu_init_context(uint64_t phb_id, int pasid, uint64_t msr,
-			uint64_t bdf);
 int64_t opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid,
 			uint64_t lpcr);
 int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t bdfn,
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index d2a2a14e56f91e..74424c14515ce0 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -126,7 +126,6 @@ struct pci_controller {
 #endif	/* CONFIG_PPC64 */
 
 	void *private_data;
-	struct npu *npu;
 };
 
 /* These are used for config access before all the PCI probing
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 6436f0b41539e3..d1f53260725ca7 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -119,11 +119,4 @@ extern void pcibios_scan_phb(struct pci_controller *hose);
 
 #endif	/* __KERNEL__ */
 
-extern struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev);
-extern struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index);
-extern int pnv_npu2_init(struct pci_controller *hose);
-extern int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid,
-		unsigned long msr);
-extern int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev);
-
 #endif /* __ASM_POWERPC_PCI_H */
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 2eb6ae150d1fd5..be2546b968165e 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_SMP)	+= smp.o subcore.o subcore-asm.o
 obj-$(CONFIG_FA_DUMP)	+= opal-fadump.o
 obj-$(CONFIG_PRESERVE_FA_DUMP)	+= opal-fadump.o
 obj-$(CONFIG_OPAL_CORE)	+= opal-core.o
-obj-$(CONFIG_PCI)	+= pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o
+obj-$(CONFIG_PCI)	+= pci.o pci-ioda.o pci-ioda-tce.o
 obj-$(CONFIG_PCI_IOV)   += pci-sriov.o
 obj-$(CONFIG_CXL_BASE)	+= pci-cxl.o
 obj-$(CONFIG_EEH)	+= eeh-powernv.o
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
deleted file mode 100644
index b711dc3262a308..00000000000000
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ /dev/null
@@ -1,705 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * This file implements the DMA operations for NVLink devices. The NPU
- * devices all point to the same iommu table as the parent PCI device.
- *
- * Copyright Alistair Popple, IBM Corporation 2015.
- */
-
-#include <linux/mmu_notifier.h>
-#include <linux/mmu_context.h>
-#include <linux/of.h>
-#include <linux/pci.h>
-#include <linux/memblock.h>
-#include <linux/sizes.h>
-
-#include <asm/debugfs.h>
-#include <asm/powernv.h>
-#include <asm/ppc-pci.h>
-#include <asm/opal.h>
-
-#include "pci.h"
-
-static struct pci_dev *get_pci_dev(struct device_node *dn)
-{
-	struct pci_dn *pdn = PCI_DN(dn);
-	struct pci_dev *pdev;
-
-	pdev = pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus),
-					   pdn->busno, pdn->devfn);
-
-	/*
-	 * pci_get_domain_bus_and_slot() increased the reference count of
-	 * the PCI device, but callers don't need that actually as the PE
-	 * already holds a reference to the device. Since callers aren't
-	 * aware of the reference count change, call pci_dev_put() now to
-	 * avoid leaks.
-	 */
-	if (pdev)
-		pci_dev_put(pdev);
-
-	return pdev;
-}
-
-/* Given a NPU device get the associated PCI device. */
-struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev)
-{
-	struct device_node *dn;
-	struct pci_dev *gpdev;
-
-	if (WARN_ON(!npdev))
-		return NULL;
-
-	if (WARN_ON(!npdev->dev.of_node))
-		return NULL;
-
-	/* Get assoicated PCI device */
-	dn = of_parse_phandle(npdev->dev.of_node, "ibm,gpu", 0);
-	if (!dn)
-		return NULL;
-
-	gpdev = get_pci_dev(dn);
-	of_node_put(dn);
-
-	return gpdev;
-}
-EXPORT_SYMBOL(pnv_pci_get_gpu_dev);
-
-/* Given the real PCI device get a linked NPU device. */
-struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
-{
-	struct device_node *dn;
-	struct pci_dev *npdev;
-
-	if (WARN_ON(!gpdev))
-		return NULL;
-
-	/* Not all PCI devices have device-tree nodes */
-	if (!gpdev->dev.of_node)
-		return NULL;
-
-	/* Get assoicated PCI device */
-	dn = of_parse_phandle(gpdev->dev.of_node, "ibm,npu", index);
-	if (!dn)
-		return NULL;
-
-	npdev = get_pci_dev(dn);
-	of_node_put(dn);
-
-	return npdev;
-}
-EXPORT_SYMBOL(pnv_pci_get_npu_dev);
-
-#ifdef CONFIG_IOMMU_API
-/*
- * Returns the PE assoicated with the PCI device of the given
- * NPU. Returns the linked pci device if pci_dev != NULL.
- */
-static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe,
-						  struct pci_dev **gpdev)
-{
-	struct pnv_phb *phb;
-	struct pci_controller *hose;
-	struct pci_dev *pdev;
-	struct pnv_ioda_pe *pe;
-	struct pci_dn *pdn;
-
-	pdev = pnv_pci_get_gpu_dev(npe->pdev);
-	if (!pdev)
-		return NULL;
-
-	pdn = pci_get_pdn(pdev);
-	if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
-		return NULL;
-
-	hose = pci_bus_to_host(pdev->bus);
-	phb = hose->private_data;
-	pe = &phb->ioda.pe_array[pdn->pe_number];
-
-	if (gpdev)
-		*gpdev = pdev;
-
-	return pe;
-}
-
-static long pnv_npu_unset_window(struct iommu_table_group *table_group,
-		int num);
-
-static long pnv_npu_set_window(struct iommu_table_group *table_group, int num,
-		struct iommu_table *tbl)
-{
-	struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
-			table_group);
-	struct pnv_phb *phb = npe->phb;
-	int64_t rc;
-	const unsigned long size = tbl->it_indirect_levels ?
-		tbl->it_level_size : tbl->it_size;
-	const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
-	const __u64 win_size = tbl->it_size << tbl->it_page_shift;
-	int num2 = (num == 0) ? 1 : 0;
-
-	/* NPU has just one TVE so if there is another table, remove it first */
-	if (npe->table_group.tables[num2])
-		pnv_npu_unset_window(&npe->table_group, num2);
-
-	pe_info(npe, "Setting up window %llx..%llx pg=%lx\n",
-			start_addr, start_addr + win_size - 1,
-			IOMMU_PAGE_SIZE(tbl));
-
-	rc = opal_pci_map_pe_dma_window(phb->opal_id,
-			npe->pe_number,
-			npe->pe_number,
-			tbl->it_indirect_levels + 1,
-			__pa(tbl->it_base),
-			size << 3,
-			IOMMU_PAGE_SIZE(tbl));
-	if (rc) {
-		pe_err(npe, "Failed to configure TCE table, err %lld\n", rc);
-		return rc;
-	}
-	pnv_pci_ioda2_tce_invalidate_entire(phb, false);
-
-	/* Add the table to the list so its TCE cache will get invalidated */
-	pnv_pci_link_table_and_group(phb->hose->node, num,
-			tbl, &npe->table_group);
-
-	return 0;
-}
-
-static long pnv_npu_unset_window(struct iommu_table_group *table_group, int num)
-{
-	struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
-			table_group);
-	struct pnv_phb *phb = npe->phb;
-	int64_t rc;
-
-	if (!npe->table_group.tables[num])
-		return 0;
-
-	pe_info(npe, "Removing DMA window\n");
-
-	rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
-			npe->pe_number,
-			0/* levels */, 0/* table address */,
-			0/* table size */, 0/* page size */);
-	if (rc) {
-		pe_err(npe, "Unmapping failed, ret = %lld\n", rc);
-		return rc;
-	}
-	pnv_pci_ioda2_tce_invalidate_entire(phb, false);
-
-	pnv_pci_unlink_table_and_group(npe->table_group.tables[num],
-			&npe->table_group);
-
-	return 0;
-}
-
-/* Switch ownership from platform code to external user (e.g. VFIO) */
-static void pnv_npu_take_ownership(struct iommu_table_group *table_group)
-{
-	struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
-			table_group);
-	struct pnv_phb *phb = npe->phb;
-	int64_t rc;
-	struct pci_dev *gpdev = NULL;
-
-	/*
-	 * Note: NPU has just a single TVE in the hardware which means that
-	 * while used by the kernel, it can have either 32bit window or
-	 * DMA bypass but never both. So we deconfigure 32bit window only
-	 * if it was enabled at the moment of ownership change.
-	 */
-	if (npe->table_group.tables[0]) {
-		pnv_npu_unset_window(&npe->table_group, 0);
-		return;
-	}
-
-	/* Disable bypass */
-	rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
-			npe->pe_number, npe->pe_number,
-			0 /* bypass base */, 0);
-	if (rc) {
-		pe_err(npe, "Failed to disable bypass, err %lld\n", rc);
-		return;
-	}
-	pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false);
-
-	get_gpu_pci_dev_and_pe(npe, &gpdev);
-	if (gpdev)
-		pnv_npu2_unmap_lpar_dev(gpdev);
-}
-
-static void pnv_npu_release_ownership(struct iommu_table_group *table_group)
-{
-	struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
-			table_group);
-	struct pci_dev *gpdev = NULL;
-
-	get_gpu_pci_dev_and_pe(npe, &gpdev);
-	if (gpdev)
-		pnv_npu2_map_lpar_dev(gpdev, 0, MSR_DR | MSR_PR | MSR_HV);
-}
-
-static struct iommu_table_group_ops pnv_pci_npu_ops = {
-	.set_window = pnv_npu_set_window,
-	.unset_window = pnv_npu_unset_window,
-	.take_ownership = pnv_npu_take_ownership,
-	.release_ownership = pnv_npu_release_ownership,
-};
-#endif /* !CONFIG_IOMMU_API */
-
-/*
- * NPU2 ATS
- */
-/* Maximum possible number of ATSD MMIO registers per NPU */
-#define NV_NMMU_ATSD_REGS 8
-#define NV_NPU_MAX_PE_NUM	16
-
-/*
- * A compound NPU IOMMU group which might consist of 1 GPU + 2xNPUs (POWER8) or
- * up to 3 x (GPU + 2xNPUs) (POWER9).
- */
-struct npu_comp {
-	struct iommu_table_group table_group;
-	int pe_num;
-	struct pnv_ioda_pe *pe[NV_NPU_MAX_PE_NUM];
-};
-
-/* An NPU descriptor, valid for POWER9 only */
-struct npu {
-	int index;
-	struct npu_comp npucomp;
-};
-
-#ifdef CONFIG_IOMMU_API
-static long pnv_npu_peers_create_table_userspace(
-		struct iommu_table_group *table_group,
-		int num, __u32 page_shift, __u64 window_size, __u32 levels,
-		struct iommu_table **ptbl)
-{
-	struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
-			table_group);
-
-	if (!npucomp->pe_num || !npucomp->pe[0] ||
-			!npucomp->pe[0]->table_group.ops ||
-			!npucomp->pe[0]->table_group.ops->create_table)
-		return -EFAULT;
-
-	return npucomp->pe[0]->table_group.ops->create_table(
-			&npucomp->pe[0]->table_group, num, page_shift,
-			window_size, levels, ptbl);
-}
-
-static long pnv_npu_peers_set_window(struct iommu_table_group *table_group,
-		int num, struct iommu_table *tbl)
-{
-	int i, j;
-	long ret = 0;
-	struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
-			table_group);
-
-	for (i = 0; i < npucomp->pe_num; ++i) {
-		struct pnv_ioda_pe *pe = npucomp->pe[i];
-
-		if (!pe->table_group.ops->set_window)
-			continue;
-
-		ret = pe->table_group.ops->set_window(&pe->table_group,
-				num, tbl);
-		if (ret)
-			break;
-	}
-
-	if (ret) {
-		for (j = 0; j < i; ++j) {
-			struct pnv_ioda_pe *pe = npucomp->pe[j];
-
-			if (!pe->table_group.ops->unset_window)
-				continue;
-
-			ret = pe->table_group.ops->unset_window(
-					&pe->table_group, num);
-			if (ret)
-				break;
-		}
-	} else {
-		table_group->tables[num] = iommu_tce_table_get(tbl);
-	}
-
-	return ret;
-}
-
-static long pnv_npu_peers_unset_window(struct iommu_table_group *table_group,
-		int num)
-{
-	int i, j;
-	long ret = 0;
-	struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
-			table_group);
-
-	for (i = 0; i < npucomp->pe_num; ++i) {
-		struct pnv_ioda_pe *pe = npucomp->pe[i];
-
-		WARN_ON(npucomp->table_group.tables[num] !=
-				table_group->tables[num]);
-		if (!npucomp->table_group.tables[num])
-			continue;
-
-		if (!pe->table_group.ops->unset_window)
-			continue;
-
-		ret = pe->table_group.ops->unset_window(&pe->table_group, num);
-		if (ret)
-			break;
-	}
-
-	if (ret) {
-		for (j = 0; j < i; ++j) {
-			struct pnv_ioda_pe *pe = npucomp->pe[j];
-
-			if (!npucomp->table_group.tables[num])
-				continue;
-
-			if (!pe->table_group.ops->set_window)
-				continue;
-
-			ret = pe->table_group.ops->set_window(&pe->table_group,
-					num, table_group->tables[num]);
-			if (ret)
-				break;
-		}
-	} else if (table_group->tables[num]) {
-		iommu_tce_table_put(table_group->tables[num]);
-		table_group->tables[num] = NULL;
-	}
-
-	return ret;
-}
-
-static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group)
-{
-	int i;
-	struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
-			table_group);
-
-	for (i = 0; i < npucomp->pe_num; ++i) {
-		struct pnv_ioda_pe *pe = npucomp->pe[i];
-
-		if (!pe->table_group.ops ||
-		    !pe->table_group.ops->take_ownership)
-			continue;
-		pe->table_group.ops->take_ownership(&pe->table_group);
-	}
-}
-
-static void pnv_npu_peers_release_ownership(
-		struct iommu_table_group *table_group)
-{
-	int i;
-	struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
-			table_group);
-
-	for (i = 0; i < npucomp->pe_num; ++i) {
-		struct pnv_ioda_pe *pe = npucomp->pe[i];
-
-		if (!pe->table_group.ops ||
-		    !pe->table_group.ops->release_ownership)
-			continue;
-		pe->table_group.ops->release_ownership(&pe->table_group);
-	}
-}
-
-static struct iommu_table_group_ops pnv_npu_peers_ops = {
-	.get_table_size = pnv_pci_ioda2_get_table_size,
-	.create_table = pnv_npu_peers_create_table_userspace,
-	.set_window = pnv_npu_peers_set_window,
-	.unset_window = pnv_npu_peers_unset_window,
-	.take_ownership = pnv_npu_peers_take_ownership,
-	.release_ownership = pnv_npu_peers_release_ownership,
-};
-
-static void pnv_comp_attach_table_group(struct npu_comp *npucomp,
-		struct pnv_ioda_pe *pe)
-{
-	if (WARN_ON(npucomp->pe_num == NV_NPU_MAX_PE_NUM))
-		return;
-
-	npucomp->pe[npucomp->pe_num] = pe;
-	++npucomp->pe_num;
-}
-
-static struct iommu_table_group *
-	pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
-{
-	struct iommu_table_group *compound_group;
-	struct npu_comp *npucomp;
-	struct pci_dev *gpdev = NULL;
-	struct pci_controller *hose;
-	struct pci_dev *npdev = NULL;
-
-	list_for_each_entry(gpdev, &pe->pbus->devices, bus_list) {
-		npdev = pnv_pci_get_npu_dev(gpdev, 0);
-		if (npdev)
-			break;
-	}
-
-	if (!npdev)
-		/* It is not an NPU attached device, skip */
-		return NULL;
-
-	hose = pci_bus_to_host(npdev->bus);
-
-	if (hose->npu) {
-		/* P9 case: compound group is per-NPU (all gpus, all links) */
-		npucomp = &hose->npu->npucomp;
-	} else {
-		/* P8 case: Compound group is per-GPU (1 gpu, 2 links) */
-		npucomp = pe->npucomp = kzalloc(sizeof(*npucomp), GFP_KERNEL);
-	}
-
-	compound_group = &npucomp->table_group;
-	if (!compound_group->group) {
-		compound_group->ops = &pnv_npu_peers_ops;
-		iommu_register_group(compound_group, hose->global_number,
-				pe->pe_number);
-
-		/* Steal capabilities from a GPU PE */
-		compound_group->max_dynamic_windows_supported =
-			pe->table_group.max_dynamic_windows_supported;
-		compound_group->tce32_start = pe->table_group.tce32_start;
-		compound_group->tce32_size = pe->table_group.tce32_size;
-		compound_group->max_levels = pe->table_group.max_levels;
-		if (!compound_group->pgsizes)
-			compound_group->pgsizes = pe->table_group.pgsizes;
-	}
-
-	/*
-	 * The gpu would have been added to the iommu group that's created
-	 * for the PE. Pull it out now.
-	 */
-	iommu_del_device(&gpdev->dev);
-
-       /*
-	* I'm not sure this is strictly required, but it's probably a good idea
-	* since the table_group for the PE is going to be attached to the
-	* compound table group. If we leave the PE's iommu group active then
-	* we might have the same table_group being modifiable via two sepeate
-	* iommu groups.
-	*/
-	iommu_group_put(pe->table_group.group);
-
-	/* now put the GPU into the compound group */
-	pnv_comp_attach_table_group(npucomp, pe);
-	iommu_add_device(compound_group, &gpdev->dev);
-
-	return compound_group;
-}
-
-static struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe)
-{
-	struct iommu_table_group *table_group;
-	struct npu_comp *npucomp;
-	struct pci_dev *gpdev = NULL;
-	struct pci_dev *npdev;
-	struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(pe, &gpdev);
-
-	WARN_ON(!(pe->flags & PNV_IODA_PE_DEV));
-	if (!gpe)
-		return NULL;
-
-	/*
-	 * IODA2 bridges get this set up from pci_controller_ops::setup_bridge
-	 * but NPU bridges do not have this hook defined so we do it here.
-	 * We do not setup other table group parameters as they won't be used
-	 * anyway - NVLink bridges are subordinate PEs.
-	 */
-	pe->table_group.ops = &pnv_pci_npu_ops;
-
-	table_group = iommu_group_get_iommudata(
-			iommu_group_get(&gpdev->dev));
-
-	/*
-	 * On P9 NPU PHB and PCI PHB support different page sizes,
-	 * keep only matching. We expect here that NVLink bridge PE pgsizes is
-	 * initialized by the caller.
-	 */
-	table_group->pgsizes &= pe->table_group.pgsizes;
-	npucomp = container_of(table_group, struct npu_comp, table_group);
-	pnv_comp_attach_table_group(npucomp, pe);
-
-	list_for_each_entry(npdev, &pe->phb->hose->bus->devices, bus_list) {
-		struct pci_dev *gpdevtmp = pnv_pci_get_gpu_dev(npdev);
-
-		if (gpdevtmp != gpdev)
-			continue;
-
-		iommu_add_device(table_group, &npdev->dev);
-	}
-
-	return table_group;
-}
-
-void pnv_pci_npu_setup_iommu_groups(void)
-{
-	struct pci_controller *hose;
-	struct pnv_phb *phb;
-	struct pnv_ioda_pe *pe;
-
-	/*
-	 * For non-nvlink devices the IOMMU group is registered when the PE is
-	 * configured and devices are added to the group when the per-device
-	 * DMA setup is run. That's done in hose->ops.dma_dev_setup() which is
-	 * only initialise for "normal" IODA PHBs.
-	 *
-	 * For NVLink devices we need to ensure the NVLinks and the GPU end up
-	 * in the same IOMMU group, so that's handled here.
-	 */
-	list_for_each_entry(hose, &hose_list, list_node) {
-		phb = hose->private_data;
-
-		if (phb->type == PNV_PHB_IODA2)
-			list_for_each_entry(pe, &phb->ioda.pe_list, list)
-				pnv_try_setup_npu_table_group(pe);
-	}
-
-	/*
-	 * Now we have all PHBs discovered, time to add NPU devices to
-	 * the corresponding IOMMU groups.
-	 */
-	list_for_each_entry(hose, &hose_list, list_node) {
-		unsigned long  pgsizes;
-
-		phb = hose->private_data;
-
-		if (phb->type != PNV_PHB_NPU_NVLINK)
-			continue;
-
-		pgsizes = pnv_ioda_parse_tce_sizes(phb);
-		list_for_each_entry(pe, &phb->ioda.pe_list, list) {
-			/*
-			 * IODA2 bridges get this set up from
-			 * pci_controller_ops::setup_bridge but NPU bridges
-			 * do not have this hook defined so we do it here.
-			 */
-			pe->table_group.pgsizes = pgsizes;
-			pnv_npu_compound_attach(pe);
-		}
-	}
-}
-#endif /* CONFIG_IOMMU_API */
-
-int pnv_npu2_init(struct pci_controller *hose)
-{
-	static int npu_index;
-	struct npu *npu;
-	int ret;
-
-	npu = kzalloc(sizeof(*npu), GFP_KERNEL);
-	if (!npu)
-		return -ENOMEM;
-
-	npu_index++;
-	if (WARN_ON(npu_index >= NV_MAX_NPUS)) {
-		ret = -ENOSPC;
-		goto fail_exit;
-	}
-	npu->index = npu_index;
-	hose->npu = npu;
-
-	return 0;
-
-fail_exit:
-	kfree(npu);
-	return ret;
-}
-
-int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid,
-		unsigned long msr)
-{
-	int ret;
-	struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
-	struct pci_controller *hose;
-	struct pnv_phb *nphb;
-
-	if (!npdev)
-		return -ENODEV;
-
-	hose = pci_bus_to_host(npdev->bus);
-	if (hose->npu == NULL) {
-		dev_info_once(&npdev->dev, "Nvlink1 does not support contexts");
-		return 0;
-	}
-
-	nphb = hose->private_data;
-
-	dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n",
-			nphb->opal_id, lparid);
-	/*
-	 * Currently we only support radix and non-zero LPCR only makes sense
-	 * for hash tables so skiboot expects the LPCR parameter to be a zero.
-	 */
-	ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), lparid,
-				0 /* LPCR bits */);
-	if (ret) {
-		dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret);
-		return ret;
-	}
-
-	dev_dbg(&gpdev->dev, "init context opalid=%llu msr=%lx\n",
-			nphb->opal_id, msr);
-	ret = opal_npu_init_context(nphb->opal_id, 0/*__unused*/, msr,
-				    pci_dev_id(gpdev));
-	if (ret < 0)
-		dev_err(&gpdev->dev, "Failed to init context: %d\n", ret);
-	else
-		ret = 0;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(pnv_npu2_map_lpar_dev);
-
-void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr)
-{
-	struct pci_dev *gpdev;
-
-	list_for_each_entry(gpdev, &gpe->pbus->devices, bus_list)
-		pnv_npu2_map_lpar_dev(gpdev, 0, msr);
-}
-
-int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev)
-{
-	int ret;
-	struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
-	struct pci_controller *hose;
-	struct pnv_phb *nphb;
-
-	if (!npdev)
-		return -ENODEV;
-
-	hose = pci_bus_to_host(npdev->bus);
-	if (hose->npu == NULL) {
-		dev_info_once(&npdev->dev, "Nvlink1 does not support contexts");
-		return 0;
-	}
-
-	nphb = hose->private_data;
-
-	dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n",
-			nphb->opal_id);
-	ret = opal_npu_destroy_context(nphb->opal_id, 0/*__unused*/,
-				       pci_dev_id(gpdev));
-	if (ret < 0) {
-		dev_err(&gpdev->dev, "Failed to destroy context: %d\n", ret);
-		return ret;
-	}
-
-	/* Set LPID to 0 anyway, just to be safe */
-	dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=0\n", nphb->opal_id);
-	ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), 0 /*LPID*/,
-				0 /* LPCR bits */);
-	if (ret)
-		dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(pnv_npu2_unmap_lpar_dev);
diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
index 5cd0f52d258f64..01401e3da7ca14 100644
--- a/arch/powerpc/platforms/powernv/opal-call.c
+++ b/arch/powerpc/platforms/powernv/opal-call.c
@@ -267,8 +267,6 @@ OPAL_CALL(opal_xive_get_queue_state,		OPAL_XIVE_GET_QUEUE_STATE);
 OPAL_CALL(opal_xive_set_queue_state,		OPAL_XIVE_SET_QUEUE_STATE);
 OPAL_CALL(opal_xive_get_vp_state,		OPAL_XIVE_GET_VP_STATE);
 OPAL_CALL(opal_signal_system_reset,		OPAL_SIGNAL_SYSTEM_RESET);
-OPAL_CALL(opal_npu_init_context,		OPAL_NPU_INIT_CONTEXT);
-OPAL_CALL(opal_npu_destroy_context,		OPAL_NPU_DESTROY_CONTEXT);
 OPAL_CALL(opal_npu_map_lpar,			OPAL_NPU_MAP_LPAR);
 OPAL_CALL(opal_imc_counters_init,		OPAL_IMC_COUNTERS_INIT);
 OPAL_CALL(opal_imc_counters_start,		OPAL_IMC_COUNTERS_START);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index f0f901683a2fe1..5c88d7145a23e0 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -47,8 +47,7 @@
 #define PNV_IODA1_M64_SEGS	8	/* Segments per M64 BAR	*/
 #define PNV_IODA1_DMA32_SEGSIZE	0x10000000
 
-static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK",
-					      "NPU_OCAPI" };
+static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_OCAPI" };
 
 static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
 static void pnv_pci_configure_bus(struct pci_bus *bus);
@@ -192,8 +191,6 @@ void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
 	unsigned int pe_num = pe->pe_number;
 
 	WARN_ON(pe->pdev);
-	WARN_ON(pe->npucomp); /* NPUs for nvlink are not supposed to be freed */
-	kfree(pe->npucomp);
 	memset(pe, 0, sizeof(struct pnv_ioda_pe));
 
 	mutex_lock(&phb->ioda.pe_alloc_mutex);
@@ -875,7 +872,7 @@ int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 	 * Release from all parents PELT-V. NPUs don't have a PELTV
 	 * table
 	 */
-	if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI)
+	if (phb->type != PNV_PHB_NPU_OCAPI)
 		pnv_ioda_unset_peltv(phb, pe, parent);
 
 	rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
@@ -946,7 +943,7 @@ int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 	 * Configure PELTV. NPUs don't have a PELTV table so skip
 	 * configuration on them.
 	 */
-	if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI)
+	if (phb->type != PNV_PHB_NPU_OCAPI)
 		pnv_ioda_set_peltv(phb, pe, true);
 
 	/* Setup reverse map */
@@ -1002,8 +999,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
 
 	/* NOTE: We don't get a reference for the pointer in the PE
 	 * data structure, both the device and PE structures should be
-	 * destroyed at the same time. However, removing nvlink
-	 * devices will need some work.
+	 * destroyed at the same time.
 	 *
 	 * At some point we want to remove the PDN completely anyways
 	 */
@@ -1099,113 +1095,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
 	return pe;
 }
 
-static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
-{
-	int pe_num, found_pe = false, rc;
-	long rid;
-	struct pnv_ioda_pe *pe;
-	struct pci_dev *gpu_pdev;
-	struct pci_dn *npu_pdn;
-	struct pnv_phb *phb = pci_bus_to_pnvhb(npu_pdev->bus);
-
-	/*
-	 * Intentionally leak a reference on the npu device (for
-	 * nvlink only; this is not an opencapi path) to make sure it
-	 * never goes away, as it's been the case all along and some
-	 * work is needed otherwise.
-	 */
-	pci_dev_get(npu_pdev);
-
-	/*
-	 * Due to a hardware errata PE#0 on the NPU is reserved for
-	 * error handling. This means we only have three PEs remaining
-	 * which need to be assigned to four links, implying some
-	 * links must share PEs.
-	 *
-	 * To achieve this we assign PEs such that NPUs linking the
-	 * same GPU get assigned the same PE.
-	 */
-	gpu_pdev = pnv_pci_get_gpu_dev(npu_pdev);
-	for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) {
-		pe = &phb->ioda.pe_array[pe_num];
-		if (!pe->pdev)
-			continue;
-
-		if (pnv_pci_get_gpu_dev(pe->pdev) == gpu_pdev) {
-			/*
-			 * This device has the same peer GPU so should
-			 * be assigned the same PE as the existing
-			 * peer NPU.
-			 */
-			dev_info(&npu_pdev->dev,
-				"Associating to existing PE %x\n", pe_num);
-			npu_pdn = pci_get_pdn(npu_pdev);
-			rid = npu_pdev->bus->number << 8 | npu_pdn->devfn;
-			npu_pdn->pe_number = pe_num;
-			phb->ioda.pe_rmap[rid] = pe->pe_number;
-			pe->device_count++;
-
-			/* Map the PE to this link */
-			rc = opal_pci_set_pe(phb->opal_id, pe_num, rid,
-					OpalPciBusAll,
-					OPAL_COMPARE_RID_DEVICE_NUMBER,
-					OPAL_COMPARE_RID_FUNCTION_NUMBER,
-					OPAL_MAP_PE);
-			WARN_ON(rc != OPAL_SUCCESS);
-			found_pe = true;
-			break;
-		}
-	}
-
-	if (!found_pe)
-		/*
-		 * Could not find an existing PE so allocate a new
-		 * one.
-		 */
-		return pnv_ioda_setup_dev_PE(npu_pdev);
-	else
-		return pe;
-}
-
-static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus)
-{
-	struct pci_dev *pdev;
-
-	list_for_each_entry(pdev, &bus->devices, bus_list)
-		pnv_ioda_setup_npu_PE(pdev);
-}
-
-static void pnv_pci_ioda_setup_nvlink(void)
-{
-	struct pci_controller *hose;
-	struct pnv_phb *phb;
-	struct pnv_ioda_pe *pe;
-
-	list_for_each_entry(hose, &hose_list, list_node) {
-		phb = hose->private_data;
-		if (phb->type == PNV_PHB_NPU_NVLINK) {
-			/* PE#0 is needed for error reporting */
-			pnv_ioda_reserve_pe(phb, 0);
-			pnv_ioda_setup_npu_PEs(hose->bus);
-			if (phb->model == PNV_PHB_MODEL_NPU2)
-				WARN_ON_ONCE(pnv_npu2_init(hose));
-		}
-	}
-	list_for_each_entry(hose, &hose_list, list_node) {
-		phb = hose->private_data;
-		if (phb->type != PNV_PHB_IODA2)
-			continue;
-
-		list_for_each_entry(pe, &phb->ioda.pe_list, list)
-			pnv_npu2_map_lpar(pe, MSR_DR | MSR_PR | MSR_HV);
-	}
-
-#ifdef CONFIG_IOMMU_API
-	/* setup iommu groups so we can do nvlink pass-thru */
-	pnv_pci_npu_setup_iommu_groups();
-#endif
-}
-
 static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
 				       struct pnv_ioda_pe *pe);
 
@@ -1468,18 +1357,6 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = {
 #define PHB3_TCE_KILL_INVAL_PE		PPC_BIT(1)
 #define PHB3_TCE_KILL_INVAL_ONE		PPC_BIT(2)
 
-static void pnv_pci_phb3_tce_invalidate_entire(struct pnv_phb *phb, bool rm)
-{
-	__be64 __iomem *invalidate = pnv_ioda_get_inval_reg(phb, rm);
-	const unsigned long val = PHB3_TCE_KILL_INVAL_ALL;
-
-	mb(); /* Ensure previous TCE table stores are visible */
-	if (rm)
-		__raw_rm_writeq_be(val, invalidate);
-	else
-		__raw_writeq_be(val, invalidate);
-}
-
 static inline void pnv_pci_phb3_tce_invalidate_pe(struct pnv_ioda_pe *pe)
 {
 	/* 01xb - invalidate TCEs that match the specified PE# */
@@ -1539,20 +1416,6 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
 		struct pnv_phb *phb = pe->phb;
 		unsigned int shift = tbl->it_page_shift;
 
-		/*
-		 * NVLink1 can use the TCE kill register directly as
-		 * it's the same as PHB3. NVLink2 is different and
-		 * should go via the OPAL call.
-		 */
-		if (phb->model == PNV_PHB_MODEL_NPU) {
-			/*
-			 * The NVLink hardware does not support TCE kill
-			 * per TCE entry so we have to invalidate
-			 * the entire cache for it.
-			 */
-			pnv_pci_phb3_tce_invalidate_entire(phb, rm);
-			continue;
-		}
 		if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs)
 			pnv_pci_phb3_tce_invalidate(pe, rm, shift,
 						    index, npages);
@@ -1564,14 +1427,6 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
 	}
 }
 
-void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm)
-{
-	if (phb->model == PNV_PHB_MODEL_NPU || phb->model == PNV_PHB_MODEL_PHB3)
-		pnv_pci_phb3_tce_invalidate_entire(phb, rm);
-	else
-		opal_pci_tce_kill(phb->opal_id, OPAL_PCI_TCE_KILL, 0, 0, 0, 0);
-}
-
 static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
 		long npages, unsigned long uaddr,
 		enum dma_data_direction direction,
@@ -2450,7 +2305,6 @@ static void pnv_pci_enable_bridges(void)
 
 static void pnv_pci_ioda_fixup(void)
 {
-	pnv_pci_ioda_setup_nvlink();
 	pnv_pci_ioda_create_dbgfs();
 
 	pnv_pci_enable_bridges();
@@ -2823,15 +2677,6 @@ static void pnv_pci_release_device(struct pci_dev *pdev)
 		pnv_ioda_release_pe(pe);
 }
 
-static void pnv_npu_disable_device(struct pci_dev *pdev)
-{
-	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
-	struct eeh_pe *eehpe = edev ? edev->pe : NULL;
-
-	if (eehpe && eeh_ops && eeh_ops->reset)
-		eeh_ops->reset(eehpe, EEH_RESET_HOT);
-}
-
 static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
 {
 	struct pnv_phb *phb = hose->private_data;
@@ -2873,16 +2718,6 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
 	.shutdown		= pnv_pci_ioda_shutdown,
 };
 
-static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
-	.setup_msi_irqs		= pnv_setup_msi_irqs,
-	.teardown_msi_irqs	= pnv_teardown_msi_irqs,
-	.enable_device_hook	= pnv_pci_enable_device_hook,
-	.window_alignment	= pnv_pci_window_alignment,
-	.reset_secondary_bus	= pnv_pci_reset_secondary_bus,
-	.shutdown		= pnv_pci_ioda_shutdown,
-	.disable_device		= pnv_npu_disable_device,
-};
-
 static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = {
 	.enable_device_hook	= pnv_ocapi_enable_device_hook,
 	.release_device		= pnv_pci_release_device,
@@ -2956,10 +2791,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 		phb->model = PNV_PHB_MODEL_P7IOC;
 	else if (of_device_is_compatible(np, "ibm,power8-pciex"))
 		phb->model = PNV_PHB_MODEL_PHB3;
-	else if (of_device_is_compatible(np, "ibm,power8-npu-pciex"))
-		phb->model = PNV_PHB_MODEL_NPU;
-	else if (of_device_is_compatible(np, "ibm,power9-npu-pciex"))
-		phb->model = PNV_PHB_MODEL_NPU2;
 	else
 		phb->model = PNV_PHB_MODEL_UNKNOWN;
 
@@ -3117,9 +2948,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
 
 	switch (phb->type) {
-	case PNV_PHB_NPU_NVLINK:
-		hose->controller_ops = pnv_npu_ioda_controller_ops;
-		break;
 	case PNV_PHB_NPU_OCAPI:
 		hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops;
 		break;
@@ -3172,11 +3000,6 @@ void __init pnv_pci_init_ioda2_phb(struct device_node *np)
 	pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
 }
 
-void __init pnv_pci_init_npu_phb(struct device_node *np)
-{
-	pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_NVLINK);
-}
-
 void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np)
 {
 	pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_OCAPI);
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 9b9bca169275a8..b18468dc31ff5b 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -926,17 +926,6 @@ void __init pnv_pci_init(void)
 	for_each_compatible_node(np, NULL, "ibm,ioda3-phb")
 		pnv_pci_init_ioda2_phb(np);
 
-	/* Look for NPU PHBs */
-	for_each_compatible_node(np, NULL, "ibm,ioda2-npu-phb")
-		pnv_pci_init_npu_phb(np);
-
-	/*
-	 * Look for NPU2 PHBs which we treat mostly as NPU PHBs with
-	 * the exception of TCE kill which requires an OPAL call.
-	 */
-	for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-phb")
-		pnv_pci_init_npu_phb(np);
-
 	/* Look for NPU2 OpenCAPI PHBs */
 	for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-opencapi-phb")
 		pnv_pci_init_npu2_opencapi_phb(np);
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 36d22920f5a3cb..c8d4f222a86fe3 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -10,10 +10,9 @@
 struct pci_dn;
 
 enum pnv_phb_type {
-	PNV_PHB_IODA1		= 0,
-	PNV_PHB_IODA2		= 1,
-	PNV_PHB_NPU_NVLINK	= 2,
-	PNV_PHB_NPU_OCAPI	= 3,
+	PNV_PHB_IODA1,
+	PNV_PHB_IODA2,
+	PNV_PHB_NPU_OCAPI,
 };
 
 /* Precise PHB model for error management */
@@ -21,8 +20,6 @@ enum pnv_phb_model {
 	PNV_PHB_MODEL_UNKNOWN,
 	PNV_PHB_MODEL_P7IOC,
 	PNV_PHB_MODEL_PHB3,
-	PNV_PHB_MODEL_NPU,
-	PNV_PHB_MODEL_NPU2,
 };
 
 #define PNV_PCI_DIAG_BUF_SIZE	8192
@@ -81,7 +78,6 @@ struct pnv_ioda_pe {
 
 	/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
 	struct iommu_table_group table_group;
-	struct npu_comp		*npucomp;
 
 	/* 64-bit TCE bypass region */
 	bool			tce_bypass_enabled;
@@ -289,9 +285,7 @@ extern struct iommu_table *pnv_pci_table_alloc(int nid);
 
 extern void pnv_pci_init_ioda_hub(struct device_node *np);
 extern void pnv_pci_init_ioda2_phb(struct device_node *np);
-extern void pnv_pci_init_npu_phb(struct device_node *np);
 extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np);
-extern void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr);
 extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
 extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
 
@@ -314,11 +308,6 @@ extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
 #define pe_info(pe, fmt, ...)					\
 	pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__)
 
-/* Nvlink functions */
-extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass);
-extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm);
-extern void pnv_pci_npu_setup_iommu_groups(void);
-
 /* pci-ioda-tce.c */
 #define POWERNV_IOMMU_DEFAULT_LEVELS	2
 #define POWERNV_IOMMU_MAX_LEVELS	5
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 1bffbd1c9a94b1..3b6800f774c241 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -224,8 +224,6 @@ static void __init pSeries_request_regions(void)
 
 void __init pSeries_final_fixup(void)
 {
-	struct pci_controller *hose;
-
 	pSeries_request_regions();
 
 	eeh_show_enabled();
@@ -234,27 +232,6 @@ void __init pSeries_final_fixup(void)
 	ppc_md.pcibios_sriov_enable = pseries_pcibios_sriov_enable;
 	ppc_md.pcibios_sriov_disable = pseries_pcibios_sriov_disable;
 #endif
-	list_for_each_entry(hose, &hose_list, list_node) {
-		struct device_node *dn = hose->dn, *nvdn;
-
-		while (1) {
-			dn = of_find_all_nodes(dn);
-			if (!dn)
-				break;
-			nvdn = of_parse_phandle(dn, "ibm,nvlink", 0);
-			if (!nvdn)
-				continue;
-			if (!of_device_is_compatible(nvdn, "ibm,npu-link"))
-				continue;
-			if (!of_device_is_compatible(nvdn->parent,
-						"ibm,power9-npu"))
-				continue;
-#ifdef CONFIG_PPC_POWERNV
-			WARN_ON_ONCE(pnv_npu2_init(hose));
-#endif
-			break;
-		}
-	}
 }
 
 /*
-- 
2.30.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, back to index

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-22 15:01 remove the nvlink2 pci_vfio subdriver Christoph Hellwig
2021-03-22 15:01 ` [PATCH 1/2] vfio/pci: remove vfio_pci_nvlink2 Christoph Hellwig
2021-03-22 17:46   ` Alex Williamson
2021-03-22 15:01 ` [PATCH 2/2] powerpc/powernv: remove the nvlink support Christoph Hellwig
2021-03-22 15:12 ` remove the nvlink2 pci_vfio subdriver Greg Kroah-Hartman
2021-03-26  6:13 remove the nvlink2 pci_vfio subdriver v2 Christoph Hellwig
2021-03-26  6:13 ` [PATCH 2/2] powerpc/powernv: remove the nvlink support Christoph Hellwig

Linux-api Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-api/0 linux-api/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-api linux-api/ https://lore.kernel.org/linux-api \
		linux-api@vger.kernel.org
	public-inbox-index linux-api

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-api


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git