All of lore.kernel.org
 help / color / mirror / Atom feed
* PowerNV PCI & SR-IOV cleanups
@ 2020-07-10  5:23 Oliver O'Halloran
  2020-07-10  5:23 ` [PATCH 01/15] powernv/pci: Add pci_bus_to_pnvhb() helper Oliver O'Halloran
                   ` (15 more replies)
  0 siblings, 16 replies; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-10  5:23 UTC (permalink / raw)
  To: linuxppc-dev

Finally bit the bullet and learned how all the MMIO->PE mapping setup
actually works. As a side effect I found a bunch of oddities in how
PowerNV SR-IOV support is implemented. This series mostly sorts that
out with a few more generic cleanups along the way.

This is largely prep work for supporting VFs in the 32bit MMIO window.
This is an unfortunate necessity due to how the Linux BAR allocator
handles BARs marked as non-prefetchable. The distinction
between prefetch and non-prefetchable BARs was made largely irrelevant
with the introduction of PCIe, but the BAR allocator is overly
conservative. It will always place non-pref bars in the prefetchable
window, which is 32bit only. This results in us being unable to use VFs
from NVMe drives and a few different RAID cards.

This series is based on top of these two:

https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=187630
https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=187688

Rebases cleanly on top of the first, but I haven't tested that one plus
this extensively.

Oliver



^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH 01/15] powernv/pci: Add pci_bus_to_pnvhb() helper
  2020-07-10  5:23 PowerNV PCI & SR-IOV cleanups Oliver O'Halloran
@ 2020-07-10  5:23 ` Oliver O'Halloran
  2020-07-13  8:28   ` Alexey Kardashevskiy
  2020-07-10  5:23 ` [PATCH 02/15] powerpc/powernv/pci: Always tear down DMA windows on PE release Oliver O'Halloran
                   ` (14 subsequent siblings)
  15 siblings, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-10  5:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran

Add a helper to go from a pci_bus structure to the pnv_phb that hosts that
bus. There's a lot of instances of the following pattern:

	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
	struct pnv_phb *phb = hose->private_data;

Without any other uses of the pci_controller inside the function. This is
hard to read since it requires you to memorise the contents of the
private data fields and kind of error prone since it involves blindly
assigning a void pointer. Add a helper to make it more concise and
explicit.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 88 +++++++----------------
 arch/powerpc/platforms/powernv/pci.c      | 14 ++--
 arch/powerpc/platforms/powernv/pci.h      | 10 +++
 3 files changed, 38 insertions(+), 74 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 31c3e6d58c41..687919db0347 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -252,8 +252,7 @@ static int pnv_ioda2_init_m64(struct pnv_phb *phb)
 static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev,
 					 unsigned long *pe_bitmap)
 {
-	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
 	struct resource *r;
 	resource_size_t base, sgsz, start, end;
 	int segno, i;
@@ -351,8 +350,7 @@ static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus,
 
 static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all)
 {
-	struct pci_controller *hose = pci_bus_to_host(bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
 	struct pnv_ioda_pe *master_pe, *pe;
 	unsigned long size, *pe_alloc;
 	int i;
@@ -673,8 +671,7 @@ struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn)
 
 struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
 {
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
 	struct pci_dn *pdn = pci_get_pdn(dev);
 
 	if (!pdn)
@@ -1069,8 +1066,7 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
 
 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
 {
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
 	struct pci_dn *pdn = pci_get_pdn(dev);
 	struct pnv_ioda_pe *pe;
 
@@ -1129,8 +1125,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
  */
 static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
 {
-	struct pci_controller *hose = pci_bus_to_host(bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
 	struct pnv_ioda_pe *pe = NULL;
 	unsigned int pe_num;
 
@@ -1196,8 +1191,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
 	struct pnv_ioda_pe *pe;
 	struct pci_dev *gpu_pdev;
 	struct pci_dn *npu_pdn;
-	struct pci_controller *hose = pci_bus_to_host(npu_pdev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(npu_pdev->bus);
 
 	/*
 	 * Intentionally leak a reference on the npu device (for
@@ -1300,16 +1294,12 @@ static void pnv_pci_ioda_setup_nvlink(void)
 #ifdef CONFIG_PCI_IOV
 static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
 {
-	struct pci_bus        *bus;
-	struct pci_controller *hose;
 	struct pnv_phb        *phb;
 	struct pci_dn         *pdn;
 	int                    i, j;
 	int                    m64_bars;
 
-	bus = pdev->bus;
-	hose = pci_bus_to_host(bus);
-	phb = hose->private_data;
+	phb = pci_bus_to_pnvhb(pdev->bus);
 	pdn = pci_get_pdn(pdev);
 
 	if (pdn->m64_single_mode)
@@ -1333,8 +1323,6 @@ static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
 
 static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
 {
-	struct pci_bus        *bus;
-	struct pci_controller *hose;
 	struct pnv_phb        *phb;
 	struct pci_dn         *pdn;
 	unsigned int           win;
@@ -1346,9 +1334,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
 	int                    pe_num;
 	int                    m64_bars;
 
-	bus = pdev->bus;
-	hose = pci_bus_to_host(bus);
-	phb = hose->private_data;
+	phb = pci_bus_to_pnvhb(pdev->bus);
 	pdn = pci_get_pdn(pdev);
 	total_vfs = pci_sriov_get_totalvfs(pdev);
 
@@ -1459,15 +1445,11 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe
 
 static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
 {
-	struct pci_bus        *bus;
-	struct pci_controller *hose;
 	struct pnv_phb        *phb;
 	struct pnv_ioda_pe    *pe, *pe_n;
 	struct pci_dn         *pdn;
 
-	bus = pdev->bus;
-	hose = pci_bus_to_host(bus);
-	phb = hose->private_data;
+	phb = pci_bus_to_pnvhb(pdev->bus);
 	pdn = pci_get_pdn(pdev);
 
 	if (!pdev->is_physfn)
@@ -1492,16 +1474,12 @@ static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
 
 static void pnv_pci_sriov_disable(struct pci_dev *pdev)
 {
-	struct pci_bus        *bus;
-	struct pci_controller *hose;
 	struct pnv_phb        *phb;
 	struct pnv_ioda_pe    *pe;
 	struct pci_dn         *pdn;
 	u16                    num_vfs, i;
 
-	bus = pdev->bus;
-	hose = pci_bus_to_host(bus);
-	phb = hose->private_data;
+	phb = pci_bus_to_pnvhb(pdev->bus);
 	pdn = pci_get_pdn(pdev);
 	num_vfs = pdn->num_vfs;
 
@@ -1535,17 +1513,13 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 				       struct pnv_ioda_pe *pe);
 static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
 {
-	struct pci_bus        *bus;
-	struct pci_controller *hose;
 	struct pnv_phb        *phb;
 	struct pnv_ioda_pe    *pe;
 	int                    pe_num;
 	u16                    vf_index;
 	struct pci_dn         *pdn;
 
-	bus = pdev->bus;
-	hose = pci_bus_to_host(bus);
-	phb = hose->private_data;
+	phb = pci_bus_to_pnvhb(pdev->bus);
 	pdn = pci_get_pdn(pdev);
 
 	if (!pdev->is_physfn)
@@ -1572,7 +1546,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
 		pe->rid = (vf_bus << 8) | vf_devfn;
 
 		pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n",
-			hose->global_number, pdev->bus->number,
+			pci_domain_nr(pdev->bus), pdev->bus->number,
 			PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num);
 
 		if (pnv_ioda_configure_pe(phb, pe)) {
@@ -1602,17 +1576,13 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
 
 static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
 {
-	struct pci_bus        *bus;
-	struct pci_controller *hose;
 	struct pnv_phb        *phb;
 	struct pnv_ioda_pe    *pe;
 	struct pci_dn         *pdn;
 	int                    ret;
 	u16                    i;
 
-	bus = pdev->bus;
-	hose = pci_bus_to_host(bus);
-	phb = hose->private_data;
+	phb = pci_bus_to_pnvhb(pdev->bus);
 	pdn = pci_get_pdn(pdev);
 
 	if (phb->type == PNV_PHB_IODA2) {
@@ -1735,8 +1705,7 @@ static int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
 
 static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev)
 {
-	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
 	struct pci_dn *pdn = pci_get_pdn(pdev);
 	struct pnv_ioda_pe *pe;
 
@@ -1847,8 +1816,7 @@ static int pnv_pci_ioda_dma_64bit_bypass(struct pnv_ioda_pe *pe)
 static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev,
 		u64 dma_mask)
 {
-	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
 	struct pci_dn *pdn = pci_get_pdn(pdev);
 	struct pnv_ioda_pe *pe;
 
@@ -2766,8 +2734,7 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
 #ifdef CONFIG_PCI_IOV
 static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
 {
-	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
 	const resource_size_t gate = phb->ioda.m64_segsize >> 2;
 	struct resource *res;
 	int i;
@@ -3101,10 +3068,9 @@ static void pnv_pci_ioda_fixup(void)
 static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
 						unsigned long type)
 {
-	struct pci_dev *bridge;
-	struct pci_controller *hose = pci_bus_to_host(bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
 	int num_pci_bridges = 0;
+	struct pci_dev *bridge;
 
 	bridge = bus->self;
 	while (bridge) {
@@ -3190,8 +3156,7 @@ static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus,
 
 static void pnv_pci_configure_bus(struct pci_bus *bus)
 {
-	struct pci_controller *hose = pci_bus_to_host(bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
 	struct pci_dev *bridge = bus->self;
 	struct pnv_ioda_pe *pe;
 	bool all = (bridge && pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE);
@@ -3237,8 +3202,7 @@ static resource_size_t pnv_pci_default_alignment(void)
 static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
 						      int resno)
 {
-	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
 	struct pci_dn *pdn = pci_get_pdn(pdev);
 	resource_size_t align;
 
@@ -3274,8 +3238,7 @@ static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
  */
 static bool pnv_pci_enable_device_hook(struct pci_dev *dev)
 {
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
 	struct pci_dn *pdn;
 
 	/* The function is probably called while the PEs have
@@ -3488,8 +3451,7 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe)
 
 static void pnv_pci_release_device(struct pci_dev *pdev)
 {
-	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
 	struct pci_dn *pdn = pci_get_pdn(pdev);
 	struct pnv_ioda_pe *pe;
 
@@ -3534,8 +3496,7 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
 
 static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus)
 {
-	struct pci_controller *hose = bus->sysdata;
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
 	struct pnv_ioda_pe *pe;
 
 	list_for_each_entry(pe, &phb->ioda.pe_list, list) {
@@ -3873,8 +3834,7 @@ void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np)
 
 static void pnv_npu2_opencapi_cfg_size_fixup(struct pci_dev *dev)
 {
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
 
 	if (!machine_is(powernv))
 		return;
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 091fe1cf386b..9b9bca169275 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -162,8 +162,7 @@ EXPORT_SYMBOL_GPL(pnv_pci_set_power_state);
 
 int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 {
-	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
 	struct msi_desc *entry;
 	struct msi_msg msg;
 	int hwirq;
@@ -211,8 +210,7 @@ int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 
 void pnv_teardown_msi_irqs(struct pci_dev *pdev)
 {
-	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
 	struct msi_desc *entry;
 	irq_hw_number_t hwirq;
 
@@ -824,10 +822,9 @@ EXPORT_SYMBOL(pnv_pci_get_phb_node);
 
 int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable)
 {
-	__be64 val;
-	struct pci_controller *hose;
-	struct pnv_phb *phb;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
 	u64 tunnel_bar;
+	__be64 val;
 	int rc;
 
 	if (!opal_check_token(OPAL_PCI_GET_PBCQ_TUNNEL_BAR))
@@ -835,9 +832,6 @@ int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable)
 	if (!opal_check_token(OPAL_PCI_SET_PBCQ_TUNNEL_BAR))
 		return -ENXIO;
 
-	hose = pci_bus_to_host(dev->bus);
-	phb = hose->private_data;
-
 	mutex_lock(&tunnel_mutex);
 	rc = opal_pci_get_pbcq_tunnel_bar(phb->opal_id, &val);
 	if (rc != OPAL_SUCCESS) {
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 51c254f2f3cb..0727dec9a0d1 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -260,4 +260,14 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
 
 extern unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb);
 
+static inline struct pnv_phb *pci_bus_to_pnvhb(struct pci_bus *bus)
+{
+	struct pci_controller *hose = bus->sysdata;
+
+	if (hose)
+		return hose->private_data;
+
+	return NULL;
+}
+
 #endif /* __POWERNV_PCI_H */
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH 02/15] powerpc/powernv/pci: Always tear down DMA windows on PE release
  2020-07-10  5:23 PowerNV PCI & SR-IOV cleanups Oliver O'Halloran
  2020-07-10  5:23 ` [PATCH 01/15] powernv/pci: Add pci_bus_to_pnvhb() helper Oliver O'Halloran
@ 2020-07-10  5:23 ` Oliver O'Halloran
  2020-07-13  8:30   ` Alexey Kardashevskiy
  2020-07-10  5:23 ` [PATCH 03/15] powerpc/powernv/pci: Add explicit tracking of the DMA setup state Oliver O'Halloran
                   ` (13 subsequent siblings)
  15 siblings, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-10  5:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran

Currently we have these two functions:

	pnv_pci_ioda2_release_dma_pe(), and
	pnv_pci_ioda2_release_pe_dma()

The first is used when tearing down VF PEs and the other is used for normal
devices. There's very little difference between the two though. The latter
(non-VF) will skip a call to pnv_pci_ioda2_unset_window() unless
CONFIG_IOMMU_API=y is set. There's no real point in doing this so fold the
two together.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 30 +++--------------------
 1 file changed, 3 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 687919db0347..bfb40607aa0e 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1422,26 +1422,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
 	return -EBUSY;
 }
 
-static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
-		int num);
-
-static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe)
-{
-	struct iommu_table    *tbl;
-	int64_t               rc;
-
-	tbl = pe->table_group.tables[0];
-	rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
-	if (rc)
-		pe_warn(pe, "OPAL error %lld release DMA window\n", rc);
-
-	pnv_pci_ioda2_set_bypass(pe, false);
-	if (pe->table_group.group) {
-		iommu_group_put(pe->table_group.group);
-		BUG_ON(pe->table_group.group);
-	}
-	iommu_tce_table_put(tbl);
-}
+static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe);
 
 static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
 {
@@ -1455,11 +1436,12 @@ static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
 	if (!pdev->is_physfn)
 		return;
 
+	/* FIXME: Use pnv_ioda_release_pe()? */
 	list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
 		if (pe->parent_dev != pdev)
 			continue;
 
-		pnv_pci_ioda2_release_dma_pe(pdev, pe);
+		pnv_pci_ioda2_release_pe_dma(pe);
 
 		/* Remove from list */
 		mutex_lock(&phb->ioda.pe_list_mutex);
@@ -2429,7 +2411,6 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
 	return 0;
 }
 
-#if defined(CONFIG_IOMMU_API) || defined(CONFIG_PCI_IOV)
 static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
 		int num)
 {
@@ -2453,7 +2434,6 @@ static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
 
 	return ret;
 }
-#endif
 
 #ifdef CONFIG_IOMMU_API
 unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
@@ -3334,18 +3314,14 @@ static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
 {
 	struct iommu_table *tbl = pe->table_group.tables[0];
 	unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe);
-#ifdef CONFIG_IOMMU_API
 	int64_t rc;
-#endif
 
 	if (!weight)
 		return;
 
-#ifdef CONFIG_IOMMU_API
 	rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
 	if (rc)
 		pe_warn(pe, "OPAL error %lld release DMA window\n", rc);
-#endif
 
 	pnv_pci_ioda2_set_bypass(pe, false);
 	if (pe->table_group.group) {
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH 03/15] powerpc/powernv/pci: Add explicit tracking of the DMA setup state
  2020-07-10  5:23 PowerNV PCI & SR-IOV cleanups Oliver O'Halloran
  2020-07-10  5:23 ` [PATCH 01/15] powernv/pci: Add pci_bus_to_pnvhb() helper Oliver O'Halloran
  2020-07-10  5:23 ` [PATCH 02/15] powerpc/powernv/pci: Always tear down DMA windows on PE release Oliver O'Halloran
@ 2020-07-10  5:23 ` Oliver O'Halloran
  2020-07-14  5:37   ` Alexey Kardashevskiy
  2020-07-10  5:23 ` [PATCH 04/15] powerpc/powernv/pci: Initialise M64 for IODA1 as a 1-1 window Oliver O'Halloran
                   ` (12 subsequent siblings)
  15 siblings, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-10  5:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Alexey Kardashevskiy, Oliver O'Halloran

There's an optimisation in the PE setup which skips performing DMA
setup for a PE if we only have bridges in a PE. The assumption being
that only "real" devices will DMA to system memory, which is probably
fair. However, if we start off with only bridge devices in a PE then
add a non-bridge device the new device won't be able to use DMA because
we never configured it.

Fix this (admittedly pretty weird) edge case by tracking whether we've done
the DMA setup for the PE or not. If a non-bridge device is added to the PE
(via rescan or hotplug, or whatever) we can set up DMA on demand.

This also means the only remaining user of the old "DMA Weight" code is
the IODA1 DMA setup code that it was originally added for, which is good.

Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
Alexey, do we need to have the IOMMU API stuff set/clear this flag?
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 48 ++++++++++++++---------
 arch/powerpc/platforms/powernv/pci.h      |  7 ++++
 2 files changed, 36 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index bfb40607aa0e..bb9c1cc60c33 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -141,6 +141,7 @@ static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
 
 	phb->ioda.pe_array[pe_no].phb = phb;
 	phb->ioda.pe_array[pe_no].pe_number = pe_no;
+	phb->ioda.pe_array[pe_no].dma_setup_done = false;
 
 	/*
 	 * Clear the PE frozen state as it might be put into frozen state
@@ -1685,6 +1686,12 @@ static int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
 }
 #endif /* CONFIG_PCI_IOV */
 
+static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
+				       struct pnv_ioda_pe *pe);
+
+static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
+				       struct pnv_ioda_pe *pe);
+
 static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev)
 {
 	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
@@ -1713,6 +1720,24 @@ static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev)
 		pci_info(pdev, "Added to existing PE#%x\n", pe->pe_number);
 	}
 
+	/*
+	 * We assume that bridges *probably* don't need to do any DMA so we can
+	 * skip allocating a TCE table, etc unless we get a non-bridge device.
+	 */
+	if (!pe->dma_setup_done && !pci_is_bridge(pdev)) {
+		switch (phb->type) {
+		case PNV_PHB_IODA1:
+			pnv_pci_ioda1_setup_dma_pe(phb, pe);
+			break;
+		case PNV_PHB_IODA2:
+			pnv_pci_ioda2_setup_dma_pe(phb, pe);
+			break;
+		default:
+			pr_warn("%s: No DMA for PHB#%x (type %d)\n",
+				__func__, phb->hose->global_number, phb->type);
+		}
+	}
+
 	if (pdn)
 		pdn->pe_number = pe->pe_number;
 	pe->device_count++;
@@ -2222,6 +2247,7 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
 	pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift;
 	iommu_init_table(tbl, phb->hose->node, 0, 0);
 
+	pe->dma_setup_done = true;
 	return;
  fail:
 	/* XXX Failure: Try to fallback to 64-bit only ? */
@@ -2536,9 +2562,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 {
 	int64_t rc;
 
-	if (!pnv_pci_ioda_pe_dma_weight(pe))
-		return;
-
 	/* TVE #1 is selected by PCI address bit 59 */
 	pe->tce_bypass_base = 1ull << 59;
 
@@ -2563,6 +2586,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 	iommu_register_group(&pe->table_group, phb->hose->global_number,
 			     pe->pe_number);
 #endif
+	pe->dma_setup_done = true;
 }
 
 int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq)
@@ -3136,7 +3160,6 @@ static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus,
 
 static void pnv_pci_configure_bus(struct pci_bus *bus)
 {
-	struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
 	struct pci_dev *bridge = bus->self;
 	struct pnv_ioda_pe *pe;
 	bool all = (bridge && pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE);
@@ -3160,17 +3183,6 @@ static void pnv_pci_configure_bus(struct pci_bus *bus)
 		return;
 
 	pnv_ioda_setup_pe_seg(pe);
-	switch (phb->type) {
-	case PNV_PHB_IODA1:
-		pnv_pci_ioda1_setup_dma_pe(phb, pe);
-		break;
-	case PNV_PHB_IODA2:
-		pnv_pci_ioda2_setup_dma_pe(phb, pe);
-		break;
-	default:
-		pr_warn("%s: No DMA for PHB#%x (type %d)\n",
-			__func__, phb->hose->global_number, phb->type);
-	}
 }
 
 static resource_size_t pnv_pci_default_alignment(void)
@@ -3289,11 +3301,10 @@ static long pnv_pci_ioda1_unset_window(struct iommu_table_group *table_group,
 
 static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe)
 {
-	unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe);
 	struct iommu_table *tbl = pe->table_group.tables[0];
 	int64_t rc;
 
-	if (!weight)
+	if (!pe->dma_setup_done)
 		return;
 
 	rc = pnv_pci_ioda1_unset_window(&pe->table_group, 0);
@@ -3313,10 +3324,9 @@ static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe)
 static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
 {
 	struct iommu_table *tbl = pe->table_group.tables[0];
-	unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe);
 	int64_t rc;
 
-	if (!weight)
+	if (pe->dma_setup_done)
 		return;
 
 	rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 0727dec9a0d1..6aa6aefb637d 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -87,6 +87,13 @@ struct pnv_ioda_pe {
 	bool			tce_bypass_enabled;
 	uint64_t		tce_bypass_base;
 
+	/*
+	 * Used to track whether we've done DMA setup for this PE or not. We
+	 * want to defer allocating TCE tables, etc until we've added a
+	 * non-bridge device to the PE.
+	 */
+	bool			dma_setup_done;
+
 	/* MSIs. MVE index is identical for for 32 and 64 bit MSI
 	 * and -1 if not supported. (It's actually identical to the
 	 * PE number)
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH 04/15] powerpc/powernv/pci: Initialise M64 for IODA1 as a 1-1 window
  2020-07-10  5:23 PowerNV PCI & SR-IOV cleanups Oliver O'Halloran
                   ` (2 preceding siblings ...)
  2020-07-10  5:23 ` [PATCH 03/15] powerpc/powernv/pci: Add explicit tracking of the DMA setup state Oliver O'Halloran
@ 2020-07-10  5:23 ` Oliver O'Halloran
  2020-07-14  7:39   ` Alexey Kardashevskiy
  2020-07-10  5:23 ` [PATCH 05/15] powerpc/powernv/sriov: Move SR-IOV into a seperate file Oliver O'Halloran
                   ` (11 subsequent siblings)
  15 siblings, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-10  5:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran

We pre-configure the m64 window for IODA1 as a 1-1 segment-PE mapping,
similar to PHB3. Currently the actual mapping of segments occurs in
pnv_ioda_pick_m64_pe(), but we can move it into pnv_ioda1_init_m64() and
drop the IODA1 specific code paths in the PE setup / teardown.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 55 +++++++++++------------
 1 file changed, 25 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index bb9c1cc60c33..8fb17676d914 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -311,6 +311,28 @@ static int pnv_ioda1_init_m64(struct pnv_phb *phb)
 		}
 	}
 
+	for (index = 0; index < phb->ioda.total_pe_num; index++) {
+		int64_t rc;
+
+		/*
+		 * P7IOC supports M64DT, which helps mapping M64 segment
+		 * to one particular PE#. However, PHB3 has fixed mapping
+		 * between M64 segment and PE#. In order to have same logic
+		 * for P7IOC and PHB3, we enforce fixed mapping between M64
+		 * segment and PE# on P7IOC.
+		 */
+		rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+				index, OPAL_M64_WINDOW_TYPE,
+				index / PNV_IODA1_M64_SEGS,
+				index % PNV_IODA1_M64_SEGS);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("%s: Error %lld mapping M64 for PHB#%x-PE#%x\n",
+				__func__, rc, phb->hose->global_number,
+				index);
+			goto fail;
+		}
+	}
+
 	/*
 	 * Exclude the segments for reserved and root bus PE, which
 	 * are first or last two PEs.
@@ -402,26 +424,6 @@ static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all)
 			pe->master = master_pe;
 			list_add_tail(&pe->list, &master_pe->slaves);
 		}
-
-		/*
-		 * P7IOC supports M64DT, which helps mapping M64 segment
-		 * to one particular PE#. However, PHB3 has fixed mapping
-		 * between M64 segment and PE#. In order to have same logic
-		 * for P7IOC and PHB3, we enforce fixed mapping between M64
-		 * segment and PE# on P7IOC.
-		 */
-		if (phb->type == PNV_PHB_IODA1) {
-			int64_t rc;
-
-			rc = opal_pci_map_pe_mmio_window(phb->opal_id,
-					pe->pe_number, OPAL_M64_WINDOW_TYPE,
-					pe->pe_number / PNV_IODA1_M64_SEGS,
-					pe->pe_number % PNV_IODA1_M64_SEGS);
-			if (rc != OPAL_SUCCESS)
-				pr_warn("%s: Error %lld mapping M64 for PHB#%x-PE#%x\n",
-					__func__, rc, phb->hose->global_number,
-					pe->pe_number);
-		}
 	}
 
 	kfree(pe_alloc);
@@ -3354,14 +3356,8 @@ static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe,
 		if (map[idx] != pe->pe_number)
 			continue;
 
-		if (win == OPAL_M64_WINDOW_TYPE)
-			rc = opal_pci_map_pe_mmio_window(phb->opal_id,
-					phb->ioda.reserved_pe_idx, win,
-					idx / PNV_IODA1_M64_SEGS,
-					idx % PNV_IODA1_M64_SEGS);
-		else
-			rc = opal_pci_map_pe_mmio_window(phb->opal_id,
-					phb->ioda.reserved_pe_idx, win, 0, idx);
+		rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+				phb->ioda.reserved_pe_idx, win, 0, idx);
 
 		if (rc != OPAL_SUCCESS)
 			pe_warn(pe, "Error %lld unmapping (%d) segment#%d\n",
@@ -3380,8 +3376,7 @@ static void pnv_ioda_release_pe_seg(struct pnv_ioda_pe *pe)
 				     phb->ioda.io_segmap);
 		pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE,
 				     phb->ioda.m32_segmap);
-		pnv_ioda_free_pe_seg(pe, OPAL_M64_WINDOW_TYPE,
-				     phb->ioda.m64_segmap);
+		/* M64 is pre-configured by pnv_ioda1_init_m64() */
 	} else if (phb->type == PNV_PHB_IODA2) {
 		pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE,
 				     phb->ioda.m32_segmap);
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH 05/15] powerpc/powernv/sriov: Move SR-IOV into a seperate file
  2020-07-10  5:23 PowerNV PCI & SR-IOV cleanups Oliver O'Halloran
                   ` (3 preceding siblings ...)
  2020-07-10  5:23 ` [PATCH 04/15] powerpc/powernv/pci: Initialise M64 for IODA1 as a 1-1 window Oliver O'Halloran
@ 2020-07-10  5:23 ` Oliver O'Halloran
  2020-07-14  9:16   ` Alexey Kardashevskiy
  2020-07-10  5:23 ` [PATCH 06/15] powerpc/powernv/sriov: Explain how SR-IOV works on PowerNV Oliver O'Halloran
                   ` (10 subsequent siblings)
  15 siblings, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-10  5:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran

pci-ioda.c is getting a bit unwieldly due to the amount of stuff jammed in
there. The SR-IOV support can be extracted easily enough and is mostly
standalone, so move it into a seperate file.

This patch also moves the PowerNV SR-IOV specific fields from pci_dn and moves them
into a platform specific structure. I'm not sure how they ended up in there
in the first place, but leaking platform specifics into common code has
proven to be a terrible idea so far so lets stop doing that.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
The pci_dn change and the pci-sriov.c changes originally separate patches.
I accidently squashed them together while rebasing and fixing that seemed
like more pain that it was worth. I kind of like it this way though since
they did cause a lot of churn on the same set of functions.

I'll split them up again if you really want (please don't want this).
---
 arch/powerpc/include/asm/device.h          |   3 +
 arch/powerpc/platforms/powernv/Makefile    |   1 +
 arch/powerpc/platforms/powernv/pci-ioda.c  | 673 +--------------------
 arch/powerpc/platforms/powernv/pci-sriov.c | 642 ++++++++++++++++++++
 arch/powerpc/platforms/powernv/pci.h       |  74 +++
 5 files changed, 738 insertions(+), 655 deletions(-)
 create mode 100644 arch/powerpc/platforms/powernv/pci-sriov.c

diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h
index 266542769e4b..4d8934db7ef5 100644
--- a/arch/powerpc/include/asm/device.h
+++ b/arch/powerpc/include/asm/device.h
@@ -49,6 +49,9 @@ struct dev_archdata {
 #ifdef CONFIG_CXL_BASE
 	struct cxl_context	*cxl_ctx;
 #endif
+#ifdef CONFIG_PCI_IOV
+	void *iov_data;
+#endif
 };
 
 struct pdev_archdata {
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index fe3f0fb5aeca..2eb6ae150d1f 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_FA_DUMP)	+= opal-fadump.o
 obj-$(CONFIG_PRESERVE_FA_DUMP)	+= opal-fadump.o
 obj-$(CONFIG_OPAL_CORE)	+= opal-core.o
 obj-$(CONFIG_PCI)	+= pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o
+obj-$(CONFIG_PCI_IOV)   += pci-sriov.o
 obj-$(CONFIG_CXL_BASE)	+= pci-cxl.o
 obj-$(CONFIG_EEH)	+= eeh-powernv.o
 obj-$(CONFIG_MEMORY_FAILURE)	+= opal-memory-errors.o
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 8fb17676d914..2d36a9ebf0e9 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -115,26 +115,6 @@ static int __init pci_reset_phbs_setup(char *str)
 
 early_param("ppc_pci_reset_phbs", pci_reset_phbs_setup);
 
-static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r)
-{
-	/*
-	 * WARNING: We cannot rely on the resource flags. The Linux PCI
-	 * allocation code sometimes decides to put a 64-bit prefetchable
-	 * BAR in the 32-bit window, so we have to compare the addresses.
-	 *
-	 * For simplicity we only test resource start.
-	 */
-	return (r->start >= phb->ioda.m64_base &&
-		r->start < (phb->ioda.m64_base + phb->ioda.m64_size));
-}
-
-static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags)
-{
-	unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
-
-	return (resource_flags & flags) == flags;
-}
-
 static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
 {
 	s64 rc;
@@ -172,7 +152,7 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
 	pnv_ioda_init_pe(phb, pe_no);
 }
 
-static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
+struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
 {
 	long pe;
 
@@ -184,7 +164,7 @@ static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
 	return NULL;
 }
 
-static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
+void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
 {
 	struct pnv_phb *phb = pe->phb;
 	unsigned int pe_num = pe->pe_number;
@@ -816,7 +796,7 @@ static void pnv_ioda_unset_peltv(struct pnv_phb *phb,
 		pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc);
 }
 
-static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
+int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 {
 	struct pci_dev *parent;
 	uint8_t bcomp, dcomp, fcomp;
@@ -887,7 +867,7 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 	return 0;
 }
 
-static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
+int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 {
 	struct pci_dev *parent;
 	uint8_t bcomp, dcomp, fcomp;
@@ -982,91 +962,6 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 	return 0;
 }
 
-#ifdef CONFIG_PCI_IOV
-static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
-{
-	struct pci_dn *pdn = pci_get_pdn(dev);
-	int i;
-	struct resource *res, res2;
-	resource_size_t size;
-	u16 num_vfs;
-
-	if (!dev->is_physfn)
-		return -EINVAL;
-
-	/*
-	 * "offset" is in VFs.  The M64 windows are sized so that when they
-	 * are segmented, each segment is the same size as the IOV BAR.
-	 * Each segment is in a separate PE, and the high order bits of the
-	 * address are the PE number.  Therefore, each VF's BAR is in a
-	 * separate PE, and changing the IOV BAR start address changes the
-	 * range of PEs the VFs are in.
-	 */
-	num_vfs = pdn->num_vfs;
-	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
-		res = &dev->resource[i + PCI_IOV_RESOURCES];
-		if (!res->flags || !res->parent)
-			continue;
-
-		/*
-		 * The actual IOV BAR range is determined by the start address
-		 * and the actual size for num_vfs VFs BAR.  This check is to
-		 * make sure that after shifting, the range will not overlap
-		 * with another device.
-		 */
-		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
-		res2.flags = res->flags;
-		res2.start = res->start + (size * offset);
-		res2.end = res2.start + (size * num_vfs) - 1;
-
-		if (res2.end > res->end) {
-			dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n",
-				i, &res2, res, num_vfs, offset);
-			return -EBUSY;
-		}
-	}
-
-	/*
-	 * Since M64 BAR shares segments among all possible 256 PEs,
-	 * we have to shift the beginning of PF IOV BAR to make it start from
-	 * the segment which belongs to the PE number assigned to the first VF.
-	 * This creates a "hole" in the /proc/iomem which could be used for
-	 * allocating other resources so we reserve this area below and
-	 * release when IOV is released.
-	 */
-	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
-		res = &dev->resource[i + PCI_IOV_RESOURCES];
-		if (!res->flags || !res->parent)
-			continue;
-
-		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
-		res2 = *res;
-		res->start += size * offset;
-
-		dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n",
-			 i, &res2, res, (offset > 0) ? "En" : "Dis",
-			 num_vfs, offset);
-
-		if (offset < 0) {
-			devm_release_resource(&dev->dev, &pdn->holes[i]);
-			memset(&pdn->holes[i], 0, sizeof(pdn->holes[i]));
-		}
-
-		pci_update_resource(dev, i + PCI_IOV_RESOURCES);
-
-		if (offset > 0) {
-			pdn->holes[i].start = res2.start;
-			pdn->holes[i].end = res2.start + size * offset - 1;
-			pdn->holes[i].flags = IORESOURCE_BUS;
-			pdn->holes[i].name = "pnv_iov_reserved";
-			devm_request_resource(&dev->dev, res->parent,
-					&pdn->holes[i]);
-		}
-	}
-	return 0;
-}
-#endif /* CONFIG_PCI_IOV */
-
 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
 {
 	struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
@@ -1294,406 +1189,9 @@ static void pnv_pci_ioda_setup_nvlink(void)
 #endif
 }
 
-#ifdef CONFIG_PCI_IOV
-static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
-{
-	struct pnv_phb        *phb;
-	struct pci_dn         *pdn;
-	int                    i, j;
-	int                    m64_bars;
-
-	phb = pci_bus_to_pnvhb(pdev->bus);
-	pdn = pci_get_pdn(pdev);
-
-	if (pdn->m64_single_mode)
-		m64_bars = num_vfs;
-	else
-		m64_bars = 1;
-
-	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
-		for (j = 0; j < m64_bars; j++) {
-			if (pdn->m64_map[j][i] == IODA_INVALID_M64)
-				continue;
-			opal_pci_phb_mmio_enable(phb->opal_id,
-				OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 0);
-			clear_bit(pdn->m64_map[j][i], &phb->ioda.m64_bar_alloc);
-			pdn->m64_map[j][i] = IODA_INVALID_M64;
-		}
-
-	kfree(pdn->m64_map);
-	return 0;
-}
-
-static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
-{
-	struct pnv_phb        *phb;
-	struct pci_dn         *pdn;
-	unsigned int           win;
-	struct resource       *res;
-	int                    i, j;
-	int64_t                rc;
-	int                    total_vfs;
-	resource_size_t        size, start;
-	int                    pe_num;
-	int                    m64_bars;
-
-	phb = pci_bus_to_pnvhb(pdev->bus);
-	pdn = pci_get_pdn(pdev);
-	total_vfs = pci_sriov_get_totalvfs(pdev);
-
-	if (pdn->m64_single_mode)
-		m64_bars = num_vfs;
-	else
-		m64_bars = 1;
-
-	pdn->m64_map = kmalloc_array(m64_bars,
-				     sizeof(*pdn->m64_map),
-				     GFP_KERNEL);
-	if (!pdn->m64_map)
-		return -ENOMEM;
-	/* Initialize the m64_map to IODA_INVALID_M64 */
-	for (i = 0; i < m64_bars ; i++)
-		for (j = 0; j < PCI_SRIOV_NUM_BARS; j++)
-			pdn->m64_map[i][j] = IODA_INVALID_M64;
-
-
-	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
-		res = &pdev->resource[i + PCI_IOV_RESOURCES];
-		if (!res->flags || !res->parent)
-			continue;
-
-		for (j = 0; j < m64_bars; j++) {
-			do {
-				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
-						phb->ioda.m64_bar_idx + 1, 0);
-
-				if (win >= phb->ioda.m64_bar_idx + 1)
-					goto m64_failed;
-			} while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
-
-			pdn->m64_map[j][i] = win;
-
-			if (pdn->m64_single_mode) {
-				size = pci_iov_resource_size(pdev,
-							PCI_IOV_RESOURCES + i);
-				start = res->start + size * j;
-			} else {
-				size = resource_size(res);
-				start = res->start;
-			}
-
-			/* Map the M64 here */
-			if (pdn->m64_single_mode) {
-				pe_num = pdn->pe_num_map[j];
-				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
-						pe_num, OPAL_M64_WINDOW_TYPE,
-						pdn->m64_map[j][i], 0);
-			}
-
-			rc = opal_pci_set_phb_mem_window(phb->opal_id,
-						 OPAL_M64_WINDOW_TYPE,
-						 pdn->m64_map[j][i],
-						 start,
-						 0, /* unused */
-						 size);
-
-
-			if (rc != OPAL_SUCCESS) {
-				dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n",
-					win, rc);
-				goto m64_failed;
-			}
-
-			if (pdn->m64_single_mode)
-				rc = opal_pci_phb_mmio_enable(phb->opal_id,
-				     OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 2);
-			else
-				rc = opal_pci_phb_mmio_enable(phb->opal_id,
-				     OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 1);
-
-			if (rc != OPAL_SUCCESS) {
-				dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n",
-					win, rc);
-				goto m64_failed;
-			}
-		}
-	}
-	return 0;
-
-m64_failed:
-	pnv_pci_vf_release_m64(pdev, num_vfs);
-	return -EBUSY;
-}
-
-static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe);
-
-static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
-{
-	struct pnv_phb        *phb;
-	struct pnv_ioda_pe    *pe, *pe_n;
-	struct pci_dn         *pdn;
-
-	phb = pci_bus_to_pnvhb(pdev->bus);
-	pdn = pci_get_pdn(pdev);
-
-	if (!pdev->is_physfn)
-		return;
-
-	/* FIXME: Use pnv_ioda_release_pe()? */
-	list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
-		if (pe->parent_dev != pdev)
-			continue;
-
-		pnv_pci_ioda2_release_pe_dma(pe);
-
-		/* Remove from list */
-		mutex_lock(&phb->ioda.pe_list_mutex);
-		list_del(&pe->list);
-		mutex_unlock(&phb->ioda.pe_list_mutex);
-
-		pnv_ioda_deconfigure_pe(phb, pe);
-
-		pnv_ioda_free_pe(pe);
-	}
-}
-
-static void pnv_pci_sriov_disable(struct pci_dev *pdev)
-{
-	struct pnv_phb        *phb;
-	struct pnv_ioda_pe    *pe;
-	struct pci_dn         *pdn;
-	u16                    num_vfs, i;
-
-	phb = pci_bus_to_pnvhb(pdev->bus);
-	pdn = pci_get_pdn(pdev);
-	num_vfs = pdn->num_vfs;
-
-	/* Release VF PEs */
-	pnv_ioda_release_vf_PE(pdev);
-
-	if (phb->type == PNV_PHB_IODA2) {
-		if (!pdn->m64_single_mode)
-			pnv_pci_vf_resource_shift(pdev, -*pdn->pe_num_map);
-
-		/* Release M64 windows */
-		pnv_pci_vf_release_m64(pdev, num_vfs);
-
-		/* Release PE numbers */
-		if (pdn->m64_single_mode) {
-			for (i = 0; i < num_vfs; i++) {
-				if (pdn->pe_num_map[i] == IODA_INVALID_PE)
-					continue;
-
-				pe = &phb->ioda.pe_array[pdn->pe_num_map[i]];
-				pnv_ioda_free_pe(pe);
-			}
-		} else
-			bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
-		/* Releasing pe_num_map */
-		kfree(pdn->pe_num_map);
-	}
-}
-
-static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
-				       struct pnv_ioda_pe *pe);
-static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
-{
-	struct pnv_phb        *phb;
-	struct pnv_ioda_pe    *pe;
-	int                    pe_num;
-	u16                    vf_index;
-	struct pci_dn         *pdn;
-
-	phb = pci_bus_to_pnvhb(pdev->bus);
-	pdn = pci_get_pdn(pdev);
-
-	if (!pdev->is_physfn)
-		return;
-
-	/* Reserve PE for each VF */
-	for (vf_index = 0; vf_index < num_vfs; vf_index++) {
-		int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index);
-		int vf_bus = pci_iov_virtfn_bus(pdev, vf_index);
-		struct pci_dn *vf_pdn;
-
-		if (pdn->m64_single_mode)
-			pe_num = pdn->pe_num_map[vf_index];
-		else
-			pe_num = *pdn->pe_num_map + vf_index;
-
-		pe = &phb->ioda.pe_array[pe_num];
-		pe->pe_number = pe_num;
-		pe->phb = phb;
-		pe->flags = PNV_IODA_PE_VF;
-		pe->pbus = NULL;
-		pe->parent_dev = pdev;
-		pe->mve_number = -1;
-		pe->rid = (vf_bus << 8) | vf_devfn;
-
-		pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n",
-			pci_domain_nr(pdev->bus), pdev->bus->number,
-			PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num);
-
-		if (pnv_ioda_configure_pe(phb, pe)) {
-			/* XXX What do we do here ? */
-			pnv_ioda_free_pe(pe);
-			pe->pdev = NULL;
-			continue;
-		}
-
-		/* Put PE to the list */
-		mutex_lock(&phb->ioda.pe_list_mutex);
-		list_add_tail(&pe->list, &phb->ioda.pe_list);
-		mutex_unlock(&phb->ioda.pe_list_mutex);
-
-		/* associate this pe to it's pdn */
-		list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) {
-			if (vf_pdn->busno == vf_bus &&
-			    vf_pdn->devfn == vf_devfn) {
-				vf_pdn->pe_number = pe_num;
-				break;
-			}
-		}
-
-		pnv_pci_ioda2_setup_dma_pe(phb, pe);
-	}
-}
-
-static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
-{
-	struct pnv_phb        *phb;
-	struct pnv_ioda_pe    *pe;
-	struct pci_dn         *pdn;
-	int                    ret;
-	u16                    i;
-
-	phb = pci_bus_to_pnvhb(pdev->bus);
-	pdn = pci_get_pdn(pdev);
-
-	if (phb->type == PNV_PHB_IODA2) {
-		if (!pdn->vfs_expanded) {
-			dev_info(&pdev->dev, "don't support this SRIOV device"
-				" with non 64bit-prefetchable IOV BAR\n");
-			return -ENOSPC;
-		}
-
-		/*
-		 * When M64 BARs functions in Single PE mode, the number of VFs
-		 * could be enabled must be less than the number of M64 BARs.
-		 */
-		if (pdn->m64_single_mode && num_vfs > phb->ioda.m64_bar_idx) {
-			dev_info(&pdev->dev, "Not enough M64 BAR for VFs\n");
-			return -EBUSY;
-		}
-
-		/* Allocating pe_num_map */
-		if (pdn->m64_single_mode)
-			pdn->pe_num_map = kmalloc_array(num_vfs,
-							sizeof(*pdn->pe_num_map),
-							GFP_KERNEL);
-		else
-			pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map), GFP_KERNEL);
-
-		if (!pdn->pe_num_map)
-			return -ENOMEM;
-
-		if (pdn->m64_single_mode)
-			for (i = 0; i < num_vfs; i++)
-				pdn->pe_num_map[i] = IODA_INVALID_PE;
-
-		/* Calculate available PE for required VFs */
-		if (pdn->m64_single_mode) {
-			for (i = 0; i < num_vfs; i++) {
-				pe = pnv_ioda_alloc_pe(phb);
-				if (!pe) {
-					ret = -EBUSY;
-					goto m64_failed;
-				}
-
-				pdn->pe_num_map[i] = pe->pe_number;
-			}
-		} else {
-			mutex_lock(&phb->ioda.pe_alloc_mutex);
-			*pdn->pe_num_map = bitmap_find_next_zero_area(
-				phb->ioda.pe_alloc, phb->ioda.total_pe_num,
-				0, num_vfs, 0);
-			if (*pdn->pe_num_map >= phb->ioda.total_pe_num) {
-				mutex_unlock(&phb->ioda.pe_alloc_mutex);
-				dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
-				kfree(pdn->pe_num_map);
-				return -EBUSY;
-			}
-			bitmap_set(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
-			mutex_unlock(&phb->ioda.pe_alloc_mutex);
-		}
-		pdn->num_vfs = num_vfs;
-
-		/* Assign M64 window accordingly */
-		ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
-		if (ret) {
-			dev_info(&pdev->dev, "Not enough M64 window resources\n");
-			goto m64_failed;
-		}
-
-		/*
-		 * When using one M64 BAR to map one IOV BAR, we need to shift
-		 * the IOV BAR according to the PE# allocated to the VFs.
-		 * Otherwise, the PE# for the VF will conflict with others.
-		 */
-		if (!pdn->m64_single_mode) {
-			ret = pnv_pci_vf_resource_shift(pdev, *pdn->pe_num_map);
-			if (ret)
-				goto m64_failed;
-		}
-	}
-
-	/* Setup VF PEs */
-	pnv_ioda_setup_vf_PE(pdev, num_vfs);
-
-	return 0;
-
-m64_failed:
-	if (pdn->m64_single_mode) {
-		for (i = 0; i < num_vfs; i++) {
-			if (pdn->pe_num_map[i] == IODA_INVALID_PE)
-				continue;
-
-			pe = &phb->ioda.pe_array[pdn->pe_num_map[i]];
-			pnv_ioda_free_pe(pe);
-		}
-	} else
-		bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
-
-	/* Releasing pe_num_map */
-	kfree(pdn->pe_num_map);
-
-	return ret;
-}
-
-static int pnv_pcibios_sriov_disable(struct pci_dev *pdev)
-{
-	pnv_pci_sriov_disable(pdev);
-
-	/* Release PCI data */
-	remove_sriov_vf_pdns(pdev);
-	return 0;
-}
-
-static int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
-{
-	/* Allocate PCI data */
-	add_sriov_vf_pdns(pdev);
-
-	return pnv_pci_sriov_enable(pdev, num_vfs);
-}
-#endif /* CONFIG_PCI_IOV */
-
 static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
 				       struct pnv_ioda_pe *pe);
 
-static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
-				       struct pnv_ioda_pe *pe);
-
 static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev)
 {
 	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
@@ -2559,8 +2057,8 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
 };
 #endif
 
-static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
-				       struct pnv_ioda_pe *pe)
+void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
+				struct pnv_ioda_pe *pe)
 {
 	int64_t rc;
 
@@ -2737,117 +2235,6 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
 		count, phb->msi_base);
 }
 
-#ifdef CONFIG_PCI_IOV
-static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
-{
-	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
-	const resource_size_t gate = phb->ioda.m64_segsize >> 2;
-	struct resource *res;
-	int i;
-	resource_size_t size, total_vf_bar_sz;
-	struct pci_dn *pdn;
-	int mul, total_vfs;
-
-	pdn = pci_get_pdn(pdev);
-	pdn->vfs_expanded = 0;
-	pdn->m64_single_mode = false;
-
-	total_vfs = pci_sriov_get_totalvfs(pdev);
-	mul = phb->ioda.total_pe_num;
-	total_vf_bar_sz = 0;
-
-	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
-		res = &pdev->resource[i + PCI_IOV_RESOURCES];
-		if (!res->flags || res->parent)
-			continue;
-		if (!pnv_pci_is_m64_flags(res->flags)) {
-			dev_warn(&pdev->dev, "Don't support SR-IOV with"
-					" non M64 VF BAR%d: %pR. \n",
-				 i, res);
-			goto truncate_iov;
-		}
-
-		total_vf_bar_sz += pci_iov_resource_size(pdev,
-				i + PCI_IOV_RESOURCES);
-
-		/*
-		 * If bigger than quarter of M64 segment size, just round up
-		 * power of two.
-		 *
-		 * Generally, one M64 BAR maps one IOV BAR. To avoid conflict
-		 * with other devices, IOV BAR size is expanded to be
-		 * (total_pe * VF_BAR_size).  When VF_BAR_size is half of M64
-		 * segment size , the expanded size would equal to half of the
-		 * whole M64 space size, which will exhaust the M64 Space and
-		 * limit the system flexibility.  This is a design decision to
-		 * set the boundary to quarter of the M64 segment size.
-		 */
-		if (total_vf_bar_sz > gate) {
-			mul = roundup_pow_of_two(total_vfs);
-			dev_info(&pdev->dev,
-				"VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n",
-				total_vf_bar_sz, gate, mul);
-			pdn->m64_single_mode = true;
-			break;
-		}
-	}
-
-	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
-		res = &pdev->resource[i + PCI_IOV_RESOURCES];
-		if (!res->flags || res->parent)
-			continue;
-
-		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
-		/*
-		 * On PHB3, the minimum size alignment of M64 BAR in single
-		 * mode is 32MB.
-		 */
-		if (pdn->m64_single_mode && (size < SZ_32M))
-			goto truncate_iov;
-		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
-		res->end = res->start + size * mul - 1;
-		dev_dbg(&pdev->dev, "                       %pR\n", res);
-		dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
-			 i, res, mul);
-	}
-	pdn->vfs_expanded = mul;
-
-	return;
-
-truncate_iov:
-	/* To save MMIO space, IOV BAR is truncated. */
-	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
-		res = &pdev->resource[i + PCI_IOV_RESOURCES];
-		res->flags = 0;
-		res->end = res->start - 1;
-	}
-}
-
-static void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev)
-{
-	if (WARN_ON(pci_dev_is_added(pdev)))
-		return;
-
-	if (pdev->is_virtfn) {
-		struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev);
-
-		/*
-		 * VF PEs are single-device PEs so their pdev pointer needs to
-		 * be set. The pdev doesn't exist when the PE is allocated (in
-		 * (pcibios_sriov_enable()) so we fix it up here.
-		 */
-		pe->pdev = pdev;
-		WARN_ON(!(pe->flags & PNV_IODA_PE_VF));
-	} else if (pdev->is_physfn) {
-		/*
-		 * For PFs adjust their allocated IOV resources to match what
-		 * the PHB can support using it's M64 BAR table.
-		 */
-		pnv_pci_ioda_fixup_iov_resources(pdev);
-	}
-}
-#endif /* CONFIG_PCI_IOV */
-
 static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
 				  struct resource *res)
 {
@@ -3192,41 +2579,6 @@ static resource_size_t pnv_pci_default_alignment(void)
 	return PAGE_SIZE;
 }
 
-#ifdef CONFIG_PCI_IOV
-static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
-						      int resno)
-{
-	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
-	struct pci_dn *pdn = pci_get_pdn(pdev);
-	resource_size_t align;
-
-	/*
-	 * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the
-	 * SR-IOV. While from hardware perspective, the range mapped by M64
-	 * BAR should be size aligned.
-	 *
-	 * When IOV BAR is mapped with M64 BAR in Single PE mode, the extra
-	 * powernv-specific hardware restriction is gone. But if just use the
-	 * VF BAR size as the alignment, PF BAR / VF BAR may be allocated with
-	 * in one segment of M64 #15, which introduces the PE conflict between
-	 * PF and VF. Based on this, the minimum alignment of an IOV BAR is
-	 * m64_segsize.
-	 *
-	 * This function returns the total IOV BAR size if M64 BAR is in
-	 * Shared PE mode or just VF BAR size if not.
-	 * If the M64 BAR is in Single PE mode, return the VF BAR size or
-	 * M64 segment size if IOV BAR size is less.
-	 */
-	align = pci_iov_resource_size(pdev, resno);
-	if (!pdn->vfs_expanded)
-		return align;
-	if (pdn->m64_single_mode)
-		return max(align, (resource_size_t)phb->ioda.m64_segsize);
-
-	return pdn->vfs_expanded * align;
-}
-#endif /* CONFIG_PCI_IOV */
-
 /* Prevent enabling devices for which we couldn't properly
  * assign a PE
  */
@@ -3323,7 +2675,7 @@ static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe)
 	iommu_tce_table_put(tbl);
 }
 
-static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
+void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
 {
 	struct iommu_table *tbl = pe->table_group.tables[0];
 	int64_t rc;
@@ -3436,12 +2788,23 @@ static void pnv_pci_release_device(struct pci_dev *pdev)
 	struct pci_dn *pdn = pci_get_pdn(pdev);
 	struct pnv_ioda_pe *pe;
 
+	/* The VF PE state is torn down when sriov_disable() is called */
 	if (pdev->is_virtfn)
 		return;
 
 	if (!pdn || pdn->pe_number == IODA_INVALID_PE)
 		return;
 
+#ifdef CONFIG_PCI_IOV
+	/*
+	 * FIXME: Try move this to sriov_disable(). It's here since we allocate
+	 * the iov state at probe time since we need to fiddle with the IOV
+	 * resources.
+	 */
+	if (pdev->is_physfn)
+		kfree(pdev->dev.archdata.iov_data);
+#endif
+
 	/*
 	 * PCI hotplug can happen as part of EEH error recovery. The @pdn
 	 * isn't removed and added afterwards in this scenario. We should
diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
new file mode 100644
index 000000000000..080ea39f5a83
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/pci-sriov.c
@@ -0,0 +1,642 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kernel.h>
+#include <linux/ioport.h>
+#include <linux/bitmap.h>
+#include <linux/pci.h>
+
+#include <asm/opal.h>
+
+#include "pci.h"
+
+/* for pci_dev_is_added() */
+#include "../../../../drivers/pci/pci.h"
+
+
+static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
+{
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
+	const resource_size_t gate = phb->ioda.m64_segsize >> 2;
+	struct resource *res;
+	int i;
+	resource_size_t size, total_vf_bar_sz;
+	struct pnv_iov_data *iov;
+	int mul, total_vfs;
+
+	iov = kzalloc(sizeof(*iov), GFP_KERNEL);
+	if (!iov)
+		goto truncate_iov;
+	pdev->dev.archdata.iov_data = iov;
+
+	total_vfs = pci_sriov_get_totalvfs(pdev);
+	mul = phb->ioda.total_pe_num;
+	total_vf_bar_sz = 0;
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &pdev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || res->parent)
+			continue;
+		if (!pnv_pci_is_m64_flags(res->flags)) {
+			dev_warn(&pdev->dev, "Don't support SR-IOV with"
+					" non M64 VF BAR%d: %pR. \n",
+				 i, res);
+			goto truncate_iov;
+		}
+
+		total_vf_bar_sz += pci_iov_resource_size(pdev,
+				i + PCI_IOV_RESOURCES);
+
+		/*
+		 * If bigger than quarter of M64 segment size, just round up
+		 * power of two.
+		 *
+		 * Generally, one M64 BAR maps one IOV BAR. To avoid conflict
+		 * with other devices, IOV BAR size is expanded to be
+		 * (total_pe * VF_BAR_size).  When VF_BAR_size is half of M64
+		 * segment size , the expanded size would equal to half of the
+		 * whole M64 space size, which will exhaust the M64 Space and
+		 * limit the system flexibility.  This is a design decision to
+		 * set the boundary to quarter of the M64 segment size.
+		 */
+		if (total_vf_bar_sz > gate) {
+			mul = roundup_pow_of_two(total_vfs);
+			dev_info(&pdev->dev,
+				"VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n",
+				total_vf_bar_sz, gate, mul);
+			iov->m64_single_mode = true;
+			break;
+		}
+	}
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &pdev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || res->parent)
+			continue;
+
+		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
+		/*
+		 * On PHB3, the minimum size alignment of M64 BAR in single
+		 * mode is 32MB.
+		 */
+		if (iov->m64_single_mode && (size < SZ_32M))
+			goto truncate_iov;
+		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
+		res->end = res->start + size * mul - 1;
+		dev_dbg(&pdev->dev, "                       %pR\n", res);
+		dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
+			 i, res, mul);
+	}
+	iov->vfs_expanded = mul;
+
+	return;
+
+truncate_iov:
+	/* To save MMIO space, IOV BAR is truncated. */
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &pdev->resource[i + PCI_IOV_RESOURCES];
+		res->flags = 0;
+		res->end = res->start - 1;
+	}
+
+	pdev->dev.archdata.iov_data = NULL;
+	kfree(iov);
+}
+
+void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev)
+{
+	if (WARN_ON(pci_dev_is_added(pdev)))
+		return;
+
+	if (pdev->is_virtfn) {
+		struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev);
+
+		/*
+		 * VF PEs are single-device PEs so their pdev pointer needs to
+		 * be set. The pdev doesn't exist when the PE is allocated (in
+		 * (pcibios_sriov_enable()) so we fix it up here.
+		 */
+		pe->pdev = pdev;
+		WARN_ON(!(pe->flags & PNV_IODA_PE_VF));
+	} else if (pdev->is_physfn) {
+		/*
+		 * For PFs adjust their allocated IOV resources to match what
+		 * the PHB can support using it's M64 BAR table.
+		 */
+		pnv_pci_ioda_fixup_iov_resources(pdev);
+	}
+}
+
+resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
+						      int resno)
+{
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
+	struct pnv_iov_data *iov = pnv_iov_get(pdev);
+	resource_size_t align;
+
+	/*
+	 * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the
+	 * SR-IOV. While from hardware perspective, the range mapped by M64
+	 * BAR should be size aligned.
+	 *
+	 * When IOV BAR is mapped with M64 BAR in Single PE mode, the extra
+	 * powernv-specific hardware restriction is gone. But if just use the
+	 * VF BAR size as the alignment, PF BAR / VF BAR may be allocated with
+	 * in one segment of M64 #15, which introduces the PE conflict between
+	 * PF and VF. Based on this, the minimum alignment of an IOV BAR is
+	 * m64_segsize.
+	 *
+	 * This function returns the total IOV BAR size if M64 BAR is in
+	 * Shared PE mode or just VF BAR size if not.
+	 * If the M64 BAR is in Single PE mode, return the VF BAR size or
+	 * M64 segment size if IOV BAR size is less.
+	 */
+	align = pci_iov_resource_size(pdev, resno);
+
+	/*
+	 * iov can be null if we have an SR-IOV device with IOV BAR that can't
+	 * be placed in the m64 space (i.e. The BAR is 32bit or non-prefetch).
+	 * In that case we don't allow VFs to be enabled so just return the
+	 * default alignment.
+	 */
+	if (!iov)
+		return align;
+	if (!iov->vfs_expanded)
+		return align;
+	if (iov->m64_single_mode)
+		return max(align, (resource_size_t)phb->ioda.m64_segsize);
+
+	return iov->vfs_expanded * align;
+}
+
+static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pnv_iov_data   *iov;
+	struct pnv_phb        *phb;
+	int                    i, j;
+	int                    m64_bars;
+
+	phb = pci_bus_to_pnvhb(pdev->bus);
+	iov = pnv_iov_get(pdev);
+
+	if (iov->m64_single_mode)
+		m64_bars = num_vfs;
+	else
+		m64_bars = 1;
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
+		for (j = 0; j < m64_bars; j++) {
+			if (iov->m64_map[j][i] == IODA_INVALID_M64)
+				continue;
+			opal_pci_phb_mmio_enable(phb->opal_id,
+				OPAL_M64_WINDOW_TYPE, iov->m64_map[j][i], 0);
+			clear_bit(iov->m64_map[j][i], &phb->ioda.m64_bar_alloc);
+			iov->m64_map[j][i] = IODA_INVALID_M64;
+		}
+
+	kfree(iov->m64_map);
+	return 0;
+}
+
+static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pnv_iov_data   *iov;
+	struct pnv_phb        *phb;
+	unsigned int           win;
+	struct resource       *res;
+	int                    i, j;
+	int64_t                rc;
+	int                    total_vfs;
+	resource_size_t        size, start;
+	int                    pe_num;
+	int                    m64_bars;
+
+	phb = pci_bus_to_pnvhb(pdev->bus);
+	iov = pnv_iov_get(pdev);
+	total_vfs = pci_sriov_get_totalvfs(pdev);
+
+	if (iov->m64_single_mode)
+		m64_bars = num_vfs;
+	else
+		m64_bars = 1;
+
+	iov->m64_map = kmalloc_array(m64_bars,
+				     sizeof(*iov->m64_map),
+				     GFP_KERNEL);
+	if (!iov->m64_map)
+		return -ENOMEM;
+	/* Initialize the m64_map to IODA_INVALID_M64 */
+	for (i = 0; i < m64_bars ; i++)
+		for (j = 0; j < PCI_SRIOV_NUM_BARS; j++)
+			iov->m64_map[i][j] = IODA_INVALID_M64;
+
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &pdev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || !res->parent)
+			continue;
+
+		for (j = 0; j < m64_bars; j++) {
+			do {
+				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
+						phb->ioda.m64_bar_idx + 1, 0);
+
+				if (win >= phb->ioda.m64_bar_idx + 1)
+					goto m64_failed;
+			} while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
+
+			iov->m64_map[j][i] = win;
+
+			if (iov->m64_single_mode) {
+				size = pci_iov_resource_size(pdev,
+							PCI_IOV_RESOURCES + i);
+				start = res->start + size * j;
+			} else {
+				size = resource_size(res);
+				start = res->start;
+			}
+
+			/* Map the M64 here */
+			if (iov->m64_single_mode) {
+				pe_num = iov->pe_num_map[j];
+				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+						pe_num, OPAL_M64_WINDOW_TYPE,
+						iov->m64_map[j][i], 0);
+			}
+
+			rc = opal_pci_set_phb_mem_window(phb->opal_id,
+						 OPAL_M64_WINDOW_TYPE,
+						 iov->m64_map[j][i],
+						 start,
+						 0, /* unused */
+						 size);
+
+
+			if (rc != OPAL_SUCCESS) {
+				dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n",
+					win, rc);
+				goto m64_failed;
+			}
+
+			if (iov->m64_single_mode)
+				rc = opal_pci_phb_mmio_enable(phb->opal_id,
+				     OPAL_M64_WINDOW_TYPE, iov->m64_map[j][i], 2);
+			else
+				rc = opal_pci_phb_mmio_enable(phb->opal_id,
+				     OPAL_M64_WINDOW_TYPE, iov->m64_map[j][i], 1);
+
+			if (rc != OPAL_SUCCESS) {
+				dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n",
+					win, rc);
+				goto m64_failed;
+			}
+		}
+	}
+	return 0;
+
+m64_failed:
+	pnv_pci_vf_release_m64(pdev, num_vfs);
+	return -EBUSY;
+}
+
+static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
+{
+	struct pnv_phb        *phb;
+	struct pnv_ioda_pe    *pe, *pe_n;
+
+	phb = pci_bus_to_pnvhb(pdev->bus);
+
+	if (!pdev->is_physfn)
+		return;
+
+	/* FIXME: Use pnv_ioda_release_pe()? */
+	list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
+		if (pe->parent_dev != pdev)
+			continue;
+
+		pnv_pci_ioda2_release_pe_dma(pe);
+
+		/* Remove from list */
+		mutex_lock(&phb->ioda.pe_list_mutex);
+		list_del(&pe->list);
+		mutex_unlock(&phb->ioda.pe_list_mutex);
+
+		pnv_ioda_deconfigure_pe(phb, pe);
+
+		pnv_ioda_free_pe(pe);
+	}
+}
+
+static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
+{
+	struct resource *res, res2;
+	struct pnv_iov_data *iov;
+	resource_size_t size;
+	u16 num_vfs;
+	int i;
+
+	if (!dev->is_physfn)
+		return -EINVAL;
+	iov = pnv_iov_get(dev);
+
+	/*
+	 * "offset" is in VFs.  The M64 windows are sized so that when they
+	 * are segmented, each segment is the same size as the IOV BAR.
+	 * Each segment is in a separate PE, and the high order bits of the
+	 * address are the PE number.  Therefore, each VF's BAR is in a
+	 * separate PE, and changing the IOV BAR start address changes the
+	 * range of PEs the VFs are in.
+	 */
+	num_vfs = iov->num_vfs;
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &dev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || !res->parent)
+			continue;
+
+		/*
+		 * The actual IOV BAR range is determined by the start address
+		 * and the actual size for num_vfs VFs BAR.  This check is to
+		 * make sure that after shifting, the range will not overlap
+		 * with another device.
+		 */
+		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
+		res2.flags = res->flags;
+		res2.start = res->start + (size * offset);
+		res2.end = res2.start + (size * num_vfs) - 1;
+
+		if (res2.end > res->end) {
+			dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n",
+				i, &res2, res, num_vfs, offset);
+			return -EBUSY;
+		}
+	}
+
+	/*
+	 * Since M64 BAR shares segments among all possible 256 PEs,
+	 * we have to shift the beginning of PF IOV BAR to make it start from
+	 * the segment which belongs to the PE number assigned to the first VF.
+	 * This creates a "hole" in the /proc/iomem which could be used for
+	 * allocating other resources so we reserve this area below and
+	 * release when IOV is released.
+	 */
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &dev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || !res->parent)
+			continue;
+
+		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
+		res2 = *res;
+		res->start += size * offset;
+
+		dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n",
+			 i, &res2, res, (offset > 0) ? "En" : "Dis",
+			 num_vfs, offset);
+
+		if (offset < 0) {
+			devm_release_resource(&dev->dev, &iov->holes[i]);
+			memset(&iov->holes[i], 0, sizeof(iov->holes[i]));
+		}
+
+		pci_update_resource(dev, i + PCI_IOV_RESOURCES);
+
+		if (offset > 0) {
+			iov->holes[i].start = res2.start;
+			iov->holes[i].end = res2.start + size * offset - 1;
+			iov->holes[i].flags = IORESOURCE_BUS;
+			iov->holes[i].name = "pnv_iov_reserved";
+			devm_request_resource(&dev->dev, res->parent,
+					&iov->holes[i]);
+		}
+	}
+	return 0;
+}
+
+static void pnv_pci_sriov_disable(struct pci_dev *pdev)
+{
+	struct pnv_phb        *phb;
+	struct pnv_ioda_pe    *pe;
+	struct pnv_iov_data   *iov;
+	u16                    num_vfs, i;
+
+	phb = pci_bus_to_pnvhb(pdev->bus);
+	iov = pnv_iov_get(pdev);
+	num_vfs = iov->num_vfs;
+
+	/* Release VF PEs */
+	pnv_ioda_release_vf_PE(pdev);
+
+	if (phb->type == PNV_PHB_IODA2) {
+		if (!iov->m64_single_mode)
+			pnv_pci_vf_resource_shift(pdev, -*iov->pe_num_map);
+
+		/* Release M64 windows */
+		pnv_pci_vf_release_m64(pdev, num_vfs);
+
+		/* Release PE numbers */
+		if (iov->m64_single_mode) {
+			for (i = 0; i < num_vfs; i++) {
+				if (iov->pe_num_map[i] == IODA_INVALID_PE)
+					continue;
+
+				pe = &phb->ioda.pe_array[iov->pe_num_map[i]];
+				pnv_ioda_free_pe(pe);
+			}
+		} else
+			bitmap_clear(phb->ioda.pe_alloc, *iov->pe_num_map, num_vfs);
+		/* Releasing pe_num_map */
+		kfree(iov->pe_num_map);
+	}
+}
+
+static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pnv_phb        *phb;
+	struct pnv_ioda_pe    *pe;
+	int                    pe_num;
+	u16                    vf_index;
+	struct pnv_iov_data   *iov;
+	struct pci_dn         *pdn;
+
+	if (!pdev->is_physfn)
+		return;
+
+	phb = pci_bus_to_pnvhb(pdev->bus);
+	pdn = pci_get_pdn(pdev);
+	iov = pnv_iov_get(pdev);
+
+	/* Reserve PE for each VF */
+	for (vf_index = 0; vf_index < num_vfs; vf_index++) {
+		int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index);
+		int vf_bus = pci_iov_virtfn_bus(pdev, vf_index);
+		struct pci_dn *vf_pdn;
+
+		if (iov->m64_single_mode)
+			pe_num = iov->pe_num_map[vf_index];
+		else
+			pe_num = *iov->pe_num_map + vf_index;
+
+		pe = &phb->ioda.pe_array[pe_num];
+		pe->pe_number = pe_num;
+		pe->phb = phb;
+		pe->flags = PNV_IODA_PE_VF;
+		pe->pbus = NULL;
+		pe->parent_dev = pdev;
+		pe->mve_number = -1;
+		pe->rid = (vf_bus << 8) | vf_devfn;
+
+		pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n",
+			pci_domain_nr(pdev->bus), pdev->bus->number,
+			PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num);
+
+		if (pnv_ioda_configure_pe(phb, pe)) {
+			/* XXX What do we do here ? */
+			pnv_ioda_free_pe(pe);
+			pe->pdev = NULL;
+			continue;
+		}
+
+		/* Put PE to the list */
+		mutex_lock(&phb->ioda.pe_list_mutex);
+		list_add_tail(&pe->list, &phb->ioda.pe_list);
+		mutex_unlock(&phb->ioda.pe_list_mutex);
+
+		/* associate this pe to it's pdn */
+		list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) {
+			if (vf_pdn->busno == vf_bus &&
+			    vf_pdn->devfn == vf_devfn) {
+				vf_pdn->pe_number = pe_num;
+				break;
+			}
+		}
+
+		pnv_pci_ioda2_setup_dma_pe(phb, pe);
+	}
+}
+
+static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pnv_iov_data   *iov;
+	struct pnv_phb        *phb;
+	struct pnv_ioda_pe    *pe;
+	int                    ret;
+	u16                    i;
+
+	phb = pci_bus_to_pnvhb(pdev->bus);
+	iov = pnv_iov_get(pdev);
+
+	if (phb->type == PNV_PHB_IODA2) {
+		if (!iov->vfs_expanded) {
+			dev_info(&pdev->dev, "don't support this SRIOV device"
+				" with non 64bit-prefetchable IOV BAR\n");
+			return -ENOSPC;
+		}
+
+		/*
+		 * When M64 BARs functions in Single PE mode, the number of VFs
+		 * could be enabled must be less than the number of M64 BARs.
+		 */
+		if (iov->m64_single_mode && num_vfs > phb->ioda.m64_bar_idx) {
+			dev_info(&pdev->dev, "Not enough M64 BAR for VFs\n");
+			return -EBUSY;
+		}
+
+		/* Allocating pe_num_map */
+		if (iov->m64_single_mode)
+			iov->pe_num_map = kmalloc_array(num_vfs,
+							sizeof(*iov->pe_num_map),
+							GFP_KERNEL);
+		else
+			iov->pe_num_map = kmalloc(sizeof(*iov->pe_num_map), GFP_KERNEL);
+
+		if (!iov->pe_num_map)
+			return -ENOMEM;
+
+		if (iov->m64_single_mode)
+			for (i = 0; i < num_vfs; i++)
+				iov->pe_num_map[i] = IODA_INVALID_PE;
+
+		/* Calculate available PE for required VFs */
+		if (iov->m64_single_mode) {
+			for (i = 0; i < num_vfs; i++) {
+				pe = pnv_ioda_alloc_pe(phb);
+				if (!pe) {
+					ret = -EBUSY;
+					goto m64_failed;
+				}
+
+				iov->pe_num_map[i] = pe->pe_number;
+			}
+		} else {
+			mutex_lock(&phb->ioda.pe_alloc_mutex);
+			*iov->pe_num_map = bitmap_find_next_zero_area(
+				phb->ioda.pe_alloc, phb->ioda.total_pe_num,
+				0, num_vfs, 0);
+			if (*iov->pe_num_map >= phb->ioda.total_pe_num) {
+				mutex_unlock(&phb->ioda.pe_alloc_mutex);
+				dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
+				kfree(iov->pe_num_map);
+				return -EBUSY;
+			}
+			bitmap_set(phb->ioda.pe_alloc, *iov->pe_num_map, num_vfs);
+			mutex_unlock(&phb->ioda.pe_alloc_mutex);
+		}
+		iov->num_vfs = num_vfs;
+
+		/* Assign M64 window accordingly */
+		ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
+		if (ret) {
+			dev_info(&pdev->dev, "Not enough M64 window resources\n");
+			goto m64_failed;
+		}
+
+		/*
+		 * When using one M64 BAR to map one IOV BAR, we need to shift
+		 * the IOV BAR according to the PE# allocated to the VFs.
+		 * Otherwise, the PE# for the VF will conflict with others.
+		 */
+		if (!iov->m64_single_mode) {
+			ret = pnv_pci_vf_resource_shift(pdev, *iov->pe_num_map);
+			if (ret)
+				goto m64_failed;
+		}
+	}
+
+	/* Setup VF PEs */
+	pnv_ioda_setup_vf_PE(pdev, num_vfs);
+
+	return 0;
+
+m64_failed:
+	if (iov->m64_single_mode) {
+		for (i = 0; i < num_vfs; i++) {
+			if (iov->pe_num_map[i] == IODA_INVALID_PE)
+				continue;
+
+			pe = &phb->ioda.pe_array[iov->pe_num_map[i]];
+			pnv_ioda_free_pe(pe);
+		}
+	} else
+		bitmap_clear(phb->ioda.pe_alloc, *iov->pe_num_map, num_vfs);
+
+	/* Releasing pe_num_map */
+	kfree(iov->pe_num_map);
+
+	return ret;
+}
+
+int pnv_pcibios_sriov_disable(struct pci_dev *pdev)
+{
+	pnv_pci_sriov_disable(pdev);
+
+	/* Release PCI data */
+	remove_sriov_vf_pdns(pdev);
+	return 0;
+}
+
+int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+	/* Allocate PCI data */
+	add_sriov_vf_pdns(pdev);
+
+	return pnv_pci_sriov_enable(pdev, num_vfs);
+}
+
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 6aa6aefb637d..0156d7d17f7d 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -194,6 +194,80 @@ struct pnv_phb {
 	u8			*diag_data;
 };
 
+
+/* IODA PE management */
+
+static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r)
+{
+	/*
+	 * WARNING: We cannot rely on the resource flags. The Linux PCI
+	 * allocation code sometimes decides to put a 64-bit prefetchable
+	 * BAR in the 32-bit window, so we have to compare the addresses.
+	 *
+	 * For simplicity we only test resource start.
+	 */
+	return (r->start >= phb->ioda.m64_base &&
+		r->start < (phb->ioda.m64_base + phb->ioda.m64_size));
+}
+
+static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags)
+{
+	unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
+
+	return (resource_flags & flags) == flags;
+}
+
+int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe);
+int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe);
+
+void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe);
+void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe);
+
+struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb);
+void pnv_ioda_free_pe(struct pnv_ioda_pe *pe);
+
+#ifdef CONFIG_PCI_IOV
+/*
+ * For SR-IOV we want to put each VF's MMIO resource in to a separate PE.
+ * This requires a bit of acrobatics with the MMIO -> PE configuration
+ * and this structure is used to keep track of it all.
+ */
+struct pnv_iov_data {
+	/* number of VFs IOV BAR expanded. FIXME: rename this to something less bad */
+	u16     vfs_expanded;
+
+	/* number of VFs enabled */
+	u16     num_vfs;
+	unsigned int *pe_num_map;	/* PE# for the first VF PE or array */
+
+	/* Did we map the VF BARs with single-PE IODA BARs? */
+	bool    m64_single_mode;
+
+	int     (*m64_map)[PCI_SRIOV_NUM_BARS];
+#define IODA_INVALID_M64        (-1)
+
+	/*
+	 * If we map the SR-IOV BARs with a segmented window then
+	 * parts of that window will be "claimed" by other PEs.
+	 *
+	 * "holes" here is used to reserve the leading portion
+	 * of the window that is used by other (non VF) PEs.
+	 */
+	struct resource holes[PCI_SRIOV_NUM_BARS];
+};
+
+static inline struct pnv_iov_data *pnv_iov_get(struct pci_dev *pdev)
+{
+	return pdev->dev.archdata.iov_data;
+}
+
+void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev);
+resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, int resno);
+
+int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs);
+int pnv_pcibios_sriov_disable(struct pci_dev *pdev);
+#endif /* CONFIG_PCI_IOV */
+
 extern struct pci_ops pnv_pci_ops;
 
 void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH 06/15] powerpc/powernv/sriov: Explain how SR-IOV works on PowerNV
  2020-07-10  5:23 PowerNV PCI & SR-IOV cleanups Oliver O'Halloran
                   ` (4 preceding siblings ...)
  2020-07-10  5:23 ` [PATCH 05/15] powerpc/powernv/sriov: Move SR-IOV into a seperate file Oliver O'Halloran
@ 2020-07-10  5:23 ` Oliver O'Halloran
  2020-07-15  0:40   ` Alexey Kardashevskiy
  2020-07-10  5:23 ` [PATCH 07/15] powerpc/powernv/sriov: Rename truncate_iov Oliver O'Halloran
                   ` (9 subsequent siblings)
  15 siblings, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-10  5:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran

SR-IOV support on PowerNV is a byzantine maze of hooks. I have no idea
how anyone is supposed to know how it works except through a lot of
stuffering. Write up some docs about the overall story to help out
the next sucker^Wperson who needs to tinker with it.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/platforms/powernv/pci-sriov.c | 130 +++++++++++++++++++++
 1 file changed, 130 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
index 080ea39f5a83..f4c74ab1284d 100644
--- a/arch/powerpc/platforms/powernv/pci-sriov.c
+++ b/arch/powerpc/platforms/powernv/pci-sriov.c
@@ -12,6 +12,136 @@
 /* for pci_dev_is_added() */
 #include "../../../../drivers/pci/pci.h"
 
+/*
+ * The majority of the complexity in supporting SR-IOV on PowerNV comes from
+ * the need to put the MMIO space for each VF into a separate PE. Internally
+ * the PHB maps MMIO addresses to a specific PE using the "Memory BAR Table".
+ * The MBT historically only applied to the 64bit MMIO window of the PHB
+ * so it's common to see it referred to as the "M64BT".
+ *
+ * An MBT entry stores the mapped range as an <base>,<mask> pair. This forces
+ * the address range that we want to map to be power-of-two sized and aligned.
+ * For conventional PCI devices this isn't really an issue since PCI device BARs
+ * have the same requirement.
+ *
+ * For a SR-IOV BAR things are a little more awkward since size and alignment
+ * are not coupled. The alignment is set based on the the per-VF BAR size, but
+ * the total BAR area is: number-of-vfs * per-vf-size. The number of VFs
+ * isn't necessarily a power of two, so neither is the total size. To fix that
+ * we need to finesse (read: hack) the Linux BAR allocator so that it will
+ * allocate the SR-IOV BARs in a way that lets us map them using the MBT.
+ *
+ * The changes to size and alignment that we need to do depend on the "mode"
+ * of MBT entry that we use. We only support SR-IOV on PHB3 (IODA2) and above,
+ * so as a baseline we can assume that we have the following BAR modes
+ * available:
+ *
+ *   NB: $PE_COUNT is the number of PEs that the PHB supports.
+ *
+ * a) A segmented BAR that splits the mapped range into $PE_COUNT equally sized
+ *    segments. The n'th segment is mapped to the n'th PE.
+ * b) An un-segmented BAR that maps the whole address range to a specific PE.
+ *
+ *
+ * We prefer to use mode a) since it only requires one MBT entry per SR-IOV BAR
+ * For comparison b) requires one entry per-VF per-BAR, or:
+ * (num-vfs * num-sriov-bars) in total. To use a) we need the size of each segment
+ * to equal the size of the per-VF BAR area. So:
+ *
+ *	new_size = per-vf-size * number-of-PEs
+ *
+ * The alignment for the SR-IOV BAR also needs to be changed from per-vf-size
+ * to "new_size", calculated above. Implementing this is a convoluted process
+ * which requires several hooks in the PCI core:
+ *
+ * 1. In pcibios_add_device() we call pnv_pci_ioda_fixup_iov().
+ *
+ *    At this point the device has been probed and the device's BARs are sized,
+ *    but no resource allocations have been done. The SR-IOV BARs are sized
+ *    based on the maximum number of VFs supported by the device and we need
+ *    to increase that to new_size.
+ *
+ * 2. Later, when Linux actually assigns resources it tries to make the resource
+ *    allocations for each PCI bus as compact as possible. As a part of that it
+ *    sorts the BARs on a bus by their required alignment, which is calculated
+ *    using pci_resource_alignment().
+ *
+ *    For IOV resources this goes:
+ *    pci_resource_alignment()
+ *        pci_sriov_resource_alignment()
+ *            pcibios_sriov_resource_alignment()
+ *                pnv_pci_iov_resource_alignment()
+ *
+ *    Our hook overrides the default alignment, equal to the per-vf-size, with
+ *    new_size computed above.
+ *
+ * 3. When userspace enables VFs for a device:
+ *
+ *    sriov_enable()
+ *       pcibios_sriov_enable()
+ *           pnv_pcibios_sriov_enable()
+ *
+ *    This is where we actually allocate PE numbers for each VF and setup the
+ *    MBT mapping for each SR-IOV BAR. In steps 1) and 2) we setup an "arena"
+ *    where each MBT segment is equal in size to the VF BAR so we can shift
+ *    around the actual SR-IOV BAR location within this arena. We need this
+ *    ability because the PE space is shared by all devices on the same PHB.
+ *    When using mode a) described above segment 0 in maps to PE#0 which might
+ *    be already being used by another device on the PHB.
+ *
+ *    As a result we need allocate a contigious range of PE numbers, then shift
+ *    the address programmed into the SR-IOV BAR of the PF so that the address
+ *    of VF0 matches up with the segment corresponding to the first allocated
+ *    PE number. This is handled in pnv_pci_vf_resource_shift().
+ *
+ *    Once all that is done we return to the PCI core which then enables VFs,
+ *    scans them and creates pci_devs for each. The init process for a VF is
+ *    largely the same as a normal device, but the VF is inserted into the IODA
+ *    PE that we allocated for it rather than the PE associated with the bus.
+ *
+ * 4. When userspace disables VFs we unwind the above in
+ *    pnv_pcibios_sriov_disable(). Fortunately this is relatively simple since
+ *    we don't need to validate anything, just tear down the mappings and
+ *    move SR-IOV resource back to its "proper" location.
+ *
+ * That's how mode a) works. In theory mode b) (single PE mapping) is less work
+ * since we can map each individual VF with a separate BAR. However, there's a
+ * few limitations:
+ *
+ * 1) For IODA2 mode b) has a minimum alignment requirement of 32MB. This makes
+ *    it only usable for devices with very large per-VF BARs. Such devices are
+ *    similar to Big Foot. They definitely exist, but I've never seen one.
+ *
+ * 2) The number of MBT entries that we have is limited. PHB3 and PHB4 only
+ *    16 total and some are needed for. Most SR-IOV capable network cards can support
+ *    more than 16 VFs on each port.
+ *
+ * We use b) when using a) would use more than 1/4 of the entire 64 bit MMIO
+ * window of the PHB.
+ *
+ *
+ *
+ * PHB4 (IODA3) added a few new features that would be useful for SR-IOV. It
+ * allowed the MBT to map 32bit MMIO space in addition to 64bit which allows
+ * us to support SR-IOV BARs in the 32bit MMIO window. This is useful since
+ * the Linux BAR allocation will place any BAR marked as non-prefetchable into
+ * the non-prefetchable bridge window, which is 32bit only. It also added two
+ * new modes:
+ *
+ * c) A segmented BAR similar to a), but each segment can be individually
+ *    mapped to any PE. This is matches how the 32bit MMIO window worked on
+ *    IODA1&2.
+ *
+ * d) A segmented BAR with 8, 64, or 128 segments. This works similarly to a),
+ *    but with fewer segments and configurable base PE.
+ *
+ *    i.e. The n'th segment maps to the (n + base)'th PE.
+ *
+ *    The base PE is also required to be a multiple of the window size.
+ *
+ * Unfortunately, the OPAL API doesn't currently (as of skiboot v6.6) allow us
+ * to exploit any of the IODA3 features.
+ */
 
 static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
 {
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH 07/15] powerpc/powernv/sriov: Rename truncate_iov
  2020-07-10  5:23 PowerNV PCI & SR-IOV cleanups Oliver O'Halloran
                   ` (5 preceding siblings ...)
  2020-07-10  5:23 ` [PATCH 06/15] powerpc/powernv/sriov: Explain how SR-IOV works on PowerNV Oliver O'Halloran
@ 2020-07-10  5:23 ` Oliver O'Halloran
  2020-07-15  0:46   ` Alexey Kardashevskiy
  2020-07-10  5:23 ` [PATCH 08/15] powerpc/powernv/sriov: Simplify used window tracking Oliver O'Halloran
                   ` (8 subsequent siblings)
  15 siblings, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-10  5:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran

This prevents SR-IOV being used by making the SR-IOV BAR resources
unallocatable. Rename it to reflect what it actually does.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/platforms/powernv/pci-sriov.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
index f4c74ab1284d..216ceeff69b0 100644
--- a/arch/powerpc/platforms/powernv/pci-sriov.c
+++ b/arch/powerpc/platforms/powernv/pci-sriov.c
@@ -155,7 +155,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
 
 	iov = kzalloc(sizeof(*iov), GFP_KERNEL);
 	if (!iov)
-		goto truncate_iov;
+		goto disable_iov;
 	pdev->dev.archdata.iov_data = iov;
 
 	total_vfs = pci_sriov_get_totalvfs(pdev);
@@ -170,7 +170,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
 			dev_warn(&pdev->dev, "Don't support SR-IOV with"
 					" non M64 VF BAR%d: %pR. \n",
 				 i, res);
-			goto truncate_iov;
+			goto disable_iov;
 		}
 
 		total_vf_bar_sz += pci_iov_resource_size(pdev,
@@ -209,7 +209,8 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
 		 * mode is 32MB.
 		 */
 		if (iov->m64_single_mode && (size < SZ_32M))
-			goto truncate_iov;
+			goto disable_iov;
+
 		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
 		res->end = res->start + size * mul - 1;
 		dev_dbg(&pdev->dev, "                       %pR\n", res);
@@ -220,8 +221,8 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
 
 	return;
 
-truncate_iov:
-	/* To save MMIO space, IOV BAR is truncated. */
+disable_iov:
+	/* Save ourselves some MMIO space by disabling the unusable BARs */
 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
 		res = &pdev->resource[i + PCI_IOV_RESOURCES];
 		res->flags = 0;
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH 08/15] powerpc/powernv/sriov: Simplify used window tracking
  2020-07-10  5:23 PowerNV PCI & SR-IOV cleanups Oliver O'Halloran
                   ` (6 preceding siblings ...)
  2020-07-10  5:23 ` [PATCH 07/15] powerpc/powernv/sriov: Rename truncate_iov Oliver O'Halloran
@ 2020-07-10  5:23 ` Oliver O'Halloran
  2020-07-15  1:34   ` Alexey Kardashevskiy
  2020-07-10  5:23 ` [PATCH 09/15] powerpc/powernv/sriov: Factor out M64 BAR setup Oliver O'Halloran
                   ` (7 subsequent siblings)
  15 siblings, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-10  5:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran

No need for the multi-dimensional arrays, just use a bitmap.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/platforms/powernv/pci-sriov.c | 48 +++++++---------------
 arch/powerpc/platforms/powernv/pci.h       |  7 +++-
 2 files changed, 20 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
index 216ceeff69b0..e4c65cb49757 100644
--- a/arch/powerpc/platforms/powernv/pci-sriov.c
+++ b/arch/powerpc/platforms/powernv/pci-sriov.c
@@ -303,28 +303,20 @@ static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
 {
 	struct pnv_iov_data   *iov;
 	struct pnv_phb        *phb;
-	int                    i, j;
-	int                    m64_bars;
+	int window_id;
 
 	phb = pci_bus_to_pnvhb(pdev->bus);
 	iov = pnv_iov_get(pdev);
 
-	if (iov->m64_single_mode)
-		m64_bars = num_vfs;
-	else
-		m64_bars = 1;
+	for_each_set_bit(window_id, iov->used_m64_bar_mask, 64) {
+		opal_pci_phb_mmio_enable(phb->opal_id,
+					 OPAL_M64_WINDOW_TYPE,
+					 window_id,
+					 0);
 
-	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
-		for (j = 0; j < m64_bars; j++) {
-			if (iov->m64_map[j][i] == IODA_INVALID_M64)
-				continue;
-			opal_pci_phb_mmio_enable(phb->opal_id,
-				OPAL_M64_WINDOW_TYPE, iov->m64_map[j][i], 0);
-			clear_bit(iov->m64_map[j][i], &phb->ioda.m64_bar_alloc);
-			iov->m64_map[j][i] = IODA_INVALID_M64;
-		}
+		clear_bit(window_id, &phb->ioda.m64_bar_alloc);
+	}
 
-	kfree(iov->m64_map);
 	return 0;
 }
 
@@ -350,23 +342,14 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
 	else
 		m64_bars = 1;
 
-	iov->m64_map = kmalloc_array(m64_bars,
-				     sizeof(*iov->m64_map),
-				     GFP_KERNEL);
-	if (!iov->m64_map)
-		return -ENOMEM;
-	/* Initialize the m64_map to IODA_INVALID_M64 */
-	for (i = 0; i < m64_bars ; i++)
-		for (j = 0; j < PCI_SRIOV_NUM_BARS; j++)
-			iov->m64_map[i][j] = IODA_INVALID_M64;
-
-
 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
 		res = &pdev->resource[i + PCI_IOV_RESOURCES];
 		if (!res->flags || !res->parent)
 			continue;
 
 		for (j = 0; j < m64_bars; j++) {
+
+			/* allocate a window ID for this BAR */
 			do {
 				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
 						phb->ioda.m64_bar_idx + 1, 0);
@@ -374,8 +357,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
 				if (win >= phb->ioda.m64_bar_idx + 1)
 					goto m64_failed;
 			} while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
-
-			iov->m64_map[j][i] = win;
+			set_bit(win, iov->used_m64_bar_mask);
 
 			if (iov->m64_single_mode) {
 				size = pci_iov_resource_size(pdev,
@@ -391,12 +373,12 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
 				pe_num = iov->pe_num_map[j];
 				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
 						pe_num, OPAL_M64_WINDOW_TYPE,
-						iov->m64_map[j][i], 0);
+						win, 0);
 			}
 
 			rc = opal_pci_set_phb_mem_window(phb->opal_id,
 						 OPAL_M64_WINDOW_TYPE,
-						 iov->m64_map[j][i],
+						 win,
 						 start,
 						 0, /* unused */
 						 size);
@@ -410,10 +392,10 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
 
 			if (iov->m64_single_mode)
 				rc = opal_pci_phb_mmio_enable(phb->opal_id,
-				     OPAL_M64_WINDOW_TYPE, iov->m64_map[j][i], 2);
+				     OPAL_M64_WINDOW_TYPE, win, 2);
 			else
 				rc = opal_pci_phb_mmio_enable(phb->opal_id,
-				     OPAL_M64_WINDOW_TYPE, iov->m64_map[j][i], 1);
+				     OPAL_M64_WINDOW_TYPE, win, 1);
 
 			if (rc != OPAL_SUCCESS) {
 				dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n",
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 0156d7d17f7d..58c97e60c3db 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -243,8 +243,11 @@ struct pnv_iov_data {
 	/* Did we map the VF BARs with single-PE IODA BARs? */
 	bool    m64_single_mode;
 
-	int     (*m64_map)[PCI_SRIOV_NUM_BARS];
-#define IODA_INVALID_M64        (-1)
+	/*
+	 * Bit mask used to track which m64 windows that we used to map the
+	 * SR-IOV BARs for this device.
+	 */
+	DECLARE_BITMAP(used_m64_bar_mask, 64);
 
 	/*
 	 * If we map the SR-IOV BARs with a segmented window then
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH 09/15] powerpc/powernv/sriov: Factor out M64 BAR setup
  2020-07-10  5:23 PowerNV PCI & SR-IOV cleanups Oliver O'Halloran
                   ` (7 preceding siblings ...)
  2020-07-10  5:23 ` [PATCH 08/15] powerpc/powernv/sriov: Simplify used window tracking Oliver O'Halloran
@ 2020-07-10  5:23 ` Oliver O'Halloran
  2020-07-15  2:09   ` Alexey Kardashevskiy
  2020-07-10  5:23 ` [PATCH 10/15] powerpc/powernv/pci: Refactor pnv_ioda_alloc_pe() Oliver O'Halloran
                   ` (6 subsequent siblings)
  15 siblings, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-10  5:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran

The sequence required to use the single PE BAR mode is kinda janky and
requires a little explanation. The API was designed with P7-IOC style
windows where the setup process is something like:

1. Configure the window start / end address
2. Enable the window
3. Map the segments of each window to the PE

For Single PE BARs the process is:

1. Set the PE for segment zero on a disabled window
2. Set the range
3. Enable the window

Move the OPAL calls into their own helper functions where the quirks can be
contained.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/platforms/powernv/pci-sriov.c | 132 ++++++++++++++++-----
 1 file changed, 103 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
index e4c65cb49757..d53a85ccb538 100644
--- a/arch/powerpc/platforms/powernv/pci-sriov.c
+++ b/arch/powerpc/platforms/powernv/pci-sriov.c
@@ -320,6 +320,102 @@ static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
 	return 0;
 }
 
+
+/*
+ * PHB3 and beyond support "accordion" windows. The window's address range
+ * is subdivided into phb->ioda.total_pe_num segments and there's a 1-1
+ * mapping between PEs and segments.
+ *
+ * They're called that because as the window size changes the segment sizes
+ * change with it. Sort of like an accordion, sort of.
+ */
+static int64_t pnv_ioda_map_m64_accordion(struct pnv_phb *phb,
+					  int window_id,
+					  resource_size_t start,
+					  resource_size_t size)
+{
+	int64_t rc;
+
+	rc = opal_pci_set_phb_mem_window(phb->opal_id,
+					 OPAL_M64_WINDOW_TYPE,
+					 window_id,
+					 start,
+					 0, /* unused */
+					 size);
+	if (rc)
+		goto out;
+
+	rc = opal_pci_phb_mmio_enable(phb->opal_id,
+				      OPAL_M64_WINDOW_TYPE,
+				      window_id,
+				      OPAL_ENABLE_M64_SPLIT);
+out:
+	if (rc)
+		pr_err("Failed to map M64 window #%d: %lld\n", window_id, rc);
+
+	return rc;
+}
+
+static int64_t pnv_ioda_map_m64_single(struct pnv_phb *phb,
+				       int pe_num,
+				       int window_id,
+				       resource_size_t start,
+				       resource_size_t size)
+{
+	int64_t rc;
+
+	/*
+	 * The API for setting up m64 mmio windows seems to have been designed
+	 * with P7-IOC in mind. For that chip each M64 BAR (window) had a fixed
+	 * split of 8 equally sized segments each of which could individually
+	 * assigned to a PE.
+	 *
+	 * The problem with this is that the API doesn't have any way to
+	 * communicate the number of segments we want on a BAR. This wasn't
+	 * a problem for p7-ioc since you didn't have a choice, but the
+	 * single PE windows added in PHB3 don't map cleanly to this API.
+	 *
+	 * As a result we've got this slightly awkward process where we
+	 * call opal_pci_map_pe_mmio_window() to put the single in single
+	 * PE mode, and set the PE for the window before setting the address
+	 * bounds. We need to do it this way because the single PE windows
+	 * for PHB3 have different alignment requirements on PHB3.
+	 */
+	rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+					 pe_num,
+					 OPAL_M64_WINDOW_TYPE,
+					 window_id,
+					 0);
+	if (rc)
+		goto out;
+
+	/*
+	 * NB: In single PE mode the window needs to be aligned to 32MB
+	 */
+	rc = opal_pci_set_phb_mem_window(phb->opal_id,
+					 OPAL_M64_WINDOW_TYPE,
+					 window_id,
+					 start,
+					 0, /* ignored by FW, m64 is 1-1 */
+					 size);
+	if (rc)
+		goto out;
+
+	/*
+	 * Now actually enable it. We specified the BAR should be in "non-split"
+	 * mode so FW will validate that the BAR is in single PE mode.
+	 */
+	rc = opal_pci_phb_mmio_enable(phb->opal_id,
+				      OPAL_M64_WINDOW_TYPE,
+				      window_id,
+				      OPAL_ENABLE_M64_NON_SPLIT);
+out:
+	if (rc)
+		pr_err("Error mapping single PE BAR\n");
+
+	return rc;
+}
+
 static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
 {
 	struct pnv_iov_data   *iov;
@@ -330,7 +426,6 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
 	int64_t                rc;
 	int                    total_vfs;
 	resource_size_t        size, start;
-	int                    pe_num;
 	int                    m64_bars;
 
 	phb = pci_bus_to_pnvhb(pdev->bus);
@@ -359,49 +454,28 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
 			} while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
 			set_bit(win, iov->used_m64_bar_mask);
 
+
 			if (iov->m64_single_mode) {
 				size = pci_iov_resource_size(pdev,
 							PCI_IOV_RESOURCES + i);
 				start = res->start + size * j;
+				rc = pnv_ioda_map_m64_single(phb, win,
+							     iov->pe_num_map[j],
+							     start,
+							     size);
 			} else {
 				size = resource_size(res);
 				start = res->start;
-			}
 
-			/* Map the M64 here */
-			if (iov->m64_single_mode) {
-				pe_num = iov->pe_num_map[j];
-				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
-						pe_num, OPAL_M64_WINDOW_TYPE,
-						win, 0);
+				rc = pnv_ioda_map_m64_accordion(phb, win, start,
+								size);
 			}
 
-			rc = opal_pci_set_phb_mem_window(phb->opal_id,
-						 OPAL_M64_WINDOW_TYPE,
-						 win,
-						 start,
-						 0, /* unused */
-						 size);
-
-
 			if (rc != OPAL_SUCCESS) {
 				dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n",
 					win, rc);
 				goto m64_failed;
 			}
-
-			if (iov->m64_single_mode)
-				rc = opal_pci_phb_mmio_enable(phb->opal_id,
-				     OPAL_M64_WINDOW_TYPE, win, 2);
-			else
-				rc = opal_pci_phb_mmio_enable(phb->opal_id,
-				     OPAL_M64_WINDOW_TYPE, win, 1);
-
-			if (rc != OPAL_SUCCESS) {
-				dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n",
-					win, rc);
-				goto m64_failed;
-			}
 		}
 	}
 	return 0;
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH 10/15] powerpc/powernv/pci: Refactor pnv_ioda_alloc_pe()
  2020-07-10  5:23 PowerNV PCI & SR-IOV cleanups Oliver O'Halloran
                   ` (8 preceding siblings ...)
  2020-07-10  5:23 ` [PATCH 09/15] powerpc/powernv/sriov: Factor out M64 BAR setup Oliver O'Halloran
@ 2020-07-10  5:23 ` Oliver O'Halloran
  2020-07-15  2:29   ` Alexey Kardashevskiy
  2020-07-10  5:23 ` [PATCH 11/15] powerpc/powernv/sriov: Drop iov->pe_num_map[] Oliver O'Halloran
                   ` (5 subsequent siblings)
  15 siblings, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-10  5:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran

Rework the PE allocation logic to allow allocating blocks of PEs rather
than individually. We'll use this to allocate contigious blocks of PEs for
the SR-IOVs.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 41 ++++++++++++++++++-----
 arch/powerpc/platforms/powernv/pci.h      |  2 +-
 2 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 2d36a9ebf0e9..c9c25fb0783c 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -145,23 +145,45 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
 		return;
 	}
 
+	mutex_lock(&phb->ioda.pe_alloc_mutex);
 	if (test_and_set_bit(pe_no, phb->ioda.pe_alloc))
 		pr_debug("%s: PE %x was reserved on PHB#%x\n",
 			 __func__, pe_no, phb->hose->global_number);
+	mutex_unlock(&phb->ioda.pe_alloc_mutex);
 
 	pnv_ioda_init_pe(phb, pe_no);
 }
 
-struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
+struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count)
 {
-	long pe;
+	struct pnv_ioda_pe *ret = NULL;
+	int run = 0, pe, i;
 
+	mutex_lock(&phb->ioda.pe_alloc_mutex);
+
+	/* scan backwards for a run of @count cleared bits */
 	for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) {
-		if (!test_and_set_bit(pe, phb->ioda.pe_alloc))
-			return pnv_ioda_init_pe(phb, pe);
+		if (test_bit(pe, phb->ioda.pe_alloc)) {
+			run = 0;
+			continue;
+		}
+
+		run++;
+		if (run == count)
+			break;
 	}
+	if (run != count)
+		goto out;
 
-	return NULL;
+	for (i = pe; i < pe + count; i++) {
+		set_bit(i, phb->ioda.pe_alloc);
+		pnv_ioda_init_pe(phb, i);
+	}
+	ret = &phb->ioda.pe_array[pe];
+
+out:
+	mutex_unlock(&phb->ioda.pe_alloc_mutex);
+	return ret;
 }
 
 void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
@@ -173,7 +195,10 @@ void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
 	WARN_ON(pe->npucomp); /* NPUs for nvlink are not supposed to be freed */
 	kfree(pe->npucomp);
 	memset(pe, 0, sizeof(struct pnv_ioda_pe));
+
+	mutex_lock(&phb->ioda.pe_alloc_mutex);
 	clear_bit(pe_num, phb->ioda.pe_alloc);
+	mutex_unlock(&phb->ioda.pe_alloc_mutex);
 }
 
 /* The default M64 BAR is shared by all PEs */
@@ -976,7 +1001,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
 	if (pdn->pe_number != IODA_INVALID_PE)
 		return NULL;
 
-	pe = pnv_ioda_alloc_pe(phb);
+	pe = pnv_ioda_alloc_pe(phb, 1);
 	if (!pe) {
 		pr_warn("%s: Not enough PE# available, disabling device\n",
 			pci_name(dev));
@@ -1047,7 +1072,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
 
 	/* The PE number isn't pinned by M64 */
 	if (!pe)
-		pe = pnv_ioda_alloc_pe(phb);
+		pe = pnv_ioda_alloc_pe(phb, 1);
 
 	if (!pe) {
 		pr_warn("%s: Not enough PE# available for PCI bus %04x:%02x\n",
@@ -3065,7 +3090,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 		pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx);
 	} else {
 		/* otherwise just allocate one */
-		root_pe = pnv_ioda_alloc_pe(phb);
+		root_pe = pnv_ioda_alloc_pe(phb, 1);
 		phb->ioda.root_pe_idx = root_pe->pe_number;
 	}
 
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 58c97e60c3db..b4c9bdba7217 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -223,7 +223,7 @@ int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe);
 void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe);
 void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe);
 
-struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb);
+struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count);
 void pnv_ioda_free_pe(struct pnv_ioda_pe *pe);
 
 #ifdef CONFIG_PCI_IOV
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH 11/15] powerpc/powernv/sriov: Drop iov->pe_num_map[]
  2020-07-10  5:23 PowerNV PCI & SR-IOV cleanups Oliver O'Halloran
                   ` (9 preceding siblings ...)
  2020-07-10  5:23 ` [PATCH 10/15] powerpc/powernv/pci: Refactor pnv_ioda_alloc_pe() Oliver O'Halloran
@ 2020-07-10  5:23 ` Oliver O'Halloran
  2020-07-15  3:31   ` Alexey Kardashevskiy
  2020-07-10  5:23 ` [PATCH 12/15] powerpc/powernv/sriov: De-indent setup and teardown Oliver O'Halloran
                   ` (4 subsequent siblings)
  15 siblings, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-10  5:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran

Currently the iov->pe_num_map[] does one of two things depending on
whether single PE mode is being used or not. When it is, this contains an
array which maps a vf_index to the corresponding PE number. When single PE
mode is not being used this contains a scalar which is the base PE for the
set of enabled VFs (for for VFn is base + n).

The array was necessary because when calling pnv_ioda_alloc_pe() there is
no guarantee that the allocated PEs would be contigious. We can now
allocate contigious blocks of PEs so this is no longer an issue. This
allows us to drop the if (single_mode) {} .. else {} block scattered
through the SR-IOV code which is a nice clean up.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/platforms/powernv/pci-sriov.c | 109 +++++----------------
 arch/powerpc/platforms/powernv/pci.h       |   4 +-
 2 files changed, 25 insertions(+), 88 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
index d53a85ccb538..08f88187d65a 100644
--- a/arch/powerpc/platforms/powernv/pci-sriov.c
+++ b/arch/powerpc/platforms/powernv/pci-sriov.c
@@ -456,11 +456,13 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
 
 
 			if (iov->m64_single_mode) {
+				int pe_num = iov->vf_pe_arr[j].pe_number;
+
 				size = pci_iov_resource_size(pdev,
 							PCI_IOV_RESOURCES + i);
 				start = res->start + size * j;
 				rc = pnv_ioda_map_m64_single(phb, win,
-							     iov->pe_num_map[j],
+							     pe_num,
 							     start,
 							     size);
 			} else {
@@ -599,38 +601,24 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
 
 static void pnv_pci_sriov_disable(struct pci_dev *pdev)
 {
+	u16                    num_vfs, base_pe;
 	struct pnv_phb        *phb;
-	struct pnv_ioda_pe    *pe;
 	struct pnv_iov_data   *iov;
-	u16                    num_vfs, i;
 
 	phb = pci_bus_to_pnvhb(pdev->bus);
 	iov = pnv_iov_get(pdev);
 	num_vfs = iov->num_vfs;
+	base_pe = iov->vf_pe_arr[0].pe_number;
 
 	/* Release VF PEs */
 	pnv_ioda_release_vf_PE(pdev);
 
 	if (phb->type == PNV_PHB_IODA2) {
 		if (!iov->m64_single_mode)
-			pnv_pci_vf_resource_shift(pdev, -*iov->pe_num_map);
+			pnv_pci_vf_resource_shift(pdev, -base_pe);
 
 		/* Release M64 windows */
 		pnv_pci_vf_release_m64(pdev, num_vfs);
-
-		/* Release PE numbers */
-		if (iov->m64_single_mode) {
-			for (i = 0; i < num_vfs; i++) {
-				if (iov->pe_num_map[i] == IODA_INVALID_PE)
-					continue;
-
-				pe = &phb->ioda.pe_array[iov->pe_num_map[i]];
-				pnv_ioda_free_pe(pe);
-			}
-		} else
-			bitmap_clear(phb->ioda.pe_alloc, *iov->pe_num_map, num_vfs);
-		/* Releasing pe_num_map */
-		kfree(iov->pe_num_map);
 	}
 }
 
@@ -656,13 +644,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
 		int vf_bus = pci_iov_virtfn_bus(pdev, vf_index);
 		struct pci_dn *vf_pdn;
 
-		if (iov->m64_single_mode)
-			pe_num = iov->pe_num_map[vf_index];
-		else
-			pe_num = *iov->pe_num_map + vf_index;
-
-		pe = &phb->ioda.pe_array[pe_num];
-		pe->pe_number = pe_num;
+		pe = &iov->vf_pe_arr[vf_index];
 		pe->phb = phb;
 		pe->flags = PNV_IODA_PE_VF;
 		pe->pbus = NULL;
@@ -670,6 +652,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
 		pe->mve_number = -1;
 		pe->rid = (vf_bus << 8) | vf_devfn;
 
+		pe_num = pe->pe_number;
 		pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n",
 			pci_domain_nr(pdev->bus), pdev->bus->number,
 			PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num);
@@ -701,9 +684,9 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
 
 static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
 {
+	struct pnv_ioda_pe    *base_pe;
 	struct pnv_iov_data   *iov;
 	struct pnv_phb        *phb;
-	struct pnv_ioda_pe    *pe;
 	int                    ret;
 	u16                    i;
 
@@ -717,55 +700,14 @@ static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
 			return -ENOSPC;
 		}
 
-		/*
-		 * When M64 BARs functions in Single PE mode, the number of VFs
-		 * could be enabled must be less than the number of M64 BARs.
-		 */
-		if (iov->m64_single_mode && num_vfs > phb->ioda.m64_bar_idx) {
-			dev_info(&pdev->dev, "Not enough M64 BAR for VFs\n");
+		/* allocate a contigious block of PEs for our VFs */
+		base_pe = pnv_ioda_alloc_pe(phb, num_vfs);
+		if (!base_pe) {
+			pci_err(pdev, "Unable to allocate PEs for %d VFs\n", num_vfs);
 			return -EBUSY;
 		}
 
-		/* Allocating pe_num_map */
-		if (iov->m64_single_mode)
-			iov->pe_num_map = kmalloc_array(num_vfs,
-							sizeof(*iov->pe_num_map),
-							GFP_KERNEL);
-		else
-			iov->pe_num_map = kmalloc(sizeof(*iov->pe_num_map), GFP_KERNEL);
-
-		if (!iov->pe_num_map)
-			return -ENOMEM;
-
-		if (iov->m64_single_mode)
-			for (i = 0; i < num_vfs; i++)
-				iov->pe_num_map[i] = IODA_INVALID_PE;
-
-		/* Calculate available PE for required VFs */
-		if (iov->m64_single_mode) {
-			for (i = 0; i < num_vfs; i++) {
-				pe = pnv_ioda_alloc_pe(phb);
-				if (!pe) {
-					ret = -EBUSY;
-					goto m64_failed;
-				}
-
-				iov->pe_num_map[i] = pe->pe_number;
-			}
-		} else {
-			mutex_lock(&phb->ioda.pe_alloc_mutex);
-			*iov->pe_num_map = bitmap_find_next_zero_area(
-				phb->ioda.pe_alloc, phb->ioda.total_pe_num,
-				0, num_vfs, 0);
-			if (*iov->pe_num_map >= phb->ioda.total_pe_num) {
-				mutex_unlock(&phb->ioda.pe_alloc_mutex);
-				dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
-				kfree(iov->pe_num_map);
-				return -EBUSY;
-			}
-			bitmap_set(phb->ioda.pe_alloc, *iov->pe_num_map, num_vfs);
-			mutex_unlock(&phb->ioda.pe_alloc_mutex);
-		}
+		iov->vf_pe_arr = base_pe;
 		iov->num_vfs = num_vfs;
 
 		/* Assign M64 window accordingly */
@@ -781,9 +723,10 @@ static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
 		 * Otherwise, the PE# for the VF will conflict with others.
 		 */
 		if (!iov->m64_single_mode) {
-			ret = pnv_pci_vf_resource_shift(pdev, *iov->pe_num_map);
+			ret = pnv_pci_vf_resource_shift(pdev,
+							base_pe->pe_number);
 			if (ret)
-				goto m64_failed;
+				goto shift_failed;
 		}
 	}
 
@@ -792,20 +735,12 @@ static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
 
 	return 0;
 
-m64_failed:
-	if (iov->m64_single_mode) {
-		for (i = 0; i < num_vfs; i++) {
-			if (iov->pe_num_map[i] == IODA_INVALID_PE)
-				continue;
-
-			pe = &phb->ioda.pe_array[iov->pe_num_map[i]];
-			pnv_ioda_free_pe(pe);
-		}
-	} else
-		bitmap_clear(phb->ioda.pe_alloc, *iov->pe_num_map, num_vfs);
+shift_failed:
+	pnv_pci_vf_release_m64(pdev, num_vfs);
 
-	/* Releasing pe_num_map */
-	kfree(iov->pe_num_map);
+m64_failed:
+	for (i = 0; i < num_vfs; i++)
+		pnv_ioda_free_pe(&iov->vf_pe_arr[i]);
 
 	return ret;
 }
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index b4c9bdba7217..13555bc549f4 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -238,7 +238,9 @@ struct pnv_iov_data {
 
 	/* number of VFs enabled */
 	u16     num_vfs;
-	unsigned int *pe_num_map;	/* PE# for the first VF PE or array */
+
+	/* pointer to the array of VF PEs. num_vfs long*/
+	struct pnv_ioda_pe *vf_pe_arr;
 
 	/* Did we map the VF BARs with single-PE IODA BARs? */
 	bool    m64_single_mode;
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH 12/15] powerpc/powernv/sriov: De-indent setup and teardown
  2020-07-10  5:23 PowerNV PCI & SR-IOV cleanups Oliver O'Halloran
                   ` (10 preceding siblings ...)
  2020-07-10  5:23 ` [PATCH 11/15] powerpc/powernv/sriov: Drop iov->pe_num_map[] Oliver O'Halloran
@ 2020-07-10  5:23 ` Oliver O'Halloran
  2020-07-15  4:00   ` Alexey Kardashevskiy
  2020-07-10  5:23 ` [PATCH 13/15] powerpc/powernv/sriov: Move M64 BAR allocation into a helper Oliver O'Halloran
                   ` (3 subsequent siblings)
  15 siblings, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-10  5:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran

Remove the IODA2 PHB checks. We already assume IODA2 in several places so
there's not much point in wrapping most of the setup and teardown process
in an if block.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/platforms/powernv/pci-sriov.c | 86 ++++++++++++----------
 1 file changed, 49 insertions(+), 37 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
index 08f88187d65a..d5699cd2ab7a 100644
--- a/arch/powerpc/platforms/powernv/pci-sriov.c
+++ b/arch/powerpc/platforms/powernv/pci-sriov.c
@@ -610,16 +610,18 @@ static void pnv_pci_sriov_disable(struct pci_dev *pdev)
 	num_vfs = iov->num_vfs;
 	base_pe = iov->vf_pe_arr[0].pe_number;
 
+	if (WARN_ON(!iov))
+		return;
+
 	/* Release VF PEs */
 	pnv_ioda_release_vf_PE(pdev);
 
-	if (phb->type == PNV_PHB_IODA2) {
-		if (!iov->m64_single_mode)
-			pnv_pci_vf_resource_shift(pdev, -base_pe);
+	/* Un-shift the IOV BAR resources */
+	if (!iov->m64_single_mode)
+		pnv_pci_vf_resource_shift(pdev, -base_pe);
 
-		/* Release M64 windows */
-		pnv_pci_vf_release_m64(pdev, num_vfs);
-	}
+	/* Release M64 windows */
+	pnv_pci_vf_release_m64(pdev, num_vfs);
 }
 
 static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
@@ -693,41 +695,51 @@ static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
 	phb = pci_bus_to_pnvhb(pdev->bus);
 	iov = pnv_iov_get(pdev);
 
-	if (phb->type == PNV_PHB_IODA2) {
-		if (!iov->vfs_expanded) {
-			dev_info(&pdev->dev, "don't support this SRIOV device"
-				" with non 64bit-prefetchable IOV BAR\n");
-			return -ENOSPC;
-		}
+	/*
+	 * There's a calls to IODA2 PE setup code littered throughout. We could
+	 * probably fix that, but we'd still have problems due to the
+	 * restriction inherent on IODA1 PHBs.
+	 *
+	 * NB: We class IODA3 as IODA2 since they're very similar.
+	 */
+	if (phb->type != PNV_PHB_IODA2) {
+		pci_err(pdev, "SR-IOV is not supported on this PHB\n");
+		return -ENXIO;
+	}
 
-		/* allocate a contigious block of PEs for our VFs */
-		base_pe = pnv_ioda_alloc_pe(phb, num_vfs);
-		if (!base_pe) {
-			pci_err(pdev, "Unable to allocate PEs for %d VFs\n", num_vfs);
-			return -EBUSY;
-		}
+	if (!iov->vfs_expanded) {
+		dev_info(&pdev->dev, "don't support this SRIOV device"
+			" with non 64bit-prefetchable IOV BAR\n");
+		return -ENOSPC;
+	}
 
-		iov->vf_pe_arr = base_pe;
-		iov->num_vfs = num_vfs;
+	/* allocate a contigious block of PEs for our VFs */
+	base_pe = pnv_ioda_alloc_pe(phb, num_vfs);
+	if (!base_pe) {
+		pci_err(pdev, "Unable to allocate PEs for %d VFs\n", num_vfs);
+		return -EBUSY;
+	}
 
-		/* Assign M64 window accordingly */
-		ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
-		if (ret) {
-			dev_info(&pdev->dev, "Not enough M64 window resources\n");
-			goto m64_failed;
-		}
+	iov->vf_pe_arr = base_pe;
+	iov->num_vfs = num_vfs;
 
-		/*
-		 * When using one M64 BAR to map one IOV BAR, we need to shift
-		 * the IOV BAR according to the PE# allocated to the VFs.
-		 * Otherwise, the PE# for the VF will conflict with others.
-		 */
-		if (!iov->m64_single_mode) {
-			ret = pnv_pci_vf_resource_shift(pdev,
-							base_pe->pe_number);
-			if (ret)
-				goto shift_failed;
-		}
+	/* Assign M64 window accordingly */
+	ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
+	if (ret) {
+		dev_info(&pdev->dev, "Not enough M64 window resources\n");
+		goto m64_failed;
+	}
+
+	/*
+	 * When using one M64 BAR to map one IOV BAR, we need to shift
+	 * the IOV BAR according to the PE# allocated to the VFs.
+	 * Otherwise, the PE# for the VF will conflict with others.
+	 */
+	if (!iov->m64_single_mode) {
+		ret = pnv_pci_vf_resource_shift(pdev,
+						base_pe->pe_number);
+		if (ret)
+			goto shift_failed;
 	}
 
 	/* Setup VF PEs */
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH 13/15] powerpc/powernv/sriov: Move M64 BAR allocation into a helper
  2020-07-10  5:23 PowerNV PCI & SR-IOV cleanups Oliver O'Halloran
                   ` (11 preceding siblings ...)
  2020-07-10  5:23 ` [PATCH 12/15] powerpc/powernv/sriov: De-indent setup and teardown Oliver O'Halloran
@ 2020-07-10  5:23 ` Oliver O'Halloran
  2020-07-15  4:02   ` Alexey Kardashevskiy
  2020-07-10  5:23 ` [PATCH 14/15] powerpc/powernv/sriov: Refactor M64 BAR setup Oliver O'Halloran
                   ` (2 subsequent siblings)
  15 siblings, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-10  5:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran

I want to refactor the loop this code is currently inside of. Hoist it on
out.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/platforms/powernv/pci-sriov.c | 31 ++++++++++++++--------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
index d5699cd2ab7a..2f967aa4fbf5 100644
--- a/arch/powerpc/platforms/powernv/pci-sriov.c
+++ b/arch/powerpc/platforms/powernv/pci-sriov.c
@@ -416,6 +416,23 @@ static int64_t pnv_ioda_map_m64_single(struct pnv_phb *phb,
 	return rc;
 }
 
+static int pnv_pci_alloc_m64_bar(struct pnv_phb *phb, struct pnv_iov_data *iov)
+{
+	int win;
+
+	do {
+		win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
+				phb->ioda.m64_bar_idx + 1, 0);
+
+		if (win >= phb->ioda.m64_bar_idx + 1)
+			return -1;
+	} while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
+
+	set_bit(win, iov->used_m64_bar_mask);
+
+	return win;
+}
+
 static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
 {
 	struct pnv_iov_data   *iov;
@@ -443,17 +460,9 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
 			continue;
 
 		for (j = 0; j < m64_bars; j++) {
-
-			/* allocate a window ID for this BAR */
-			do {
-				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
-						phb->ioda.m64_bar_idx + 1, 0);
-
-				if (win >= phb->ioda.m64_bar_idx + 1)
-					goto m64_failed;
-			} while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
-			set_bit(win, iov->used_m64_bar_mask);
-
+			win = pnv_pci_alloc_m64_bar(phb, iov);
+			if (win < 0)
+				goto m64_failed;
 
 			if (iov->m64_single_mode) {
 				int pe_num = iov->vf_pe_arr[j].pe_number;
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH 14/15] powerpc/powernv/sriov: Refactor M64 BAR setup
  2020-07-10  5:23 PowerNV PCI & SR-IOV cleanups Oliver O'Halloran
                   ` (12 preceding siblings ...)
  2020-07-10  5:23 ` [PATCH 13/15] powerpc/powernv/sriov: Move M64 BAR allocation into a helper Oliver O'Halloran
@ 2020-07-10  5:23 ` Oliver O'Halloran
  2020-07-15  4:50   ` Alexey Kardashevskiy
  2020-07-10  5:23 ` [PATCH 15/15] powerpc/powernv/sriov: Make single PE mode a per-BAR setting Oliver O'Halloran
  2020-07-10  6:45   ` Christoph Hellwig
  15 siblings, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-10  5:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran

Split up the logic so that we have one branch that handles setting up a
segmented window and another that handles setting up single PE windows for
each VF.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
This patch could be folded into the previous one. I've kept it
seperate mainly because the diff is *horrific* when they're merged.
---
 arch/powerpc/platforms/powernv/pci-sriov.c | 57 ++++++++++------------
 1 file changed, 27 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
index 2f967aa4fbf5..8de03636888a 100644
--- a/arch/powerpc/platforms/powernv/pci-sriov.c
+++ b/arch/powerpc/platforms/powernv/pci-sriov.c
@@ -441,52 +441,49 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
 	struct resource       *res;
 	int                    i, j;
 	int64_t                rc;
-	int                    total_vfs;
 	resource_size_t        size, start;
-	int                    m64_bars;
+	int                    base_pe_num;
 
 	phb = pci_bus_to_pnvhb(pdev->bus);
 	iov = pnv_iov_get(pdev);
-	total_vfs = pci_sriov_get_totalvfs(pdev);
-
-	if (iov->m64_single_mode)
-		m64_bars = num_vfs;
-	else
-		m64_bars = 1;
 
 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
 		res = &pdev->resource[i + PCI_IOV_RESOURCES];
 		if (!res->flags || !res->parent)
 			continue;
 
-		for (j = 0; j < m64_bars; j++) {
+		/* don't need single mode? map everything in one go! */
+		if (!iov->m64_single_mode) {
 			win = pnv_pci_alloc_m64_bar(phb, iov);
 			if (win < 0)
 				goto m64_failed;
 
-			if (iov->m64_single_mode) {
-				int pe_num = iov->vf_pe_arr[j].pe_number;
-
-				size = pci_iov_resource_size(pdev,
-							PCI_IOV_RESOURCES + i);
-				start = res->start + size * j;
-				rc = pnv_ioda_map_m64_single(phb, win,
-							     pe_num,
-							     start,
-							     size);
-			} else {
-				size = resource_size(res);
-				start = res->start;
-
-				rc = pnv_ioda_map_m64_accordion(phb, win, start,
-								size);
-			}
+			size = resource_size(res);
+			start = res->start;
 
-			if (rc != OPAL_SUCCESS) {
-				dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n",
-					win, rc);
+			rc = pnv_ioda_map_m64_accordion(phb, win, start, size);
+			if (rc)
+				goto m64_failed;
+
+			continue;
+		}
+
+		/* otherwise map each VF with single PE BARs */
+		size = pci_iov_resource_size(pdev, PCI_IOV_RESOURCES + i);
+		base_pe_num = iov->vf_pe_arr[0].pe_number;
+
+		for (j = 0; j < num_vfs; j++) {
+			win = pnv_pci_alloc_m64_bar(phb, iov);
+			if (win < 0)
+				goto m64_failed;
+
+			start = res->start + size * j;
+			rc = pnv_ioda_map_m64_single(phb, win,
+						     base_pe_num + j,
+						     start,
+						     size);
+			if (rc)
 				goto m64_failed;
-			}
 		}
 	}
 	return 0;
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH 15/15] powerpc/powernv/sriov: Make single PE mode a per-BAR setting
  2020-07-10  5:23 PowerNV PCI & SR-IOV cleanups Oliver O'Halloran
                   ` (13 preceding siblings ...)
  2020-07-10  5:23 ` [PATCH 14/15] powerpc/powernv/sriov: Refactor M64 BAR setup Oliver O'Halloran
@ 2020-07-10  5:23 ` Oliver O'Halloran
  2020-07-15  5:24   ` Alexey Kardashevskiy
  2020-07-10  6:45   ` Christoph Hellwig
  15 siblings, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-10  5:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran

Using single PE BARs to map an SR-IOV BAR is really a choice about what
strategy to use when mapping a BAR. It doesn't make much sense for this to
be a global setting since a device might have one large BAR which needs to
be mapped with single PE windows and another smaller BAR that can be mapped
with a regular segmented window. Make the segmented vs single decision a
per-BAR setting and clean up the logic that decides which mode to use.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/platforms/powernv/pci-sriov.c | 131 +++++++++++----------
 arch/powerpc/platforms/powernv/pci.h       |  10 +-
 2 files changed, 75 insertions(+), 66 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
index 8de03636888a..87377d95d648 100644
--- a/arch/powerpc/platforms/powernv/pci-sriov.c
+++ b/arch/powerpc/platforms/powernv/pci-sriov.c
@@ -146,10 +146,9 @@
 static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
 {
 	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
-	const resource_size_t gate = phb->ioda.m64_segsize >> 2;
 	struct resource *res;
 	int i;
-	resource_size_t size, total_vf_bar_sz;
+	resource_size_t vf_bar_sz;
 	struct pnv_iov_data *iov;
 	int mul, total_vfs;
 
@@ -158,9 +157,9 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
 		goto disable_iov;
 	pdev->dev.archdata.iov_data = iov;
 
+	/* FIXME: totalvfs > phb->ioda.total_pe_num is going to be a problem */
 	total_vfs = pci_sriov_get_totalvfs(pdev);
 	mul = phb->ioda.total_pe_num;
-	total_vf_bar_sz = 0;
 
 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
 		res = &pdev->resource[i + PCI_IOV_RESOURCES];
@@ -173,50 +172,51 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
 			goto disable_iov;
 		}
 
-		total_vf_bar_sz += pci_iov_resource_size(pdev,
-				i + PCI_IOV_RESOURCES);
+		vf_bar_sz = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
 
 		/*
-		 * If bigger than quarter of M64 segment size, just round up
-		 * power of two.
+		 * Generally, one segmented M64 BAR maps one IOV BAR. However,
+		 * if a VF BAR is too large we end up wasting a lot of space.
+		 * If we've got a BAR that's bigger than greater than 1/4 of the
+		 * default window's segment size then switch to using single PE
+		 * windows. This limits the total number of VFs we can support.
 		 *
-		 * Generally, one M64 BAR maps one IOV BAR. To avoid conflict
-		 * with other devices, IOV BAR size is expanded to be
-		 * (total_pe * VF_BAR_size).  When VF_BAR_size is half of M64
-		 * segment size , the expanded size would equal to half of the
-		 * whole M64 space size, which will exhaust the M64 Space and
-		 * limit the system flexibility.  This is a design decision to
-		 * set the boundary to quarter of the M64 segment size.
+		 * The 1/4 limit is arbitrary and can be tweaked.
 		 */
-		if (total_vf_bar_sz > gate) {
-			mul = roundup_pow_of_two(total_vfs);
-			dev_info(&pdev->dev,
-				"VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n",
-				total_vf_bar_sz, gate, mul);
-			iov->m64_single_mode = true;
-			break;
-		}
-	}
+		if (vf_bar_sz > (phb->ioda.m64_segsize >> 2)) {
+			/*
+			 * On PHB3, the minimum size alignment of M64 BAR in
+			 * single mode is 32MB. If this VF BAR is smaller than
+			 * 32MB, but still too large for a segmented window
+			 * then we can't map it and need to disable SR-IOV for
+			 * this device.
+			 */
+			if (vf_bar_sz < SZ_32M) {
+				pci_err(pdev, "VF BAR%d: %pR can't be mapped in single PE mode\n",
+					i, res);
+				goto disable_iov;
+			}
 
-	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
-		res = &pdev->resource[i + PCI_IOV_RESOURCES];
-		if (!res->flags || res->parent)
+			iov->m64_single_mode[i] = true;
 			continue;
+		}
+
 
-		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
 		/*
-		 * On PHB3, the minimum size alignment of M64 BAR in single
-		 * mode is 32MB.
+		 * This BAR can be mapped with one segmented window, so adjust
+		 * te resource size to accommodate.
 		 */
-		if (iov->m64_single_mode && (size < SZ_32M))
-			goto disable_iov;
+		pci_dbg(pdev, " Fixing VF BAR%d: %pR to\n", i, res);
+		res->end = res->start + vf_bar_sz * mul - 1;
+		pci_dbg(pdev, "                       %pR\n", res);
 
-		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
-		res->end = res->start + size * mul - 1;
-		dev_dbg(&pdev->dev, "                       %pR\n", res);
-		dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
+		pci_info(pdev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
 			 i, res, mul);
+
+		iov->need_shift = true;
 	}
+
+	// what should this be?
 	iov->vfs_expanded = mul;
 
 	return;
@@ -260,42 +260,42 @@ void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev)
 resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
 						      int resno)
 {
-	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
 	struct pnv_iov_data *iov = pnv_iov_get(pdev);
 	resource_size_t align;
 
-	/*
-	 * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the
-	 * SR-IOV. While from hardware perspective, the range mapped by M64
-	 * BAR should be size aligned.
-	 *
-	 * When IOV BAR is mapped with M64 BAR in Single PE mode, the extra
-	 * powernv-specific hardware restriction is gone. But if just use the
-	 * VF BAR size as the alignment, PF BAR / VF BAR may be allocated with
-	 * in one segment of M64 #15, which introduces the PE conflict between
-	 * PF and VF. Based on this, the minimum alignment of an IOV BAR is
-	 * m64_segsize.
-	 *
-	 * This function returns the total IOV BAR size if M64 BAR is in
-	 * Shared PE mode or just VF BAR size if not.
-	 * If the M64 BAR is in Single PE mode, return the VF BAR size or
-	 * M64 segment size if IOV BAR size is less.
-	 */
-	align = pci_iov_resource_size(pdev, resno);
+	int bar_no = resno - PCI_IOV_RESOURCES;
 
 	/*
 	 * iov can be null if we have an SR-IOV device with IOV BAR that can't
 	 * be placed in the m64 space (i.e. The BAR is 32bit or non-prefetch).
-	 * In that case we don't allow VFs to be enabled so just return the
-	 * default alignment.
+	 * In that case we don't allow VFs to be enabled since one of their
+	 * BARs would not be placed in the correct PE.
 	 */
 	if (!iov)
 		return align;
 	if (!iov->vfs_expanded)
 		return align;
-	if (iov->m64_single_mode)
-		return max(align, (resource_size_t)phb->ioda.m64_segsize);
 
+	align = pci_iov_resource_size(pdev, resno);
+
+	/*
+	 * If we're using single mode then we can just use the native VF BAR
+	 * alignment. We validated that it's possible to use a single PE
+	 * window above when we did the fixup.
+	 */
+	if (iov->m64_single_mode[bar_no])
+		return align;
+
+	/*
+	 * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the
+	 * SR-IOV. While from hardware perspective, the range mapped by M64
+	 * BAR should be size aligned.
+	 *
+	 * This function returns the total IOV BAR size if M64 BAR is in
+	 * Shared PE mode or just VF BAR size if not.
+	 * If the M64 BAR is in Single PE mode, return the VF BAR size or
+	 * M64 segment size if IOV BAR size is less.
+	 */
 	return iov->vfs_expanded * align;
 }
 
@@ -453,7 +453,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
 			continue;
 
 		/* don't need single mode? map everything in one go! */
-		if (!iov->m64_single_mode) {
+		if (!iov->m64_single_mode[i]) {
 			win = pnv_pci_alloc_m64_bar(phb, iov);
 			if (win < 0)
 				goto m64_failed;
@@ -546,6 +546,8 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
 		res = &dev->resource[i + PCI_IOV_RESOURCES];
 		if (!res->flags || !res->parent)
 			continue;
+		if (iov->m64_single_mode[i])
+			continue;
 
 		/*
 		 * The actual IOV BAR range is determined by the start address
@@ -577,6 +579,8 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
 		res = &dev->resource[i + PCI_IOV_RESOURCES];
 		if (!res->flags || !res->parent)
 			continue;
+		if (iov->m64_single_mode[i])
+			continue;
 
 		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
 		res2 = *res;
@@ -622,8 +626,8 @@ static void pnv_pci_sriov_disable(struct pci_dev *pdev)
 	/* Release VF PEs */
 	pnv_ioda_release_vf_PE(pdev);
 
-	/* Un-shift the IOV BAR resources */
-	if (!iov->m64_single_mode)
+	/* Un-shift the IOV BARs if we need to */
+	if (iov->need_shift)
 		pnv_pci_vf_resource_shift(pdev, -base_pe);
 
 	/* Release M64 windows */
@@ -741,9 +745,8 @@ static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
 	 * the IOV BAR according to the PE# allocated to the VFs.
 	 * Otherwise, the PE# for the VF will conflict with others.
 	 */
-	if (!iov->m64_single_mode) {
-		ret = pnv_pci_vf_resource_shift(pdev,
-						base_pe->pe_number);
+	if (iov->need_shift) {
+		ret = pnv_pci_vf_resource_shift(pdev, base_pe->pe_number);
 		if (ret)
 			goto shift_failed;
 	}
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 13555bc549f4..a78d1feb8fb8 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -236,14 +236,20 @@ struct pnv_iov_data {
 	/* number of VFs IOV BAR expanded. FIXME: rename this to something less bad */
 	u16     vfs_expanded;
 
+	/*
+	 * indicates if we need to move our IOV BAR to account for our
+	 * allocated PE number when enabling VFs.
+	 */
+	bool    need_shift;
+
 	/* number of VFs enabled */
 	u16     num_vfs;
 
 	/* pointer to the array of VF PEs. num_vfs long*/
 	struct pnv_ioda_pe *vf_pe_arr;
 
-	/* Did we map the VF BARs with single-PE IODA BARs? */
-	bool    m64_single_mode;
+	/* Did we map the VF BAR with single-PE IODA BARs? */
+	bool    m64_single_mode[PCI_SRIOV_NUM_BARS];
 
 	/*
 	 * Bit mask used to track which m64 windows that we used to map the
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* Re: PowerNV PCI & SR-IOV cleanups
  2020-07-10  5:23 PowerNV PCI & SR-IOV cleanups Oliver O'Halloran
@ 2020-07-10  6:45   ` Christoph Hellwig
  2020-07-10  5:23 ` [PATCH 02/15] powerpc/powernv/pci: Always tear down DMA windows on PE release Oliver O'Halloran
                     ` (14 subsequent siblings)
  15 siblings, 0 replies; 57+ messages in thread
From: Christoph Hellwig @ 2020-07-10  6:45 UTC (permalink / raw)
  To: Oliver O'Halloran; +Cc: linuxppc-dev, linux-pci

On Fri, Jul 10, 2020 at 03:23:25PM +1000, Oliver O'Halloran wrote:
> This is largely prep work for supporting VFs in the 32bit MMIO window.
> This is an unfortunate necessity due to how the Linux BAR allocator
> handles BARs marked as non-prefetchable. The distinction
> between prefetch and non-prefetchable BARs was made largely irrelevant
> with the introduction of PCIe, but the BAR allocator is overly
> conservative. It will always place non-pref bars in the prefetchable
> window, which is 32bit only. This results in us being unable to use VFs
> from NVMe drives and a few different RAID cards.

How about fixing that in the core PCI code?

(nothing against this series through, as it seems like a massive
cleanup)

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: PowerNV PCI & SR-IOV cleanups
@ 2020-07-10  6:45   ` Christoph Hellwig
  0 siblings, 0 replies; 57+ messages in thread
From: Christoph Hellwig @ 2020-07-10  6:45 UTC (permalink / raw)
  To: Oliver O'Halloran; +Cc: linux-pci, linuxppc-dev

On Fri, Jul 10, 2020 at 03:23:25PM +1000, Oliver O'Halloran wrote:
> This is largely prep work for supporting VFs in the 32bit MMIO window.
> This is an unfortunate necessity due to how the Linux BAR allocator
> handles BARs marked as non-prefetchable. The distinction
> between prefetch and non-prefetchable BARs was made largely irrelevant
> with the introduction of PCIe, but the BAR allocator is overly
> conservative. It will always place non-pref bars in the prefetchable
> window, which is 32bit only. This results in us being unable to use VFs
> from NVMe drives and a few different RAID cards.

How about fixing that in the core PCI code?

(nothing against this series through, as it seems like a massive
cleanup)

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: PowerNV PCI & SR-IOV cleanups
  2020-07-10  6:45   ` Christoph Hellwig
@ 2020-07-10 12:45     ` Oliver O'Halloran
  -1 siblings, 0 replies; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-10 12:45 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linuxppc-dev, linux-pci

On Fri, Jul 10, 2020 at 4:45 PM Christoph Hellwig <hch@infradead.org> wrote:
>
> On Fri, Jul 10, 2020 at 03:23:25PM +1000, Oliver O'Halloran wrote:
> > This is largely prep work for supporting VFs in the 32bit MMIO window.
> > This is an unfortunate necessity due to how the Linux BAR allocator
> > handles BARs marked as non-prefetchable. The distinction
> > between prefetch and non-prefetchable BARs was made largely irrelevant
> > with the introduction of PCIe, but the BAR allocator is overly
> > conservative. It will always place non-pref bars in the prefetchable
> > window, which is 32bit only. This results in us being unable to use VFs
> > from NVMe drives and a few different RAID cards.
>
> How about fixing that in the core PCI code?

I've been kicking around the idea but I've never managed to convince
myself that ignoring the non-prefetchable bit is a safe thing to do in
generic code. Since Gen3 at least the PCIe Base spec has provided some
guidance about when you can put non-prefetchable BARs in the
prefetchable window and as of the Gen5 spec it lists these conditions:

> 1) The entire path from the host to the adapter is over PCI Express.
> 2) No conventional PCI or PCI-X devices do peer-to-peer reads to the range mapped by the BAR.
> 3) The PCI Express Host Bridge does no byte merging. (This is believed to be true on most platforms.)
> 4) Any locations with read side-effects are never the target of Memory Reads with the TH bit Set.
> 5) The range mapped by the BAR is never the target of a speculative Memory Read, either Host initiated or peer-to-peer.

1) Is easy enough to verify.
2) Is probably true, but who knows.
3) I know this is true for the platforms I'm looking at since the HW
designers assure me there is no merging happening at the host-bridge
level. Merging of MMIO ops does seem like an insane thing to do so
it's probably true in general too, but there's no real way to tell.
4) Is also *probably* true since the TH bit is only set when it's
explicitly enabled via the TLP Processing Hints extended capability in
config space. I guess it's possible firmware might enable that without
Linux realising, but in that case Linux is probably not doing BAR
allocation.
5) I have no idea about, but it seems difficult to make any kind of
general statement about.

I might just be being paranoid.

Oliver

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: PowerNV PCI & SR-IOV cleanups
@ 2020-07-10 12:45     ` Oliver O'Halloran
  0 siblings, 0 replies; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-10 12:45 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-pci, linuxppc-dev

On Fri, Jul 10, 2020 at 4:45 PM Christoph Hellwig <hch@infradead.org> wrote:
>
> On Fri, Jul 10, 2020 at 03:23:25PM +1000, Oliver O'Halloran wrote:
> > This is largely prep work for supporting VFs in the 32bit MMIO window.
> > This is an unfortunate necessity due to how the Linux BAR allocator
> > handles BARs marked as non-prefetchable. The distinction
> > between prefetch and non-prefetchable BARs was made largely irrelevant
> > with the introduction of PCIe, but the BAR allocator is overly
> > conservative. It will always place non-pref bars in the prefetchable
> > window, which is 32bit only. This results in us being unable to use VFs
> > from NVMe drives and a few different RAID cards.
>
> How about fixing that in the core PCI code?

I've been kicking around the idea but I've never managed to convince
myself that ignoring the non-prefetchable bit is a safe thing to do in
generic code. Since Gen3 at least the PCIe Base spec has provided some
guidance about when you can put non-prefetchable BARs in the
prefetchable window and as of the Gen5 spec it lists these conditions:

> 1) The entire path from the host to the adapter is over PCI Express.
> 2) No conventional PCI or PCI-X devices do peer-to-peer reads to the range mapped by the BAR.
> 3) The PCI Express Host Bridge does no byte merging. (This is believed to be true on most platforms.)
> 4) Any locations with read side-effects are never the target of Memory Reads with the TH bit Set.
> 5) The range mapped by the BAR is never the target of a speculative Memory Read, either Host initiated or peer-to-peer.

1) Is easy enough to verify.
2) Is probably true, but who knows.
3) I know this is true for the platforms I'm looking at since the HW
designers assure me there is no merging happening at the host-bridge
level. Merging of MMIO ops does seem like an insane thing to do so
it's probably true in general too, but there's no real way to tell.
4) Is also *probably* true since the TH bit is only set when it's
explicitly enabled via the TLP Processing Hints extended capability in
config space. I guess it's possible firmware might enable that without
Linux realising, but in that case Linux is probably not doing BAR
allocation.
5) I have no idea about, but it seems difficult to make any kind of
general statement about.

I might just be being paranoid.

Oliver

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 01/15] powernv/pci: Add pci_bus_to_pnvhb() helper
  2020-07-10  5:23 ` [PATCH 01/15] powernv/pci: Add pci_bus_to_pnvhb() helper Oliver O'Halloran
@ 2020-07-13  8:28   ` Alexey Kardashevskiy
  0 siblings, 0 replies; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-13  8:28 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev



On 10/07/2020 15:23, Oliver O'Halloran wrote:
> Add a helper to go from a pci_bus structure to the pnv_phb that hosts that
> bus. There's a lot of instances of the following pattern:
> 
> 	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
> 	struct pnv_phb *phb = hose->private_data;
> 
> Without any other uses of the pci_controller inside the function. This is
> hard to read since it requires you to memorise the contents of the
> private data fields and kind of error prone since it involves blindly
> assigning a void pointer. Add a helper to make it more concise and
> explicit.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> ---
>  arch/powerpc/platforms/powernv/pci-ioda.c | 88 +++++++----------------
>  arch/powerpc/platforms/powernv/pci.c      | 14 ++--
>  arch/powerpc/platforms/powernv/pci.h      | 10 +++
>  3 files changed, 38 insertions(+), 74 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
> index 31c3e6d58c41..687919db0347 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -252,8 +252,7 @@ static int pnv_ioda2_init_m64(struct pnv_phb *phb)
>  static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev,
>  					 unsigned long *pe_bitmap)
>  {
> -	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
> -	struct pnv_phb *phb = hose->private_data;
> +	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
>  	struct resource *r;
>  	resource_size_t base, sgsz, start, end;
>  	int segno, i;
> @@ -351,8 +350,7 @@ static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus,
>  
>  static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all)
>  {
> -	struct pci_controller *hose = pci_bus_to_host(bus);
> -	struct pnv_phb *phb = hose->private_data;
> +	struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
>  	struct pnv_ioda_pe *master_pe, *pe;
>  	unsigned long size, *pe_alloc;
>  	int i;
> @@ -673,8 +671,7 @@ struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn)
>  
>  struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
>  {
> -	struct pci_controller *hose = pci_bus_to_host(dev->bus);
> -	struct pnv_phb *phb = hose->private_data;
> +	struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
>  	struct pci_dn *pdn = pci_get_pdn(dev);
>  
>  	if (!pdn)
> @@ -1069,8 +1066,7 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>  
>  static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
>  {
> -	struct pci_controller *hose = pci_bus_to_host(dev->bus);
> -	struct pnv_phb *phb = hose->private_data;
> +	struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
>  	struct pci_dn *pdn = pci_get_pdn(dev);
>  	struct pnv_ioda_pe *pe;
>  
> @@ -1129,8 +1125,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
>   */
>  static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
>  {
> -	struct pci_controller *hose = pci_bus_to_host(bus);
> -	struct pnv_phb *phb = hose->private_data;
> +	struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
>  	struct pnv_ioda_pe *pe = NULL;
>  	unsigned int pe_num;
>  
> @@ -1196,8 +1191,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
>  	struct pnv_ioda_pe *pe;
>  	struct pci_dev *gpu_pdev;
>  	struct pci_dn *npu_pdn;
> -	struct pci_controller *hose = pci_bus_to_host(npu_pdev->bus);
> -	struct pnv_phb *phb = hose->private_data;
> +	struct pnv_phb *phb = pci_bus_to_pnvhb(npu_pdev->bus);
>  
>  	/*
>  	 * Intentionally leak a reference on the npu device (for
> @@ -1300,16 +1294,12 @@ static void pnv_pci_ioda_setup_nvlink(void)
>  #ifdef CONFIG_PCI_IOV
>  static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
>  {
> -	struct pci_bus        *bus;
> -	struct pci_controller *hose;
>  	struct pnv_phb        *phb;
>  	struct pci_dn         *pdn;
>  	int                    i, j;
>  	int                    m64_bars;
>  
> -	bus = pdev->bus;
> -	hose = pci_bus_to_host(bus);
> -	phb = hose->private_data;
> +	phb = pci_bus_to_pnvhb(pdev->bus);
>  	pdn = pci_get_pdn(pdev);
>  
>  	if (pdn->m64_single_mode)
> @@ -1333,8 +1323,6 @@ static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
>  
>  static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>  {
> -	struct pci_bus        *bus;
> -	struct pci_controller *hose;
>  	struct pnv_phb        *phb;
>  	struct pci_dn         *pdn;
>  	unsigned int           win;
> @@ -1346,9 +1334,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>  	int                    pe_num;
>  	int                    m64_bars;
>  
> -	bus = pdev->bus;
> -	hose = pci_bus_to_host(bus);
> -	phb = hose->private_data;
> +	phb = pci_bus_to_pnvhb(pdev->bus);
>  	pdn = pci_get_pdn(pdev);
>  	total_vfs = pci_sriov_get_totalvfs(pdev);
>  
> @@ -1459,15 +1445,11 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe
>  
>  static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
>  {
> -	struct pci_bus        *bus;
> -	struct pci_controller *hose;
>  	struct pnv_phb        *phb;
>  	struct pnv_ioda_pe    *pe, *pe_n;
>  	struct pci_dn         *pdn;
>  
> -	bus = pdev->bus;
> -	hose = pci_bus_to_host(bus);
> -	phb = hose->private_data;
> +	phb = pci_bus_to_pnvhb(pdev->bus);
>  	pdn = pci_get_pdn(pdev);
>  
>  	if (!pdev->is_physfn)
> @@ -1492,16 +1474,12 @@ static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
>  
>  static void pnv_pci_sriov_disable(struct pci_dev *pdev)
>  {
> -	struct pci_bus        *bus;
> -	struct pci_controller *hose;
>  	struct pnv_phb        *phb;
>  	struct pnv_ioda_pe    *pe;
>  	struct pci_dn         *pdn;
>  	u16                    num_vfs, i;
>  
> -	bus = pdev->bus;
> -	hose = pci_bus_to_host(bus);
> -	phb = hose->private_data;
> +	phb = pci_bus_to_pnvhb(pdev->bus);
>  	pdn = pci_get_pdn(pdev);
>  	num_vfs = pdn->num_vfs;
>  
> @@ -1535,17 +1513,13 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
>  				       struct pnv_ioda_pe *pe);
>  static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>  {
> -	struct pci_bus        *bus;
> -	struct pci_controller *hose;
>  	struct pnv_phb        *phb;
>  	struct pnv_ioda_pe    *pe;
>  	int                    pe_num;
>  	u16                    vf_index;
>  	struct pci_dn         *pdn;
>  
> -	bus = pdev->bus;
> -	hose = pci_bus_to_host(bus);
> -	phb = hose->private_data;
> +	phb = pci_bus_to_pnvhb(pdev->bus);
>  	pdn = pci_get_pdn(pdev);
>  
>  	if (!pdev->is_physfn)
> @@ -1572,7 +1546,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>  		pe->rid = (vf_bus << 8) | vf_devfn;
>  
>  		pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n",
> -			hose->global_number, pdev->bus->number,
> +			pci_domain_nr(pdev->bus), pdev->bus->number,
>  			PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num);
>  
>  		if (pnv_ioda_configure_pe(phb, pe)) {
> @@ -1602,17 +1576,13 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>  
>  static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>  {
> -	struct pci_bus        *bus;
> -	struct pci_controller *hose;
>  	struct pnv_phb        *phb;
>  	struct pnv_ioda_pe    *pe;
>  	struct pci_dn         *pdn;
>  	int                    ret;
>  	u16                    i;
>  
> -	bus = pdev->bus;
> -	hose = pci_bus_to_host(bus);
> -	phb = hose->private_data;
> +	phb = pci_bus_to_pnvhb(pdev->bus);
>  	pdn = pci_get_pdn(pdev);
>  
>  	if (phb->type == PNV_PHB_IODA2) {
> @@ -1735,8 +1705,7 @@ static int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>  
>  static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev)
>  {
> -	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
> -	struct pnv_phb *phb = hose->private_data;
> +	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
>  	struct pci_dn *pdn = pci_get_pdn(pdev);
>  	struct pnv_ioda_pe *pe;
>  
> @@ -1847,8 +1816,7 @@ static int pnv_pci_ioda_dma_64bit_bypass(struct pnv_ioda_pe *pe)
>  static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev,
>  		u64 dma_mask)
>  {
> -	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
> -	struct pnv_phb *phb = hose->private_data;
> +	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
>  	struct pci_dn *pdn = pci_get_pdn(pdev);
>  	struct pnv_ioda_pe *pe;
>  
> @@ -2766,8 +2734,7 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
>  #ifdef CONFIG_PCI_IOV
>  static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>  {
> -	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
> -	struct pnv_phb *phb = hose->private_data;
> +	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
>  	const resource_size_t gate = phb->ioda.m64_segsize >> 2;
>  	struct resource *res;
>  	int i;
> @@ -3101,10 +3068,9 @@ static void pnv_pci_ioda_fixup(void)
>  static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
>  						unsigned long type)
>  {
> -	struct pci_dev *bridge;
> -	struct pci_controller *hose = pci_bus_to_host(bus);
> -	struct pnv_phb *phb = hose->private_data;
> +	struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
>  	int num_pci_bridges = 0;
> +	struct pci_dev *bridge;
>  
>  	bridge = bus->self;
>  	while (bridge) {
> @@ -3190,8 +3156,7 @@ static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus,
>  
>  static void pnv_pci_configure_bus(struct pci_bus *bus)
>  {
> -	struct pci_controller *hose = pci_bus_to_host(bus);
> -	struct pnv_phb *phb = hose->private_data;
> +	struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
>  	struct pci_dev *bridge = bus->self;
>  	struct pnv_ioda_pe *pe;
>  	bool all = (bridge && pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE);
> @@ -3237,8 +3202,7 @@ static resource_size_t pnv_pci_default_alignment(void)
>  static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
>  						      int resno)
>  {
> -	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
> -	struct pnv_phb *phb = hose->private_data;
> +	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
>  	struct pci_dn *pdn = pci_get_pdn(pdev);
>  	resource_size_t align;
>  
> @@ -3274,8 +3238,7 @@ static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
>   */
>  static bool pnv_pci_enable_device_hook(struct pci_dev *dev)
>  {
> -	struct pci_controller *hose = pci_bus_to_host(dev->bus);
> -	struct pnv_phb *phb = hose->private_data;
> +	struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
>  	struct pci_dn *pdn;
>  
>  	/* The function is probably called while the PEs have
> @@ -3488,8 +3451,7 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe)
>  
>  static void pnv_pci_release_device(struct pci_dev *pdev)
>  {
> -	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
> -	struct pnv_phb *phb = hose->private_data;
> +	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
>  	struct pci_dn *pdn = pci_get_pdn(pdev);
>  	struct pnv_ioda_pe *pe;
>  
> @@ -3534,8 +3496,7 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
>  
>  static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus)
>  {
> -	struct pci_controller *hose = bus->sysdata;
> -	struct pnv_phb *phb = hose->private_data;
> +	struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
>  	struct pnv_ioda_pe *pe;
>  
>  	list_for_each_entry(pe, &phb->ioda.pe_list, list) {
> @@ -3873,8 +3834,7 @@ void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np)
>  
>  static void pnv_npu2_opencapi_cfg_size_fixup(struct pci_dev *dev)
>  {
> -	struct pci_controller *hose = pci_bus_to_host(dev->bus);
> -	struct pnv_phb *phb = hose->private_data;
> +	struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
>  
>  	if (!machine_is(powernv))
>  		return;
> diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
> index 091fe1cf386b..9b9bca169275 100644
> --- a/arch/powerpc/platforms/powernv/pci.c
> +++ b/arch/powerpc/platforms/powernv/pci.c
> @@ -162,8 +162,7 @@ EXPORT_SYMBOL_GPL(pnv_pci_set_power_state);
>  
>  int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
>  {
> -	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
> -	struct pnv_phb *phb = hose->private_data;
> +	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
>  	struct msi_desc *entry;
>  	struct msi_msg msg;
>  	int hwirq;
> @@ -211,8 +210,7 @@ int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
>  
>  void pnv_teardown_msi_irqs(struct pci_dev *pdev)
>  {
> -	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
> -	struct pnv_phb *phb = hose->private_data;
> +	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
>  	struct msi_desc *entry;
>  	irq_hw_number_t hwirq;
>  
> @@ -824,10 +822,9 @@ EXPORT_SYMBOL(pnv_pci_get_phb_node);
>  
>  int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable)
>  {
> -	__be64 val;
> -	struct pci_controller *hose;
> -	struct pnv_phb *phb;
> +	struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
>  	u64 tunnel_bar;
> +	__be64 val;
>  	int rc;
>  
>  	if (!opal_check_token(OPAL_PCI_GET_PBCQ_TUNNEL_BAR))
> @@ -835,9 +832,6 @@ int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable)
>  	if (!opal_check_token(OPAL_PCI_SET_PBCQ_TUNNEL_BAR))
>  		return -ENXIO;
>  
> -	hose = pci_bus_to_host(dev->bus);
> -	phb = hose->private_data;
> -
>  	mutex_lock(&tunnel_mutex);
>  	rc = opal_pci_get_pbcq_tunnel_bar(phb->opal_id, &val);
>  	if (rc != OPAL_SUCCESS) {
> diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
> index 51c254f2f3cb..0727dec9a0d1 100644
> --- a/arch/powerpc/platforms/powernv/pci.h
> +++ b/arch/powerpc/platforms/powernv/pci.h
> @@ -260,4 +260,14 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
>  
>  extern unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb);
>  
> +static inline struct pnv_phb *pci_bus_to_pnvhb(struct pci_bus *bus)
> +{
> +	struct pci_controller *hose = bus->sysdata;
> +
> +	if (hose)
> +		return hose->private_data;


Since it is powernv, private_data should not ever be NULL so we want
BUG_ON here may be?


> +
> +	return NULL;
> +}
> +
>  #endif /* __POWERNV_PCI_H */
> 

-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 02/15] powerpc/powernv/pci: Always tear down DMA windows on PE release
  2020-07-10  5:23 ` [PATCH 02/15] powerpc/powernv/pci: Always tear down DMA windows on PE release Oliver O'Halloran
@ 2020-07-13  8:30   ` Alexey Kardashevskiy
  0 siblings, 0 replies; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-13  8:30 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev



On 10/07/2020 15:23, Oliver O'Halloran wrote:
> Currently we have these two functions:
> 
> 	pnv_pci_ioda2_release_dma_pe(), and
> 	pnv_pci_ioda2_release_pe_dma()
> 
> The first is used when tearing down VF PEs and the other is used for normal
> devices. There's very little difference between the two though. The latter
> (non-VF) will skip a call to pnv_pci_ioda2_unset_window() unless
> CONFIG_IOMMU_API=y is set. There's no real point in doing this so fold the
> two together.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>



Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>


> ---
>  arch/powerpc/platforms/powernv/pci-ioda.c | 30 +++--------------------
>  1 file changed, 3 insertions(+), 27 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
> index 687919db0347..bfb40607aa0e 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -1422,26 +1422,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>  	return -EBUSY;
>  }
>  
> -static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
> -		int num);
> -
> -static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe)
> -{
> -	struct iommu_table    *tbl;
> -	int64_t               rc;
> -
> -	tbl = pe->table_group.tables[0];
> -	rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
> -	if (rc)
> -		pe_warn(pe, "OPAL error %lld release DMA window\n", rc);
> -
> -	pnv_pci_ioda2_set_bypass(pe, false);
> -	if (pe->table_group.group) {
> -		iommu_group_put(pe->table_group.group);
> -		BUG_ON(pe->table_group.group);
> -	}
> -	iommu_tce_table_put(tbl);
> -}
> +static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe);
>  
>  static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
>  {
> @@ -1455,11 +1436,12 @@ static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
>  	if (!pdev->is_physfn)
>  		return;
>  
> +	/* FIXME: Use pnv_ioda_release_pe()? */
>  	list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
>  		if (pe->parent_dev != pdev)
>  			continue;
>  
> -		pnv_pci_ioda2_release_dma_pe(pdev, pe);
> +		pnv_pci_ioda2_release_pe_dma(pe);
>  
>  		/* Remove from list */
>  		mutex_lock(&phb->ioda.pe_list_mutex);
> @@ -2429,7 +2411,6 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
>  	return 0;
>  }
>  
> -#if defined(CONFIG_IOMMU_API) || defined(CONFIG_PCI_IOV)
>  static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
>  		int num)
>  {
> @@ -2453,7 +2434,6 @@ static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
>  
>  	return ret;
>  }
> -#endif
>  
>  #ifdef CONFIG_IOMMU_API
>  unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
> @@ -3334,18 +3314,14 @@ static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
>  {
>  	struct iommu_table *tbl = pe->table_group.tables[0];
>  	unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe);
> -#ifdef CONFIG_IOMMU_API
>  	int64_t rc;
> -#endif
>  
>  	if (!weight)
>  		return;
>  
> -#ifdef CONFIG_IOMMU_API
>  	rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
>  	if (rc)
>  		pe_warn(pe, "OPAL error %lld release DMA window\n", rc);
> -#endif
>  
>  	pnv_pci_ioda2_set_bypass(pe, false);
>  	if (pe->table_group.group) {
> 

-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 03/15] powerpc/powernv/pci: Add explicit tracking of the DMA setup state
  2020-07-10  5:23 ` [PATCH 03/15] powerpc/powernv/pci: Add explicit tracking of the DMA setup state Oliver O'Halloran
@ 2020-07-14  5:37   ` Alexey Kardashevskiy
  2020-07-14  5:58     ` Oliver O'Halloran
  0 siblings, 1 reply; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-14  5:37 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev



On 10/07/2020 15:23, Oliver O'Halloran wrote:
> There's an optimisation in the PE setup which skips performing DMA
> setup for a PE if we only have bridges in a PE. The assumption being
> that only "real" devices will DMA to system memory, which is probably
> fair. However, if we start off with only bridge devices in a PE then
> add a non-bridge device the new device won't be able to use DMA  because
> we never configured it.
> 
> Fix this (admittedly pretty weird) edge case by tracking whether we've done
> the DMA setup for the PE or not. If a non-bridge device is added to the PE
> (via rescan or hotplug, or whatever) we can set up DMA on demand.

So hotplug does not work on powernv then, right? I thought you tested it
a while ago, or this patch is the result of that attempt? If it is, then

Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>


> This also means the only remaining user of the old "DMA Weight" code is
> the IODA1 DMA setup code that it was originally added for, which is good.


Is ditching IODA1 in the plan? :)

> 
> Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> ---
> Alexey, do we need to have the IOMMU API stuff set/clear this flag?


I'd say no as that API only cares if a device is in a PE and for those
the PE DMA setup  optimization is skipped. Thanks,




> ---
>  arch/powerpc/platforms/powernv/pci-ioda.c | 48 ++++++++++++++---------
>  arch/powerpc/platforms/powernv/pci.h      |  7 ++++
>  2 files changed, 36 insertions(+), 19 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
> index bfb40607aa0e..bb9c1cc60c33 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -141,6 +141,7 @@ static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
>  
>  	phb->ioda.pe_array[pe_no].phb = phb;
>  	phb->ioda.pe_array[pe_no].pe_number = pe_no;
> +	phb->ioda.pe_array[pe_no].dma_setup_done = false;
>  
>  	/*
>  	 * Clear the PE frozen state as it might be put into frozen state
> @@ -1685,6 +1686,12 @@ static int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>  }
>  #endif /* CONFIG_PCI_IOV */
>  
> +static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
> +				       struct pnv_ioda_pe *pe);
> +
> +static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
> +				       struct pnv_ioda_pe *pe);
> +
>  static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev)
>  {
>  	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
> @@ -1713,6 +1720,24 @@ static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev)
>  		pci_info(pdev, "Added to existing PE#%x\n", pe->pe_number);
>  	}
>  
> +	/*
> +	 * We assume that bridges *probably* don't need to do any DMA so we can
> +	 * skip allocating a TCE table, etc unless we get a non-bridge device.
> +	 */
> +	if (!pe->dma_setup_done && !pci_is_bridge(pdev)) {
> +		switch (phb->type) {
> +		case PNV_PHB_IODA1:
> +			pnv_pci_ioda1_setup_dma_pe(phb, pe);
> +			break;
> +		case PNV_PHB_IODA2:
> +			pnv_pci_ioda2_setup_dma_pe(phb, pe);
> +			break;
> +		default:
> +			pr_warn("%s: No DMA for PHB#%x (type %d)\n",
> +				__func__, phb->hose->global_number, phb->type);
> +		}
> +	}
> +
>  	if (pdn)
>  		pdn->pe_number = pe->pe_number;
>  	pe->device_count++;
> @@ -2222,6 +2247,7 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
>  	pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift;
>  	iommu_init_table(tbl, phb->hose->node, 0, 0);
>  
> +	pe->dma_setup_done = true;
>  	return;
>   fail:
>  	/* XXX Failure: Try to fallback to 64-bit only ? */
> @@ -2536,9 +2562,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
>  {
>  	int64_t rc;
>  
> -	if (!pnv_pci_ioda_pe_dma_weight(pe))
> -		return;
> -
>  	/* TVE #1 is selected by PCI address bit 59 */
>  	pe->tce_bypass_base = 1ull << 59;
>  
> @@ -2563,6 +2586,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
>  	iommu_register_group(&pe->table_group, phb->hose->global_number,
>  			     pe->pe_number);
>  #endif
> +	pe->dma_setup_done = true;
>  }
>  
>  int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq)
> @@ -3136,7 +3160,6 @@ static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus,
>  
>  static void pnv_pci_configure_bus(struct pci_bus *bus)
>  {
> -	struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
>  	struct pci_dev *bridge = bus->self;
>  	struct pnv_ioda_pe *pe;
>  	bool all = (bridge && pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE);
> @@ -3160,17 +3183,6 @@ static void pnv_pci_configure_bus(struct pci_bus *bus)
>  		return;
>  
>  	pnv_ioda_setup_pe_seg(pe);
> -	switch (phb->type) {
> -	case PNV_PHB_IODA1:
> -		pnv_pci_ioda1_setup_dma_pe(phb, pe);
> -		break;
> -	case PNV_PHB_IODA2:
> -		pnv_pci_ioda2_setup_dma_pe(phb, pe);
> -		break;
> -	default:
> -		pr_warn("%s: No DMA for PHB#%x (type %d)\n",
> -			__func__, phb->hose->global_number, phb->type);
> -	}
>  }
>  
>  static resource_size_t pnv_pci_default_alignment(void)
> @@ -3289,11 +3301,10 @@ static long pnv_pci_ioda1_unset_window(struct iommu_table_group *table_group,
>  
>  static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe)
>  {
> -	unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe);
>  	struct iommu_table *tbl = pe->table_group.tables[0];
>  	int64_t rc;
>  
> -	if (!weight)
> +	if (!pe->dma_setup_done)
>  		return;
>  
>  	rc = pnv_pci_ioda1_unset_window(&pe->table_group, 0);
> @@ -3313,10 +3324,9 @@ static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe)
>  static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
>  {
>  	struct iommu_table *tbl = pe->table_group.tables[0];
> -	unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe);
>  	int64_t rc;
>  
> -	if (!weight)
> +	if (pe->dma_setup_done)
>  		return;
>  
>  	rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
> diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
> index 0727dec9a0d1..6aa6aefb637d 100644
> --- a/arch/powerpc/platforms/powernv/pci.h
> +++ b/arch/powerpc/platforms/powernv/pci.h
> @@ -87,6 +87,13 @@ struct pnv_ioda_pe {
>  	bool			tce_bypass_enabled;
>  	uint64_t		tce_bypass_base;
>  
> +	/*
> +	 * Used to track whether we've done DMA setup for this PE or not. We
> +	 * want to defer allocating TCE tables, etc until we've added a
> +	 * non-bridge device to the PE.
> +	 */
> +	bool			dma_setup_done;
> +
>  	/* MSIs. MVE index is identical for for 32 and 64 bit MSI
>  	 * and -1 if not supported. (It's actually identical to the
>  	 * PE number)
> 

-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 03/15] powerpc/powernv/pci: Add explicit tracking of the DMA setup state
  2020-07-14  5:37   ` Alexey Kardashevskiy
@ 2020-07-14  5:58     ` Oliver O'Halloran
  2020-07-14  7:21       ` Alexey Kardashevskiy
  0 siblings, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-14  5:58 UTC (permalink / raw)
  To: Alexey Kardashevskiy; +Cc: linuxppc-dev

On Tue, Jul 14, 2020 at 3:37 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>
> On 10/07/2020 15:23, Oliver O'Halloran wrote:
> > There's an optimisation in the PE setup which skips performing DMA
> > setup for a PE if we only have bridges in a PE. The assumption being
> > that only "real" devices will DMA to system memory, which is probably
> > fair. However, if we start off with only bridge devices in a PE then
> > add a non-bridge device the new device won't be able to use DMA  because
> > we never configured it.
> >
> > Fix this (admittedly pretty weird) edge case by tracking whether we've done
> > the DMA setup for the PE or not. If a non-bridge device is added to the PE
> > (via rescan or hotplug, or whatever) we can set up DMA on demand.
>
> So hotplug does not work on powernv then, right? I thought you tested it
> a while ago, or this patch is the result of that attempt? If it is, then

It mostly works. Just the really niche case of hot plugging a bridge,
then later on hot plugging a device into the same bus which wouldn't
work.

> Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>
>
> > This also means the only remaining user of the old "DMA Weight" code is
> > the IODA1 DMA setup code that it was originally added for, which is good.
>
>
> Is ditching IODA1 in the plan? :)

That or separating out the pci_controller_ops for IODA1 and IODA2 so
we can stop any IODA2 specific changes from breaking it. For the most
part keeping around IODA1 support isn't hurting anyone, but I wanted
to re-work how the BDFN->PE assignment works so that we'd delay
assigning a BDFN to a PE until the device is probed. Right now when
we're configuring the PE for a bus we map all 255 devfn's to that PE.
This is mostly fine, but if you do a bus rescan and there's no device
present we'll get a spurious EEH on that PE since the PHB sees that
there's no device responding to the CFG cycle. We stop the spurious
EEH freeze today by only allowing config cycles if we can find a
pci_dn for that bdfn, but I want to get rid of pci_dn.

Mapping each BDFN to a PE after the device is probed is easy enough to
do on PHB3 and above since the mapping is handled by an in-memory
table which is indexed by the BDFN. Earlier PHBs (i.e. IODA1) use a
table of bask & mask values which match on the BDFN, so assigning a
whole bus at once is easy, but adding individual BDFNs is hard. It's
still possible to do in the HW, but the way the OPAL API works makes
it impossible.

> >
> > Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
> > Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> > ---
> > Alexey, do we need to have the IOMMU API stuff set/clear this flag?
>
>
> I'd say no as that API only cares if a device is in a PE and for those
> the PE DMA setup  optimization is skipped. Thanks,

Ok cool.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 03/15] powerpc/powernv/pci: Add explicit tracking of the DMA setup state
  2020-07-14  5:58     ` Oliver O'Halloran
@ 2020-07-14  7:21       ` Alexey Kardashevskiy
  2020-07-15  0:23         ` Alexey Kardashevskiy
  2020-07-15  1:38         ` Oliver O'Halloran
  0 siblings, 2 replies; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-14  7:21 UTC (permalink / raw)
  To: Oliver O'Halloran; +Cc: linuxppc-dev



On 14/07/2020 15:58, Oliver O'Halloran wrote:
> On Tue, Jul 14, 2020 at 3:37 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>>
>> On 10/07/2020 15:23, Oliver O'Halloran wrote:
>>> There's an optimisation in the PE setup which skips performing DMA
>>> setup for a PE if we only have bridges in a PE. The assumption being
>>> that only "real" devices will DMA to system memory, which is probably
>>> fair. However, if we start off with only bridge devices in a PE then
>>> add a non-bridge device the new device won't be able to use DMA  because
>>> we never configured it.
>>>
>>> Fix this (admittedly pretty weird) edge case by tracking whether we've done
>>> the DMA setup for the PE or not. If a non-bridge device is added to the PE
>>> (via rescan or hotplug, or whatever) we can set up DMA on demand.
>>
>> So hotplug does not work on powernv then, right? I thought you tested it
>> a while ago, or this patch is the result of that attempt? If it is, then
> 
> It mostly works. Just the really niche case of hot plugging a bridge,
> then later on hot plugging a device into the same bus which wouldn't
> work.

Do not you have to have a slot (which is a bridge) for hotplug in the
first place, to hotplug the bridge?

> 
>> Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>
>>
>>> This also means the only remaining user of the old "DMA Weight" code is
>>> the IODA1 DMA setup code that it was originally added for, which is good.
>>
>>
>> Is ditching IODA1 in the plan? :)
> 
> That or separating out the pci_controller_ops for IODA1 and IODA2 so
> we can stop any IODA2 specific changes from breaking it.

Is IODA1 tested at all these days? Or, is anyone running upstream
kernels anywhere and keeps shouting when it does not work on IODA1? Thanks,



> For the most
> part keeping around IODA1 support isn't hurting anyone, but I wanted
> to re-work how the BDFN->PE assignment works so that we'd delay
> assigning a BDFN to a PE until the device is probed. Right now when
> we're configuring the PE for a bus we map all 255 devfn's to that PE.
> This is mostly fine, but if you do a bus rescan and there's no device
> present we'll get a spurious EEH on that PE since the PHB sees that
> there's no device responding to the CFG cycle. We stop the spurious
> EEH freeze today by only allowing config cycles if we can find a
> pci_dn for that bdfn, but I want to get rid of pci_dn.
> 
> Mapping each BDFN to a PE after the device is probed is easy enough to
> do on PHB3 and above since the mapping is handled by an in-memory
> table which is indexed by the BDFN. Earlier PHBs (i.e. IODA1) use a
> table of bask & mask values which match on the BDFN, so assigning a
> whole bus at once is easy, but adding individual BDFNs is hard. It's
> still possible to do in the HW, but the way the OPAL API works makes
> it impossible.
> 
>>>
>>> Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
>>> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
>>> ---
>>> Alexey, do we need to have the IOMMU API stuff set/clear this flag?
>>
>>
>> I'd say no as that API only cares if a device is in a PE and for those
>> the PE DMA setup  optimization is skipped. Thanks,
> 
> Ok cool.
> 

-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 04/15] powerpc/powernv/pci: Initialise M64 for IODA1 as a 1-1 window
  2020-07-10  5:23 ` [PATCH 04/15] powerpc/powernv/pci: Initialise M64 for IODA1 as a 1-1 window Oliver O'Halloran
@ 2020-07-14  7:39   ` Alexey Kardashevskiy
  0 siblings, 0 replies; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-14  7:39 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev



On 10/07/2020 15:23, Oliver O'Halloran wrote:
> We pre-configure the m64 window for IODA1 as a 1-1 segment-PE mapping,
> similar to PHB3. Currently the actual mapping of segments occurs in
> pnv_ioda_pick_m64_pe(), but we can move it into pnv_ioda1_init_m64() and
> drop the IODA1 specific code paths in the PE setup / teardown.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>



Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>


> ---
>  arch/powerpc/platforms/powernv/pci-ioda.c | 55 +++++++++++------------
>  1 file changed, 25 insertions(+), 30 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
> index bb9c1cc60c33..8fb17676d914 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -311,6 +311,28 @@ static int pnv_ioda1_init_m64(struct pnv_phb *phb)
>  		}
>  	}
>  
> +	for (index = 0; index < phb->ioda.total_pe_num; index++) {
> +		int64_t rc;
> +
> +		/*
> +		 * P7IOC supports M64DT, which helps mapping M64 segment
> +		 * to one particular PE#. However, PHB3 has fixed mapping
> +		 * between M64 segment and PE#. In order to have same logic
> +		 * for P7IOC and PHB3, we enforce fixed mapping between M64
> +		 * segment and PE# on P7IOC.
> +		 */
> +		rc = opal_pci_map_pe_mmio_window(phb->opal_id,
> +				index, OPAL_M64_WINDOW_TYPE,
> +				index / PNV_IODA1_M64_SEGS,
> +				index % PNV_IODA1_M64_SEGS);
> +		if (rc != OPAL_SUCCESS) {
> +			pr_warn("%s: Error %lld mapping M64 for PHB#%x-PE#%x\n",
> +				__func__, rc, phb->hose->global_number,
> +				index);
> +			goto fail;
> +		}
> +	}
> +
>  	/*
>  	 * Exclude the segments for reserved and root bus PE, which
>  	 * are first or last two PEs.
> @@ -402,26 +424,6 @@ static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all)
>  			pe->master = master_pe;
>  			list_add_tail(&pe->list, &master_pe->slaves);
>  		}
> -
> -		/*
> -		 * P7IOC supports M64DT, which helps mapping M64 segment
> -		 * to one particular PE#. However, PHB3 has fixed mapping
> -		 * between M64 segment and PE#. In order to have same logic
> -		 * for P7IOC and PHB3, we enforce fixed mapping between M64
> -		 * segment and PE# on P7IOC.
> -		 */
> -		if (phb->type == PNV_PHB_IODA1) {
> -			int64_t rc;
> -
> -			rc = opal_pci_map_pe_mmio_window(phb->opal_id,
> -					pe->pe_number, OPAL_M64_WINDOW_TYPE,
> -					pe->pe_number / PNV_IODA1_M64_SEGS,
> -					pe->pe_number % PNV_IODA1_M64_SEGS);
> -			if (rc != OPAL_SUCCESS)
> -				pr_warn("%s: Error %lld mapping M64 for PHB#%x-PE#%x\n",
> -					__func__, rc, phb->hose->global_number,
> -					pe->pe_number);
> -		}
>  	}
>  
>  	kfree(pe_alloc);
> @@ -3354,14 +3356,8 @@ static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe,
>  		if (map[idx] != pe->pe_number)
>  			continue;
>  
> -		if (win == OPAL_M64_WINDOW_TYPE)
> -			rc = opal_pci_map_pe_mmio_window(phb->opal_id,
> -					phb->ioda.reserved_pe_idx, win,
> -					idx / PNV_IODA1_M64_SEGS,
> -					idx % PNV_IODA1_M64_SEGS);
> -		else
> -			rc = opal_pci_map_pe_mmio_window(phb->opal_id,
> -					phb->ioda.reserved_pe_idx, win, 0, idx);
> +		rc = opal_pci_map_pe_mmio_window(phb->opal_id,
> +				phb->ioda.reserved_pe_idx, win, 0, idx);
>  
>  		if (rc != OPAL_SUCCESS)
>  			pe_warn(pe, "Error %lld unmapping (%d) segment#%d\n",
> @@ -3380,8 +3376,7 @@ static void pnv_ioda_release_pe_seg(struct pnv_ioda_pe *pe)
>  				     phb->ioda.io_segmap);
>  		pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE,
>  				     phb->ioda.m32_segmap);
> -		pnv_ioda_free_pe_seg(pe, OPAL_M64_WINDOW_TYPE,
> -				     phb->ioda.m64_segmap);
> +		/* M64 is pre-configured by pnv_ioda1_init_m64() */
>  	} else if (phb->type == PNV_PHB_IODA2) {
>  		pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE,
>  				     phb->ioda.m32_segmap);
> 

-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 05/15] powerpc/powernv/sriov: Move SR-IOV into a seperate file
  2020-07-10  5:23 ` [PATCH 05/15] powerpc/powernv/sriov: Move SR-IOV into a seperate file Oliver O'Halloran
@ 2020-07-14  9:16   ` Alexey Kardashevskiy
  2020-07-22  5:01     ` Oliver O'Halloran
  0 siblings, 1 reply; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-14  9:16 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev



On 10/07/2020 15:23, Oliver O'Halloran wrote:
> pci-ioda.c is getting a bit unwieldly due to the amount of stuff jammed in
> there. The SR-IOV support can be extracted easily enough and is mostly
> standalone, so move it into a seperate file.
> 
> This patch also moves the PowerNV SR-IOV specific fields from pci_dn and moves them
> into a platform specific structure. I'm not sure how they ended up in there
> in the first place, but leaking platform specifics into common code has
> proven to be a terrible idea so far so lets stop doing that.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> ---
> The pci_dn change and the pci-sriov.c changes originally separate patches.
> I accidently squashed them together while rebasing and fixing that seemed
> like more pain that it was worth. I kind of like it this way though since
> they did cause a lot of churn on the same set of functions.
> 
> I'll split them up again if you really want (please don't want this).


Nah, not worth it splitting it this way. However it would be nice to not
to have a (small?) functional change in the same patch, there is a small
new piece (below).


> ---
>  arch/powerpc/include/asm/device.h          |   3 +
>  arch/powerpc/platforms/powernv/Makefile    |   1 +
>  arch/powerpc/platforms/powernv/pci-ioda.c  | 673 +--------------------
>  arch/powerpc/platforms/powernv/pci-sriov.c | 642 ++++++++++++++++++++
>  arch/powerpc/platforms/powernv/pci.h       |  74 +++
>  5 files changed, 738 insertions(+), 655 deletions(-)
>  create mode 100644 arch/powerpc/platforms/powernv/pci-sriov.c
> 
> diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h
> index 266542769e4b..4d8934db7ef5 100644
> --- a/arch/powerpc/include/asm/device.h
> +++ b/arch/powerpc/include/asm/device.h
> @@ -49,6 +49,9 @@ struct dev_archdata {
>  #ifdef CONFIG_CXL_BASE
>  	struct cxl_context	*cxl_ctx;
>  #endif
> +#ifdef CONFIG_PCI_IOV
> +	void *iov_data;
> +#endif
>  };
>  
>  struct pdev_archdata {
> diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
> index fe3f0fb5aeca..2eb6ae150d1f 100644
> --- a/arch/powerpc/platforms/powernv/Makefile
> +++ b/arch/powerpc/platforms/powernv/Makefile
> @@ -11,6 +11,7 @@ obj-$(CONFIG_FA_DUMP)	+= opal-fadump.o
>  obj-$(CONFIG_PRESERVE_FA_DUMP)	+= opal-fadump.o
>  obj-$(CONFIG_OPAL_CORE)	+= opal-core.o
>  obj-$(CONFIG_PCI)	+= pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o
> +obj-$(CONFIG_PCI_IOV)   += pci-sriov.o
>  obj-$(CONFIG_CXL_BASE)	+= pci-cxl.o
>  obj-$(CONFIG_EEH)	+= eeh-powernv.o
>  obj-$(CONFIG_MEMORY_FAILURE)	+= opal-memory-errors.o
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
> index 8fb17676d914..2d36a9ebf0e9 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -115,26 +115,6 @@ static int __init pci_reset_phbs_setup(char *str)
>  
>  early_param("ppc_pci_reset_phbs", pci_reset_phbs_setup);
>  
> -static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r)
> -{
> -	/*
> -	 * WARNING: We cannot rely on the resource flags. The Linux PCI
> -	 * allocation code sometimes decides to put a 64-bit prefetchable
> -	 * BAR in the 32-bit window, so we have to compare the addresses.
> -	 *
> -	 * For simplicity we only test resource start.
> -	 */
> -	return (r->start >= phb->ioda.m64_base &&
> -		r->start < (phb->ioda.m64_base + phb->ioda.m64_size));
> -}
> -
> -static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags)
> -{
> -	unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
> -
> -	return (resource_flags & flags) == flags;
> -}
> -
>  static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
>  {
>  	s64 rc;
> @@ -172,7 +152,7 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
>  	pnv_ioda_init_pe(phb, pe_no);
>  }
>  
> -static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
> +struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
>  {
>  	long pe;
>  
> @@ -184,7 +164,7 @@ static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
>  	return NULL;
>  }
>  
> -static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
> +void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
>  {
>  	struct pnv_phb *phb = pe->phb;
>  	unsigned int pe_num = pe->pe_number;
> @@ -816,7 +796,7 @@ static void pnv_ioda_unset_peltv(struct pnv_phb *phb,
>  		pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc);
>  }
>  
> -static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
> +int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
>  {
>  	struct pci_dev *parent;
>  	uint8_t bcomp, dcomp, fcomp;
> @@ -887,7 +867,7 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
>  	return 0;
>  }
>  
> -static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
> +int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
>  {
>  	struct pci_dev *parent;
>  	uint8_t bcomp, dcomp, fcomp;
> @@ -982,91 +962,6 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
>  	return 0;
>  }
>  
> -#ifdef CONFIG_PCI_IOV
> -static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
> -{
> -	struct pci_dn *pdn = pci_get_pdn(dev);
> -	int i;
> -	struct resource *res, res2;
> -	resource_size_t size;
> -	u16 num_vfs;
> -
> -	if (!dev->is_physfn)
> -		return -EINVAL;
> -
> -	/*
> -	 * "offset" is in VFs.  The M64 windows are sized so that when they
> -	 * are segmented, each segment is the same size as the IOV BAR.
> -	 * Each segment is in a separate PE, and the high order bits of the
> -	 * address are the PE number.  Therefore, each VF's BAR is in a
> -	 * separate PE, and changing the IOV BAR start address changes the
> -	 * range of PEs the VFs are in.
> -	 */
> -	num_vfs = pdn->num_vfs;
> -	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
> -		res = &dev->resource[i + PCI_IOV_RESOURCES];
> -		if (!res->flags || !res->parent)
> -			continue;
> -
> -		/*
> -		 * The actual IOV BAR range is determined by the start address
> -		 * and the actual size for num_vfs VFs BAR.  This check is to
> -		 * make sure that after shifting, the range will not overlap
> -		 * with another device.
> -		 */
> -		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
> -		res2.flags = res->flags;
> -		res2.start = res->start + (size * offset);
> -		res2.end = res2.start + (size * num_vfs) - 1;
> -
> -		if (res2.end > res->end) {
> -			dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n",
> -				i, &res2, res, num_vfs, offset);
> -			return -EBUSY;
> -		}
> -	}
> -
> -	/*
> -	 * Since M64 BAR shares segments among all possible 256 PEs,
> -	 * we have to shift the beginning of PF IOV BAR to make it start from
> -	 * the segment which belongs to the PE number assigned to the first VF.
> -	 * This creates a "hole" in the /proc/iomem which could be used for
> -	 * allocating other resources so we reserve this area below and
> -	 * release when IOV is released.
> -	 */
> -	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
> -		res = &dev->resource[i + PCI_IOV_RESOURCES];
> -		if (!res->flags || !res->parent)
> -			continue;
> -
> -		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
> -		res2 = *res;
> -		res->start += size * offset;
> -
> -		dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n",
> -			 i, &res2, res, (offset > 0) ? "En" : "Dis",
> -			 num_vfs, offset);
> -
> -		if (offset < 0) {
> -			devm_release_resource(&dev->dev, &pdn->holes[i]);
> -			memset(&pdn->holes[i], 0, sizeof(pdn->holes[i]));
> -		}
> -
> -		pci_update_resource(dev, i + PCI_IOV_RESOURCES);
> -
> -		if (offset > 0) {
> -			pdn->holes[i].start = res2.start;
> -			pdn->holes[i].end = res2.start + size * offset - 1;
> -			pdn->holes[i].flags = IORESOURCE_BUS;
> -			pdn->holes[i].name = "pnv_iov_reserved";
> -			devm_request_resource(&dev->dev, res->parent,
> -					&pdn->holes[i]);
> -		}
> -	}
> -	return 0;
> -}
> -#endif /* CONFIG_PCI_IOV */
> -
>  static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
>  {
>  	struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
> @@ -1294,406 +1189,9 @@ static void pnv_pci_ioda_setup_nvlink(void)
>  #endif
>  }
>  
> -#ifdef CONFIG_PCI_IOV
> -static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
> -{
> -	struct pnv_phb        *phb;
> -	struct pci_dn         *pdn;
> -	int                    i, j;
> -	int                    m64_bars;
> -
> -	phb = pci_bus_to_pnvhb(pdev->bus);
> -	pdn = pci_get_pdn(pdev);
> -
> -	if (pdn->m64_single_mode)
> -		m64_bars = num_vfs;
> -	else
> -		m64_bars = 1;
> -
> -	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
> -		for (j = 0; j < m64_bars; j++) {
> -			if (pdn->m64_map[j][i] == IODA_INVALID_M64)
> -				continue;
> -			opal_pci_phb_mmio_enable(phb->opal_id,
> -				OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 0);
> -			clear_bit(pdn->m64_map[j][i], &phb->ioda.m64_bar_alloc);
> -			pdn->m64_map[j][i] = IODA_INVALID_M64;
> -		}
> -
> -	kfree(pdn->m64_map);
> -	return 0;
> -}
> -
> -static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
> -{
> -	struct pnv_phb        *phb;
> -	struct pci_dn         *pdn;
> -	unsigned int           win;
> -	struct resource       *res;
> -	int                    i, j;
> -	int64_t                rc;
> -	int                    total_vfs;
> -	resource_size_t        size, start;
> -	int                    pe_num;
> -	int                    m64_bars;
> -
> -	phb = pci_bus_to_pnvhb(pdev->bus);
> -	pdn = pci_get_pdn(pdev);
> -	total_vfs = pci_sriov_get_totalvfs(pdev);
> -
> -	if (pdn->m64_single_mode)
> -		m64_bars = num_vfs;
> -	else
> -		m64_bars = 1;
> -
> -	pdn->m64_map = kmalloc_array(m64_bars,
> -				     sizeof(*pdn->m64_map),
> -				     GFP_KERNEL);
> -	if (!pdn->m64_map)
> -		return -ENOMEM;
> -	/* Initialize the m64_map to IODA_INVALID_M64 */
> -	for (i = 0; i < m64_bars ; i++)
> -		for (j = 0; j < PCI_SRIOV_NUM_BARS; j++)
> -			pdn->m64_map[i][j] = IODA_INVALID_M64;
> -
> -
> -	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
> -		res = &pdev->resource[i + PCI_IOV_RESOURCES];
> -		if (!res->flags || !res->parent)
> -			continue;
> -
> -		for (j = 0; j < m64_bars; j++) {
> -			do {
> -				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
> -						phb->ioda.m64_bar_idx + 1, 0);
> -
> -				if (win >= phb->ioda.m64_bar_idx + 1)
> -					goto m64_failed;
> -			} while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
> -
> -			pdn->m64_map[j][i] = win;
> -
> -			if (pdn->m64_single_mode) {
> -				size = pci_iov_resource_size(pdev,
> -							PCI_IOV_RESOURCES + i);
> -				start = res->start + size * j;
> -			} else {
> -				size = resource_size(res);
> -				start = res->start;
> -			}
> -
> -			/* Map the M64 here */
> -			if (pdn->m64_single_mode) {
> -				pe_num = pdn->pe_num_map[j];
> -				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
> -						pe_num, OPAL_M64_WINDOW_TYPE,
> -						pdn->m64_map[j][i], 0);
> -			}
> -
> -			rc = opal_pci_set_phb_mem_window(phb->opal_id,
> -						 OPAL_M64_WINDOW_TYPE,
> -						 pdn->m64_map[j][i],
> -						 start,
> -						 0, /* unused */
> -						 size);
> -
> -
> -			if (rc != OPAL_SUCCESS) {
> -				dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n",
> -					win, rc);
> -				goto m64_failed;
> -			}
> -
> -			if (pdn->m64_single_mode)
> -				rc = opal_pci_phb_mmio_enable(phb->opal_id,
> -				     OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 2);
> -			else
> -				rc = opal_pci_phb_mmio_enable(phb->opal_id,
> -				     OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 1);
> -
> -			if (rc != OPAL_SUCCESS) {
> -				dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n",
> -					win, rc);
> -				goto m64_failed;
> -			}
> -		}
> -	}
> -	return 0;
> -
> -m64_failed:
> -	pnv_pci_vf_release_m64(pdev, num_vfs);
> -	return -EBUSY;
> -}
> -
> -static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe);
> -
> -static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
> -{
> -	struct pnv_phb        *phb;
> -	struct pnv_ioda_pe    *pe, *pe_n;
> -	struct pci_dn         *pdn;
> -
> -	phb = pci_bus_to_pnvhb(pdev->bus);
> -	pdn = pci_get_pdn(pdev);
> -
> -	if (!pdev->is_physfn)
> -		return;
> -
> -	/* FIXME: Use pnv_ioda_release_pe()? */
> -	list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
> -		if (pe->parent_dev != pdev)
> -			continue;
> -
> -		pnv_pci_ioda2_release_pe_dma(pe);
> -
> -		/* Remove from list */
> -		mutex_lock(&phb->ioda.pe_list_mutex);
> -		list_del(&pe->list);
> -		mutex_unlock(&phb->ioda.pe_list_mutex);
> -
> -		pnv_ioda_deconfigure_pe(phb, pe);
> -
> -		pnv_ioda_free_pe(pe);
> -	}
> -}
> -
> -static void pnv_pci_sriov_disable(struct pci_dev *pdev)
> -{
> -	struct pnv_phb        *phb;
> -	struct pnv_ioda_pe    *pe;
> -	struct pci_dn         *pdn;
> -	u16                    num_vfs, i;
> -
> -	phb = pci_bus_to_pnvhb(pdev->bus);
> -	pdn = pci_get_pdn(pdev);
> -	num_vfs = pdn->num_vfs;
> -
> -	/* Release VF PEs */
> -	pnv_ioda_release_vf_PE(pdev);
> -
> -	if (phb->type == PNV_PHB_IODA2) {
> -		if (!pdn->m64_single_mode)
> -			pnv_pci_vf_resource_shift(pdev, -*pdn->pe_num_map);
> -
> -		/* Release M64 windows */
> -		pnv_pci_vf_release_m64(pdev, num_vfs);
> -
> -		/* Release PE numbers */
> -		if (pdn->m64_single_mode) {
> -			for (i = 0; i < num_vfs; i++) {
> -				if (pdn->pe_num_map[i] == IODA_INVALID_PE)
> -					continue;
> -
> -				pe = &phb->ioda.pe_array[pdn->pe_num_map[i]];
> -				pnv_ioda_free_pe(pe);
> -			}
> -		} else
> -			bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
> -		/* Releasing pe_num_map */
> -		kfree(pdn->pe_num_map);
> -	}
> -}
> -
> -static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
> -				       struct pnv_ioda_pe *pe);
> -static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
> -{
> -	struct pnv_phb        *phb;
> -	struct pnv_ioda_pe    *pe;
> -	int                    pe_num;
> -	u16                    vf_index;
> -	struct pci_dn         *pdn;
> -
> -	phb = pci_bus_to_pnvhb(pdev->bus);
> -	pdn = pci_get_pdn(pdev);
> -
> -	if (!pdev->is_physfn)
> -		return;
> -
> -	/* Reserve PE for each VF */
> -	for (vf_index = 0; vf_index < num_vfs; vf_index++) {
> -		int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index);
> -		int vf_bus = pci_iov_virtfn_bus(pdev, vf_index);
> -		struct pci_dn *vf_pdn;
> -
> -		if (pdn->m64_single_mode)
> -			pe_num = pdn->pe_num_map[vf_index];
> -		else
> -			pe_num = *pdn->pe_num_map + vf_index;
> -
> -		pe = &phb->ioda.pe_array[pe_num];
> -		pe->pe_number = pe_num;
> -		pe->phb = phb;
> -		pe->flags = PNV_IODA_PE_VF;
> -		pe->pbus = NULL;
> -		pe->parent_dev = pdev;
> -		pe->mve_number = -1;
> -		pe->rid = (vf_bus << 8) | vf_devfn;
> -
> -		pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n",
> -			pci_domain_nr(pdev->bus), pdev->bus->number,
> -			PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num);
> -
> -		if (pnv_ioda_configure_pe(phb, pe)) {
> -			/* XXX What do we do here ? */
> -			pnv_ioda_free_pe(pe);
> -			pe->pdev = NULL;
> -			continue;
> -		}
> -
> -		/* Put PE to the list */
> -		mutex_lock(&phb->ioda.pe_list_mutex);
> -		list_add_tail(&pe->list, &phb->ioda.pe_list);
> -		mutex_unlock(&phb->ioda.pe_list_mutex);
> -
> -		/* associate this pe to it's pdn */
> -		list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) {
> -			if (vf_pdn->busno == vf_bus &&
> -			    vf_pdn->devfn == vf_devfn) {
> -				vf_pdn->pe_number = pe_num;
> -				break;
> -			}
> -		}
> -
> -		pnv_pci_ioda2_setup_dma_pe(phb, pe);
> -	}
> -}
> -
> -static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
> -{
> -	struct pnv_phb        *phb;
> -	struct pnv_ioda_pe    *pe;
> -	struct pci_dn         *pdn;
> -	int                    ret;
> -	u16                    i;
> -
> -	phb = pci_bus_to_pnvhb(pdev->bus);
> -	pdn = pci_get_pdn(pdev);
> -
> -	if (phb->type == PNV_PHB_IODA2) {
> -		if (!pdn->vfs_expanded) {
> -			dev_info(&pdev->dev, "don't support this SRIOV device"
> -				" with non 64bit-prefetchable IOV BAR\n");
> -			return -ENOSPC;
> -		}
> -
> -		/*
> -		 * When M64 BARs functions in Single PE mode, the number of VFs
> -		 * could be enabled must be less than the number of M64 BARs.
> -		 */
> -		if (pdn->m64_single_mode && num_vfs > phb->ioda.m64_bar_idx) {
> -			dev_info(&pdev->dev, "Not enough M64 BAR for VFs\n");
> -			return -EBUSY;
> -		}
> -
> -		/* Allocating pe_num_map */
> -		if (pdn->m64_single_mode)
> -			pdn->pe_num_map = kmalloc_array(num_vfs,
> -							sizeof(*pdn->pe_num_map),
> -							GFP_KERNEL);
> -		else
> -			pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map), GFP_KERNEL);
> -
> -		if (!pdn->pe_num_map)
> -			return -ENOMEM;
> -
> -		if (pdn->m64_single_mode)
> -			for (i = 0; i < num_vfs; i++)
> -				pdn->pe_num_map[i] = IODA_INVALID_PE;
> -
> -		/* Calculate available PE for required VFs */
> -		if (pdn->m64_single_mode) {
> -			for (i = 0; i < num_vfs; i++) {
> -				pe = pnv_ioda_alloc_pe(phb);
> -				if (!pe) {
> -					ret = -EBUSY;
> -					goto m64_failed;
> -				}
> -
> -				pdn->pe_num_map[i] = pe->pe_number;
> -			}
> -		} else {
> -			mutex_lock(&phb->ioda.pe_alloc_mutex);
> -			*pdn->pe_num_map = bitmap_find_next_zero_area(
> -				phb->ioda.pe_alloc, phb->ioda.total_pe_num,
> -				0, num_vfs, 0);
> -			if (*pdn->pe_num_map >= phb->ioda.total_pe_num) {
> -				mutex_unlock(&phb->ioda.pe_alloc_mutex);
> -				dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
> -				kfree(pdn->pe_num_map);
> -				return -EBUSY;
> -			}
> -			bitmap_set(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
> -			mutex_unlock(&phb->ioda.pe_alloc_mutex);
> -		}
> -		pdn->num_vfs = num_vfs;
> -
> -		/* Assign M64 window accordingly */
> -		ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
> -		if (ret) {
> -			dev_info(&pdev->dev, "Not enough M64 window resources\n");
> -			goto m64_failed;
> -		}
> -
> -		/*
> -		 * When using one M64 BAR to map one IOV BAR, we need to shift
> -		 * the IOV BAR according to the PE# allocated to the VFs.
> -		 * Otherwise, the PE# for the VF will conflict with others.
> -		 */
> -		if (!pdn->m64_single_mode) {
> -			ret = pnv_pci_vf_resource_shift(pdev, *pdn->pe_num_map);
> -			if (ret)
> -				goto m64_failed;
> -		}
> -	}
> -
> -	/* Setup VF PEs */
> -	pnv_ioda_setup_vf_PE(pdev, num_vfs);
> -
> -	return 0;
> -
> -m64_failed:
> -	if (pdn->m64_single_mode) {
> -		for (i = 0; i < num_vfs; i++) {
> -			if (pdn->pe_num_map[i] == IODA_INVALID_PE)
> -				continue;
> -
> -			pe = &phb->ioda.pe_array[pdn->pe_num_map[i]];
> -			pnv_ioda_free_pe(pe);
> -		}
> -	} else
> -		bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
> -
> -	/* Releasing pe_num_map */
> -	kfree(pdn->pe_num_map);
> -
> -	return ret;
> -}
> -
> -static int pnv_pcibios_sriov_disable(struct pci_dev *pdev)
> -{
> -	pnv_pci_sriov_disable(pdev);
> -
> -	/* Release PCI data */
> -	remove_sriov_vf_pdns(pdev);
> -	return 0;
> -}
> -
> -static int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
> -{
> -	/* Allocate PCI data */
> -	add_sriov_vf_pdns(pdev);
> -
> -	return pnv_pci_sriov_enable(pdev, num_vfs);
> -}
> -#endif /* CONFIG_PCI_IOV */
> -
>  static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
>  				       struct pnv_ioda_pe *pe);
>  
> -static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
> -				       struct pnv_ioda_pe *pe);
> -
>  static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev)
>  {
>  	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
> @@ -2559,8 +2057,8 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
>  };
>  #endif
>  
> -static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
> -				       struct pnv_ioda_pe *pe)
> +void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
> +				struct pnv_ioda_pe *pe)
>  {
>  	int64_t rc;
>  
> @@ -2737,117 +2235,6 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
>  		count, phb->msi_base);
>  }
>  
> -#ifdef CONFIG_PCI_IOV
> -static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
> -{
> -	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
> -	const resource_size_t gate = phb->ioda.m64_segsize >> 2;
> -	struct resource *res;
> -	int i;
> -	resource_size_t size, total_vf_bar_sz;
> -	struct pci_dn *pdn;
> -	int mul, total_vfs;
> -
> -	pdn = pci_get_pdn(pdev);
> -	pdn->vfs_expanded = 0;
> -	pdn->m64_single_mode = false;
> -
> -	total_vfs = pci_sriov_get_totalvfs(pdev);
> -	mul = phb->ioda.total_pe_num;
> -	total_vf_bar_sz = 0;
> -
> -	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
> -		res = &pdev->resource[i + PCI_IOV_RESOURCES];
> -		if (!res->flags || res->parent)
> -			continue;
> -		if (!pnv_pci_is_m64_flags(res->flags)) {
> -			dev_warn(&pdev->dev, "Don't support SR-IOV with"
> -					" non M64 VF BAR%d: %pR. \n",
> -				 i, res);
> -			goto truncate_iov;
> -		}
> -
> -		total_vf_bar_sz += pci_iov_resource_size(pdev,
> -				i + PCI_IOV_RESOURCES);
> -
> -		/*
> -		 * If bigger than quarter of M64 segment size, just round up
> -		 * power of two.
> -		 *
> -		 * Generally, one M64 BAR maps one IOV BAR. To avoid conflict
> -		 * with other devices, IOV BAR size is expanded to be
> -		 * (total_pe * VF_BAR_size).  When VF_BAR_size is half of M64
> -		 * segment size , the expanded size would equal to half of the
> -		 * whole M64 space size, which will exhaust the M64 Space and
> -		 * limit the system flexibility.  This is a design decision to
> -		 * set the boundary to quarter of the M64 segment size.
> -		 */
> -		if (total_vf_bar_sz > gate) {
> -			mul = roundup_pow_of_two(total_vfs);
> -			dev_info(&pdev->dev,
> -				"VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n",
> -				total_vf_bar_sz, gate, mul);
> -			pdn->m64_single_mode = true;
> -			break;
> -		}
> -	}
> -
> -	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
> -		res = &pdev->resource[i + PCI_IOV_RESOURCES];
> -		if (!res->flags || res->parent)
> -			continue;
> -
> -		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
> -		/*
> -		 * On PHB3, the minimum size alignment of M64 BAR in single
> -		 * mode is 32MB.
> -		 */
> -		if (pdn->m64_single_mode && (size < SZ_32M))
> -			goto truncate_iov;
> -		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
> -		res->end = res->start + size * mul - 1;
> -		dev_dbg(&pdev->dev, "                       %pR\n", res);
> -		dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
> -			 i, res, mul);
> -	}
> -	pdn->vfs_expanded = mul;
> -
> -	return;
> -
> -truncate_iov:
> -	/* To save MMIO space, IOV BAR is truncated. */
> -	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
> -		res = &pdev->resource[i + PCI_IOV_RESOURCES];
> -		res->flags = 0;
> -		res->end = res->start - 1;
> -	}
> -}
> -
> -static void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev)
> -{
> -	if (WARN_ON(pci_dev_is_added(pdev)))
> -		return;
> -
> -	if (pdev->is_virtfn) {
> -		struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev);
> -
> -		/*
> -		 * VF PEs are single-device PEs so their pdev pointer needs to
> -		 * be set. The pdev doesn't exist when the PE is allocated (in
> -		 * (pcibios_sriov_enable()) so we fix it up here.
> -		 */
> -		pe->pdev = pdev;
> -		WARN_ON(!(pe->flags & PNV_IODA_PE_VF));
> -	} else if (pdev->is_physfn) {
> -		/*
> -		 * For PFs adjust their allocated IOV resources to match what
> -		 * the PHB can support using it's M64 BAR table.
> -		 */
> -		pnv_pci_ioda_fixup_iov_resources(pdev);
> -	}
> -}
> -#endif /* CONFIG_PCI_IOV */
> -
>  static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
>  				  struct resource *res)
>  {
> @@ -3192,41 +2579,6 @@ static resource_size_t pnv_pci_default_alignment(void)
>  	return PAGE_SIZE;
>  }
>  
> -#ifdef CONFIG_PCI_IOV
> -static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
> -						      int resno)
> -{
> -	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
> -	struct pci_dn *pdn = pci_get_pdn(pdev);
> -	resource_size_t align;
> -
> -	/*
> -	 * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the
> -	 * SR-IOV. While from hardware perspective, the range mapped by M64
> -	 * BAR should be size aligned.
> -	 *
> -	 * When IOV BAR is mapped with M64 BAR in Single PE mode, the extra
> -	 * powernv-specific hardware restriction is gone. But if just use the
> -	 * VF BAR size as the alignment, PF BAR / VF BAR may be allocated with
> -	 * in one segment of M64 #15, which introduces the PE conflict between
> -	 * PF and VF. Based on this, the minimum alignment of an IOV BAR is
> -	 * m64_segsize.
> -	 *
> -	 * This function returns the total IOV BAR size if M64 BAR is in
> -	 * Shared PE mode or just VF BAR size if not.
> -	 * If the M64 BAR is in Single PE mode, return the VF BAR size or
> -	 * M64 segment size if IOV BAR size is less.
> -	 */
> -	align = pci_iov_resource_size(pdev, resno);
> -	if (!pdn->vfs_expanded)
> -		return align;
> -	if (pdn->m64_single_mode)
> -		return max(align, (resource_size_t)phb->ioda.m64_segsize);
> -
> -	return pdn->vfs_expanded * align;
> -}
> -#endif /* CONFIG_PCI_IOV */
> -
>  /* Prevent enabling devices for which we couldn't properly
>   * assign a PE
>   */
> @@ -3323,7 +2675,7 @@ static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe)
>  	iommu_tce_table_put(tbl);
>  }
>  
> -static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
> +void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
>  {
>  	struct iommu_table *tbl = pe->table_group.tables[0];
>  	int64_t rc;
> @@ -3436,12 +2788,23 @@ static void pnv_pci_release_device(struct pci_dev *pdev)
>  	struct pci_dn *pdn = pci_get_pdn(pdev);
>  	struct pnv_ioda_pe *pe;
>  
> +	/* The VF PE state is torn down when sriov_disable() is called */
>  	if (pdev->is_virtfn)
>  		return;
>  
>  	if (!pdn || pdn->pe_number == IODA_INVALID_PE)
>  		return;
>  
> +#ifdef CONFIG_PCI_IOV
> +	/*
> +	 * FIXME: Try move this to sriov_disable(). It's here since we allocate
> +	 * the iov state at probe time since we need to fiddle with the IOV
> +	 * resources.
> +	 */
> +	if (pdev->is_physfn)
> +		kfree(pdev->dev.archdata.iov_data);
> +#endif
> +
>  	/*
>  	 * PCI hotplug can happen as part of EEH error recovery. The @pdn
>  	 * isn't removed and added afterwards in this scenario. We should
> diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
> new file mode 100644
> index 000000000000..080ea39f5a83
> --- /dev/null
> +++ b/arch/powerpc/platforms/powernv/pci-sriov.c
> @@ -0,0 +1,642 @@
> +// SPDX-License-Identifier: GPL-2.0

Not SPDX-License-Identifier: GPL-2.0-or-later ?


> +
> +#include <linux/kernel.h>
> +#include <linux/ioport.h>
> +#include <linux/bitmap.h>
> +#include <linux/pci.h>
> +
> +#include <asm/opal.h>
> +
> +#include "pci.h"
> +
> +/* for pci_dev_is_added() */
> +#include "../../../../drivers/pci/pci.h"
> +
> +
> +static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
> +{
> +	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
> +	const resource_size_t gate = phb->ioda.m64_segsize >> 2;
> +	struct resource *res;
> +	int i;
> +	resource_size_t size, total_vf_bar_sz;
> +	struct pnv_iov_data *iov;
> +	int mul, total_vfs;
> +
> +	iov = kzalloc(sizeof(*iov), GFP_KERNEL);
> +	if (!iov)
> +		goto truncate_iov;
> +	pdev->dev.archdata.iov_data = iov;
> +
> +	total_vfs = pci_sriov_get_totalvfs(pdev);
> +	mul = phb->ioda.total_pe_num;
> +	total_vf_bar_sz = 0;
> +
> +	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
> +		res = &pdev->resource[i + PCI_IOV_RESOURCES];
> +		if (!res->flags || res->parent)
> +			continue;
> +		if (!pnv_pci_is_m64_flags(res->flags)) {
> +			dev_warn(&pdev->dev, "Don't support SR-IOV with"
> +					" non M64 VF BAR%d: %pR. \n",
> +				 i, res);
> +			goto truncate_iov;
> +		}
> +
> +		total_vf_bar_sz += pci_iov_resource_size(pdev,
> +				i + PCI_IOV_RESOURCES);
> +
> +		/*
> +		 * If bigger than quarter of M64 segment size, just round up
> +		 * power of two.
> +		 *
> +		 * Generally, one M64 BAR maps one IOV BAR. To avoid conflict
> +		 * with other devices, IOV BAR size is expanded to be
> +		 * (total_pe * VF_BAR_size).  When VF_BAR_size is half of M64
> +		 * segment size , the expanded size would equal to half of the
> +		 * whole M64 space size, which will exhaust the M64 Space and
> +		 * limit the system flexibility.  This is a design decision to
> +		 * set the boundary to quarter of the M64 segment size.
> +		 */
> +		if (total_vf_bar_sz > gate) {
> +			mul = roundup_pow_of_two(total_vfs);
> +			dev_info(&pdev->dev,
> +				"VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n",
> +				total_vf_bar_sz, gate, mul);
> +			iov->m64_single_mode = true;
> +			break;
> +		}
> +	}
> +
> +	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
> +		res = &pdev->resource[i + PCI_IOV_RESOURCES];
> +		if (!res->flags || res->parent)
> +			continue;
> +
> +		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
> +		/*
> +		 * On PHB3, the minimum size alignment of M64 BAR in single
> +		 * mode is 32MB.
> +		 */
> +		if (iov->m64_single_mode && (size < SZ_32M))
> +			goto truncate_iov;
> +		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
> +		res->end = res->start + size * mul - 1;
> +		dev_dbg(&pdev->dev, "                       %pR\n", res);
> +		dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
> +			 i, res, mul);
> +	}
> +	iov->vfs_expanded = mul;
> +
> +	return;
> +
> +truncate_iov:
> +	/* To save MMIO space, IOV BAR is truncated. */
> +	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
> +		res = &pdev->resource[i + PCI_IOV_RESOURCES];
> +		res->flags = 0;
> +		res->end = res->start - 1;
> +	}
> +
> +	pdev->dev.archdata.iov_data = NULL;
> +	kfree(iov);
> +}
> +
> +void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev)
> +{
> +	if (WARN_ON(pci_dev_is_added(pdev)))
> +		return;
> +
> +	if (pdev->is_virtfn) {
> +		struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev);
> +
> +		/*
> +		 * VF PEs are single-device PEs so their pdev pointer needs to
> +		 * be set. The pdev doesn't exist when the PE is allocated (in
> +		 * (pcibios_sriov_enable()) so we fix it up here.
> +		 */
> +		pe->pdev = pdev;
> +		WARN_ON(!(pe->flags & PNV_IODA_PE_VF));
> +	} else if (pdev->is_physfn) {
> +		/*
> +		 * For PFs adjust their allocated IOV resources to match what
> +		 * the PHB can support using it's M64 BAR table.
> +		 */
> +		pnv_pci_ioda_fixup_iov_resources(pdev);
> +	}
> +}
> +
> +resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
> +						      int resno)
> +{
> +	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
> +	struct pnv_iov_data *iov = pnv_iov_get(pdev);
> +	resource_size_t align;
> +
> +	/*
> +	 * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the
> +	 * SR-IOV. While from hardware perspective, the range mapped by M64
> +	 * BAR should be size aligned.
> +	 *
> +	 * When IOV BAR is mapped with M64 BAR in Single PE mode, the extra
> +	 * powernv-specific hardware restriction is gone. But if just use the
> +	 * VF BAR size as the alignment, PF BAR / VF BAR may be allocated with
> +	 * in one segment of M64 #15, which introduces the PE conflict between
> +	 * PF and VF. Based on this, the minimum alignment of an IOV BAR is
> +	 * m64_segsize.
> +	 *
> +	 * This function returns the total IOV BAR size if M64 BAR is in
> +	 * Shared PE mode or just VF BAR size if not.
> +	 * If the M64 BAR is in Single PE mode, return the VF BAR size or
> +	 * M64 segment size if IOV BAR size is less.
> +	 */
> +	align = pci_iov_resource_size(pdev, resno);
> +
> +	/*
> +	 * iov can be null if we have an SR-IOV device with IOV BAR that can't
> +	 * be placed in the m64 space (i.e. The BAR is 32bit or non-prefetch).
> +	 * In that case we don't allow VFs to be enabled so just return the
> +	 * default alignment.
> +	 */
> +	if (!iov)
> +		return align;


This is the new chunk. What would happen before? Non-prefetch BAR would
still go to m64 space?

The rest is accurate.




-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 03/15] powerpc/powernv/pci: Add explicit tracking of the DMA setup state
  2020-07-14  7:21       ` Alexey Kardashevskiy
@ 2020-07-15  0:23         ` Alexey Kardashevskiy
  2020-07-15  1:38         ` Oliver O'Halloran
  1 sibling, 0 replies; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-15  0:23 UTC (permalink / raw)
  To: Oliver O'Halloran; +Cc: linuxppc-dev



On 14/07/2020 17:21, Alexey Kardashevskiy wrote:
> 
> 
> On 14/07/2020 15:58, Oliver O'Halloran wrote:
>> On Tue, Jul 14, 2020 at 3:37 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>>>
>>> On 10/07/2020 15:23, Oliver O'Halloran wrote:
>>>> There's an optimisation in the PE setup which skips performing DMA
>>>> setup for a PE if we only have bridges in a PE. The assumption being
>>>> that only "real" devices will DMA to system memory, which is probably
>>>> fair. However, if we start off with only bridge devices in a PE then
>>>> add a non-bridge device the new device won't be able to use DMA  because
>>>> we never configured it.
>>>>
>>>> Fix this (admittedly pretty weird) edge case by tracking whether we've done
>>>> the DMA setup for the PE or not. If a non-bridge device is added to the PE
>>>> (via rescan or hotplug, or whatever) we can set up DMA on demand.
>>>
>>> So hotplug does not work on powernv then, right? I thought you tested it
>>> a while ago, or this patch is the result of that attempt? If it is, then
>>
>> It mostly works. Just the really niche case of hot plugging a bridge,
>> then later on hot plugging a device into the same bus which wouldn't
>> work.
> 
> Do not you have to have a slot (which is a bridge) for hotplug in the
> first place, to hotplug the bridge?


As discussed elsewhere, I missed that it is a non bridge device on the
same bus with previously plugged bridge. Now it all makes sense and


Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>


-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 06/15] powerpc/powernv/sriov: Explain how SR-IOV works on PowerNV
  2020-07-10  5:23 ` [PATCH 06/15] powerpc/powernv/sriov: Explain how SR-IOV works on PowerNV Oliver O'Halloran
@ 2020-07-15  0:40   ` Alexey Kardashevskiy
  0 siblings, 0 replies; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-15  0:40 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev



On 10/07/2020 15:23, Oliver O'Halloran wrote:
> SR-IOV support on PowerNV is a byzantine maze of hooks. I have no idea
> how anyone is supposed to know how it works except through a lot of
> stuffering. Write up some docs about the overall story to help out
> the next sucker^Wperson who needs to tinker with it.


Sounds about right :)

Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>



> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> ---
>  arch/powerpc/platforms/powernv/pci-sriov.c | 130 +++++++++++++++++++++
>  1 file changed, 130 insertions(+)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
> index 080ea39f5a83..f4c74ab1284d 100644
> --- a/arch/powerpc/platforms/powernv/pci-sriov.c
> +++ b/arch/powerpc/platforms/powernv/pci-sriov.c
> @@ -12,6 +12,136 @@
>  /* for pci_dev_is_added() */
>  #include "../../../../drivers/pci/pci.h"
>  
> +/*
> + * The majority of the complexity in supporting SR-IOV on PowerNV comes from
> + * the need to put the MMIO space for each VF into a separate PE. Internally
> + * the PHB maps MMIO addresses to a specific PE using the "Memory BAR Table".
> + * The MBT historically only applied to the 64bit MMIO window of the PHB
> + * so it's common to see it referred to as the "M64BT".
> + *
> + * An MBT entry stores the mapped range as an <base>,<mask> pair. This forces
> + * the address range that we want to map to be power-of-two sized and aligned.
> + * For conventional PCI devices this isn't really an issue since PCI device BARs
> + * have the same requirement.
> + *
> + * For a SR-IOV BAR things are a little more awkward since size and alignment
> + * are not coupled. The alignment is set based on the the per-VF BAR size, but
> + * the total BAR area is: number-of-vfs * per-vf-size. The number of VFs
> + * isn't necessarily a power of two, so neither is the total size. To fix that
> + * we need to finesse (read: hack) the Linux BAR allocator so that it will
> + * allocate the SR-IOV BARs in a way that lets us map them using the MBT.
> + *
> + * The changes to size and alignment that we need to do depend on the "mode"
> + * of MBT entry that we use. We only support SR-IOV on PHB3 (IODA2) and above,
> + * so as a baseline we can assume that we have the following BAR modes
> + * available:
> + *
> + *   NB: $PE_COUNT is the number of PEs that the PHB supports.
> + *
> + * a) A segmented BAR that splits the mapped range into $PE_COUNT equally sized
> + *    segments. The n'th segment is mapped to the n'th PE.
> + * b) An un-segmented BAR that maps the whole address range to a specific PE.
> + *
> + *
> + * We prefer to use mode a) since it only requires one MBT entry per SR-IOV BAR
> + * For comparison b) requires one entry per-VF per-BAR, or:
> + * (num-vfs * num-sriov-bars) in total. To use a) we need the size of each segment
> + * to equal the size of the per-VF BAR area. So:
> + *
> + *	new_size = per-vf-size * number-of-PEs
> + *
> + * The alignment for the SR-IOV BAR also needs to be changed from per-vf-size
> + * to "new_size", calculated above. Implementing this is a convoluted process
> + * which requires several hooks in the PCI core:
> + *
> + * 1. In pcibios_add_device() we call pnv_pci_ioda_fixup_iov().
> + *
> + *    At this point the device has been probed and the device's BARs are sized,
> + *    but no resource allocations have been done. The SR-IOV BARs are sized
> + *    based on the maximum number of VFs supported by the device and we need
> + *    to increase that to new_size.
> + *
> + * 2. Later, when Linux actually assigns resources it tries to make the resource
> + *    allocations for each PCI bus as compact as possible. As a part of that it
> + *    sorts the BARs on a bus by their required alignment, which is calculated
> + *    using pci_resource_alignment().
> + *
> + *    For IOV resources this goes:
> + *    pci_resource_alignment()
> + *        pci_sriov_resource_alignment()
> + *            pcibios_sriov_resource_alignment()
> + *                pnv_pci_iov_resource_alignment()
> + *
> + *    Our hook overrides the default alignment, equal to the per-vf-size, with
> + *    new_size computed above.
> + *
> + * 3. When userspace enables VFs for a device:
> + *
> + *    sriov_enable()
> + *       pcibios_sriov_enable()
> + *           pnv_pcibios_sriov_enable()
> + *
> + *    This is where we actually allocate PE numbers for each VF and setup the
> + *    MBT mapping for each SR-IOV BAR. In steps 1) and 2) we setup an "arena"
> + *    where each MBT segment is equal in size to the VF BAR so we can shift
> + *    around the actual SR-IOV BAR location within this arena. We need this
> + *    ability because the PE space is shared by all devices on the same PHB.
> + *    When using mode a) described above segment 0 in maps to PE#0 which might
> + *    be already being used by another device on the PHB.
> + *
> + *    As a result we need allocate a contigious range of PE numbers, then shift
> + *    the address programmed into the SR-IOV BAR of the PF so that the address
> + *    of VF0 matches up with the segment corresponding to the first allocated
> + *    PE number. This is handled in pnv_pci_vf_resource_shift().
> + *
> + *    Once all that is done we return to the PCI core which then enables VFs,
> + *    scans them and creates pci_devs for each. The init process for a VF is
> + *    largely the same as a normal device, but the VF is inserted into the IODA
> + *    PE that we allocated for it rather than the PE associated with the bus.
> + *
> + * 4. When userspace disables VFs we unwind the above in
> + *    pnv_pcibios_sriov_disable(). Fortunately this is relatively simple since
> + *    we don't need to validate anything, just tear down the mappings and
> + *    move SR-IOV resource back to its "proper" location.
> + *
> + * That's how mode a) works. In theory mode b) (single PE mapping) is less work
> + * since we can map each individual VF with a separate BAR. However, there's a
> + * few limitations:
> + *
> + * 1) For IODA2 mode b) has a minimum alignment requirement of 32MB. This makes
> + *    it only usable for devices with very large per-VF BARs. Such devices are
> + *    similar to Big Foot. They definitely exist, but I've never seen one.
> + *
> + * 2) The number of MBT entries that we have is limited. PHB3 and PHB4 only
> + *    16 total and some are needed for. Most SR-IOV capable network cards can support
> + *    more than 16 VFs on each port.
> + *
> + * We use b) when using a) would use more than 1/4 of the entire 64 bit MMIO
> + * window of the PHB.
> + *
> + *
> + *
> + * PHB4 (IODA3) added a few new features that would be useful for SR-IOV. It
> + * allowed the MBT to map 32bit MMIO space in addition to 64bit which allows
> + * us to support SR-IOV BARs in the 32bit MMIO window. This is useful since
> + * the Linux BAR allocation will place any BAR marked as non-prefetchable into
> + * the non-prefetchable bridge window, which is 32bit only. It also added two
> + * new modes:
> + *
> + * c) A segmented BAR similar to a), but each segment can be individually
> + *    mapped to any PE. This is matches how the 32bit MMIO window worked on
> + *    IODA1&2.
> + *
> + * d) A segmented BAR with 8, 64, or 128 segments. This works similarly to a),
> + *    but with fewer segments and configurable base PE.
> + *
> + *    i.e. The n'th segment maps to the (n + base)'th PE.
> + *
> + *    The base PE is also required to be a multiple of the window size.
> + *
> + * Unfortunately, the OPAL API doesn't currently (as of skiboot v6.6) allow us
> + * to exploit any of the IODA3 features.
> + */
>  
>  static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>  {
> 

-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 07/15] powerpc/powernv/sriov: Rename truncate_iov
  2020-07-10  5:23 ` [PATCH 07/15] powerpc/powernv/sriov: Rename truncate_iov Oliver O'Halloran
@ 2020-07-15  0:46   ` Alexey Kardashevskiy
  0 siblings, 0 replies; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-15  0:46 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev



On 10/07/2020 15:23, Oliver O'Halloran wrote:
> This prevents SR-IOV being used by making the SR-IOV BAR resources
> unallocatable. Rename it to reflect what it actually does.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>


Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>


> ---
>  arch/powerpc/platforms/powernv/pci-sriov.c | 11 ++++++-----
>  1 file changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
> index f4c74ab1284d..216ceeff69b0 100644
> --- a/arch/powerpc/platforms/powernv/pci-sriov.c
> +++ b/arch/powerpc/platforms/powernv/pci-sriov.c
> @@ -155,7 +155,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>  
>  	iov = kzalloc(sizeof(*iov), GFP_KERNEL);
>  	if (!iov)
> -		goto truncate_iov;
> +		goto disable_iov;
>  	pdev->dev.archdata.iov_data = iov;
>  
>  	total_vfs = pci_sriov_get_totalvfs(pdev);
> @@ -170,7 +170,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>  			dev_warn(&pdev->dev, "Don't support SR-IOV with"
>  					" non M64 VF BAR%d: %pR. \n",
>  				 i, res);
> -			goto truncate_iov;
> +			goto disable_iov;
>  		}
>  
>  		total_vf_bar_sz += pci_iov_resource_size(pdev,
> @@ -209,7 +209,8 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>  		 * mode is 32MB.
>  		 */
>  		if (iov->m64_single_mode && (size < SZ_32M))
> -			goto truncate_iov;
> +			goto disable_iov;
> +
>  		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
>  		res->end = res->start + size * mul - 1;
>  		dev_dbg(&pdev->dev, "                       %pR\n", res);
> @@ -220,8 +221,8 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>  
>  	return;
>  
> -truncate_iov:
> -	/* To save MMIO space, IOV BAR is truncated. */
> +disable_iov:
> +	/* Save ourselves some MMIO space by disabling the unusable BARs */
>  	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
>  		res = &pdev->resource[i + PCI_IOV_RESOURCES];
>  		res->flags = 0;
> 

-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 08/15] powerpc/powernv/sriov: Simplify used window tracking
  2020-07-10  5:23 ` [PATCH 08/15] powerpc/powernv/sriov: Simplify used window tracking Oliver O'Halloran
@ 2020-07-15  1:34   ` Alexey Kardashevskiy
  2020-07-15  1:41     ` Oliver O'Halloran
  0 siblings, 1 reply; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-15  1:34 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev



On 10/07/2020 15:23, Oliver O'Halloran wrote:
> No need for the multi-dimensional arrays, just use a bitmap.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> ---
>  arch/powerpc/platforms/powernv/pci-sriov.c | 48 +++++++---------------
>  arch/powerpc/platforms/powernv/pci.h       |  7 +++-
>  2 files changed, 20 insertions(+), 35 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
> index 216ceeff69b0..e4c65cb49757 100644
> --- a/arch/powerpc/platforms/powernv/pci-sriov.c
> +++ b/arch/powerpc/platforms/powernv/pci-sriov.c
> @@ -303,28 +303,20 @@ static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
>  {
>  	struct pnv_iov_data   *iov;
>  	struct pnv_phb        *phb;
> -	int                    i, j;
> -	int                    m64_bars;
> +	int window_id;
>  
>  	phb = pci_bus_to_pnvhb(pdev->bus);
>  	iov = pnv_iov_get(pdev);
>  
> -	if (iov->m64_single_mode)
> -		m64_bars = num_vfs;
> -	else
> -		m64_bars = 1;
> +	for_each_set_bit(window_id, iov->used_m64_bar_mask, 64) {
> +		opal_pci_phb_mmio_enable(phb->opal_id,
> +					 OPAL_M64_WINDOW_TYPE,
> +					 window_id,
> +					 0);
>  
> -	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
> -		for (j = 0; j < m64_bars; j++) {
> -			if (iov->m64_map[j][i] == IODA_INVALID_M64)
> -				continue;
> -			opal_pci_phb_mmio_enable(phb->opal_id,
> -				OPAL_M64_WINDOW_TYPE, iov->m64_map[j][i], 0);
> -			clear_bit(iov->m64_map[j][i], &phb->ioda.m64_bar_alloc);
> -			iov->m64_map[j][i] = IODA_INVALID_M64;
> -		}
> +		clear_bit(window_id, &phb->ioda.m64_bar_alloc);
> +	}
>  
> -	kfree(iov->m64_map);
>  	return 0;
>  }
>  
> @@ -350,23 +342,14 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>  	else
>  		m64_bars = 1;
>  
> -	iov->m64_map = kmalloc_array(m64_bars,
> -				     sizeof(*iov->m64_map),
> -				     GFP_KERNEL);
> -	if (!iov->m64_map)
> -		return -ENOMEM;
> -	/* Initialize the m64_map to IODA_INVALID_M64 */
> -	for (i = 0; i < m64_bars ; i++)
> -		for (j = 0; j < PCI_SRIOV_NUM_BARS; j++)
> -			iov->m64_map[i][j] = IODA_INVALID_M64;
> -
> -
>  	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
>  		res = &pdev->resource[i + PCI_IOV_RESOURCES];
>  		if (!res->flags || !res->parent)
>  			continue;
>  
>  		for (j = 0; j < m64_bars; j++) {
> +
> +			/* allocate a window ID for this BAR */
>  			do {
>  				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
>  						phb->ioda.m64_bar_idx + 1, 0);
> @@ -374,8 +357,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>  				if (win >= phb->ioda.m64_bar_idx + 1)
>  					goto m64_failed;
>  			} while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
> -
> -			iov->m64_map[j][i] = win;
> +			set_bit(win, iov->used_m64_bar_mask);
>  
>  			if (iov->m64_single_mode) {
>  				size = pci_iov_resource_size(pdev,
> @@ -391,12 +373,12 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>  				pe_num = iov->pe_num_map[j];
>  				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
>  						pe_num, OPAL_M64_WINDOW_TYPE,
> -						iov->m64_map[j][i], 0);
> +						win, 0);
>  			}
>  
>  			rc = opal_pci_set_phb_mem_window(phb->opal_id,
>  						 OPAL_M64_WINDOW_TYPE,
> -						 iov->m64_map[j][i],
> +						 win,
>  						 start,
>  						 0, /* unused */
>  						 size);
> @@ -410,10 +392,10 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>  
>  			if (iov->m64_single_mode)
>  				rc = opal_pci_phb_mmio_enable(phb->opal_id,
> -				     OPAL_M64_WINDOW_TYPE, iov->m64_map[j][i], 2);
> +				     OPAL_M64_WINDOW_TYPE, win, 2);
>  			else
>  				rc = opal_pci_phb_mmio_enable(phb->opal_id,
> -				     OPAL_M64_WINDOW_TYPE, iov->m64_map[j][i], 1);
> +				     OPAL_M64_WINDOW_TYPE, win, 1);
>  
>  			if (rc != OPAL_SUCCESS) {
>  				dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n",
> diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
> index 0156d7d17f7d..58c97e60c3db 100644
> --- a/arch/powerpc/platforms/powernv/pci.h
> +++ b/arch/powerpc/platforms/powernv/pci.h
> @@ -243,8 +243,11 @@ struct pnv_iov_data {
>  	/* Did we map the VF BARs with single-PE IODA BARs? */
>  	bool    m64_single_mode;
>  
> -	int     (*m64_map)[PCI_SRIOV_NUM_BARS];
> -#define IODA_INVALID_M64        (-1)
> +	/*
> +	 * Bit mask used to track which m64 windows that we used to map the


Language question: either "which" or "that" but both?


> +	 * SR-IOV BARs for this device.
> +	 */
> +	DECLARE_BITMAP(used_m64_bar_mask, 64);

64 here is the maximum number of M64's (which is 16 at the moment)? Can
we define this 64 somehow (appears twice in this patch alone)?

Anyway, the change is correct.

Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>



>  
>  	/*
>  	 * If we map the SR-IOV BARs with a segmented window then
> 

-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 03/15] powerpc/powernv/pci: Add explicit tracking of the DMA setup state
  2020-07-14  7:21       ` Alexey Kardashevskiy
  2020-07-15  0:23         ` Alexey Kardashevskiy
@ 2020-07-15  1:38         ` Oliver O'Halloran
  2020-07-15  3:33           ` Alexey Kardashevskiy
  1 sibling, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-15  1:38 UTC (permalink / raw)
  To: Alexey Kardashevskiy; +Cc: linuxppc-dev, Cédric Le Goater

On Tue, Jul 14, 2020 at 5:21 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>
> On 14/07/2020 15:58, Oliver O'Halloran wrote:
> > On Tue, Jul 14, 2020 at 3:37 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
> >>
> >> On 10/07/2020 15:23, Oliver O'Halloran wrote:
> >>> This also means the only remaining user of the old "DMA Weight" code is
> >>> the IODA1 DMA setup code that it was originally added for, which is good.
> >>
> >>
> >> Is ditching IODA1 in the plan? :)
> >
> > That or separating out the pci_controller_ops for IODA1 and IODA2 so
> > we can stop any IODA2 specific changes from breaking it.
>
> Is IODA1 tested at all these days? Or, is anyone running upstream
> kernels anywhere and keeps shouting when it does not work on IODA1? Thanks,

Cedric has a P7 with OPAL. That's probably the one left though.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 08/15] powerpc/powernv/sriov: Simplify used window tracking
  2020-07-15  1:34   ` Alexey Kardashevskiy
@ 2020-07-15  1:41     ` Oliver O'Halloran
  0 siblings, 0 replies; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-15  1:41 UTC (permalink / raw)
  To: Alexey Kardashevskiy; +Cc: linuxppc-dev

On Wed, Jul 15, 2020 at 11:34 AM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>
> On 10/07/2020 15:23, Oliver O'Halloran wrote:
> > diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
> > index 0156d7d17f7d..58c97e60c3db 100644
> > --- a/arch/powerpc/platforms/powernv/pci.h
> > +++ b/arch/powerpc/platforms/powernv/pci.h
> > @@ -243,8 +243,11 @@ struct pnv_iov_data {
> >       /* Did we map the VF BARs with single-PE IODA BARs? */
> >       bool    m64_single_mode;
> >
> > -     int     (*m64_map)[PCI_SRIOV_NUM_BARS];
> > -#define IODA_INVALID_M64        (-1)
> > +     /*
> > +      * Bit mask used to track which m64 windows that we used to map the
>
>
> Language question: either "which" or "that" but both?

Uhhhh... I don't speak english

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 09/15] powerpc/powernv/sriov: Factor out M64 BAR setup
  2020-07-10  5:23 ` [PATCH 09/15] powerpc/powernv/sriov: Factor out M64 BAR setup Oliver O'Halloran
@ 2020-07-15  2:09   ` Alexey Kardashevskiy
  0 siblings, 0 replies; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-15  2:09 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev



On 10/07/2020 15:23, Oliver O'Halloran wrote:
> The sequence required to use the single PE BAR mode is kinda janky and
> requires a little explanation. The API was designed with P7-IOC style
> windows where the setup process is something like:
> 
> 1. Configure the window start / end address
> 2. Enable the window
> 3. Map the segments of each window to the PE
> 
> For Single PE BARs the process is:
> 
> 1. Set the PE for segment zero on a disabled window
> 2. Set the range
> 3. Enable the window
> 
> Move the OPAL calls into their own helper functions where the quirks can be
> contained.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>


I'd use "segmented" instead of "accordion". Otherwise,

Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>




> ---
>  arch/powerpc/platforms/powernv/pci-sriov.c | 132 ++++++++++++++++-----
>  1 file changed, 103 insertions(+), 29 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
> index e4c65cb49757..d53a85ccb538 100644
> --- a/arch/powerpc/platforms/powernv/pci-sriov.c
> +++ b/arch/powerpc/platforms/powernv/pci-sriov.c
> @@ -320,6 +320,102 @@ static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
>  	return 0;
>  }
>  
> +
> +/*
> + * PHB3 and beyond support "accordion" windows. The window's address range
> + * is subdivided into phb->ioda.total_pe_num segments and there's a 1-1
> + * mapping between PEs and segments.
> + *
> + * They're called that because as the window size changes the segment sizes
> + * change with it. Sort of like an accordion, sort of.
> + */
> +static int64_t pnv_ioda_map_m64_accordion(struct pnv_phb *phb,
> +					  int window_id,
> +					  resource_size_t start,
> +					  resource_size_t size)
> +{
> +	int64_t rc;
> +
> +	rc = opal_pci_set_phb_mem_window(phb->opal_id,
> +					 OPAL_M64_WINDOW_TYPE,
> +					 window_id,
> +					 start,
> +					 0, /* unused */
> +					 size);
> +	if (rc)
> +		goto out;
> +
> +	rc = opal_pci_phb_mmio_enable(phb->opal_id,
> +				      OPAL_M64_WINDOW_TYPE,
> +				      window_id,
> +				      OPAL_ENABLE_M64_SPLIT);
> +out:
> +	if (rc)
> +		pr_err("Failed to map M64 window #%d: %lld\n", window_id, rc);
> +
> +	return rc;
> +}
> +
> +static int64_t pnv_ioda_map_m64_single(struct pnv_phb *phb,
> +				       int pe_num,
> +				       int window_id,
> +				       resource_size_t start,
> +				       resource_size_t size)
> +{
> +	int64_t rc;
> +
> +	/*
> +	 * The API for setting up m64 mmio windows seems to have been designed
> +	 * with P7-IOC in mind. For that chip each M64 BAR (window) had a fixed
> +	 * split of 8 equally sized segments each of which could individually
> +	 * assigned to a PE.
> +	 *
> +	 * The problem with this is that the API doesn't have any way to
> +	 * communicate the number of segments we want on a BAR. This wasn't
> +	 * a problem for p7-ioc since you didn't have a choice, but the
> +	 * single PE windows added in PHB3 don't map cleanly to this API.
> +	 *
> +	 * As a result we've got this slightly awkward process where we
> +	 * call opal_pci_map_pe_mmio_window() to put the single in single
> +	 * PE mode, and set the PE for the window before setting the address
> +	 * bounds. We need to do it this way because the single PE windows
> +	 * for PHB3 have different alignment requirements on PHB3.
> +	 */
> +	rc = opal_pci_map_pe_mmio_window(phb->opal_id,
> +					 pe_num,
> +					 OPAL_M64_WINDOW_TYPE,
> +					 window_id,
> +					 0);
> +	if (rc)
> +		goto out;
> +
> +	/*
> +	 * NB: In single PE mode the window needs to be aligned to 32MB
> +	 */
> +	rc = opal_pci_set_phb_mem_window(phb->opal_id,
> +					 OPAL_M64_WINDOW_TYPE,
> +					 window_id,
> +					 start,
> +					 0, /* ignored by FW, m64 is 1-1 */
> +					 size);
> +	if (rc)
> +		goto out;
> +
> +	/*
> +	 * Now actually enable it. We specified the BAR should be in "non-split"
> +	 * mode so FW will validate that the BAR is in single PE mode.
> +	 */
> +	rc = opal_pci_phb_mmio_enable(phb->opal_id,
> +				      OPAL_M64_WINDOW_TYPE,
> +				      window_id,
> +				      OPAL_ENABLE_M64_NON_SPLIT);
> +out:
> +	if (rc)
> +		pr_err("Error mapping single PE BAR\n");
> +
> +	return rc;
> +}
> +
>  static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>  {
>  	struct pnv_iov_data   *iov;
> @@ -330,7 +426,6 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>  	int64_t                rc;
>  	int                    total_vfs;
>  	resource_size_t        size, start;
> -	int                    pe_num;
>  	int                    m64_bars;
>  
>  	phb = pci_bus_to_pnvhb(pdev->bus);
> @@ -359,49 +454,28 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>  			} while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
>  			set_bit(win, iov->used_m64_bar_mask);
>  
> +
>  			if (iov->m64_single_mode) {
>  				size = pci_iov_resource_size(pdev,
>  							PCI_IOV_RESOURCES + i);
>  				start = res->start + size * j;
> +				rc = pnv_ioda_map_m64_single(phb, win,
> +							     iov->pe_num_map[j],
> +							     start,
> +							     size);
>  			} else {
>  				size = resource_size(res);
>  				start = res->start;
> -			}
>  
> -			/* Map the M64 here */
> -			if (iov->m64_single_mode) {
> -				pe_num = iov->pe_num_map[j];
> -				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
> -						pe_num, OPAL_M64_WINDOW_TYPE,
> -						win, 0);
> +				rc = pnv_ioda_map_m64_accordion(phb, win, start,
> +								size);
>  			}
>  
> -			rc = opal_pci_set_phb_mem_window(phb->opal_id,
> -						 OPAL_M64_WINDOW_TYPE,
> -						 win,
> -						 start,
> -						 0, /* unused */
> -						 size);
> -
> -
>  			if (rc != OPAL_SUCCESS) {
>  				dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n",
>  					win, rc);
>  				goto m64_failed;
>  			}
> -
> -			if (iov->m64_single_mode)
> -				rc = opal_pci_phb_mmio_enable(phb->opal_id,
> -				     OPAL_M64_WINDOW_TYPE, win, 2);
> -			else
> -				rc = opal_pci_phb_mmio_enable(phb->opal_id,
> -				     OPAL_M64_WINDOW_TYPE, win, 1);
> -
> -			if (rc != OPAL_SUCCESS) {
> -				dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n",
> -					win, rc);
> -				goto m64_failed;
> -			}
>  		}
>  	}
>  	return 0;
> 

-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 10/15] powerpc/powernv/pci: Refactor pnv_ioda_alloc_pe()
  2020-07-10  5:23 ` [PATCH 10/15] powerpc/powernv/pci: Refactor pnv_ioda_alloc_pe() Oliver O'Halloran
@ 2020-07-15  2:29   ` Alexey Kardashevskiy
  2020-07-15  2:53     ` Oliver O'Halloran
  0 siblings, 1 reply; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-15  2:29 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev



On 10/07/2020 15:23, Oliver O'Halloran wrote:
> Rework the PE allocation logic to allow allocating blocks of PEs rather
> than individually. We'll use this to allocate contigious blocks of PEs for
> the SR-IOVs.

The patch does not do just this, it also adds missing mutexes (which is
good) but still misses them in pnv_pci_sriov_disable() and
pnv_pci_ioda_pe_dump().




> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> ---
>  arch/powerpc/platforms/powernv/pci-ioda.c | 41 ++++++++++++++++++-----
>  arch/powerpc/platforms/powernv/pci.h      |  2 +-
>  2 files changed, 34 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
> index 2d36a9ebf0e9..c9c25fb0783c 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -145,23 +145,45 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
>  		return;
>  	}
>  
> +	mutex_lock(&phb->ioda.pe_alloc_mutex);
>  	if (test_and_set_bit(pe_no, phb->ioda.pe_alloc))
>  		pr_debug("%s: PE %x was reserved on PHB#%x\n",
>  			 __func__, pe_no, phb->hose->global_number);
> +	mutex_unlock(&phb->ioda.pe_alloc_mutex);
>  
>  	pnv_ioda_init_pe(phb, pe_no);
>  }
>  
> -struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
> +struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count)
>  {
> -	long pe;
> +	struct pnv_ioda_pe *ret = NULL;
> +	int run = 0, pe, i;
>  
> +	mutex_lock(&phb->ioda.pe_alloc_mutex);
> +
> +	/* scan backwards for a run of @count cleared bits */
>  	for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) {
> -		if (!test_and_set_bit(pe, phb->ioda.pe_alloc))
> -			return pnv_ioda_init_pe(phb, pe);
> +		if (test_bit(pe, phb->ioda.pe_alloc)) {
> +			run = 0;
> +			continue;
> +		}
> +
> +		run++;
> +		if (run == count)
> +			break;
>  	}
> +	if (run != count)
> +		goto out;
>  
> -	return NULL;
> +	for (i = pe; i < pe + count; i++) {
> +		set_bit(i, phb->ioda.pe_alloc);
> +		pnv_ioda_init_pe(phb, i);
> +	}
> +	ret = &phb->ioda.pe_array[pe];
> +
> +out:
> +	mutex_unlock(&phb->ioda.pe_alloc_mutex);
> +	return ret;
>  }
>  
>  void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
> @@ -173,7 +195,10 @@ void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
>  	WARN_ON(pe->npucomp); /* NPUs for nvlink are not supposed to be freed */
>  	kfree(pe->npucomp);
>  	memset(pe, 0, sizeof(struct pnv_ioda_pe));
> +
> +	mutex_lock(&phb->ioda.pe_alloc_mutex);
>  	clear_bit(pe_num, phb->ioda.pe_alloc);
> +	mutex_unlock(&phb->ioda.pe_alloc_mutex);
>  }
>  
>  /* The default M64 BAR is shared by all PEs */
> @@ -976,7 +1001,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
>  	if (pdn->pe_number != IODA_INVALID_PE)
>  		return NULL;
>  
> -	pe = pnv_ioda_alloc_pe(phb);
> +	pe = pnv_ioda_alloc_pe(phb, 1);
>  	if (!pe) {
>  		pr_warn("%s: Not enough PE# available, disabling device\n",
>  			pci_name(dev));
> @@ -1047,7 +1072,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
>  
>  	/* The PE number isn't pinned by M64 */
>  	if (!pe)
> -		pe = pnv_ioda_alloc_pe(phb);
> +		pe = pnv_ioda_alloc_pe(phb, 1);
>  
>  	if (!pe) {
>  		pr_warn("%s: Not enough PE# available for PCI bus %04x:%02x\n",
> @@ -3065,7 +3090,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
>  		pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx);
>  	} else {
>  		/* otherwise just allocate one */
> -		root_pe = pnv_ioda_alloc_pe(phb);
> +		root_pe = pnv_ioda_alloc_pe(phb, 1);
>  		phb->ioda.root_pe_idx = root_pe->pe_number;
>  	}
>  
> diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
> index 58c97e60c3db..b4c9bdba7217 100644
> --- a/arch/powerpc/platforms/powernv/pci.h
> +++ b/arch/powerpc/platforms/powernv/pci.h
> @@ -223,7 +223,7 @@ int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe);
>  void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe);
>  void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe);
>  
> -struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb);
> +struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count);
>  void pnv_ioda_free_pe(struct pnv_ioda_pe *pe);
>  
>  #ifdef CONFIG_PCI_IOV
> 

-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 10/15] powerpc/powernv/pci: Refactor pnv_ioda_alloc_pe()
  2020-07-15  2:29   ` Alexey Kardashevskiy
@ 2020-07-15  2:53     ` Oliver O'Halloran
  2020-07-15  3:15       ` Alexey Kardashevskiy
  0 siblings, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-15  2:53 UTC (permalink / raw)
  To: Alexey Kardashevskiy; +Cc: linuxppc-dev

On Wed, Jul 15, 2020 at 12:29 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>
>
>
> On 10/07/2020 15:23, Oliver O'Halloran wrote:
> > Rework the PE allocation logic to allow allocating blocks of PEs rather
> > than individually. We'll use this to allocate contigious blocks of PEs for
> > the SR-IOVs.
>
> The patch does not do just this, it also adds missing mutexes (which is
> good) but still misses them in pnv_pci_sriov_disable() and
> pnv_pci_ioda_pe_dump().

The current implementation doesn't need the mutex because alloc,
reserve and free all use atomic bit ops. The mutex has been there
forever with nothing actually using it, but with the change we need to
prevent modifications to the bitmap while alloc() is scanning it. I
probably should have mentioned that in the commit message.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 10/15] powerpc/powernv/pci: Refactor pnv_ioda_alloc_pe()
  2020-07-15  2:53     ` Oliver O'Halloran
@ 2020-07-15  3:15       ` Alexey Kardashevskiy
  0 siblings, 0 replies; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-15  3:15 UTC (permalink / raw)
  To: Oliver O'Halloran; +Cc: linuxppc-dev



On 15/07/2020 12:53, Oliver O'Halloran wrote:
> On Wed, Jul 15, 2020 at 12:29 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>>
>>
>>
>> On 10/07/2020 15:23, Oliver O'Halloran wrote:
>>> Rework the PE allocation logic to allow allocating blocks of PEs rather
>>> than individually. We'll use this to allocate contigious blocks of PEs for
>>> the SR-IOVs.
>>
>> The patch does not do just this, it also adds missing mutexes (which is
>> good) but still misses them in pnv_pci_sriov_disable() and
>> pnv_pci_ioda_pe_dump().
> 
> The current implementation doesn't need the mutex because alloc,
> reserve and free all use atomic bit ops.

Ah, ok.

> The mutex has been there
> forever with nothing actually using it, but with the change we need to
> prevent modifications to the bitmap while alloc() is scanning it. I
> probably should have mentioned that in the commit message.

but bitmap_clear() (from pnv_pci_sriov_disable()) is not atomic. It
probably does not matter as the next patch gets rid of it anyway.


-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 11/15] powerpc/powernv/sriov: Drop iov->pe_num_map[]
  2020-07-10  5:23 ` [PATCH 11/15] powerpc/powernv/sriov: Drop iov->pe_num_map[] Oliver O'Halloran
@ 2020-07-15  3:31   ` Alexey Kardashevskiy
  0 siblings, 0 replies; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-15  3:31 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev



On 10/07/2020 15:23, Oliver O'Halloran wrote:
> Currently the iov->pe_num_map[] does one of two things depending on
> whether single PE mode is being used or not. When it is, this contains an
> array which maps a vf_index to the corresponding PE number. When single PE
> mode is not being used this contains a scalar which is the base PE for the
> set of enabled VFs (for for VFn is base + n).
> 
> The array was necessary because when calling pnv_ioda_alloc_pe() there is
> no guarantee that the allocated PEs would be contigious. We can now


s/contigious/contiguous/ here and below.


> allocate contigious blocks of PEs so this is no longer an issue. This
> allows us to drop the if (single_mode) {} .. else {} block scattered
> through the SR-IOV code which is a nice clean up.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> ---
>  arch/powerpc/platforms/powernv/pci-sriov.c | 109 +++++----------------
>  arch/powerpc/platforms/powernv/pci.h       |   4 +-
>  2 files changed, 25 insertions(+), 88 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
> index d53a85ccb538..08f88187d65a 100644
> --- a/arch/powerpc/platforms/powernv/pci-sriov.c
> +++ b/arch/powerpc/platforms/powernv/pci-sriov.c
> @@ -456,11 +456,13 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>  
>  
>  			if (iov->m64_single_mode) {
> +				int pe_num = iov->vf_pe_arr[j].pe_number;
> +
>  				size = pci_iov_resource_size(pdev,
>  							PCI_IOV_RESOURCES + i);
>  				start = res->start + size * j;
>  				rc = pnv_ioda_map_m64_single(phb, win,
> -							     iov->pe_num_map[j],
> +							     pe_num,
>  							     start,
>  							     size);
>  			} else {
> @@ -599,38 +601,24 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>  
>  static void pnv_pci_sriov_disable(struct pci_dev *pdev)
>  {
> +	u16                    num_vfs, base_pe;
>  	struct pnv_phb        *phb;
> -	struct pnv_ioda_pe    *pe;
>  	struct pnv_iov_data   *iov;
> -	u16                    num_vfs, i;
>  
>  	phb = pci_bus_to_pnvhb(pdev->bus);
>  	iov = pnv_iov_get(pdev);
>  	num_vfs = iov->num_vfs;
> +	base_pe = iov->vf_pe_arr[0].pe_number;
>  
>  	/* Release VF PEs */
>  	pnv_ioda_release_vf_PE(pdev);
>  
>  	if (phb->type == PNV_PHB_IODA2) {
>  		if (!iov->m64_single_mode)
> -			pnv_pci_vf_resource_shift(pdev, -*iov->pe_num_map);
> +			pnv_pci_vf_resource_shift(pdev, -base_pe);
>  
>  		/* Release M64 windows */
>  		pnv_pci_vf_release_m64(pdev, num_vfs);
> -
> -		/* Release PE numbers */
> -		if (iov->m64_single_mode) {
> -			for (i = 0; i < num_vfs; i++) {
> -				if (iov->pe_num_map[i] == IODA_INVALID_PE)
> -					continue;
> -
> -				pe = &phb->ioda.pe_array[iov->pe_num_map[i]];
> -				pnv_ioda_free_pe(pe);
> -			}
> -		} else
> -			bitmap_clear(phb->ioda.pe_alloc, *iov->pe_num_map, num_vfs);
> -		/* Releasing pe_num_map */
> -		kfree(iov->pe_num_map);
>  	}
>  }
>  
> @@ -656,13 +644,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>  		int vf_bus = pci_iov_virtfn_bus(pdev, vf_index);
>  		struct pci_dn *vf_pdn;
>  
> -		if (iov->m64_single_mode)
> -			pe_num = iov->pe_num_map[vf_index];
> -		else
> -			pe_num = *iov->pe_num_map + vf_index;
> -
> -		pe = &phb->ioda.pe_array[pe_num];
> -		pe->pe_number = pe_num;
> +		pe = &iov->vf_pe_arr[vf_index];
>  		pe->phb = phb;
>  		pe->flags = PNV_IODA_PE_VF;
>  		pe->pbus = NULL;
> @@ -670,6 +652,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>  		pe->mve_number = -1;
>  		pe->rid = (vf_bus << 8) | vf_devfn;
>  
> +		pe_num = pe->pe_number;
>  		pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n",
>  			pci_domain_nr(pdev->bus), pdev->bus->number,
>  			PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num);
> @@ -701,9 +684,9 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>  
>  static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>  {
> +	struct pnv_ioda_pe    *base_pe;
>  	struct pnv_iov_data   *iov;
>  	struct pnv_phb        *phb;
> -	struct pnv_ioda_pe    *pe;
>  	int                    ret;
>  	u16                    i;
>  
> @@ -717,55 +700,14 @@ static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>  			return -ENOSPC;
>  		}
>  
> -		/*
> -		 * When M64 BARs functions in Single PE mode, the number of VFs
> -		 * could be enabled must be less than the number of M64 BARs.
> -		 */
> -		if (iov->m64_single_mode && num_vfs > phb->ioda.m64_bar_idx) {
> -			dev_info(&pdev->dev, "Not enough M64 BAR for VFs\n");
> +		/* allocate a contigious block of PEs for our VFs */
> +		base_pe = pnv_ioda_alloc_pe(phb, num_vfs);
> +		if (!base_pe) {
> +			pci_err(pdev, "Unable to allocate PEs for %d VFs\n", num_vfs);
>  			return -EBUSY;
>  		}
>  
> -		/* Allocating pe_num_map */
> -		if (iov->m64_single_mode)
> -			iov->pe_num_map = kmalloc_array(num_vfs,
> -							sizeof(*iov->pe_num_map),
> -							GFP_KERNEL);
> -		else
> -			iov->pe_num_map = kmalloc(sizeof(*iov->pe_num_map), GFP_KERNEL);
> -
> -		if (!iov->pe_num_map)
> -			return -ENOMEM;
> -
> -		if (iov->m64_single_mode)
> -			for (i = 0; i < num_vfs; i++)
> -				iov->pe_num_map[i] = IODA_INVALID_PE;
> -
> -		/* Calculate available PE for required VFs */
> -		if (iov->m64_single_mode) {
> -			for (i = 0; i < num_vfs; i++) {
> -				pe = pnv_ioda_alloc_pe(phb);
> -				if (!pe) {
> -					ret = -EBUSY;
> -					goto m64_failed;
> -				}
> -
> -				iov->pe_num_map[i] = pe->pe_number;
> -			}
> -		} else {
> -			mutex_lock(&phb->ioda.pe_alloc_mutex);
> -			*iov->pe_num_map = bitmap_find_next_zero_area(
> -				phb->ioda.pe_alloc, phb->ioda.total_pe_num,
> -				0, num_vfs, 0);
> -			if (*iov->pe_num_map >= phb->ioda.total_pe_num) {
> -				mutex_unlock(&phb->ioda.pe_alloc_mutex);
> -				dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
> -				kfree(iov->pe_num_map);
> -				return -EBUSY;
> -			}
> -			bitmap_set(phb->ioda.pe_alloc, *iov->pe_num_map, num_vfs);
> -			mutex_unlock(&phb->ioda.pe_alloc_mutex);
> -		}
> +		iov->vf_pe_arr = base_pe;
>  		iov->num_vfs = num_vfs;
>  
>  		/* Assign M64 window accordingly */
> @@ -781,9 +723,10 @@ static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>  		 * Otherwise, the PE# for the VF will conflict with others.
>  		 */
>  		if (!iov->m64_single_mode) {
> -			ret = pnv_pci_vf_resource_shift(pdev, *iov->pe_num_map);
> +			ret = pnv_pci_vf_resource_shift(pdev,
> +							base_pe->pe_number);
>  			if (ret)
> -				goto m64_failed;
> +				goto shift_failed;
>  		}
>  	}
>  
> @@ -792,20 +735,12 @@ static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>  
>  	return 0;
>  
> -m64_failed:
> -	if (iov->m64_single_mode) {
> -		for (i = 0; i < num_vfs; i++) {
> -			if (iov->pe_num_map[i] == IODA_INVALID_PE)
> -				continue;
> -
> -			pe = &phb->ioda.pe_array[iov->pe_num_map[i]];
> -			pnv_ioda_free_pe(pe);
> -		}
> -	} else
> -		bitmap_clear(phb->ioda.pe_alloc, *iov->pe_num_map, num_vfs);
> +shift_failed:
> +	pnv_pci_vf_release_m64(pdev, num_vfs);
>  
> -	/* Releasing pe_num_map */
> -	kfree(iov->pe_num_map);
> +m64_failed:
> +	for (i = 0; i < num_vfs; i++)
> +		pnv_ioda_free_pe(&iov->vf_pe_arr[i]);
>  
>  	return ret;
>  }
> diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
> index b4c9bdba7217..13555bc549f4 100644
> --- a/arch/powerpc/platforms/powernv/pci.h
> +++ b/arch/powerpc/platforms/powernv/pci.h
> @@ -238,7 +238,9 @@ struct pnv_iov_data {
>  
>  	/* number of VFs enabled */
>  	u16     num_vfs;
> -	unsigned int *pe_num_map;	/* PE# for the first VF PE or array */
> +
> +	/* pointer to the array of VF PEs. num_vfs long*/

I read the comment and for a second I thought that now you are storing
pnv_ioda_pe structs in pnv_iov_data which is not true: vf_pe_arr
actually points inside phb->ioda.pe_array[]. May be add this to the
comment please.

Otherwise good,


Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>




> +	struct pnv_ioda_pe *vf_pe_arr;
>  
>  	/* Did we map the VF BARs with single-PE IODA BARs? */
>  	bool    m64_single_mode;
> 

-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 03/15] powerpc/powernv/pci: Add explicit tracking of the DMA setup state
  2020-07-15  1:38         ` Oliver O'Halloran
@ 2020-07-15  3:33           ` Alexey Kardashevskiy
  2020-07-15  7:05             ` Cédric Le Goater
  0 siblings, 1 reply; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-15  3:33 UTC (permalink / raw)
  To: Oliver O'Halloran; +Cc: linuxppc-dev, Cédric Le Goater



On 15/07/2020 11:38, Oliver O'Halloran wrote:
> On Tue, Jul 14, 2020 at 5:21 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>>
>> On 14/07/2020 15:58, Oliver O'Halloran wrote:
>>> On Tue, Jul 14, 2020 at 3:37 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>>>>
>>>> On 10/07/2020 15:23, Oliver O'Halloran wrote:
>>>>> This also means the only remaining user of the old "DMA Weight" code is
>>>>> the IODA1 DMA setup code that it was originally added for, which is good.
>>>>
>>>>
>>>> Is ditching IODA1 in the plan? :)
>>>
>>> That or separating out the pci_controller_ops for IODA1 and IODA2 so
>>> we can stop any IODA2 specific changes from breaking it.
>>
>> Is IODA1 tested at all these days? Or, is anyone running upstream
>> kernels anywhere and keeps shouting when it does not work on IODA1? Thanks,
> 
> Cedric has a P7 with OPAL. That's probably the one left though.

Has he tried these patches on that box? Or we hope for the best here? :)



-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 12/15] powerpc/powernv/sriov: De-indent setup and teardown
  2020-07-10  5:23 ` [PATCH 12/15] powerpc/powernv/sriov: De-indent setup and teardown Oliver O'Halloran
@ 2020-07-15  4:00   ` Alexey Kardashevskiy
  2020-07-15  4:21     ` Oliver O'Halloran
  0 siblings, 1 reply; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-15  4:00 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev



On 10/07/2020 15:23, Oliver O'Halloran wrote:
> Remove the IODA2 PHB checks. We already assume IODA2 in several places so
> there's not much point in wrapping most of the setup and teardown process
> in an if block.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> ---
>  arch/powerpc/platforms/powernv/pci-sriov.c | 86 ++++++++++++----------
>  1 file changed, 49 insertions(+), 37 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
> index 08f88187d65a..d5699cd2ab7a 100644
> --- a/arch/powerpc/platforms/powernv/pci-sriov.c
> +++ b/arch/powerpc/platforms/powernv/pci-sriov.c
> @@ -610,16 +610,18 @@ static void pnv_pci_sriov_disable(struct pci_dev *pdev)
>  	num_vfs = iov->num_vfs;
>  	base_pe = iov->vf_pe_arr[0].pe_number;
>  
> +	if (WARN_ON(!iov))
> +		return;
> +
>  	/* Release VF PEs */
>  	pnv_ioda_release_vf_PE(pdev);
>  
> -	if (phb->type == PNV_PHB_IODA2) {
> -		if (!iov->m64_single_mode)
> -			pnv_pci_vf_resource_shift(pdev, -base_pe);
> +	/* Un-shift the IOV BAR resources */
> +	if (!iov->m64_single_mode)
> +		pnv_pci_vf_resource_shift(pdev, -base_pe);
>  
> -		/* Release M64 windows */
> -		pnv_pci_vf_release_m64(pdev, num_vfs);
> -	}
> +	/* Release M64 windows */
> +	pnv_pci_vf_release_m64(pdev, num_vfs);
>  }
>  
>  static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
> @@ -693,41 +695,51 @@ static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>  	phb = pci_bus_to_pnvhb(pdev->bus);
>  	iov = pnv_iov_get(pdev);
>  
> -	if (phb->type == PNV_PHB_IODA2) {
> -		if (!iov->vfs_expanded) {
> -			dev_info(&pdev->dev, "don't support this SRIOV device"
> -				" with non 64bit-prefetchable IOV BAR\n");
> -			return -ENOSPC;
> -		}
> +	/*
> +	 * There's a calls to IODA2 PE setup code littered throughout. We could
> +	 * probably fix that, but we'd still have problems due to the
> +	 * restriction inherent on IODA1 PHBs.
> +	 *
> +	 * NB: We class IODA3 as IODA2 since they're very similar.
> +	 */
> +	if (phb->type != PNV_PHB_IODA2) {
> +		pci_err(pdev, "SR-IOV is not supported on this PHB\n");
> +		return -ENXIO;
> +	}

or we could just skip setting

ppc_md.pcibios_sriov_enable = pnv_pcibios_sriov_enable;

for uninteresting platforms in pnv_pci_init_ioda_phb().


>  
> -		/* allocate a contigious block of PEs for our VFs */
> -		base_pe = pnv_ioda_alloc_pe(phb, num_vfs);
> -		if (!base_pe) {
> -			pci_err(pdev, "Unable to allocate PEs for %d VFs\n", num_vfs);
> -			return -EBUSY;
> -		}
> +	if (!iov->vfs_expanded) {
> +		dev_info(&pdev->dev, "don't support this SRIOV device"
> +			" with non 64bit-prefetchable IOV BAR\n");
> +		return -ENOSPC;
> +	}
>  
> -		iov->vf_pe_arr = base_pe;
> -		iov->num_vfs = num_vfs;
> +	/* allocate a contigious block of PEs for our VFs */
> +	base_pe = pnv_ioda_alloc_pe(phb, num_vfs);
> +	if (!base_pe) {
> +		pci_err(pdev, "Unable to allocate PEs for %d VFs\n", num_vfs);
> +		return -EBUSY;
> +	}
>  
> -		/* Assign M64 window accordingly */
> -		ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
> -		if (ret) {
> -			dev_info(&pdev->dev, "Not enough M64 window resources\n");
> -			goto m64_failed;
> -		}
> +	iov->vf_pe_arr = base_pe;
> +	iov->num_vfs = num_vfs;
>  
> -		/*
> -		 * When using one M64 BAR to map one IOV BAR, we need to shift
> -		 * the IOV BAR according to the PE# allocated to the VFs.
> -		 * Otherwise, the PE# for the VF will conflict with others.
> -		 */
> -		if (!iov->m64_single_mode) {
> -			ret = pnv_pci_vf_resource_shift(pdev,
> -							base_pe->pe_number);
> -			if (ret)
> -				goto shift_failed;
> -		}
> +	/* Assign M64 window accordingly */
> +	ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
> +	if (ret) {
> +		dev_info(&pdev->dev, "Not enough M64 window resources\n");
> +		goto m64_failed;
> +	}
> +
> +	/*
> +	 * When using one M64 BAR to map one IOV BAR, we need to shift
> +	 * the IOV BAR according to the PE# allocated to the VFs.
> +	 * Otherwise, the PE# for the VF will conflict with others.
> +	 */
> +	if (!iov->m64_single_mode) {
> +		ret = pnv_pci_vf_resource_shift(pdev,
> +						base_pe->pe_number);

This can be a single line now. Thanks,


> +		if (ret)
> +			goto shift_failed;
>  	}
>  
>  	/* Setup VF PEs */
> 

-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 13/15] powerpc/powernv/sriov: Move M64 BAR allocation into a helper
  2020-07-10  5:23 ` [PATCH 13/15] powerpc/powernv/sriov: Move M64 BAR allocation into a helper Oliver O'Halloran
@ 2020-07-15  4:02   ` Alexey Kardashevskiy
  0 siblings, 0 replies; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-15  4:02 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev



On 10/07/2020 15:23, Oliver O'Halloran wrote:
> I want to refactor the loop this code is currently inside of. Hoist it on
> out.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>



Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>


> ---
>  arch/powerpc/platforms/powernv/pci-sriov.c | 31 ++++++++++++++--------
>  1 file changed, 20 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
> index d5699cd2ab7a..2f967aa4fbf5 100644
> --- a/arch/powerpc/platforms/powernv/pci-sriov.c
> +++ b/arch/powerpc/platforms/powernv/pci-sriov.c
> @@ -416,6 +416,23 @@ static int64_t pnv_ioda_map_m64_single(struct pnv_phb *phb,
>  	return rc;
>  }
>  
> +static int pnv_pci_alloc_m64_bar(struct pnv_phb *phb, struct pnv_iov_data *iov)
> +{
> +	int win;
> +
> +	do {
> +		win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
> +				phb->ioda.m64_bar_idx + 1, 0);
> +
> +		if (win >= phb->ioda.m64_bar_idx + 1)
> +			return -1;
> +	} while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
> +
> +	set_bit(win, iov->used_m64_bar_mask);
> +
> +	return win;
> +}
> +
>  static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>  {
>  	struct pnv_iov_data   *iov;
> @@ -443,17 +460,9 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>  			continue;
>  
>  		for (j = 0; j < m64_bars; j++) {
> -
> -			/* allocate a window ID for this BAR */
> -			do {
> -				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
> -						phb->ioda.m64_bar_idx + 1, 0);
> -
> -				if (win >= phb->ioda.m64_bar_idx + 1)
> -					goto m64_failed;
> -			} while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
> -			set_bit(win, iov->used_m64_bar_mask);
> -
> +			win = pnv_pci_alloc_m64_bar(phb, iov);
> +			if (win < 0)
> +				goto m64_failed;
>  
>  			if (iov->m64_single_mode) {
>  				int pe_num = iov->vf_pe_arr[j].pe_number;
> 

-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 12/15] powerpc/powernv/sriov: De-indent setup and teardown
  2020-07-15  4:00   ` Alexey Kardashevskiy
@ 2020-07-15  4:21     ` Oliver O'Halloran
  2020-07-15  4:41       ` Alexey Kardashevskiy
  0 siblings, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-15  4:21 UTC (permalink / raw)
  To: Alexey Kardashevskiy; +Cc: linuxppc-dev

On Wed, Jul 15, 2020 at 2:00 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>
>
> or we could just skip setting
>
> ppc_md.pcibios_sriov_enable = pnv_pcibios_sriov_enable;
>
> for uninteresting platforms in pnv_pci_init_ioda_phb().

I don't think so. ppc_md is per-platform, not per-PHB andw e still
have to deal with a mixture of IODA/NVLink/OpenCAPI PHBs on a single
system. We could make it a callback in pnv_phb, but  it seemed like
more indirection than it's worth.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 12/15] powerpc/powernv/sriov: De-indent setup and teardown
  2020-07-15  4:21     ` Oliver O'Halloran
@ 2020-07-15  4:41       ` Alexey Kardashevskiy
  2020-07-15  4:46         ` Oliver O'Halloran
  0 siblings, 1 reply; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-15  4:41 UTC (permalink / raw)
  To: Oliver O'Halloran; +Cc: linuxppc-dev



On 15/07/2020 14:21, Oliver O'Halloran wrote:
> On Wed, Jul 15, 2020 at 2:00 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>>
>>
>> or we could just skip setting
>>
>> ppc_md.pcibios_sriov_enable = pnv_pcibios_sriov_enable;
>>
>> for uninteresting platforms in pnv_pci_init_ioda_phb().
> 
> I don't think so. ppc_md is per-platform, not per-PHB andw e still
> have to deal with a mixture of IODA/NVLink/OpenCAPI PHBs on a single
> system.

NVLink/OpenCAPI won't have SRIOV devices. Other types won't appear on
the same platform simultaneously. It is not too clean, yes.


> We could make it a callback in pnv_phb, but  it seemed like
> more indirection than it's worth.

I genuinely dislike how we use ppc_md so removing things from it is
definitely a good thing.


-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 12/15] powerpc/powernv/sriov: De-indent setup and teardown
  2020-07-15  4:41       ` Alexey Kardashevskiy
@ 2020-07-15  4:46         ` Oliver O'Halloran
  2020-07-15  4:58           ` Alexey Kardashevskiy
  0 siblings, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-15  4:46 UTC (permalink / raw)
  To: Alexey Kardashevskiy; +Cc: linuxppc-dev

On Wed, Jul 15, 2020 at 2:41 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>
>
>
> On 15/07/2020 14:21, Oliver O'Halloran wrote:
> > On Wed, Jul 15, 2020 at 2:00 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
> >>
> >>
> >> or we could just skip setting
> >>
> >> ppc_md.pcibios_sriov_enable = pnv_pcibios_sriov_enable;
> >>
> >> for uninteresting platforms in pnv_pci_init_ioda_phb().
> >
> > I don't think so. ppc_md is per-platform, not per-PHB andw e still
> > have to deal with a mixture of IODA/NVLink/OpenCAPI PHBs on a single
> > system.
>
> NVLink/OpenCAPI won't have SRIOV devices.

...OR WILL THEY?

> Other types won't appear on
> the same platform simultaneously. It is not too clean, yes.

Sure, my point is that's a per-PHB setting rather than a per-platform
one so we should set it up like that.

> > We could make it a callback in pnv_phb, but  it seemed like
> > more indirection than it's worth.
>
> I genuinely dislike how we use ppc_md so removing things from it is
> definitely a good thing.

you wouldn't be able to get rid of it. We'd have something like what
we have for the existing pcibios calls where there's a "generic" one
that bounces it to a member of pci_controller_ops, which then bounces
it to the pnv_phb method. It's bad and I hate it.

>
>
> --
> Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 14/15] powerpc/powernv/sriov: Refactor M64 BAR setup
  2020-07-10  5:23 ` [PATCH 14/15] powerpc/powernv/sriov: Refactor M64 BAR setup Oliver O'Halloran
@ 2020-07-15  4:50   ` Alexey Kardashevskiy
  0 siblings, 0 replies; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-15  4:50 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev



On 10/07/2020 15:23, Oliver O'Halloran wrote:
> Split up the logic so that we have one branch that handles setting up a
> segmented window and another that handles setting up single PE windows for
> each VF.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>


Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>



> ---
> This patch could be folded into the previous one. I've kept it
> seperate mainly because the diff is *horrific* when they're merged.
> ---
>  arch/powerpc/platforms/powernv/pci-sriov.c | 57 ++++++++++------------
>  1 file changed, 27 insertions(+), 30 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
> index 2f967aa4fbf5..8de03636888a 100644
> --- a/arch/powerpc/platforms/powernv/pci-sriov.c
> +++ b/arch/powerpc/platforms/powernv/pci-sriov.c
> @@ -441,52 +441,49 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>  	struct resource       *res;
>  	int                    i, j;
>  	int64_t                rc;
> -	int                    total_vfs;
>  	resource_size_t        size, start;
> -	int                    m64_bars;
> +	int                    base_pe_num;
>  
>  	phb = pci_bus_to_pnvhb(pdev->bus);
>  	iov = pnv_iov_get(pdev);
> -	total_vfs = pci_sriov_get_totalvfs(pdev);
> -
> -	if (iov->m64_single_mode)
> -		m64_bars = num_vfs;
> -	else
> -		m64_bars = 1;
>  
>  	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
>  		res = &pdev->resource[i + PCI_IOV_RESOURCES];
>  		if (!res->flags || !res->parent)
>  			continue;
>  
> -		for (j = 0; j < m64_bars; j++) {
> +		/* don't need single mode? map everything in one go! */
> +		if (!iov->m64_single_mode) {
>  			win = pnv_pci_alloc_m64_bar(phb, iov);
>  			if (win < 0)
>  				goto m64_failed;
>  
> -			if (iov->m64_single_mode) {
> -				int pe_num = iov->vf_pe_arr[j].pe_number;
> -
> -				size = pci_iov_resource_size(pdev,
> -							PCI_IOV_RESOURCES + i);
> -				start = res->start + size * j;
> -				rc = pnv_ioda_map_m64_single(phb, win,
> -							     pe_num,
> -							     start,
> -							     size);
> -			} else {
> -				size = resource_size(res);
> -				start = res->start;
> -
> -				rc = pnv_ioda_map_m64_accordion(phb, win, start,
> -								size);
> -			}
> +			size = resource_size(res);
> +			start = res->start;
>  
> -			if (rc != OPAL_SUCCESS) {
> -				dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n",
> -					win, rc);
> +			rc = pnv_ioda_map_m64_accordion(phb, win, start, size);
> +			if (rc)
> +				goto m64_failed;
> +
> +			continue;
> +		}
> +
> +		/* otherwise map each VF with single PE BARs */
> +		size = pci_iov_resource_size(pdev, PCI_IOV_RESOURCES + i);
> +		base_pe_num = iov->vf_pe_arr[0].pe_number;
> +
> +		for (j = 0; j < num_vfs; j++) {
> +			win = pnv_pci_alloc_m64_bar(phb, iov);
> +			if (win < 0)
> +				goto m64_failed;
> +
> +			start = res->start + size * j;
> +			rc = pnv_ioda_map_m64_single(phb, win,
> +						     base_pe_num + j,
> +						     start,
> +						     size);
> +			if (rc)
>  				goto m64_failed;
> -			}
>  		}
>  	}
>  	return 0;
> 

-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 12/15] powerpc/powernv/sriov: De-indent setup and teardown
  2020-07-15  4:46         ` Oliver O'Halloran
@ 2020-07-15  4:58           ` Alexey Kardashevskiy
  0 siblings, 0 replies; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-15  4:58 UTC (permalink / raw)
  To: Oliver O'Halloran; +Cc: linuxppc-dev



On 15/07/2020 14:46, Oliver O'Halloran wrote:
> On Wed, Jul 15, 2020 at 2:41 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>>
>>
>>
>> On 15/07/2020 14:21, Oliver O'Halloran wrote:
>>> On Wed, Jul 15, 2020 at 2:00 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>>>>
>>>>
>>>> or we could just skip setting
>>>>
>>>> ppc_md.pcibios_sriov_enable = pnv_pcibios_sriov_enable;
>>>>
>>>> for uninteresting platforms in pnv_pci_init_ioda_phb().
>>>
>>> I don't think so. ppc_md is per-platform, not per-PHB andw e still
>>> have to deal with a mixture of IODA/NVLink/OpenCAPI PHBs on a single
>>> system.
>>
>> NVLink/OpenCAPI won't have SRIOV devices.
> 
> ...OR WILL THEY?

NO!


>> Other types won't appear on
>> the same platform simultaneously. It is not too clean, yes.
> 
> Sure, my point is that's a per-PHB setting rather than a per-platform
> one so we should set it up like that.

and my point is that you did too good job getting rid of IODA1 vs IODA2
checks to keep this check. But ok.

> 
>>> We could make it a callback in pnv_phb, but  it seemed like
>>> more indirection than it's worth.
>>
>> I genuinely dislike how we use ppc_md so removing things from it is
>> definitely a good thing.
> 
> you wouldn't be able to get rid of it. We'd have something like what
> we have for the existing pcibios calls where there's a "generic" one
> that bounces it to a member of pci_controller_ops, which then bounces
> it to the pnv_phb method. It's bad and I hate it.

No argument here...


-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 15/15] powerpc/powernv/sriov: Make single PE mode a per-BAR setting
  2020-07-10  5:23 ` [PATCH 15/15] powerpc/powernv/sriov: Make single PE mode a per-BAR setting Oliver O'Halloran
@ 2020-07-15  5:24   ` Alexey Kardashevskiy
  2020-07-15  6:16     ` Oliver O'Halloran
  0 siblings, 1 reply; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-15  5:24 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev



On 10/07/2020 15:23, Oliver O'Halloran wrote:
> Using single PE BARs to map an SR-IOV BAR is really a choice about what
> strategy to use when mapping a BAR. It doesn't make much sense for this to
> be a global setting since a device might have one large BAR which needs to
> be mapped with single PE windows and another smaller BAR that can be mapped
> with a regular segmented window. Make the segmented vs single decision a
> per-BAR setting and clean up the logic that decides which mode to use.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> ---
>  arch/powerpc/platforms/powernv/pci-sriov.c | 131 +++++++++++----------
>  arch/powerpc/platforms/powernv/pci.h       |  10 +-
>  2 files changed, 75 insertions(+), 66 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
> index 8de03636888a..87377d95d648 100644
> --- a/arch/powerpc/platforms/powernv/pci-sriov.c
> +++ b/arch/powerpc/platforms/powernv/pci-sriov.c
> @@ -146,10 +146,9 @@
>  static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>  {
>  	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
> -	const resource_size_t gate = phb->ioda.m64_segsize >> 2;
>  	struct resource *res;
>  	int i;
> -	resource_size_t size, total_vf_bar_sz;
> +	resource_size_t vf_bar_sz;
>  	struct pnv_iov_data *iov;
>  	int mul, total_vfs;
>  
> @@ -158,9 +157,9 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>  		goto disable_iov;
>  	pdev->dev.archdata.iov_data = iov;
>  
> +	/* FIXME: totalvfs > phb->ioda.total_pe_num is going to be a problem */


WARN_ON_ONCE() then?


>  	total_vfs = pci_sriov_get_totalvfs(pdev);
>  	mul = phb->ioda.total_pe_num;
> -	total_vf_bar_sz = 0;
>  
>  	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
>  		res = &pdev->resource[i + PCI_IOV_RESOURCES];
> @@ -173,50 +172,51 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>  			goto disable_iov;
>  		}
>  
> -		total_vf_bar_sz += pci_iov_resource_size(pdev,
> -				i + PCI_IOV_RESOURCES);
> +		vf_bar_sz = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>  
>  		/*
> -		 * If bigger than quarter of M64 segment size, just round up
> -		 * power of two.
> +		 * Generally, one segmented M64 BAR maps one IOV BAR. However,
> +		 * if a VF BAR is too large we end up wasting a lot of space.
> +		 * If we've got a BAR that's bigger than greater than 1/4 of the


bigger, greater, huger? :)

Also, a nit: s/got a BAR/got a VF BAR/


> +		 * default window's segment size then switch to using single PE
> +		 * windows. This limits the total number of VFs we can support.

Just to get idea about absolute numbers here.

On my P9:

./pciex@600c3c0300000/ibm,opal-m64-window
                 00060200 00000000 00060200 00000000 00000040 00000000

so that default window's segment size is 0x40.0000.0000/512 = 512MB?


>  		 *
> -		 * Generally, one M64 BAR maps one IOV BAR. To avoid conflict
> -		 * with other devices, IOV BAR size is expanded to be
> -		 * (total_pe * VF_BAR_size).  When VF_BAR_size is half of M64
> -		 * segment size , the expanded size would equal to half of the
> -		 * whole M64 space size, which will exhaust the M64 Space and
> -		 * limit the system flexibility.  This is a design decision to
> -		 * set the boundary to quarter of the M64 segment size.
> +		 * The 1/4 limit is arbitrary and can be tweaked.
>  		 */
> -		if (total_vf_bar_sz > gate) {
> -			mul = roundup_pow_of_two(total_vfs);
> -			dev_info(&pdev->dev,
> -				"VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n",
> -				total_vf_bar_sz, gate, mul);
> -			iov->m64_single_mode = true;
> -			break;
> -		}
> -	}
> +		if (vf_bar_sz > (phb->ioda.m64_segsize >> 2)) {
> +			/*
> +			 * On PHB3, the minimum size alignment of M64 BAR in
> +			 * single mode is 32MB. If this VF BAR is smaller than
> +			 * 32MB, but still too large for a segmented window
> +			 * then we can't map it and need to disable SR-IOV for
> +			 * this device.


Why not use single PE mode for such BAR? Better than nothing.


> +			 */
> +			if (vf_bar_sz < SZ_32M) {
> +				pci_err(pdev, "VF BAR%d: %pR can't be mapped in single PE mode\n",
> +					i, res);
> +				goto disable_iov;
> +			}
>  
> -	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
> -		res = &pdev->resource[i + PCI_IOV_RESOURCES];
> -		if (!res->flags || res->parent)
> +			iov->m64_single_mode[i] = true;
>  			continue;
> +		}
> +
>  
> -		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>  		/*
> -		 * On PHB3, the minimum size alignment of M64 BAR in single
> -		 * mode is 32MB.
> +		 * This BAR can be mapped with one segmented window, so adjust
> +		 * te resource size to accommodate.
>  		 */
> -		if (iov->m64_single_mode && (size < SZ_32M))
> -			goto disable_iov;
> +		pci_dbg(pdev, " Fixing VF BAR%d: %pR to\n", i, res);
> +		res->end = res->start + vf_bar_sz * mul - 1;
> +		pci_dbg(pdev, "                       %pR\n", res);
>  
> -		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
> -		res->end = res->start + size * mul - 1;
> -		dev_dbg(&pdev->dev, "                       %pR\n", res);
> -		dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
> +		pci_info(pdev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
>  			 i, res, mul);
> +
> +		iov->need_shift = true;
>  	}
> +
> +	// what should this be?
>  	iov->vfs_expanded = mul;
>  
>  	return;
> @@ -260,42 +260,42 @@ void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev)
>  resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
>  						      int resno)
>  {
> -	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
>  	struct pnv_iov_data *iov = pnv_iov_get(pdev);
>  	resource_size_t align;
>  
> -	/*
> -	 * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the
> -	 * SR-IOV. While from hardware perspective, the range mapped by M64
> -	 * BAR should be size aligned.
> -	 *
> -	 * When IOV BAR is mapped with M64 BAR in Single PE mode, the extra
> -	 * powernv-specific hardware restriction is gone. But if just use the
> -	 * VF BAR size as the alignment, PF BAR / VF BAR may be allocated with
> -	 * in one segment of M64 #15, which introduces the PE conflict between
> -	 * PF and VF. Based on this, the minimum alignment of an IOV BAR is
> -	 * m64_segsize.
> -	 *
> -	 * This function returns the total IOV BAR size if M64 BAR is in
> -	 * Shared PE mode or just VF BAR size if not.
> -	 * If the M64 BAR is in Single PE mode, return the VF BAR size or
> -	 * M64 segment size if IOV BAR size is less.
> -	 */
> -	align = pci_iov_resource_size(pdev, resno);
> +	int bar_no = resno - PCI_IOV_RESOURCES;
>  
>  	/*
>  	 * iov can be null if we have an SR-IOV device with IOV BAR that can't
>  	 * be placed in the m64 space (i.e. The BAR is 32bit or non-prefetch).
> -	 * In that case we don't allow VFs to be enabled so just return the
> -	 * default alignment.
> +	 * In that case we don't allow VFs to be enabled since one of their
> +	 * BARs would not be placed in the correct PE.
>  	 */
>  	if (!iov)
>  		return align;
>  	if (!iov->vfs_expanded)
>  		return align;
> -	if (iov->m64_single_mode)
> -		return max(align, (resource_size_t)phb->ioda.m64_segsize);
>  
> +	align = pci_iov_resource_size(pdev, resno);
> +
> +	/*
> +	 * If we're using single mode then we can just use the native VF BAR
> +	 * alignment. We validated that it's possible to use a single PE
> +	 * window above when we did the fixup.
> +	 */
> +	if (iov->m64_single_mode[bar_no])
> +		return align;
> +
> +	/*
> +	 * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the
> +	 * SR-IOV. While from hardware perspective, the range mapped by M64
> +	 * BAR should be size aligned.
> +	 *
> +	 * This function returns the total IOV BAR size if M64 BAR is in
> +	 * Shared PE mode or just VF BAR size if not.
> +	 * If the M64 BAR is in Single PE mode, return the VF BAR size or
> +	 * M64 segment size if IOV BAR size is less.
> +	 */
>  	return iov->vfs_expanded * align;
>  }
>  
> @@ -453,7 +453,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>  			continue;
>  
>  		/* don't need single mode? map everything in one go! */
> -		if (!iov->m64_single_mode) {
> +		if (!iov->m64_single_mode[i]) {
>  			win = pnv_pci_alloc_m64_bar(phb, iov);
>  			if (win < 0)
>  				goto m64_failed;
> @@ -546,6 +546,8 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>  		res = &dev->resource[i + PCI_IOV_RESOURCES];
>  		if (!res->flags || !res->parent)
>  			continue;
> +		if (iov->m64_single_mode[i])
> +			continue;
>  
>  		/*
>  		 * The actual IOV BAR range is determined by the start address
> @@ -577,6 +579,8 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>  		res = &dev->resource[i + PCI_IOV_RESOURCES];
>  		if (!res->flags || !res->parent)
>  			continue;
> +		if (iov->m64_single_mode[i])
> +			continue;
>  
>  		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
>  		res2 = *res;
> @@ -622,8 +626,8 @@ static void pnv_pci_sriov_disable(struct pci_dev *pdev)
>  	/* Release VF PEs */
>  	pnv_ioda_release_vf_PE(pdev);
>  
> -	/* Un-shift the IOV BAR resources */
> -	if (!iov->m64_single_mode)
> +	/* Un-shift the IOV BARs if we need to */
> +	if (iov->need_shift)
>  		pnv_pci_vf_resource_shift(pdev, -base_pe);
>  
>  	/* Release M64 windows */
> @@ -741,9 +745,8 @@ static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>  	 * the IOV BAR according to the PE# allocated to the VFs.
>  	 * Otherwise, the PE# for the VF will conflict with others.
>  	 */
> -	if (!iov->m64_single_mode) {
> -		ret = pnv_pci_vf_resource_shift(pdev,
> -						base_pe->pe_number);
> +	if (iov->need_shift) {
> +		ret = pnv_pci_vf_resource_shift(pdev, base_pe->pe_number);
>  		if (ret)
>  			goto shift_failed;
>  	}
> diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
> index 13555bc549f4..a78d1feb8fb8 100644
> --- a/arch/powerpc/platforms/powernv/pci.h
> +++ b/arch/powerpc/platforms/powernv/pci.h
> @@ -236,14 +236,20 @@ struct pnv_iov_data {
>  	/* number of VFs IOV BAR expanded. FIXME: rename this to something less bad */
>  	u16     vfs_expanded;
>  
> +	/*
> +	 * indicates if we need to move our IOV BAR to account for our
> +	 * allocated PE number when enabling VFs.
> +	 */
> +	bool    need_shift;
> +
>  	/* number of VFs enabled */
>  	u16     num_vfs;
>  
>  	/* pointer to the array of VF PEs. num_vfs long*/
>  	struct pnv_ioda_pe *vf_pe_arr;
>  
> -	/* Did we map the VF BARs with single-PE IODA BARs? */
> -	bool    m64_single_mode;
> +	/* Did we map the VF BAR with single-PE IODA BARs? */
> +	bool    m64_single_mode[PCI_SRIOV_NUM_BARS];
>  
>  	/*
>  	 * Bit mask used to track which m64 windows that we used to map the
> 

-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 15/15] powerpc/powernv/sriov: Make single PE mode a per-BAR setting
  2020-07-15  5:24   ` Alexey Kardashevskiy
@ 2020-07-15  6:16     ` Oliver O'Halloran
  2020-07-15  8:00       ` Alexey Kardashevskiy
  0 siblings, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-15  6:16 UTC (permalink / raw)
  To: Alexey Kardashevskiy; +Cc: linuxppc-dev

On Wed, Jul 15, 2020 at 3:24 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>
>
> > @@ -158,9 +157,9 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
> >               goto disable_iov;
> >       pdev->dev.archdata.iov_data = iov;
> >
> > +     /* FIXME: totalvfs > phb->ioda.total_pe_num is going to be a problem */
>
>
> WARN_ON_ONCE() then?

can't hurt

> > @@ -173,50 +172,51 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
> >                       goto disable_iov;
> >               }
> >
> > -             total_vf_bar_sz += pci_iov_resource_size(pdev,
> > -                             i + PCI_IOV_RESOURCES);
> > +             vf_bar_sz = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
> >
> >               /*
> > -              * If bigger than quarter of M64 segment size, just round up
> > -              * power of two.
> > +              * Generally, one segmented M64 BAR maps one IOV BAR. However,
> > +              * if a VF BAR is too large we end up wasting a lot of space.
> > +              * If we've got a BAR that's bigger than greater than 1/4 of the
>
>
> bigger, greater, huger? :)
>
> Also, a nit: s/got a BAR/got a VF BAR/

whatever, it's just words

> > +              * default window's segment size then switch to using single PE
> > +              * windows. This limits the total number of VFs we can support.
>
> Just to get idea about absolute numbers here.
>
> On my P9:
>
> ./pciex@600c3c0300000/ibm,opal-m64-window
>                  00060200 00000000 00060200 00000000 00000040 00000000
>
> so that default window's segment size is 0x40.0000.0000/512 = 512MB?

Yeah. It'll vary a bit since PHB3 and some PHB4s have 256.

> >                *
> > -              * Generally, one M64 BAR maps one IOV BAR. To avoid conflict
> > -              * with other devices, IOV BAR size is expanded to be
> > -              * (total_pe * VF_BAR_size).  When VF_BAR_size is half of M64
> > -              * segment size , the expanded size would equal to half of the
> > -              * whole M64 space size, which will exhaust the M64 Space and
> > -              * limit the system flexibility.  This is a design decision to
> > -              * set the boundary to quarter of the M64 segment size.
> > +              * The 1/4 limit is arbitrary and can be tweaked.
> >                */
> > -             if (total_vf_bar_sz > gate) {
> > -                     mul = roundup_pow_of_two(total_vfs);
> > -                     dev_info(&pdev->dev,
> > -                             "VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n",
> > -                             total_vf_bar_sz, gate, mul);
> > -                     iov->m64_single_mode = true;
> > -                     break;
> > -             }
> > -     }
> > +             if (vf_bar_sz > (phb->ioda.m64_segsize >> 2)) {
> > +                     /*
> > +                      * On PHB3, the minimum size alignment of M64 BAR in
> > +                      * single mode is 32MB. If this VF BAR is smaller than
> > +                      * 32MB, but still too large for a segmented window
> > +                      * then we can't map it and need to disable SR-IOV for
> > +                      * this device.
>
>
> Why not use single PE mode for such BAR? Better than nothing.

Suppose you could, but I figured VFs were mainly interesting since you
could give each VF to a separate guest. If there's multiple VFs under
the same single PE BAR then they'd have to be assigned to the same
guest in order to retain the freeze/unfreeze behaviour that PAPR
requires. I guess that's how it used to work, but it seems better just
to disable them rather than having VFs which sort of work.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 03/15] powerpc/powernv/pci: Add explicit tracking of the DMA setup state
  2020-07-15  3:33           ` Alexey Kardashevskiy
@ 2020-07-15  7:05             ` Cédric Le Goater
  2020-07-15  9:00               ` Oliver O'Halloran
  0 siblings, 1 reply; 57+ messages in thread
From: Cédric Le Goater @ 2020-07-15  7:05 UTC (permalink / raw)
  To: Alexey Kardashevskiy, Oliver O'Halloran; +Cc: linuxppc-dev

On 7/15/20 5:33 AM, Alexey Kardashevskiy wrote:
> 
> 
> On 15/07/2020 11:38, Oliver O'Halloran wrote:
>> On Tue, Jul 14, 2020 at 5:21 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>>>
>>> On 14/07/2020 15:58, Oliver O'Halloran wrote:
>>>> On Tue, Jul 14, 2020 at 3:37 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>>>>>
>>>>> On 10/07/2020 15:23, Oliver O'Halloran wrote:
>>>>>> This also means the only remaining user of the old "DMA Weight" code is
>>>>>> the IODA1 DMA setup code that it was originally added for, which is good.
>>>>>
>>>>>
>>>>> Is ditching IODA1 in the plan? :)
>>>>
>>>> That or separating out the pci_controller_ops for IODA1 and IODA2 so
>>>> we can stop any IODA2 specific changes from breaking it.
>>>
>>> Is IODA1 tested at all these days? Or, is anyone running upstream
>>> kernels anywhere and keeps shouting when it does not work on IODA1? Thanks,
>>
>> Cedric has a P7 with OPAL. That's probably the one left though.
> 
> Has he tried these patches on that box? Or we hope for the best here? :)

I could but can we fix the issue below before I reboot ?  I don't have a 
console anymore on these boxes. 

Firmware is : 

root@amure:~# dtc -I fs /proc/device-tree/ibm,opal/firmware/ -f
<stdout>: ERROR (name_properties): /: "name" property is incorrect ("firmware" instead of base node name)
Warning: Input tree has errors, output forced
/dts-v1/;

/ {
	git-id = "34b3400";
	ml-version = [4d 4c 20 46 57 37 37 30 2e 32 30 20 46 57 37 37 30 2e 32 30 20 46 57 37 37 30 2e 32 30];
	compatible = "ibm,opal-firmware";
	phandle = <0x4d>;
	mi-version = <0x4d49205a 0x4c373730 0x5f303735 0x205a4c37 0x37305f30 0x3735205a 0x4c373730 0x5f303735>;
	linux,phandle = <0x4d>;
	name = "firmware";
};

I rather not change it if possible. 


C.

[    1.979581] ------------[ cut here ]------------
[    1.979582] opal: OPAL_CONSOLE_FLUSH missing.
[    1.979583] WARNING: CPU: 0 PID: 253 at arch/powerpc/platforms/powernv/opal.c:446 .__opal_flush_console+0xfc/0x110
[    1.979584] Modules linked in: ipr(E+) ptp(E) usb_common(E) pps_core(E)
[    1.979587] CPU: 0 PID: 253 Comm: udevadm Tainted: G            E     5.4.0-4-powerpc64 #1 Debian 5.4.19-1
[    1.979588] NIP:  c0000000000d10ec LR: c0000000000d10e8 CTR: c000000000b13510
[    1.979589] REGS: c00000000381f130 TRAP: 0700   Tainted: G            E      (5.4.0-4-powerpc64 Debian 5.4.19-1)
[    1.979590] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28002282  XER: 20000000
[    1.979594] CFAR: c000000000157d2c IRQMASK: 3 
[    1.979595] GPR00: c0000000000d10e8 c00000000381f3c0 c000000001618700 0000000000000022 
[    1.979598] GPR04: c000000000c95df2 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    1.979601] GPR08: 0000000000000003 0000000000000003 0000000000000001 9000000000001032 
[    1.979604] GPR12: c0000000000d0818 c000000001820000 0000000000000000 c0000000014342a8 
[    1.979607] GPR16: c00000000173b850 c00000000148b218 000000011a2d5db8 0000000000000000 
[    1.979609] GPR20: 0000000000000000 c000000004b50e00 0000000000000000 c00000000173e208 
[    1.979612] GPR24: c00000000173bde8 0000000000000000 c00000000148b1d8 c0000000016620e0 
[    1.979615] GPR28: c0000000017f7c40 0000000000000000 0000000000000000 0000000000000000 
[    1.979618] NIP [c0000000000d10ec] .__opal_flush_console+0xfc/0x110
[    1.979618] LR [c0000000000d10e8] .__opal_flush_console+0xf8/0x110
[    1.979619] Call Trace:
[    1.979620] [c00000000381f3c0] [c0000000000d10e8] .__opal_flush_console+0xf8/0x110 (unreliable)
[    1.979621] [c00000000381f450] [c0000000000d1428] .opal_flush_chars+0x38/0xc0
[    1.979623] [c00000000381f4d0] [c0000000007680a8] .hvc_console_print+0x188/0x2d0
[    1.979624] [c00000000381f5b0] [c0000000001eff08] .console_unlock+0x348/0x720
[    1.979625] [c00000000381f6c0] [c0000000001f268c] .vprintk_emit+0x27c/0x3a0
[    1.979626] [c00000000381f780] [c0000000007af2f4] .dev_vprintk_emit+0x208/0x258
[    1.979628] [c00000000381f8e0] [c0000000007af38c] .dev_printk_emit+0x48/0x58
[    1.979629] [c00000000381f950] [c0000000007af748] ._dev_err+0x6c/0x9c
[    1.979630] [c00000000381fa00] [c0000000007aaff8] .uevent_store+0x78/0x80
[    1.979631] [c00000000381fa90] [c0000000007a8ce4] .dev_attr_store+0x64/0x90
[    1.979633] [c00000000381fb20] [c00000000054becc] .sysfs_kf_write+0x7c/0xa0
[    1.979634] [c00000000381fbb0] [c00000000054b294] .kernfs_fop_write+0x114/0x270
[    1.979635] [c00000000381fc50] [c000000000456b58] .__vfs_write+0x68/0xe0
[    1.979636] [c00000000381fce0] [c000000000457e44] .vfs_write+0xc4/0x270
[    1.979638] [c00000000381fd80] [c00000000045adc4] .ksys_write+0x84/0x140
[    1.979639] [c00000000381fe20] [c00000000000c050] system_call+0x5c/0x68
[    1.979640] Instruction dump:
[    1.979641] 3be0fffe 4bffb581 60000000 4bffff90 60000000 3c62ff68 39200001 3d42ffea 
[    1.979644] 3863d6d0 992a9d98 48086be1 60000000 <0fe00000> 4bffff50 480867ad 60000000 
[    1.979648] ---[ end trace 34198c4c2c15e0e2 ]---

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 15/15] powerpc/powernv/sriov: Make single PE mode a per-BAR setting
  2020-07-15  6:16     ` Oliver O'Halloran
@ 2020-07-15  8:00       ` Alexey Kardashevskiy
  2020-07-22  5:39         ` Oliver O'Halloran
  0 siblings, 1 reply; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-15  8:00 UTC (permalink / raw)
  To: Oliver O'Halloran; +Cc: linuxppc-dev



On 15/07/2020 16:16, Oliver O'Halloran wrote:
> On Wed, Jul 15, 2020 at 3:24 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>>
>>
>>> @@ -158,9 +157,9 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>>               goto disable_iov;
>>>       pdev->dev.archdata.iov_data = iov;
>>>
>>> +     /* FIXME: totalvfs > phb->ioda.total_pe_num is going to be a problem */
>>
>>
>> WARN_ON_ONCE() then?
> 
> can't hurt
> 
>>> @@ -173,50 +172,51 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>>                       goto disable_iov;
>>>               }
>>>
>>> -             total_vf_bar_sz += pci_iov_resource_size(pdev,
>>> -                             i + PCI_IOV_RESOURCES);
>>> +             vf_bar_sz = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>>>
>>>               /*
>>> -              * If bigger than quarter of M64 segment size, just round up
>>> -              * power of two.
>>> +              * Generally, one segmented M64 BAR maps one IOV BAR. However,
>>> +              * if a VF BAR is too large we end up wasting a lot of space.
>>> +              * If we've got a BAR that's bigger than greater than 1/4 of the
>>
>>
>> bigger, greater, huger? :)
>>
>> Also, a nit: s/got a BAR/got a VF BAR/
> 
> whatever, it's just words

You are talking about these BARs and those BARs and since we want "to
help out
the next sucker^Wperson who needs to tinker with it", using precise term
is kinda essential here.


> 
>>> +              * default window's segment size then switch to using single PE
>>> +              * windows. This limits the total number of VFs we can support.
>>
>> Just to get idea about absolute numbers here.
>>
>> On my P9:
>>
>> ./pciex@600c3c0300000/ibm,opal-m64-window
>>                  00060200 00000000 00060200 00000000 00000040 00000000
>>
>> so that default window's segment size is 0x40.0000.0000/512 = 512MB?
> 
> Yeah. It'll vary a bit since PHB3 and some PHB4s have 256.
> 
>>>                *
>>> -              * Generally, one M64 BAR maps one IOV BAR. To avoid conflict
>>> -              * with other devices, IOV BAR size is expanded to be
>>> -              * (total_pe * VF_BAR_size).  When VF_BAR_size is half of M64
>>> -              * segment size , the expanded size would equal to half of the
>>> -              * whole M64 space size, which will exhaust the M64 Space and
>>> -              * limit the system flexibility.  This is a design decision to
>>> -              * set the boundary to quarter of the M64 segment size.
>>> +              * The 1/4 limit is arbitrary and can be tweaked.
>>>                */
>>> -             if (total_vf_bar_sz > gate) {
>>> -                     mul = roundup_pow_of_two(total_vfs);
>>> -                     dev_info(&pdev->dev,
>>> -                             "VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n",
>>> -                             total_vf_bar_sz, gate, mul);
>>> -                     iov->m64_single_mode = true;
>>> -                     break;
>>> -             }
>>> -     }
>>> +             if (vf_bar_sz > (phb->ioda.m64_segsize >> 2)) {
>>> +                     /*
>>> +                      * On PHB3, the minimum size alignment of M64 BAR in
>>> +                      * single mode is 32MB. If this VF BAR is smaller than
>>> +                      * 32MB, but still too large for a segmented window
>>> +                      * then we can't map it and need to disable SR-IOV for
>>> +                      * this device.
>>
>>
>> Why not use single PE mode for such BAR? Better than nothing.
> 
> Suppose you could, but I figured VFs were mainly interesting since you
> could give each VF to a separate guest. If there's multiple VFs under
> the same single PE BAR then they'd have to be assigned to the same

True. But with one PE per VF we can still have 15 (or 14?) isolated VFs
which is not hundreds but better than 0.


> guest in order to retain the freeze/unfreeze behaviour that PAPR
> requires. I guess that's how it used to work, but it seems better just
> to disable them rather than having VFs which sort of work.

Well, realistically the segment size should be 8MB to make this matter
(or the whole window 2GB) which does not seem to happen so it does not
matter.


-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 03/15] powerpc/powernv/pci: Add explicit tracking of the DMA setup state
  2020-07-15  7:05             ` Cédric Le Goater
@ 2020-07-15  9:00               ` Oliver O'Halloran
  2020-07-15 10:05                 ` Cédric Le Goater
  0 siblings, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-15  9:00 UTC (permalink / raw)
  To: Cédric Le Goater; +Cc: Alexey Kardashevskiy, linuxppc-dev

On Wed, Jul 15, 2020 at 5:05 PM Cédric Le Goater <clg@kaod.org> wrote:
>
> I could but can we fix the issue below before I reboot ?  I don't have a
> console anymore on these boxes.
>
> Firmware is :
> *snip*

Do you know when that started happening? I don't think anything
console related has changed in a very long time, but we probably
haven't tested it on p7 in even longer.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 03/15] powerpc/powernv/pci: Add explicit tracking of the DMA setup state
  2020-07-15  9:00               ` Oliver O'Halloran
@ 2020-07-15 10:05                 ` Cédric Le Goater
  0 siblings, 0 replies; 57+ messages in thread
From: Cédric Le Goater @ 2020-07-15 10:05 UTC (permalink / raw)
  To: Oliver O'Halloran; +Cc: Alexey Kardashevskiy, linuxppc-dev

On 7/15/20 11:00 AM, Oliver O'Halloran wrote:
> On Wed, Jul 15, 2020 at 5:05 PM Cédric Le Goater <clg@kaod.org> wrote:
>>
>> I could but can we fix the issue below before I reboot ?  I don't have a
>> console anymore on these boxes.
>>
>> Firmware is :
>> *snip*
> 
> Do you know when that started happening? I don't think anything
> console related has changed in a very long time, but we probably
> haven't tested it on p7 in even longer.
> 

5.0.16-100.fc28.ppc64 and 5.4.0-4-powerpc64 have the issue, 
4.16.0-1-powerpc64 doesn't. 

Anyhow, I gave the patchset a try. Here are the kernel logs.

PCI looks fine. System could boot but there are some others
errors.

C.

[    0.000000] hash-mmu: Page sizes from device-tree:
[    0.000000] hash-mmu: base_shift=12: shift=12, sllp=0x0000, avpnm=0x00000000, tlbiel=1, penc=0
[    0.000000] hash-mmu: base_shift=12: shift=16, sllp=0x0000, avpnm=0x00000000, tlbiel=1, penc=7
[    0.000000] hash-mmu: base_shift=12: shift=24, sllp=0x0000, avpnm=0x00000000, tlbiel=1, penc=56
[    0.000000] hash-mmu: base_shift=16: shift=16, sllp=0x0110, avpnm=0x00000000, tlbiel=1, penc=1
[    0.000000] hash-mmu: base_shift=16: shift=24, sllp=0x0110, avpnm=0x00000000, tlbiel=1, penc=8
[    0.000000] hash-mmu: base_shift=24: shift=24, sllp=0x0100, avpnm=0x00000001, tlbiel=0, penc=0
[    0.000000] hash-mmu: base_shift=34: shift=34, sllp=0x0120, avpnm=0x000007ff, tlbiel=0, penc=3
[    0.000000] Page orders: linear mapping = 24, virtual = 16, io = 16, vmemmap = 24
[    0.000000] Using 1TB segments
[    0.000000] hash-mmu: Initializing hash mmu with SLB
[    0.000000] Linux version 5.8.0-rc5+ (legoater@amure) (gcc (Debian 9.3.0-12) 9.3.0, GNU ld (GNU Binutils for Debian) 2.34) #121 SMP Wed Jul 15 08:36:42 CEST 2020
[    0.000000] Found initrd at 0xc000000003780000:0xc0000000057444ba
[    0.000000] Using PowerNV machine description
[    0.000000] random: get_random_bytes called from .print_oops_end_marker+0x38/0x80 with crng_init=0
[    0.000000] printk: bootconsole [udbg0] enabled
[    0.000000] CPU maps initialized for 4 threads per core
[    0.000000]  (thread shift is 2)
[    0.000000] Allocated 4800 bytes for 24 pacas
[    0.000000] -----------------------------------------------------
[    0.000000] phys_mem_size     = 0xa00000000
[    0.000000] dcache_bsize      = 0x80
[    0.000000] icache_bsize      = 0x80
[    0.000000] cpu_features      = 0x0000016fcf5cb1a3
[    0.000000]   possible        = 0x0007fbefffffb1af
[    0.000000]   always          = 0x00000000000001a0
[    0.000000] cpu_user_features = 0xdc0065c2 0x20000000
[    0.000000] mmu_features      = 0x7c006001
[    0.000000] firmware_features = 0x0000000110000000
[    0.000000] vmalloc start     = 0xc008000000000000
[    0.000000] IO start          = 0xc00a000000000000
[    0.000000] vmemmap start     = 0xc00c000000000000
[    0.000000] hash-mmu: ppc64_pft_size    = 0x0
[    0.000000] hash-mmu: htab_hash_mask    = 0x7ffff
[    0.000000] -----------------------------------------------------
[    0.000000] cma: Reserved 2048 MiB at 0x0000000978000000
[    0.000000] numa:   NODE_DATA [mem 0x9ffdcaf80-0x9ffdcffff]
[    0.000000] rfi-flush: fallback displacement flush available
[    0.000000] rfi-flush: patched 7 locations (fallback displacement flush)
[    0.000000] link-stack-flush: software flush disabled.
[    0.000000] count-cache-flush: software flush disabled.
[    0.000000] stf-barrier: fallback barrier available
[    0.000000] stf-barrier: patched 62 entry locations (fallback barrier)
[    0.000000] stf-barrier: patched 7 exit locations (fallback barrier)
[    0.000000] Probing IODA IO-Hub /io-hub@3efe00000000
[    0.000000] Initializing IODA1 PHB (/io-hub@3efe00000000/pciex@3efe00080000)
[    0.000000] PCI host bridge /io-hub@3efe00000000/pciex@3efe00080000 (primary) ranges:
[    0.000000]   IO 0x00003efe01000000..0x00003efe017fffff -> 0x0000000000000000
[    0.000000]  MEM 0x00003da080000000..0x00003da0fffeffff -> 0x0000000080000000 
[    0.000000] ioremap() called early from .pnv_pci_init_ioda_phb+0x380/0xc9c. Use early_ioremap() instead
[    0.000000]  MEM 0x00003da800000000..0x00003dafffffffff -> 0x00003da800000000 (M64 #0..15)
[    0.000000]  Using M64 #15 as default window
[    0.000000]   128 (127) PE's M32: 0x80000000 [segment=0x1000000]
[    0.000000]                  M64: 0x800000000 [segment=0x10000000]
[    0.000000]                   IO: 0x800000 [segment=0x10000]
[    0.000000]   Allocated bitmap for 256 MSIs (base IRQ 0x600)
[    0.000000] Initializing IODA1 PHB (/io-hub@3efe00000000/pciex@3efe00090000)
[    0.000000] PCI host bridge /io-hub@3efe00000000/pciex@3efe00090000  ranges:
[    0.000000]   IO 0x00003efe01800000..0x00003efe01ffffff -> 0x0000000000000000
[    0.000000]  MEM 0x00003da180000000..0x00003da1fffeffff -> 0x0000000080000000 
[    0.000000] ioremap() called early from .pnv_pci_init_ioda_phb+0x380/0xc9c. Use early_ioremap() instead
[    0.000000]  MEM 0x00003db000000000..0x00003db7ffffffff -> 0x00003db000000000 (M64 #0..15)
[    0.000000]  Using M64 #15 as default window
[    0.000000]   128 (127) PE's M32: 0x80000000 [segment=0x1000000]
[    0.000000]                  M64: 0x800000000 [segment=0x10000000]
[    0.000000]                   IO: 0x800000 [segment=0x10000]
[    0.000000]   Allocated bitmap for 256 MSIs (base IRQ 0xa00)
[    0.000000] Initializing IODA1 PHB (/io-hub@3efe00000000/pciex@3efe000a0000)
[    0.000000] PCI host bridge /io-hub@3efe00000000/pciex@3efe000a0000  ranges:
[    0.000000]   IO 0x00003efe02000000..0x00003efe027fffff -> 0x0000000000000000
[    0.000000]  MEM 0x00003da280000000..0x00003da2fffeffff -> 0x0000000080000000 
[    0.000000] ioremap() called early from .pnv_pci_init_ioda_phb+0x380/0xc9c. Use early_ioremap() instead
[    0.000000]  MEM 0x00003db800000000..0x00003dbfffffffff -> 0x00003db800000000 (M64 #0..15)
[    0.000000]  Using M64 #15 as default window
[    0.000000]   128 (127) PE's M32: 0x80000000 [segment=0x1000000]
[    0.000000]                  M64: 0x800000000 [segment=0x10000000]
[    0.000000]                   IO: 0x800000 [segment=0x10000]
[    0.000000]   Allocated bitmap for 256 MSIs (base IRQ 0xe00)
[    0.000000] Initializing IODA1 PHB (/io-hub@3efe00000000/pciex@3efe000b0000)
[    0.000000] PCI host bridge /io-hub@3efe00000000/pciex@3efe000b0000  ranges:
[    0.000000]   IO 0x00003efe02800000..0x00003efe02ffffff -> 0x0000000000000000
[    0.000000]  MEM 0x00003da380000000..0x00003da3fffeffff -> 0x0000000080000000 
[    0.000000] ioremap() called early from .pnv_pci_init_ioda_phb+0x380/0xc9c. Use early_ioremap() instead
[    0.000000]  MEM 0x00003dc000000000..0x00003dc7ffffffff -> 0x00003dc000000000 (M64 #0..15)
[    0.000000]  Using M64 #15 as default window
[    0.000000]   128 (127) PE's M32: 0x80000000 [segment=0x1000000]
[    0.000000]                  M64: 0x800000000 [segment=0x10000000]
[    0.000000]                   IO: 0x800000 [segment=0x10000]
[    0.000000]   Allocated bitmap for 256 MSIs (base IRQ 0x1200)
[    0.000000] Initializing IODA1 PHB (/io-hub@3efe00000000/pciex@3efe000c0000)
[    0.000000] PCI host bridge /io-hub@3efe00000000/pciex@3efe000c0000  ranges:
[    0.000000]   IO 0x00003efe03000000..0x00003efe037fffff -> 0x0000000000000000
[    0.000000]  MEM 0x00003da480000000..0x00003da4fffeffff -> 0x0000000080000000 
[    0.000000] ioremap() called early from .pnv_pci_init_ioda_phb+0x380/0xc9c. Use early_ioremap() instead
[    0.000000]  MEM 0x00003dc800000000..0x00003dcfffffffff -> 0x00003dc800000000 (M64 #0..15)
[    0.000000]  Using M64 #15 as default window
[    0.000000]   128 (127) PE's M32: 0x80000000 [segment=0x1000000]
[    0.000000]                  M64: 0x800000000 [segment=0x10000000]
[    0.000000]                   IO: 0x800000 [segment=0x10000]
[    0.000000]   Allocated bitmap for 256 MSIs (base IRQ 0x1600)
[    0.000000] Initializing IODA1 PHB (/io-hub@3efe00000000/pciex@3efe000d0000)
[    0.000000] PCI host bridge /io-hub@3efe00000000/pciex@3efe000d0000  ranges:
[    0.000000]   IO 0x00003efe03800000..0x00003efe03ffffff -> 0x0000000000000000
[    0.000000]  MEM 0x00003da580000000..0x00003da5fffeffff -> 0x0000000080000000 
[    0.000000] ioremap() called early from .pnv_pci_init_ioda_phb+0x380/0xc9c. Use early_ioremap() instead
[    0.000000]  MEM 0x00003dd000000000..0x00003dd7ffffffff -> 0x00003dd000000000 (M64 #0..15)
[    0.000000]  Using M64 #15 as default window
[    0.000000]   128 (127) PE's M32: 0x80000000 [segment=0x1000000]
[    0.000000]                  M64: 0x800000000 [segment=0x10000000]
[    0.000000]                   IO: 0x800000 [segment=0x10000]
[    0.000000]   Allocated bitmap for 256 MSIs (base IRQ 0x1a00)
[    0.000000] OPAL nvram setup, 1048576 bytes
[    0.000000] barrier-nospec: using ORI speculation barrier
[    0.000000] barrier-nospec: patched 376 locations
[    0.000000] Top of RAM: 0xa00000000, Total RAM: 0xa00000000
[    0.000000] Memory hole size: 0MB
[    0.000000] Zone ranges:
[    0.000000]   Normal   [mem 0x0000000000000000-0x00000009ffffffff]
[    0.000000] Movable zone start for each node
[    0.000000] Early memory node ranges
[    0.000000]   node   0: [mem 0x0000000000000000-0x00000009ffffffff]
[    0.000000] Initmem setup node 0 [mem 0x0000000000000000-0x00000009ffffffff]
[    0.000000] On node 0 totalpages: 655360
[    0.000000]   Normal zone: 640 pages used for memmap
[    0.000000]   Normal zone: 0 pages reserved
[    0.000000]   Normal zone: 655360 pages, LIFO batch:3
[    0.000000] percpu: Embedded 11 pages/cpu s632984 r0 d87912 u1048576
[    0.000000] pcpu-alloc: s632984 r0 d87912 u1048576 alloc=1*1048576
[    0.000000] pcpu-alloc: [0] 00 [0] 01 [0] 02 [0] 03 [0] 04 [0] 05 [0] 06 [0] 07 
[    0.000000] pcpu-alloc: [0] 08 [0] 09 [0] 10 [0] 11 [0] 12 [0] 13 [0] 14 [0] 15 
[    0.000000] pcpu-alloc: [0] 16 [0] 17 [0] 18 [0] 19 [0] 20 [0] 21 [0] 22 [0] 23 
[    0.000000] Built 1 zonelists, mobility grouping on.  Total pages: 654720
[    0.000000] Policy zone: Normal
[    0.000000] Kernel command line: root=/dev/mapper/vg--amure-root ro
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] ------------[ cut here ]------------
[    0.000000] opal: OPAL_CONSOLE_FLUSH missing.
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/powerpc/platforms/powernv/opal.c:528 .__opal_flush_console+0xfc/0x110
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc5+ #121
[    0.000000] NIP:  c0000000000d204c LR: c0000000000d2048 CTR: c0000000001f43f0
[    0.000000] REGS: c000000001683770 TRAP: 0700   Not tainted  (5.8.0-rc5+)
[    0.000000] MSR:  9000000000021032 <SF,HV,ME,IR,DR,RI>  CR: 28004242  XER: 20000000
[    0.000000] CFAR: c0000000001570b8 IRQMASK: 1 
[    0.000000] GPR00: c0000000000d2048 c000000001683a00 c000000001685e00 0000000000000022 
[    0.000000] GPR04: c000000000cc88ea 0000000000000002 414c5f434f4e534f 4c455f464c555348 
[    0.000000] GPR08: 206d697373696e67 c0000000016d5e00 0000000000000027 9000000000001032 
[    0.000000] GPR12: c0000000001f43f0 c000000001890000 0000000000000000 c000000001495c78 
[    0.000000] GPR16: c0000000017a33f0 c000000001482e08 0000000000000001 0000000000000000 
[    0.000000] GPR20: 0000000000000001 c000000001546700 0000000000000000 c0000000017a5da8 
[    0.000000] GPR24: 0000000000000001 c0000000017a3da8 c0000000017a3418 0000000000000001 
[    0.000000] GPR28: 0000000000000036 c000000001861718 0000000000000000 0000000000000000 
[    0.000000] NIP [c0000000000d204c] .__opal_flush_console+0xfc/0x110
[    0.000000] LR [c0000000000d2048] .__opal_flush_console+0xf8/0x110
[    0.000000] Call Trace:
[    0.000000] [c000000001683a00] [c0000000000d2048] .__opal_flush_console+0xf8/0x110 (unreliable)
[    0.000000] [c000000001683a90] [c0000000000d2574] .opal_flush_console+0x24/0x60
[    0.000000] [c000000001683b10] [c0000000007c3b60] .udbg_opal_putc+0xe0/0x140
[    0.000000] [c000000001683ba0] [c000000000026f10] .udbg_write+0x90/0x150
[    0.000000] [c000000001683c30] [c0000000001f35e8] .console_unlock+0x348/0x730
[    0.000000] [c000000001683d40] [c0000000001f3cb8] .register_console+0x208/0x420
[    0.000000] [c000000001683dd0] [c000000000e2a404] .register_early_udbg_console+0x8c/0xa4
[    0.000000] [c000000001683e50] [c000000000e29fd0] .setup_arch+0x78/0x3c4
[    0.000000] [c000000001683ed0] [c000000000e24270] .start_kernel+0x98/0x750
[    0.000000] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[    0.000000] Instruction dump:
[    0.000000] 3be0fffe 4bffb141 60000000 4bffff90 60000000 3c62ff64 39200001 3d42ffea 
[    0.000000] 38632ac8 992af5b1 4808500d 60000000 <0fe00000> 4bffff50 48a7d07d 60000000 
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] printk: log_buf_len individual max cpu contribution: 4096 bytes
[    0.000000] printk: log_buf_len total cpu_extra contributions: 94208 bytes
[    0.000000] printk: log_buf_len min size: 131072 bytes
[    0.000000] printk: log_buf_len: 262144 bytes
[    0.000000] printk: early log buf free: 57728(44%)
[    0.000000] Dentry cache hash table entries: 8388608 (order: 10, 67108864 bytes, linear)
[    0.000000] Inode-cache hash table entries: 4194304 (order: 9, 33554432 bytes, linear)
[    0.000000] mem auto-init: stack:off, heap alloc:off, heap free:off
[    0.000000] Memory: 39432768K/41943040K available (11648K kernel code, 2688K rwdata, 2752K rodata, 6528K init, 1425K bss, 413120K reserved, 2097152K cma-reserved)
[    0.000000] SLUB: HWalign=128, Order=0-3, MinObjects=0, CPUs=24, Nodes=1
[    0.000000] ftrace: allocating 29720 entries in 11 pages
[    0.000000] ftrace: allocated 11 pages with 3 groups
[    0.000000] rcu: Hierarchical RCU implementation.
[    0.000000] rcu: 	RCU restricting CPUs from NR_CPUS=2048 to nr_cpu_ids=24.
[    0.000000] 	Rude variant of Tasks RCU enabled.
[    0.000000] rcu: RCU calculated value of scheduler-enlistment delay is 25 jiffies.
[    0.000000] rcu: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=24
[    0.000000] NR_IRQS: 512, nr_irqs: 512, preallocated irqs: 16
[    0.000000] ICS OPAL backend registered
[    0.000000] time_init: decrementer frequency = 512.000000 MHz
[    0.000000] time_init: processor frequency   = 3720.000000 MHz
[    0.000003] clocksource: timebase: mask: 0xffffffffffffffff max_cycles: 0x761537d007, max_idle_ns: 440795202126 ns
[    0.001375] clocksource: timebase mult[1f40000] shift[24] registered
[    0.002225] clockevent: decrementer mult[83126e98] shift[32] cpu[0]
[    0.002337] Console: colour dummy device 80x25
[    0.002931] printk: console [hvc0] enabled
[    0.003487] printk: bootconsole [udbg0] disabled
[    0.004257] pid_max: default: 32768 minimum: 301
[    0.004504] LSM: Security Framework initializing
[    0.310236] Yama: becoming mindful.
[    0.310301] AppArmor: AppArmor initialized
[    0.310318] TOMOYO Linux initialized
[    0.310551] Mount-cache hash table entries: 131072 (order: 4, 1048576 bytes, linear)
[    0.310723] Mountpoint-cache hash table entries: 131072 (order: 4, 1048576 bytes, linear)
[    0.312139] opal: Old firmware detected, OPAL handles HMIs.
[    0.312158] EEH: PowerNV platform initialized
[    0.312176] POWER7 performance monitor hardware support registered
[    0.312252] rcu: Hierarchical SRCU implementation.
[    0.313281] smp: Bringing up secondary CPUs ...
[    0.324899] smp: Brought up 1 node, 24 CPUs
[    0.324923] numa: Node 0 CPUs: 0-23
[    0.324943] Using standard scheduler topology
[    0.325205] Enabling Asymmetric SMT scheduling
[    0.328461] devtmpfs: initialized
[    0.342385] clocksource: jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645041785100000 ns
[    0.342423] futex hash table entries: 8192 (order: 4, 1048576 bytes, linear)
[    0.649087] thermal_sys: Registered thermal governor 'fair_share'
[    0.649089] thermal_sys: Registered thermal governor 'step_wise'
[    0.649803] NET: Registered protocol family 16
[    0.650723] audit: initializing netlink subsys (disabled)
[    0.650997] audit: type=2000 audit(1594807024.344:1): state=initialized audit_enabled=0 res=1
[    0.652004] cpuidle: using governor ladder
[    0.652061] cpuidle: using governor menu
[    0.671390] PCI: Probing PCI hardware
[    0.672017] PCI host bridge to bus 0000:00
[    0.672165] pci_bus 0000:00: root bus resource [io  0x10000-0x80ffff] (bus address [0x0000-0x7fffff])
[    0.672193] pci_bus 0000:00: root bus resource [mem 0x3da080000000-0x3da0fffeffff] (bus address [0x80000000-0xfffeffff])
[    0.672220] pci_bus 0000:00: root bus resource [mem 0x3da800000000-0x3dafdfffffff 64bit pref]
[    0.672246] pci_bus 0000:00: root bus resource [bus 00-ff]
[    0.672269] pci_bus 0000:00: busn_res: [bus 00-ff] end is updated to ff
[    0.983212] pci 0000:00:00.0: [1014:03b9] type 01 class 0x060000
[    0.983386] pci 0000:00:00.0: enabling Extended Tags
[    0.983553] pci 0000:00:00.0: PME# supported from D0 D3hot D3cold
[    0.985880] pci 0000:01:00.0: [10b5:8624] type 01 class 0x060400
[    0.986000] pci 0000:01:00.0: reg 0x10: [mem 0x3da084000000-0x3da08401ffff]
[    0.986404] pci 0000:01:00.0: PME# supported from D0 D3hot D3cold
[    0.988796] pci 0000:00:00.0: PCI bridge to [bus 01-bf]
[    0.988833] pci 0000:00:00.0:   bridge window [io  0x20000-0x3ffff]
[    0.989226] pci 0000:02:04.0: [10b5:8624] type 01 class 0x060400
[    0.989698] pci 0000:02:04.0: PME# supported from D0 D3hot D3cold
[    0.990405] pci 0000:02:05.0: [10b5:8624] type 01 class 0x060400
[    0.990884] pci 0000:02:05.0: PME# supported from D0 D3hot D3cold
[    0.991606] pci 0000:02:06.0: [10b5:8624] type 01 class 0x060400
[    0.992074] pci 0000:02:06.0: PME# supported from D0 D3hot D3cold
[    1.320758] pci 0000:02:08.0: [10b5:8624] type 01 class 0x060400
[    1.321229] pci 0000:02:08.0: PME# supported from D0 D3hot D3cold
[    1.322017] pci 0000:02:09.0: [10b5:8624] type 01 class 0x060400
[    1.322491] pci 0000:02:09.0: PME# supported from D0 D3hot D3cold
[    1.324138] pci 0000:01:00.0: PCI bridge to [bus 02-bf]
[    1.324176] pci 0000:01:00.0:   bridge window [io  0x20000-0x3ffff]
[    1.325754] pci 0000:02:04.0: PCI bridge to [bus 20-3f]
[    1.327290] pci 0000:02:05.0: PCI bridge to [bus 40-5f]
[    1.327329] pci 0000:02:05.0:   bridge window [io  0x20000-0x2ffff]
[    1.327563] pci 0000:60:00.0: [1014:0339] type 00 class 0x010400
[    1.327666] pci 0000:60:00.0: reg 0x10: [mem 0x3da081000000-0x3da08103ffff 64bit]
[    1.327730] pci 0000:60:00.0: reg 0x18: [mem 0x3da086000000-0x3da086ffffff 64bit pref]
[    1.327828] pci 0000:60:00.0: reg 0x30: [mem 0x00000000-0x0001ffff pref]
[    1.328035] pci 0000:60:00.0: PME# supported from D0 D3hot D3cold
[    1.655368] pci 0000:60:00.0: 8.000 Gb/s available PCIe bandwidth, limited by 2.5 GT/s PCIe x4 link at 0000:02:06.0 (capable of 16.000 Gb/s with 2.5 GT/s PCIe x8 link)
[    1.657312] pci 0000:02:06.0: PCI bridge to [bus 60-7f]
[    1.657571] pci 0000:80:00.0: [10b5:8112] type 01 class 0x060400
[    1.657998] pci 0000:80:00.0: supports D1
[    1.658018] pci 0000:80:00.0: PME# supported from D0 D1 D3hot
[    1.658618] pci 0000:80:00.0: disabling ASPM on pre-1.1 PCIe device.  You can enable it with 'pcie_aspm=force'
[    1.660008] pci 0000:02:08.0: PCI bridge to [bus 80-9f]
[    1.660199] pci_bus 0000:90: extended config space not accessible
[    1.660352] pci 0000:90:01.0: [1033:0035] type 00 class 0x0c0310
[    1.660438] pci 0000:90:01.0: reg 0x10: [mem 0x3da082000000-0x3da082000fff]
[    1.660673] pci 0000:90:01.0: BAR0 [mem size 0x00001000]: requesting alignment to 0x10000
[    1.660796] pci 0000:90:01.0: supports D1 D2
[    1.993001] pci 0000:90:01.0: PME# supported from D0 D1 D2 D3hot
[    1.993524] pci 0000:90:01.1: [1033:0035] type 00 class 0x0c0310
[    1.993609] pci 0000:90:01.1: reg 0x10: [mem 0x3da082001000-0x3da082001fff]
[    1.993842] pci 0000:90:01.1: BAR0 [mem size 0x00001000]: requesting alignment to 0x10000
[    1.993966] pci 0000:90:01.1: supports D1 D2
[    1.993987] pci 0000:90:01.1: PME# supported from D0 D1 D2 D3hot
[    1.994506] pci 0000:90:01.2: [1033:00e0] type 00 class 0x0c0320
[    1.994591] pci 0000:90:01.2: reg 0x10: [mem 0x3da082002000-0x3da0820020ff]
[    1.994823] pci 0000:90:01.2: BAR0 [mem size 0x00000100]: requesting alignment to 0x10000
[    1.994946] pci 0000:90:01.2: supports D1 D2
[    1.994968] pci 0000:90:01.2: PME# supported from D0 D1 D2 D3hot
[    1.996752] pci 0000:80:00.0: PCI bridge to [bus 90-9f]
[    1.997025] pci 0000:a0:00.0: [8086:105e] type 00 class 0x020000
[    1.997121] pci 0000:a0:00.0: reg 0x10: [mem 0x3da083000000-0x3da08301ffff]
[    2.329069] pci 0000:a0:00.0: reg 0x14: [mem 0x3da083020000-0x3da08303ffff]
[    2.329117] pci 0000:a0:00.0: reg 0x18: [io  0x30000-0x3001f]
[    2.329239] pci 0000:a0:00.0: reg 0x30: [mem 0x00000000-0x0001ffff pref]
[    2.329461] pci 0000:a0:00.0: PME# supported from D0 D3hot D3cold
[    2.330114] pci 0000:a0:00.1: [8086:105e] type 00 class 0x020000
[    2.330211] pci 0000:a0:00.1: reg 0x10: [mem 0x3da083040000-0x3da08305ffff]
[    2.330258] pci 0000:a0:00.1: reg 0x14: [mem 0x3da083060000-0x3da08307ffff]
[    2.330307] pci 0000:a0:00.1: reg 0x18: [io  0x30020-0x3003f]
[    2.330429] pci 0000:a0:00.1: reg 0x30: [mem 0x00000000-0x0001ffff pref]
[    2.330651] pci 0000:a0:00.1: PME# supported from D0 D3hot D3cold
[    2.331209] pci 0000:a0:00.0: disabling ASPM on pre-1.1 PCIe device.  You can enable it with 'pcie_aspm=force'
[    2.332601] pci 0000:02:09.0: PCI bridge to [bus a0-bf]
[    2.332639] pci 0000:02:09.0:   bridge window [io  0x30000-0x3ffff]
[    2.665245] pci_bus 0000:00: busn_res: [bus 00-ff] end is updated to bf
[    2.665704] PCI host bridge to bus 0001:00
[    2.665726] pci_bus 0001:00: root bus resource [io  0x820000-0x101ffff] (bus address [0x0000-0x7fffff])
[    2.665752] pci_bus 0001:00: root bus resource [mem 0x3da180000000-0x3da1fffeffff] (bus address [0x80000000-0xfffeffff])
[    2.665780] pci_bus 0001:00: root bus resource [mem 0x3db000000000-0x3db7dfffffff 64bit pref]
[    2.665806] pci_bus 0001:00: root bus resource [bus 00-ff]
[    2.665828] pci_bus 0001:00: busn_res: [bus 00-ff] end is updated to ff
[    2.665888] pci 0001:00:00.0: [1014:03b9] type 01 class 0x060000
[    2.666053] pci 0001:00:00.0: enabling Extended Tags
[    2.666210] pci 0001:00:00.0: PME# supported from D0 D3hot D3cold
[    2.669662] pci 0001:00:00.0: PCI bridge to [bus 01]
[    2.669698] pci 0001:00:00.0:   bridge window [io  0x830000-0x83ffff]
[    2.669770] pci_bus 0001:00: busn_res: [bus 00-ff] end is updated to 01
[    3.001559] PCI host bridge to bus 0002:00
[    3.001581] pci_bus 0002:00: root bus resource [io  0x1030000-0x182ffff] (bus address [0x0000-0x7fffff])
[    3.001608] pci_bus 0002:00: root bus resource [mem 0x3da280000000-0x3da2fffeffff] (bus address [0x80000000-0xfffeffff])
[    3.001635] pci_bus 0002:00: root bus resource [mem 0x3db800000000-0x3dbfdfffffff 64bit pref]
[    3.001660] pci_bus 0002:00: root bus resource [bus 00-ff]
[    3.001682] pci_bus 0002:00: busn_res: [bus 00-ff] end is updated to ff
[    3.001742] pci 0002:00:00.0: [1014:03b9] type 01 class 0x060000
[    3.001907] pci 0002:00:00.0: enabling Extended Tags
[    3.002066] pci 0002:00:00.0: PME# supported from D0 D3hot D3cold
[    3.005509] pci 0002:00:00.0: PCI bridge to [bus 01]
[    3.005545] pci 0002:00:00.0:   bridge window [io  0x1040000-0x104ffff]
[    3.005617] pci_bus 0002:00: busn_res: [bus 00-ff] end is updated to 01
[    3.006174] PCI host bridge to bus 0003:00
[    3.340046] pci_bus 0003:00: root bus resource [io  0x1840000-0x203ffff] (bus address [0x0000-0x7fffff])
[    3.340073] pci_bus 0003:00: root bus resource [mem 0x3da380000000-0x3da3fffeffff] (bus address [0x80000000-0xfffeffff])
[    3.340099] pci_bus 0003:00: root bus resource [mem 0x3dc000000000-0x3dc7dfffffff 64bit pref]
[    3.340124] pci_bus 0003:00: root bus resource [bus 00-ff]
[    3.340145] pci_bus 0003:00: busn_res: [bus 00-ff] end is updated to ff
[    3.340205] pci 0003:00:00.0: [1014:03b9] type 01 class 0x060000
[    3.340370] pci 0003:00:00.0: enabling Extended Tags
[    3.340528] pci 0003:00:00.0: PME# supported from D0 D3hot D3cold
[    3.344021] pci 0003:00:00.0: PCI bridge to [bus 01]
[    3.344063] pci 0003:00:00.0:   bridge window [io  0x1850000-0x185ffff]
[    3.344136] pci_bus 0003:00: busn_res: [bus 00-ff] end is updated to 01
[    3.344743] PCI host bridge to bus 0004:00
[    3.344765] pci_bus 0004:00: root bus resource [io  0x2050000-0x284ffff] (bus address [0x0000-0x7fffff])
[    3.676788] pci_bus 0004:00: root bus resource [mem 0x3da480000000-0x3da4fffeffff] (bus address [0x80000000-0xfffeffff])
[    3.676815] pci_bus 0004:00: root bus resource [mem 0x3dc800000000-0x3dcfdfffffff 64bit pref]
[    3.676840] pci_bus 0004:00: root bus resource [bus 00-ff]
[    3.676862] pci_bus 0004:00: busn_res: [bus 00-ff] end is updated to ff
[    3.676922] pci 0004:00:00.0: [1014:03b9] type 01 class 0x060000
[    3.677087] pci 0004:00:00.0: enabling Extended Tags
[    3.677246] pci 0004:00:00.0: PME# supported from D0 D3hot D3cold
[    3.680790] pci 0004:00:00.0: PCI bridge to [bus 01]
[    3.680826] pci 0004:00:00.0:   bridge window [io  0x2060000-0x206ffff]
[    3.680897] pci_bus 0004:00: busn_res: [bus 00-ff] end is updated to 01
[    3.681582] PCI host bridge to bus 0005:00
[    3.681604] pci_bus 0005:00: root bus resource [io  0x2860000-0x305ffff] (bus address [0x0000-0x7fffff])
[    3.681630] pci_bus 0005:00: root bus resource [mem 0x3da580000000-0x3da5fffeffff] (bus address [0x80000000-0xfffeffff])
[    3.945376] pci_bus 0005:00: root bus resource [mem 0x3dd000000000-0x3dd7dfffffff 64bit pref]
[    3.945401] pci_bus 0005:00: root bus resource [bus 00-ff]
[    3.945423] pci_bus 0005:00: busn_res: [bus 00-ff] end is updated to ff
[    3.945483] pci 0005:00:00.0: [1014:03b9] type 01 class 0x060000
[    3.945648] pci 0005:00:00.0: enabling Extended Tags
[    3.945807] pci 0005:00:00.0: PME# supported from D0 D3hot D3cold
[    3.948218] pci 0005:01:00.0: [10df:f100] type 00 class 0x0c0400
[    3.948323] pci 0005:01:00.0: reg 0x10: [mem 0x3da580008000-0x3da580008fff 64bit]
[    3.948384] pci 0005:01:00.0: reg 0x18: [mem 0x3da580000000-0x3da580003fff 64bit]
[    3.948432] pci 0005:01:00.0: reg 0x20: [io  0x2870000-0x28700ff]
[    3.948502] pci 0005:01:00.0: reg 0x30: [mem 0x00000000-0x0003ffff pref]
[    3.948547] pci 0005:01:00.0: enabling Extended Tags
[    4.281583] pci 0005:01:00.0: BAR0 [mem size 0x00001000 64bit]: requesting alignment to 0x10000
[    4.281610] pci 0005:01:00.0: BAR2 [mem size 0x00004000 64bit]: requesting alignment to 0x10000
[    4.281923] pci 0005:01:00.0: 16.000 Gb/s available PCIe bandwidth, limited by 2.5 GT/s PCIe x8 link at 0005:00:00.0 (capable of 32.000 Gb/s with 5.0 GT/s PCIe x8 link)
[    4.282721] pci 0005:01:00.1: [10df:f100] type 00 class 0x0c0400
[    4.282825] pci 0005:01:00.1: reg 0x10: [mem 0x3da580009000-0x3da580009fff 64bit]
[    4.282887] pci 0005:01:00.1: reg 0x18: [mem 0x3da580004000-0x3da580007fff 64bit]
[    4.282936] pci 0005:01:00.1: reg 0x20: [io  0x2870100-0x28701ff]
[    4.283006] pci 0005:01:00.1: reg 0x30: [mem 0x00000000-0x0003ffff pref]
[    4.283052] pci 0005:01:00.1: enabling Extended Tags
[    4.283099] pci 0005:01:00.1: BAR0 [mem size 0x00001000 64bit]: requesting alignment to 0x10000
[    4.283125] pci 0005:01:00.1: BAR2 [mem size 0x00004000 64bit]: requesting alignment to 0x10000
[    4.622376] pci 0005:00:00.0: PCI bridge to [bus 01]
[    4.622414] pci 0005:00:00.0:   bridge window [io  0x2870000-0x287ffff]
[    4.622485] pci_bus 0005:00: busn_res: [bus 00-ff] end is updated to 01
[    4.622529] pci 0000:02:05.0: bridge window [io  0x10000-0xffff] to [bus 40-5f] add_size 10000
[    4.622556] pci 0000:02:05.0: bridge window [mem 0x10000000-0x0fffffff 64bit pref] to [bus 40-5f] add_size 10000000 add_align 10000000
[    4.622585] pci 0000:02:05.0: bridge window [mem 0x01000000-0x00ffffff] to [bus 40-5f] add_size 1000000 add_align 1000000
[    4.622621] pci 0000:02:09.0: bridge window [mem 0x10000000-0x0fffffff 64bit pref] to [bus a0-bf] add_size 10000000 add_align 10000000
[    4.622652] pci 0000:01:00.0: bridge window [io  0x10000-0x1ffff] to [bus 02-bf] add_size 10000
[    4.622679] pci 0000:01:00.0: bridge window [mem 0x10000000-0x3fffffff 64bit pref] to [bus 02-bf] add_size 20000000 add_align 10000000
[    4.959330] pci 0000:01:00.0: bridge window [mem 0x01000000-0x04ffffff] to [bus 02-bf] add_size 1000000 add_align 1000000
[    4.959358] pci 0000:00:00.0: bridge window [io  0x10000-0x1ffff] to [bus 01-bf] add_size 10000
[    4.959385] pci 0000:00:00.0: bridge window [mem 0x10000000-0x3fffffff 64bit pref] to [bus 01-bf] add_size 20000000 add_align 10000000
[    4.959414] pci 0000:00:00.0: bridge window [mem 0x01000000-0x05ffffff] to [bus 01-bf] add_size 1000000 add_align 1000000
[    4.959447] pci 0000:00:00.0: BAR 15: assigned [mem 0x3da800000000-0x3da84fffffff 64bit pref]
[    4.959472] pci 0000:00:00.0: BAR 14: assigned [mem 0x3da080000000-0x3da085ffffff]
[    4.959497] pci 0000:00:00.0: BAR 13: assigned [io  0x20000-0x3ffff]
[    4.959524] pci 0000:01:00.0: BAR 15: assigned [mem 0x3da800000000-0x3da84fffffff 64bit pref]
[    4.959550] pci 0000:01:00.0: BAR 14: assigned [mem 0x3da080000000-0x3da084ffffff]
[    4.959574] pci 0000:01:00.0: BAR 0: assigned [mem 0x3da085000000-0x3da08501ffff]
[    5.294386] pci 0000:01:00.0: BAR 13: assigned [io  0x20000-0x3ffff]
[    5.294417] pci 0000:02:05.0: BAR 15: assigned [mem 0x3da800000000-0x3da80fffffff 64bit pref]
[    5.294442] pci 0000:02:06.0: BAR 15: assigned [mem 0x3da810000000-0x3da81fffffff 64bit pref]
[    5.294468] pci 0000:02:09.0: BAR 15: assigned [mem 0x3da820000000-0x3da82fffffff 64bit pref]
[    5.294493] pci 0000:02:05.0: BAR 14: assigned [mem 0x3da080000000-0x3da080ffffff]
[    5.294518] pci 0000:02:06.0: BAR 14: assigned [mem 0x3da081000000-0x3da081ffffff]
[    5.294542] pci 0000:02:08.0: BAR 14: assigned [mem 0x3da082000000-0x3da082ffffff]
[    5.294566] pci 0000:02:09.0: BAR 14: assigned [mem 0x3da083000000-0x3da083ffffff]
[    5.294590] pci 0000:02:05.0: BAR 13: assigned [io  0x20000-0x2ffff]
[    5.294612] pci 0000:02:09.0: BAR 13: assigned [io  0x30000-0x3ffff]
[    5.294637] pci 0000:02:04.0: PCI bridge to [bus 20-3f]
[    5.294686] pci 0000:02:05.0: PCI bridge to [bus 40-5f]
[    5.630937] pci 0000:02:05.0:   bridge window [io  0x20000-0x2ffff]
[    5.630969] pci 0000:02:05.0:   bridge window [mem 0x3da080000000-0x3da080ffffff]
[    5.631000] pci 0000:02:05.0:   bridge window [mem 0x3da800000000-0x3da80fffffff 64bit pref]
[    5.631039] pci 0000:60:00.0: BAR 2: assigned [mem 0x3da810000000-0x3da810ffffff 64bit pref]
[    5.631091] pci 0000:60:00.0: BAR 0: assigned [mem 0x3da081000000-0x3da08103ffff 64bit]
[    5.631140] pci 0000:60:00.0: BAR 6: assigned [mem 0x3da081040000-0x3da08105ffff pref]
[    5.631166] pci 0000:02:06.0: PCI bridge to [bus 60-7f]
[    5.631196] pci 0000:02:06.0:   bridge window [mem 0x3da081000000-0x3da081ffffff]
[    5.631226] pci 0000:02:06.0:   bridge window [mem 0x3da810000000-0x3da81fffffff 64bit pref]
[    5.631264] pci 0000:80:00.0: BAR 14: assigned [mem 0x3da082000000-0x3da082ffffff]
[    5.631291] pci 0000:90:01.0: BAR 0: assigned [mem 0x3da082000000-0x3da082000fff]
[    5.631325] pci 0000:90:01.1: BAR 0: assigned [mem 0x3da082010000-0x3da082010fff]
[    5.967728] pci 0000:90:01.2: BAR 0: assigned [mem 0x3da082020000-0x3da0820200ff]
[    5.967763] pci 0000:80:00.0: PCI bridge to [bus 90-9f]
[    5.967795] pci 0000:80:00.0:   bridge window [mem 0x3da082000000-0x3da082ffffff]
[    5.967840] pci 0000:02:08.0: PCI bridge to [bus 80-9f]
[    5.967871] pci 0000:02:08.0:   bridge window [mem 0x3da082000000-0x3da082ffffff]
[    5.967916] pci 0000:a0:00.0: BAR 0: assigned [mem 0x3da083000000-0x3da08301ffff]
[    5.967949] pci 0000:a0:00.0: BAR 1: assigned [mem 0x3da083020000-0x3da08303ffff]
[    5.967981] pci 0000:a0:00.0: BAR 6: assigned [mem 0x3da083040000-0x3da08305ffff pref]
[    5.968006] pci 0000:a0:00.1: BAR 0: assigned [mem 0x3da083060000-0x3da08307ffff]
[    5.968037] pci 0000:a0:00.1: BAR 1: assigned [mem 0x3da083080000-0x3da08309ffff]
[    5.968070] pci 0000:a0:00.1: BAR 6: assigned [mem 0x3da0830a0000-0x3da0830bffff pref]
[    6.305053] pci 0000:a0:00.0: BAR 2: assigned [io  0x30000-0x3001f]
[    6.305084] pci 0000:a0:00.1: BAR 2: assigned [io  0x30020-0x3003f]
[    6.305115] pci 0000:02:09.0: PCI bridge to [bus a0-bf]
[    6.305141] pci 0000:02:09.0:   bridge window [io  0x30000-0x3ffff]
[    6.305172] pci 0000:02:09.0:   bridge window [mem 0x3da083000000-0x3da083ffffff]
[    6.305202] pci 0000:02:09.0:   bridge window [mem 0x3da820000000-0x3da82fffffff 64bit pref]
[    6.305239] pci 0000:01:00.0: PCI bridge to [bus 02-bf]
[    6.305264] pci 0000:01:00.0:   bridge window [io  0x10000-0x80ffff]
[    6.305296] pci 0000:01:00.0:   bridge window [mem 0x3da080000000-0x3da0ffefffff]
[    6.305326] pci 0000:01:00.0:   bridge window [mem 0x3da800000000-0x3dafdff0ffff 64bit pref]
[    6.305363] pci 0000:00:00.0: PCI bridge to [bus 01-bf]
[    6.305387] pci 0000:00:00.0:   bridge window [io  0x10000-0x80ffff]
[    6.305419] pci 0000:00:00.0:   bridge window [mem 0x3da080000000-0x3da0ffefffff]
[    6.643340] pci 0000:00:00.0:   bridge window [mem 0x3da800000000-0x3dafdff0ffff 64bit pref]
[    6.643378] pci_bus 0000:00: resource 4 [io  0x10000-0x80ffff]
[    6.643400] pci_bus 0000:00: resource 5 [mem 0x3da080000000-0x3da0fffeffff]
[    6.643422] pci_bus 0000:00: resource 6 [mem 0x3da800000000-0x3dafdfffffff 64bit pref]
[    6.643447] pci_bus 0000:01: resource 0 [io  0x10000-0x80ffff]
[    6.643470] pci_bus 0000:01: resource 1 [mem 0x3da080000000-0x3da0ffefffff]
[    6.643494] pci_bus 0000:01: resource 2 [mem 0x3da800000000-0x3dafdff0ffff 64bit pref]
[    6.643517] pci_bus 0000:02: resource 0 [io  0x10000-0x80ffff]
[    6.643540] pci_bus 0000:02: resource 1 [mem 0x3da080000000-0x3da0ffefffff]
[    6.643562] pci_bus 0000:02: resource 2 [mem 0x3da800000000-0x3dafdff0ffff 64bit pref]
[    6.643586] pci_bus 0000:40: resource 0 [io  0x20000-0x2ffff]
[    6.643608] pci_bus 0000:40: resource 1 [mem 0x3da080000000-0x3da080ffffff]
[    6.643631] pci_bus 0000:40: resource 2 [mem 0x3da800000000-0x3da80fffffff 64bit pref]
[    6.984394] pci_bus 0000:60: resource 1 [mem 0x3da081000000-0x3da081ffffff]
[    6.984416] pci_bus 0000:60: resource 2 [mem 0x3da810000000-0x3da81fffffff 64bit pref]
[    6.984440] pci_bus 0000:80: resource 1 [mem 0x3da082000000-0x3da082ffffff]
[    6.984462] pci_bus 0000:90: resource 1 [mem 0x3da082000000-0x3da082ffffff]
[    6.984484] pci_bus 0000:a0: resource 0 [io  0x30000-0x3ffff]
[    6.984506] pci_bus 0000:a0: resource 1 [mem 0x3da083000000-0x3da083ffffff]
[    6.984528] pci_bus 0000:a0: resource 2 [mem 0x3da820000000-0x3da82fffffff 64bit pref]
[    6.984554] pci 0001:00:00.0: bridge window [io  0x10000-0xffff] to [bus 01] add_size 10000
[    6.984579] pci 0001:00:00.0: bridge window [mem 0x10000000-0x0fffffff 64bit pref] to [bus 01] add_size 10000000 add_align 10000000
[    6.984608] pci 0001:00:00.0: bridge window [mem 0x01000000-0x00ffffff] to [bus 01] add_size 1000000 add_align 1000000
[    7.352620] pci 0001:00:00.0: BAR 15: assigned [mem 0x3db000000000-0x3db00fffffff 64bit pref]
[    7.352646] pci 0001:00:00.0: BAR 14: assigned [mem 0x3da180000000-0x3da180ffffff]
[    7.352670] pci 0001:00:00.0: BAR 13: assigned [io  0x830000-0x83ffff]
[    7.352693] pci 0001:00:00.0: PCI bridge to [bus 01]
[    7.352719] pci 0001:00:00.0:   bridge window [io  0x820000-0x101ffff]
[    7.352750] pci 0001:00:00.0:   bridge window [mem 0x3da180000000-0x3da1ffefffff]
[    7.352781] pci 0001:00:00.0:   bridge window [mem 0x3db000000000-0x3db7dff0ffff 64bit pref]
[    7.352818] pci_bus 0001:00: resource 4 [io  0x820000-0x101ffff]
[    7.352840] pci_bus 0001:00: resource 5 [mem 0x3da180000000-0x3da1fffeffff]
[    7.352863] pci_bus 0001:00: resource 6 [mem 0x3db000000000-0x3db7dfffffff 64bit pref]
[    7.352887] pci_bus 0001:01: resource 0 [io  0x820000-0x101ffff]
[    7.352909] pci_bus 0001:01: resource 1 [mem 0x3da180000000-0x3da1ffefffff]
[    7.352933] pci_bus 0001:01: resource 2 [mem 0x3db000000000-0x3db7dff0ffff 64bit pref]
[    7.689979] pci 0002:00:00.0: bridge window [io  0x10000-0xffff] to [bus 01] add_size 10000
[    7.690005] pci 0002:00:00.0: bridge window [mem 0x10000000-0x0fffffff 64bit pref] to [bus 01] add_size 10000000 add_align 10000000
[    7.690033] pci 0002:00:00.0: bridge window [mem 0x01000000-0x00ffffff] to [bus 01] add_size 1000000 add_align 1000000
[    7.690062] pci 0002:00:00.0: BAR 15: assigned [mem 0x3db800000000-0x3db80fffffff 64bit pref]
[    7.690087] pci 0002:00:00.0: BAR 14: assigned [mem 0x3da280000000-0x3da280ffffff]
[    7.690112] pci 0002:00:00.0: BAR 13: assigned [io  0x1040000-0x104ffff]
[    7.690136] pci 0002:00:00.0: PCI bridge to [bus 01]
[    7.690160] pci 0002:00:00.0:   bridge window [io  0x1030000-0x182ffff]
[    7.690191] pci 0002:00:00.0:   bridge window [mem 0x3da280000000-0x3da2ffefffff]
[    7.690222] pci 0002:00:00.0:   bridge window [mem 0x3db800000000-0x3dbfdff0ffff 64bit pref]
[    8.034686] pci_bus 0002:00: resource 4 [io  0x1030000-0x182ffff]
[    8.034708] pci_bus 0002:00: resource 5 [mem 0x3da280000000-0x3da2fffeffff]
[    8.034731] pci_bus 0002:00: resource 6 [mem 0x3db800000000-0x3dbfdfffffff 64bit pref]
[    8.034755] pci_bus 0002:01: resource 0 [io  0x1030000-0x182ffff]
[    8.034777] pci_bus 0002:01: resource 1 [mem 0x3da280000000-0x3da2ffefffff]
[    8.034800] pci_bus 0002:01: resource 2 [mem 0x3db800000000-0x3dbfdff0ffff 64bit pref]
[    8.034824] pci 0003:00:00.0: bridge window [io  0x10000-0xffff] to [bus 01] add_size 10000
[    8.034849] pci 0003:00:00.0: bridge window [mem 0x10000000-0x0fffffff 64bit pref] to [bus 01] add_size 10000000 add_align 10000000
[    8.034879] pci 0003:00:00.0: bridge window [mem 0x01000000-0x00ffffff] to [bus 01] add_size 1000000 add_align 1000000
[    8.034907] pci 0003:00:00.0: BAR 15: assigned [mem 0x3dc000000000-0x3dc00fffffff 64bit pref]
[    8.034933] pci 0003:00:00.0: BAR 14: assigned [mem 0x3da380000000-0x3da380ffffff]
[    8.377305] pci 0003:00:00.0: BAR 13: assigned [io  0x1850000-0x185ffff]
[    8.377329] pci 0003:00:00.0: PCI bridge to [bus 01]
[    8.377354] pci 0003:00:00.0:   bridge window [io  0x1840000-0x203ffff]
[    8.377386] pci 0003:00:00.0:   bridge window [mem 0x3da380000000-0x3da3ffefffff]
[    8.377415] pci 0003:00:00.0:   bridge window [mem 0x3dc000000000-0x3dc7dff0ffff 64bit pref]
[    8.377454] pci_bus 0003:00: resource 4 [io  0x1840000-0x203ffff]
[    8.377476] pci_bus 0003:00: resource 5 [mem 0x3da380000000-0x3da3fffeffff]
[    8.377498] pci_bus 0003:00: resource 6 [mem 0x3dc000000000-0x3dc7dfffffff 64bit pref]
[    8.377521] pci_bus 0003:01: resource 0 [io  0x1840000-0x203ffff]
[    8.377543] pci_bus 0003:01: resource 1 [mem 0x3da380000000-0x3da3ffefffff]
[    8.377565] pci_bus 0003:01: resource 2 [mem 0x3dc000000000-0x3dc7dff0ffff 64bit pref]
[    8.377591] pci 0004:00:00.0: bridge window [io  0x10000-0xffff] to [bus 01] add_size 10000
[    8.647236] pci 0004:00:00.0: bridge window [mem 0x10000000-0x0fffffff 64bit pref] to [bus 01] add_size 10000000 add_align 10000000
[    8.647264] pci 0004:00:00.0: bridge window [mem 0x01000000-0x00ffffff] to [bus 01] add_size 1000000 add_align 1000000
[    8.647293] pci 0004:00:00.0: BAR 15: assigned [mem 0x3dc800000000-0x3dc80fffffff 64bit pref]
[    8.647318] pci 0004:00:00.0: BAR 14: assigned [mem 0x3da480000000-0x3da480ffffff]
[    8.647342] pci 0004:00:00.0: BAR 13: assigned [io  0x2060000-0x206ffff]
[    8.647366] pci 0004:00:00.0: PCI bridge to [bus 01]
[    8.647392] pci 0004:00:00.0:   bridge window [io  0x2050000-0x284ffff]
[    8.647423] pci 0004:00:00.0:   bridge window [mem 0x3da480000000-0x3da4ffefffff]
[    8.647453] pci 0004:00:00.0:   bridge window [mem 0x3dc800000000-0x3dcfdff0ffff 64bit pref]
[    8.647490] pci_bus 0004:00: resource 4 [io  0x2050000-0x284ffff]
[    8.647512] pci_bus 0004:00: resource 5 [mem 0x3da480000000-0x3da4fffeffff]
[    8.983834] pci_bus 0004:00: resource 6 [mem 0x3dc800000000-0x3dcfdfffffff 64bit pref]
[    8.983857] pci_bus 0004:01: resource 0 [io  0x2050000-0x284ffff]
[    8.983879] pci_bus 0004:01: resource 1 [mem 0x3da480000000-0x3da4ffefffff]
[    8.983903] pci_bus 0004:01: resource 2 [mem 0x3dc800000000-0x3dcfdff0ffff 64bit pref]
[    8.983929] pci 0005:00:00.0: bridge window [mem 0x10000000-0x0fffffff 64bit pref] to [bus 01] add_size 10000000 add_align 10000000
[    8.983960] pci 0005:00:00.0: BAR 15: assigned [mem 0x3dd000000000-0x3dd00fffffff 64bit pref]
[    8.983986] pci 0005:00:00.0: BAR 14: assigned [mem 0x3da580000000-0x3da580ffffff]
[    8.984010] pci 0005:00:00.0: BAR 13: assigned [io  0x2870000-0x287ffff]
[    8.984038] pci 0005:01:00.0: BAR 6: assigned [mem 0x3da580000000-0x3da58003ffff pref]
[    8.984062] pci 0005:01:00.1: BAR 6: assigned [mem 0x3da580040000-0x3da58007ffff pref]
[    8.984088] pci 0005:01:00.0: BAR 0: assigned [mem 0x3da580080000-0x3da580080fff 64bit]
[    9.319082] pci 0005:01:00.0: BAR 2: assigned [mem 0x3da580090000-0x3da580093fff 64bit]
[    9.319132] pci 0005:01:00.1: BAR 0: assigned [mem 0x3da5800a0000-0x3da5800a0fff 64bit]
[    9.319181] pci 0005:01:00.1: BAR 2: assigned [mem 0x3da5800b0000-0x3da5800b3fff 64bit]
[    9.319229] pci 0005:01:00.0: BAR 4: assigned [io  0x2870000-0x28700ff]
[    9.319259] pci 0005:01:00.1: BAR 4: assigned [io  0x2870100-0x28701ff]
[    9.319289] pci 0005:00:00.0: PCI bridge to [bus 01]
[    9.319315] pci 0005:00:00.0:   bridge window [io  0x2860000-0x305ffff]
[    9.319347] pci 0005:00:00.0:   bridge window [mem 0x3da580000000-0x3da5ffefffff]
[    9.319377] pci 0005:00:00.0:   bridge window [mem 0x3dd000000000-0x3dd7dff0ffff 64bit pref]
[    9.319414] pci_bus 0005:00: resource 4 [io  0x2860000-0x305ffff]
[    9.319436] pci_bus 0005:00: resource 5 [mem 0x3da580000000-0x3da5fffeffff]
[    9.319460] pci_bus 0005:00: resource 6 [mem 0x3dd000000000-0x3dd7dfffffff 64bit pref]
[    9.655176] pci_bus 0005:01: resource 0 [io  0x2860000-0x305ffff]
[    9.655199] pci_bus 0005:01: resource 1 [mem 0x3da580000000-0x3da5ffefffff]
[    9.655221] pci_bus 0005:01: resource 2 [mem 0x3dd000000000-0x3dd7dff0ffff 64bit pref]
[    9.655245] pci_bus 0000:00: Configuring PE for bus
[    9.655268] pci 0000:00     : [PE# 7e] Secondary bus 0x0000000000000000 associated with PE#7e
[    9.655304] pci 0000:00:00.0: Configured PE#7e
[    9.655410] pci_bus 0000:01: Configuring PE for bus
[    9.655435] pci 0000:01     : [PE# 7d] Secondary bus 0x0000000000000001 associated with PE#7d
[    9.655469] pci 0000:01:00.0: Configured PE#7d
[    9.655631] pci_bus 0000:02: Configuring PE for bus
[    9.655655] pci 0000:02     : [PE# 7c] Secondary bus 0x0000000000000002 associated with PE#7c
[    9.655689] pci 0000:02:04.0: Configured PE#7c
[    9.655833] pci 0000:02:05.0: Added to existing PE#7c
[    9.655999] pci 0000:02:06.0: Added to existing PE#7c
[    9.993364] pci 0000:02:08.0: Added to existing PE#7c
[    9.993517] pci 0000:02:09.0: Added to existing PE#7c
[    9.993674] pci_bus 0000:60: Configuring PE for bus
[    9.993700] pci 0000:60     : [PE# 01] Secondary bus 0x0000000000000060 associated with PE#1
[    9.993736] pci 0000:60:00.0: Configured PE#1
[    9.993775] pci 0000:60     : [PE# 01] DMA weight 15 (44), assigned (0) 2 DMA32 segments
[    9.993798] pci 0000:60     : [PE# 01]  Setting up 32-bit TCE table at 00000000..1fffffff
[   10.002693] IOMMU table initialized, virtual merging enabled
[   10.002790] pci 0000:60:00.0: Adding to iommu group 0
[   10.002936] pci_bus 0000:80: Configuring PE for bus
[   10.002961] pci 0000:80     : [PE# 7b] Secondary bus 0x0000000000000080 associated with PE#7b
[   10.002996] pci 0000:80:00.0: Configured PE#7b
[   10.003169] pci_bus 0000:90: Configuring PE for bus
[   10.003195] pci 0000:90     : [PE# 7a] Secondary bus 0x0000000000000090..0x000000000000009f associated with PE#7a
[   10.336537] pci 0000:90:01.0: Configured PE#7a
[   10.336573] pci 0000:90     : [PE# 7a] DMA weight 9 (44), assigned (2) 1 DMA32 segments
[   10.336597] pci 0000:90     : [PE# 7a]  Setting up 32-bit TCE table at 20000000..2fffffff
[   10.341072] pci 0000:90:01.0: Adding to iommu group 1
[   10.341211] pci 0000:90:01.1: Added to existing PE#7a
[   10.341237] pci 0000:90:01.1: Adding to iommu group 1
[   10.341368] pci 0000:90:01.2: Added to existing PE#7a
[   10.341393] pci 0000:90:01.2: Adding to iommu group 1
[   10.341550] pci_bus 0000:a0: Configuring PE for bus
[   10.341575] pci 0000:a0     : [PE# 79] Secondary bus 0x00000000000000a0 associated with PE#79
[   10.341615] pci 0000:a0:00.0: Configured PE#79
[   10.341649] pci 0000:a0     : [PE# 79] DMA weight 20 (44), assigned (3) 3 DMA32 segments
[   10.341673] pci 0000:a0     : [PE# 79]  Setting up 32-bit TCE table at 30000000..5fffffff
[   10.467396] watchdog: CPU 1 self-detected hard LOCKUP @ .do_idle+0x160/0x430
[   10.467397] watchdog: CPU 1 TB:43795760607, last heartbeat TB:38597076570 (10153ms ago)
[   10.467398] Modules linked in:
[   10.467400] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G        W         5.8.0-rc5+ #121
[   10.467401] NIP:  c0000000001a9ca0 LR: c0000000001a9ca0 CTR: c00000000022ede0
[   10.467402] REGS: c0000009ffee3d70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   10.467403] MSR:  9000000002009032 <SF,HV,VEC,EE,ME,IR,DR,RI>  CR: 24000228  XER: 00000000
[   10.467409] CFAR: c0000000008ec0b0 IRQMASK: 1 
[   10.467410] GPR00: c0000000001a9ca0 c00000096f73fd50 c000000001685e00 0000000000000001 
[   10.467414] GPR04: 0000000000000000 c000000976913e00 c0000009ffffee00 0000000000000002 
[   10.467417] GPR08: 0000000000000000 0000000000000000 0000000975a10000 c0000000016d25e0 
[   10.467420] GPR12: 0000000028000222 c0000009ffffee00 c0000009f33abf90 0000000000000000 
[   10.467423] GPR16: 0000000000000000 0000000000000000 c0000000015b9bd8 0000000000000000 
[   10.467426] GPR20: 0000000000000000 c00000096f4e6c80 0000000000000001 0000000000080000 
[   10.467430] GPR24: c00000096f4e6c00 c00000096f4e6c80 c00000096f4e6c00 c0000000016d2640 
[   10.467433] GPR28: 0000000000000001 c00000096f4e6c00 0000000000000001 c0000000016d21e0 
[   10.467436] NIP [c0000000001a9ca0] .do_idle+0x160/0x430
[   10.467437] LR [c0000000001a9ca0] .do_idle+0x160/0x430
[   10.467437] Call Trace:
[   10.467438] [c00000096f73fd50] [c0000000001a9ca0] .do_idle+0x160/0x430 (unreliable)
[   10.467441] [c00000096f73fe40] [c0000000001aa1c4] .cpu_startup_entry+0x34/0x50
[   10.467442] [c00000096f73feb0] [c0000000000512a8] .start_secondary+0x628/0x650
[   10.467444] [c00000096f73ff90] [c00000000000d2e4] start_secondary_prolog+0x10/0x14
[   10.467445] Instruction dump:
[   10.467446] 3d22ff90 39291e88 7e8a482a 7e83a378 48743541 60000000 e93d0080 71290004 
[   10.467450] 7c731b78 4082023c 7e84a378 487423b5 <60000000> 2c230000 408201e8 3d220005 
[   10.467654] watchdog: CPU 14 self-detected hard LOCKUP @ .default_idle_call+0x34/0x7c
[   10.467656] watchdog: CPU 14 TB:43795840431, last heartbeat TB:38599902767 (10148ms ago)
[   10.467657] Modules linked in:
[   10.467659] CPU: 14 PID: 0 Comm: swapper/14 Tainted: G        W         5.8.0-rc5+ #121
[   10.467661] NIP:  c000000000b57024 LR: c0000000001a9ea4 CTR: c00000000022ede0
[   10.467662] REGS: c0000009ffe47d70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   10.467663] MSR:  9000000000009032 <SF,HV,EE,ME,IR,DR,RI>  CR: 24000222  XER: 00000000
[   10.467671] CFAR: c0000000001a9ea0 IRQMASK: 1 
[   10.467673] GPR00: c0000000001a9ea4 c00000096f75fce0 c000000001685e00 0000000000000000 
[   10.467678] GPR04: 0000000000000000 c000000977613e00 c0000009fffbe200 0000000000000808 
[   10.467682] GPR08: c00000096f4f6880 c00000096f4f6800 0000000000080000 0000000300000004 
[   10.467687] GPR12: 0000000024000222 c0000009fffbe200 c0000009f33dff90 0000000000000000 
[   10.467691] GPR16: 0000000000000000 0000000000000000 c0000000015b9bd8 0000000000000000 
[   10.467695] GPR20: 0000000000000000 c00000096f4f6880 0000000000000001 0000000000080000 
[   10.467699] GPR24: c00000096f4f6800 c00000096f4f6880 c00000096f4f6800 c0000000016d2640 
[   10.467704] GPR28: 0000000000000001 c00000096f4f6800 000000000000000e c0000000016d21e0 
[   10.467709] NIP [c000000000b57024] .default_idle_call+0x34/0x7c
[   10.467710] LR [c0000000001a9ea4] .do_idle+0x364/0x430
[   10.467711] Call Trace:
[   10.467712] [c00000096f75fce0] [c000000000b57034] .default_idle_call+0x44/0x7c (unreliable)
[   10.467715] [c00000096f75fd50] [c0000000001a9ea4] .do_idle+0x364/0x430
[   10.467717] [c00000096f75fe40] [c0000000001aa1c0] .cpu_startup_entry+0x30/0x50
[   10.467719] [c00000096f75feb0] [c0000000000512a8] .start_secondary+0x628/0x650
[   10.467721] [c00000096f75ff90] [c00000000000d2e4] start_secondary_prolog+0x10/0x14
[   10.467723] Instruction dump:
[   10.467724] f8010010 f821ff91 60000000 60000000 e92d0968 3d400008 39090080 7ce040a8 
[   10.467730] 7ce75078 7ce041ad 40c2fff4 7c0004ac <e9290080> 71290004 40820024 4b4c3f61 
[   10.467739] watchdog: CPU 21 self-detected hard LOCKUP @ .rcu_dynticks_eqs_enter+0x20/0x40
[   10.467741] watchdog: CPU 21 TB:43795873135, last heartbeat TB:38602037023 (10144ms ago)
[   10.467742] Modules linked in:
[   10.467744] CPU: 21 PID: 0 Comm: swapper/21 Tainted: G        W         5.8.0-rc5+ #121
[   10.467745] NIP:  c000000000b4f1b0 LR: c0000000001a9e9c CTR: c00000000022ede0
[   10.467747] REGS: c0000009ffdf3d70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   10.467748] MSR:  9000000000009032 <SF,HV,EE,ME,IR,DR,RI>  CR: 24000222  XER: 00000000
[   10.467756] CFAR: c000000000b4f290 IRQMASK: 1 
[   10.467758] GPR00: c0000000001a9e9c c00000096f8d7d50 c000000001685e00 0000000000000000 
[   10.467762] GPR04: 0000000000000000 c000000977d13e00 c0000009fffb6300 0000000005c93f7e 
[   10.467767] GPR08: c000000977d9a0d8 c000000977d9a000 0000000000000002 0000000300000004 
[   10.467771] GPR12: 0000000024000222 c0000009fffb6300 c0000009f33fbf90 0000000000000000 
[   10.467775] GPR16: 0000000000000000 0000000000000000 c0000000015b9bd8 0000000000000000 
[   10.467780] GPR20: 0000000000000000 c00000096f7f5680 0000000000000001 0000000000080000 
[   10.467784] GPR24: c00000096f7f5600 c00000096f7f5680 c00000096f7f5600 c0000000016d2640 
[   10.467788] GPR28: 0000000000000001 c00000096f7f5600 0000000000000015 c0000000016d21e0 
[   10.467793] NIP [c000000000b4f1b0] .rcu_dynticks_eqs_enter+0x20/0x40
[   10.467794] LR [c0000000001a9e9c] .do_idle+0x35c/0x430
[   10.467795] Call Trace:
[   10.467797] [c00000096f8d7d50] [c0000000001a9e9c] .do_idle+0x35c/0x430 (unreliable)
[   10.467799] [c00000096f8d7e40] [c0000000001aa1c0] .cpu_startup_entry+0x30/0x50
[   10.467802] [c00000096f8d7eb0] [c0000000000512a8] .start_secondary+0x628/0x650
[   10.467804] [c00000096f8d7f90] [c00000000000d2e4] start_secondary_prolog+0x10/0x14
[   10.467805] Instruction dump:
[   10.467807] 7c0803a6 4e800020 60000000 60000000 e94d0030 3d22ff90 39294200 7d295214 
[   10.467813] 390900d8 7c0004ac 39400002 7ce04028 <7cea3a14> 7ce0412d 40c2fff4 7c0004ac 
[   10.467821] watchdog: CPU 5 self-detected hard LOCKUP @ .arch_cpu_idle+0xbc/0x180
[   10.467823] watchdog: CPU 5 TB:43795948815, last heartbeat TB:38597881014 (10152ms ago)
[   10.467824] Modules linked in:
[   10.467826] CPU: 5 PID: 0 Comm: swapper/5 Tainted: G        W         5.8.0-rc5+ #121
[   10.467828] NIP:  c00000000001b04c LR: c00000000001b034 CTR: c00000000022ede0
[   10.467829] REGS: c0000009ffeb3d70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   10.467830] MSR:  9000000002009032 <SF,HV,VEC,EE,ME,IR,DR,RI>  CR: 44000222  XER: 00000000
[   10.467838] CFAR: c00000000001aa10 IRQMASK: 1 
[   10.467840] GPR00: c000000000b57034 c00000096f77bc60 c000000001685e00 0000000000000000 
[   10.467845] GPR04: 0000000000000000 c000000976d13e00 c0000009fffc8400 0000000000000808 
[   10.467849] GPR08: c00000096f4e1280 0000000000008002 c00000096f4e1200 0000000000000400 
[   10.467854] GPR12: 0000000024000222 c0000009fffc8400 c0000009f33bbf90 0000000000000000 
[   10.467858] GPR16: 0000000000000000 0000000000000000 c0000000015b9bd8 0000000000000000 
[   10.467862] GPR20: 0000000000000000 c00000096f4e1280 0000000000000001 0000000000080000 
[   10.467866] GPR24: c00000096f4e1200 c00000096f4e1280 c00000096f4e1200 c0000000016d2640 
[   10.467871] GPR28: 0000000000000001 c00000096f4e1200 0000000000000005 9000000002009032 
[   10.467876] NIP [c00000000001b04c] .arch_cpu_idle+0xbc/0x180
[   10.467877] LR [c00000000001b034] .arch_cpu_idle+0xa4/0x180
[   10.467878] Call Trace:
[   10.467879] [c00000096f77bc60] [c000000000015d50] .arch_local_irq_restore+0x40/0x90 (unreliable)
[   10.467882] [c00000096f77bce0] [c000000000b57034] .default_idle_call+0x44/0x7c
[   10.467884] [c00000096f77bd50] [c0000000001a9ea4] .do_idle+0x364/0x430
[   10.467886] [c00000096f77be40] [c0000000001aa1c4] .cpu_startup_entry+0x34/0x50
[   10.467888] [c00000096f77beb0] [c0000000000512a8] .start_secondary+0x628/0x650
[   10.467890] [c00000096f77bf90] [c00000000000d2e4] start_secondary_prolog+0x10/0x14
[   10.467892] Instruction dump:
[   10.467893] 60000000 fbe10078 7fe000a6 39200002 7d210164 4bfff9c1 60000000 73e98000 
[   10.467899] 418200c4 39200000 61298002 7d210164 <3d22000d> ebe10078 3929a380 e9290160 
[   10.468246] watchdog: CPU 22 self-detected hard LOCKUP @ .rcu_dynticks_eqs_enter+0x18/0x40
[   10.468248] watchdog: CPU 22 TB:43796195097, last heartbeat TB:38602406031 (10144ms ago)
[   10.468249] Modules linked in:
[   10.468251] CPU: 22 PID: 0 Comm: swapper/22 Tainted: G        W         5.8.0-rc5+ #121
[   10.468252] NIP:  c000000000b4f1a8 LR: c0000000001a9e9c CTR: c00000000022ede0
[   10.468254] REGS: c0000009ffde7d70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   10.468255] MSR:  9000000000009032 <SF,HV,EE,ME,IR,DR,RI>  CR: 24000222  XER: 00000000
[   10.468262] CFAR: c000000000b4f290 IRQMASK: 1 
[   10.468264] GPR00: c0000000001a9e9c c00000096f893d50 c000000001685e00 0000000000000000 
[   10.468268] GPR04: 0000000000000000 c000000977e13e00 c0000009fffb5080 0000000000000000 
[   10.468272] GPR08: c000000977e9a0d8 c000000977e9a000 0000000976f10000 c0000000016d25e0 
[   10.468277] GPR12: 0000000024000228 c0000009fffb5080 c0000009f33fff90 0000000000000000 
[   10.468281] GPR16: 0000000000000000 0000000000000000 c0000000015b9bd8 0000000000000000 
[   10.468285] GPR20: 0000000000000000 c00000096f7f3280 0000000000000001 0000000000080000 
[   10.468290] GPR24: c00000096f7f3200 c00000096f7f3280 c00000096f7f3200 c0000000016d2640 
[   10.468294] GPR28: 0000000000000001 c00000096f7f3200 0000000000000016 c0000000016d21e0 
[   10.468299] NIP [c000000000b4f1a8] .rcu_dynticks_eqs_enter+0x18/0x40
[   10.468300] LR [c0000000001a9e9c] .do_idle+0x35c/0x430
[   10.468301] Call Trace:
[   10.468302] [c00000096f893d50] [c0000000001a9e9c] .do_idle+0x35c/0x430 (unreliable)
[   10.468304] [c00000096f893e40] [c0000000001aa1c0] .cpu_startup_entry+0x30/0x50
[   10.468306] [c00000096f893eb0] [c0000000000512a8] .start_secondary+0x628/0x650
[   10.468308] [c00000096f893f90] [c00000000000d2e4] start_secondary_prolog+0x10/0x14
[   10.468310] Instruction dump:
[   10.468311] 38210070 e8010010 7c0803a6 4e800020 60000000 60000000 e94d0030 3d22ff90 
[   10.468317] 39294200 7d295214 390900d8 7c0004ac <39400002> 7ce04028 7cea3a14 7ce0412d 
[   10.468570] watchdog: CPU 7 self-detected hard LOCKUP @ .tick_nohz_idle_stop_tick+0x7c/0x380
[   10.468571] watchdog: CPU 7 TB:43796359220, last heartbeat TB:38598288559 (10152ms ago)
[   10.468572] Modules linked in:
[   10.468575] CPU: 7 PID: 0 Comm: swapper/7 Tainted: G        W         5.8.0-rc5+ #121
[   10.468576] NIP:  c00000000023ff6c LR: c0000000002400b0 CTR: c00000000022ede0
[   10.468578] REGS: c0000009ffe9bd70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   10.468578] MSR:  9000000000009032 <SF,HV,EE,ME,IR,DR,RI>  CR: 24000222  XER: 00000000
[   10.468586] CFAR: c0000000002400b4 IRQMASK: 1 
[   10.468588] GPR00: c0000000001a9e94 c00000096f76fd50 c000000001685e00 0000000000000000 
[   10.468592] GPR04: 0000000000000000 c000000976f13e00 c0000009fffc6000 0000000000000002 
[   10.468596] GPR08: 0000000000000007 0000000000000001 0000000000000000 c0000000016d25e0 
[   10.468601] GPR12: 0000000024000228 c0000009fffc6000 c0000009f33c3f90 0000000000000000 
[   10.468605] GPR16: 0000000000000000 0000000000000000 c0000000015b9bd8 0000000000000000 
[   10.468609] GPR20: 0000000000000000 c00000096f4efc80 0000000000000001 0000000000080000 
[   10.468613] GPR24: c00000096f4efc00 c00000096f4efc80 c00000096f4efc00 c0000000016d2640 
[   10.468618] GPR28: 0000000000000001 c00000096f4efc00 0000000000000007 c000000976f13da8 
[   10.468623] NIP [c00000000023ff6c] .tick_nohz_idle_stop_tick+0x7c/0x380
[   10.468624] LR [c0000000002400b0] .tick_nohz_idle_stop_tick+0x1c0/0x380
[   10.468625] Call Trace:
[   10.468626] [c00000096f76fd50] [c0000000001a9e94] .do_idle+0x354/0x430 (unreliable)
[   10.468629] [c00000096f76fe40] [c0000000001aa1c4] .cpu_startup_entry+0x34/0x50
[   10.468631] [c00000096f76feb0] [c0000000000512a8] .start_secondary+0x628/0x650
[   10.468633] [c00000096f76ff90] [c00000000000d2e4] start_secondary_prolog+0x10/0x14
[   10.468635] Instruction dump:
[   10.468636] e93f0068 2c3d0000 39290001 f93f0068 41810034 39200000 f93f00b0 382100b0 
[   10.468642] e8010010 eb61ffd8 eba1ffe8 ebc1fff0 <7c0803a6> ebe1fff8 4e800020 60000000 
[   10.468713] watchdog: CPU 16 self-detected hard LOCKUP @ .arch_cpu_idle+0xbc/0x180
[   10.468715] watchdog: CPU 16 TB:43796424321, last heartbeat TB:38600458207 (10148ms ago)
[   10.468716] Modules linked in:
[   10.468718] CPU: 16 PID: 0 Comm: swapper/16 Tainted: G        W         5.8.0-rc5+ #121
[   10.468720] NIP:  c00000000001b04c LR: c00000000001b034 CTR: c00000000022ede0
[   10.468721] REGS: c0000009ffe2fd70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   10.468722] MSR:  9000000000009032 <SF,HV,EE,ME,IR,DR,RI>  CR: 44000222  XER: 00000000
[   10.468730] CFAR: c00000000001aa10 IRQMASK: 1 
[   10.468732] GPR00: c000000000b57034 c00000096f713c60 c000000001685e00 0000000000000000 
[   10.468737] GPR04: 0000000000000000 c000000977813e00 c0000009fffbbe00 0000000000000808 
[   10.468741] GPR08: c00000096f4f4480 0000000000008002 c00000096f4f4400 0000000300000004 
[   10.468745] GPR12: 0000000024000222 c0000009fffbbe00 c0000009f33e7f90 0000000000000000 
[   10.468750] GPR16: 0000000000000000 0000000000000000 c0000000015b9bd8 0000000000000000 
[   10.468754] GPR20: 0000000000000000 c00000096f4f4480 0000000000000001 0000000000080000 
[   10.468758] GPR24: c00000096f4f4400 c00000096f4f4480 c00000096f4f4400 c0000000016d2640 
[   10.468762] GPR28: 0000000000000001 c00000096f4f4400 0000000000000010 9000000000009032 
[   10.468767] NIP [c00000000001b04c] .arch_cpu_idle+0xbc/0x180
[   10.468768] LR [c00000000001b034] .arch_cpu_idle+0xa4/0x180
[   10.468769] Call Trace:
[   10.468770] [c00000096f713c60] [c00000000001b078] .arch_cpu_idle+0xe8/0x180 (unreliable)
[   10.468773] [c00000096f713ce0] [c000000000b57034] .default_idle_call+0x44/0x7c
[   10.468775] [c00000096f713d50] [c0000000001a9ea4] .do_idle+0x364/0x430
[   10.468777] [c00000096f713e40] [c0000000001aa1c4] .cpu_startup_entry+0x34/0x50
[   10.468779] [c00000096f713eb0] [c0000000000512a8] .start_secondary+0x628/0x650
[   10.468781] [c00000096f713f90] [c00000000000d2e4] start_secondary_prolog+0x10/0x14
[   10.468783] Instruction dump:
[   10.468785] 60000000 fbe10078 7fe000a6 39200002 7d210164 4bfff9c1 60000000 73e98000 
[   10.468791] 418200c4 39200000 61298002 7d210164 <3d22000d> ebe10078 3929a380 e9290160 
[   10.468969] watchdog: CPU 23 self-detected hard LOCKUP @ .rcu_dynticks_eqs_enter+0x28/0x40
[   10.468971] watchdog: CPU 23 TB:43796565478, last heartbeat TB:38602834490 (10144ms ago)
[   10.468972] Modules linked in:
[   10.468974] CPU: 23 PID: 0 Comm: swapper/23 Tainted: G        W         5.8.0-rc5+ #121
[   10.468975] NIP:  c000000000b4f1b8 LR: c0000000001a9e9c CTR: c00000000022ede0
[   10.468977] REGS: c0000009ffddbd70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   10.468977] MSR:  9000000000009032 <SF,HV,EE,ME,IR,DR,RI>  CR: 24000222  XER: 00000000
[   10.468984] CFAR: c000000000b4f290 IRQMASK: 1 
[   10.468987] GPR00: c0000000001a9e9c c00000096f8e7d50 c000000001685e00 0000000000000000 
[   10.468991] GPR04: 0000000000000000 c000000977f13e00 c0000009fffb3e00 0000000005cc52c4 
[   10.468995] GPR08: c000000977f9a0d8 c000000977f9a000 0000000000000002 c0000000016d25e0 
[   10.468999] GPR12: 0000000024000228 c0000009fffb3e00 c0000009f3403f90 0000000000000000 
[   10.469004] GPR16: 0000000000000000 0000000000000000 c0000000015b9bd8 0000000000000000 
[   10.469008] GPR20: 0000000000000000 c00000096f7e4880 0000000000000001 0000000000080000 
[   10.469012] GPR24: c00000096f7e4800 c00000096f7e4880 c00000096f7e4800 c0000000016d2640 
[   10.469017] GPR28: 0000000000000001 c00000096f7e4800 0000000000000017 c0000000016d21e0 
[   10.469022] NIP [c000000000b4f1b8] .rcu_dynticks_eqs_enter+0x28/0x40
[   10.469023] LR [c0000000001a9e9c] .do_idle+0x35c/0x430
[   10.469023] Call Trace:
[   10.469025] [c00000096f8e7d50] [c0000000001a9e9c] .do_idle+0x35c/0x430 (unreliable)
[   10.469027] [c00000096f8e7e40] [c0000000001aa1c0] .cpu_startup_entry+0x30/0x50
[   10.469029] [c00000096f8e7eb0] [c0000000000512a8] .start_secondary+0x628/0x650
[   10.469031] [c00000096f8e7f90] [c00000000000d2e4] start_secondary_prolog+0x10/0x14
[   10.469033] Instruction dump:
[   10.469034] 60000000 60000000 e94d0030 3d22ff90 39294200 7d295214 390900d8 7c0004ac 
[   10.469040] 39400002 7ce04028 7cea3a14 7ce0412d <40c2fff4> 7c0004ac 4e800020 60000000 
[   10.469361] watchdog: CPU 9 self-detected hard LOCKUP @ .cpuidle_get_cpu_driver+0x0/0x50
[   10.469363] watchdog: CPU 9 TB:43796759103, last heartbeat TB:38598696828 (10152ms ago)
[   10.469364] Modules linked in:
[   10.469366] CPU: 9 PID: 0 Comm: swapper/9 Tainted: G        W         5.8.0-rc5+ #121
[   10.469368] NIP:  c0000000008ed1c0 LR: c0000000001a9c84 CTR: c00000000022ede0
[   10.469369] REGS: c0000009ffe83d70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   10.469370] MSR:  9000000002009032 <SF,HV,VEC,EE,ME,IR,DR,RI>  CR: 24000228  XER: 00000000
[   10.469379] CFAR: c0000000001a9c80 IRQMASK: 1 
[   10.469381] GPR00: c0000000001a9c60 c00000096f707d50 c000000001685e00 0000000000000000 
[   10.469385] GPR04: 0000000000000000 c000000977113e00 c0000009fffc3c00 0000000000000002 
[   10.469390] GPR08: 0000000000000000 c000000000f87c88 0000000976210000 c0000000016d25e0 
[   10.469394] GPR12: 0000000024000222 c0000009fffc3c00 c0000009f33cbf90 0000000000000000 
[   10.469399] GPR16: 0000000000000000 0000000000000000 c0000000015b9bd8 0000000000000000 
[   10.469403] GPR20: 0000000000000000 c00000096f4f8c80 0000000000000001 0000000000080000 
[   10.469407] GPR24: c00000096f4f8c00 c00000096f4f8c80 c00000096f4f8c00 c0000000016d2640 
[   10.469412] GPR28: 0000000000000001 c00000096f4f8c00 0000000000000009 c0000000016d21e0 
[   10.469416] NIP [c0000000008ed1c0] .cpuidle_get_cpu_driver+0x0/0x50
[   10.469418] LR [c0000000001a9c84] .do_idle+0x144/0x430
[   10.469418] Call Trace:
[   10.469420] [c00000096f707d50] [c0000000001a9c60] .do_idle+0x120/0x430 (unreliable)
[   10.469422] [c00000096f707e40] [c0000000001aa1c0] .cpu_startup_entry+0x30/0x50
[   10.469425] [c00000096f707eb0] [c0000000000512a8] .start_secondary+0x628/0x650
[   10.469427] [c00000096f707f90] [c00000000000d2e4] start_secondary_prolog+0x10/0x14
[   10.469428] Instruction dump:
[   10.469430] 7c0802a6 f8010010 f821ff91 60000000 60000000 3d22001f e8695430 38210070 
[   10.469436] e8010010 7c0803a6 4e800020 60000000 <7c0802a6> fbe1fff8 7c7f1b78 f8010010 
[   10.469737] watchdog: CPU 18 self-detected hard LOCKUP @ .can_stop_idle_tick.isra.0+0xb0/0x180
[   10.469739] watchdog: CPU 18 TB:43796953071, last heartbeat TB:38601049744 (10148ms ago)
[   10.469740] Modules linked in:
[   10.469742] CPU: 18 PID: 0 Comm: swapper/18 Tainted: G        W         5.8.0-rc5+ #121
[   10.469743] NIP:  c00000000023f720 LR: c0000000002400b0 CTR: c00000000022ede0
[   10.469745] REGS: c0000009ffe17d70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   10.469746] MSR:  9000000000009032 <SF,HV,EE,ME,IR,DR,RI>  CR: 24000222  XER: 00000000
[   10.469753] CFAR: c00000000023f6c4 IRQMASK: 1 
[   10.469755] GPR00: c0000000002400b0 c00000096f763c10 c000000001685e00 0000000000000012 
[   10.469759] GPR04: 0000000000000000 c000000977a13e00 c0000009fffb9a00 0000000000000002 
[   10.469764] GPR08: 0000000000000012 0000000000000001 0000000000000000 c0000000016d25e0 
[   10.469768] GPR12: 0000000024000228 c0000009fffb9a00 c0000009f33eff90 0000000000000000 
[   10.469772] GPR16: 0000000000000000 0000000000000000 c0000000015b9bd8 0000000000000000 
[   10.469777] GPR20: 0000000000000000 c00000096f7fd480 0000000000000001 0000000000080000 
[   10.469781] GPR24: c00000096f7fd400 c00000096f7fd480 c00000096f7fd400 0000000000000012 
[   10.469785] GPR28: 0000000000000001 c000000977a13e00 0000000000000012 0000000000000000 
[   10.469790] NIP [c00000000023f720] .can_stop_idle_tick.isra.0+0xb0/0x180
[   10.469792] LR [c0000000002400b0] .tick_nohz_idle_stop_tick+0x1c0/0x380
[   10.469792] Call Trace:
[   10.469794] [c00000096f763c10] [c00000096f7fd400] 0xc00000096f7fd400 (unreliable)
[   10.469796] [c00000096f763ca0] [c0000000002400b0] .tick_nohz_idle_stop_tick+0x1c0/0x380
[   10.469798] [c00000096f763d50] [c0000000001a9e94] .do_idle+0x354/0x430
[   10.469800] [c00000096f763e40] [c0000000001aa1c4] .cpu_startup_entry+0x34/0x50
[   10.469802] [c00000096f763eb0] [c0000000000512a8] .start_secondary+0x628/0x650
[   10.469805] [c00000096f763f90] [c00000000000d2e4] start_secondary_prolog+0x10/0x14
[   10.469806] Instruction dump:
[   10.469807] 408200b0 38210090 7fe3fb78 e8010010 eba1ffe8 ebc1fff0 ebe1fff8 7c0803a6 
[   10.469813] 4e800020 60000000 60000000 60000000 <38210090> 3be00000 e8010010 7fe3fb78 
[   10.470348] watchdog: CPU 15 self-detected hard LOCKUP @ .default_idle_call+0x34/0x7c
[   10.470350] watchdog: CPU 15 TB:43797226921, last heartbeat TB:38600195247 (10150ms ago)
[   10.470351] Modules linked in:
[   10.470353] CPU: 15 PID: 0 Comm: swapper/15 Tainted: G        W         5.8.0-rc5+ #121
[   10.470355] NIP:  c000000000b57024 LR: c0000000001a9ea4 CTR: c00000000022ede0
[   10.470356] REGS: c0000009ffe3bd70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   10.470357] MSR:  9000000000009032 <SF,HV,EE,ME,IR,DR,RI>  CR: 24000222  XER: 00000000
[   10.470364] CFAR: c0000000001a9ea0 IRQMASK: 1 
[   10.470366] GPR00: c0000000001a9ea4 c00000096f757ce0 c000000001685e00 0000000000000000 
[   10.470371] GPR04: 0000000000000000 c000000977713e00 c0000009fffbd000 0000000000000808 
[   10.470375] GPR08: c00000096f4fe680 c00000096f4fe600 0000000000080000 c0000000016d25e0 
[   10.470379] GPR12: 0000000024000228 c0000009fffbd000 c0000009f33e3f90 0000000000000000 
[   10.470383] GPR16: 0000000000000000 0000000000000000 c0000000015b9bd8 0000000000000000 
[   10.470388] GPR20: 0000000000000000 c00000096f4fe680 0000000000000001 0000000000080000 
[   10.470392] GPR24: c00000096f4fe600 c00000096f4fe680 c00000096f4fe600 c0000000016d2640 
[   10.470396] GPR28: 0000000000000001 c00000096f4fe600 000000000000000f c0000000016d21e0 
[   10.470401] NIP [c000000000b57024] .default_idle_call+0x34/0x7c
[   10.470402] LR [c0000000001a9ea4] .do_idle+0x364/0x430
[   10.470403] Call Trace:
[   10.470404] [c00000096f757ce0] [c000000000b57034] .default_idle_call+0x44/0x7c (unreliable)
[   10.470407] [c00000096f757d50] [c0000000001a9ea4] .do_idle+0x364/0x430
[   10.470409] [c00000096f757e40] [c0000000001aa1c4] .cpu_startup_entry+0x34/0x50
[   10.470411] [c00000096f757eb0] [c0000000000512a8] .start_secondary+0x628/0x650
[   10.470413] [c00000096f757f90] [c00000000000d2e4] start_secondary_prolog+0x10/0x14
[   10.470415] Instruction dump:
[   10.470416] f8010010 f821ff91 60000000 60000000 e92d0968 3d400008 39090080 7ce040a8 
[   10.470422] 7ce75078 7ce041ad 40c2fff4 7c0004ac <e9290080> 71290004 40820024 4b4c3f61 
[   10.470430] watchdog: CPU 19 self-detected hard LOCKUP @ .arch_cpu_idle+0xbc/0x180
[   10.470431] watchdog: CPU 19 TB:43797259903, last heartbeat TB:38601405818 (10148ms ago)
[   10.470432] Modules linked in:
[   10.470434] CPU: 19 PID: 0 Comm: swapper/19 Tainted: G        W         5.8.0-rc5+ #121
[   10.470436] NIP:  c00000000001b04c LR: c00000000001b034 CTR: c00000000022ede0
[   10.470437] REGS: c0000009ffe0bd70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   10.470438] MSR:  9000000000009032 <SF,HV,EE,ME,IR,DR,RI>  CR: 44000222  XER: 00000000
[   10.470445] CFAR: c00000000001aa10 IRQMASK: 1 
[   10.470447] GPR00: c000000000b57034 c00000096f727c60 c000000001685e00 0000000000000000 
[   10.470451] GPR04: 0000000000000000 c000000977b13e00 c0000009fffb8800 0000000000000808 
[   10.470456] GPR08: c00000096f7f0e80 0000000000008002 c00000096f7f0e00 c0000000016d25e0 
[   10.470460] GPR12: 0000000024000228 c0000009fffb8800 c0000009f33f3f90 0000000000000000 
[   10.470465] GPR16: 0000000000000000 0000000000000000 c0000000015b9bd8 0000000000000000 
[   10.470469] GPR20: 0000000000000000 c00000096f7f0e80 0000000000000001 0000000000080000 
[   10.470473] GPR24: c00000096f7f0e00 c00000096f7f0e80 c00000096f7f0e00 c0000000016d2640 
[   10.470477] GPR28: 0000000000000001 c00000096f7f0e00 0000000000000013 9000000000009032 
[   10.470482] NIP [c00000000001b04c] .arch_cpu_idle+0xbc/0x180
[   10.470483] LR [c00000000001b034] .arch_cpu_idle+0xa4/0x180
[   10.470484] Call Trace:
[   10.470485] [c00000096f727c60] [c00000000001b078] .arch_cpu_idle+0xe8/0x180 (unreliable)
[   10.470488] [c00000096f727ce0] [c000000000b57034] .default_idle_call+0x44/0x7c
[   10.470490] [c00000096f727d50] [c0000000001a9ea4] .do_idle+0x364/0x430
[   10.470492] [c00000096f727e40] [c0000000001aa1c0] .cpu_startup_entry+0x30/0x50
[   10.470494] [c00000096f727eb0] [c0000000000512a8] .start_secondary+0x628/0x650
[   10.470496] [c00000096f727f90] [c00000000000d2e4] start_secondary_prolog+0x10/0x14
[   10.470498] Instruction dump:
[   10.470499] 60000000 fbe10078 7fe000a6 39200002 7d210164 4bfff9c1 60000000 73e98000 
[   10.470505] 418200c4 39200000 61298002 7d210164 <3d22000d> ebe10078 3929a380 e9290160 
[   10.470512] watchdog: CPU 2 self-detected hard LOCKUP @ .rcu_dynticks_eqs_exit+0x30/0x60
[   10.470514] watchdog: CPU 2 TB:43797357106, last heartbeat TB:38597257076 (10156ms ago)
[   10.470514] Modules linked in:
[   10.470516] CPU: 2 PID: 0 Comm: swapper/2 Tainted: G        W         5.8.0-rc5+ #121
[   10.470517] NIP:  c000000000b4f200 LR: c000000000b4f2fc CTR: c00000000022ede0
[   10.470519] REGS: c0000009ffed7d70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   10.470519] MSR:  9000000000009032 <SF,HV,EE,ME,IR,DR,RI>  CR: 24000222  XER: 00000000
[   10.470525] CFAR: c000000000b4f2f8 IRQMASK: 1 
[   10.470527] GPR00: c000000000214d98 c00000096f717c50 c000000001685e00 0000000000000000 
[   10.470531] GPR04: 0000000000000000 c000000976a13e00 c0000009ffffdb80 0000000000000002 
[   10.470534] GPR08: 000000000660a136 c000000976a9a000 c000000976a9a0d8 0000000000000400 
[   10.470538] GPR12: 0000000024000228 c0000009ffffdb80 c0000009f33aff90 0000000000000000 
[   10.470542] GPR16: 0000000000000000 0000000000000000 c0000000015b9bd8 0000000000000000 
[   10.470545] GPR20: 0000000000000000 c00000096f4eea80 0000000000000001 0000000000080000 
[   10.470549] GPR24: c00000096f4eea00 c00000096f4eea80 c00000096f4eea00 c0000000016d2640 
[   10.470552] GPR28: 0000000000000001 c00000096f4eea00 0000000000000002 c000000976a9a000 
[   10.470557] NIP [c000000000b4f200] .rcu_dynticks_eqs_exit+0x30/0x60
[   10.470558] LR [c000000000b4f2fc] .rcu_eqs_exit.isra.0+0x4c/0xa0
[   10.470558] Call Trace:
[   10.470559] [c00000096f717c50] [c00000096f717ce0] 0xc00000096f717ce0 (unreliable)
[   10.470561] [c00000096f717cd0] [c000000000214d98] .rcu_idle_exit+0x28/0x50
[   10.470563] [c00000096f717d50] [c0000000001a9d20] .do_idle+0x1e0/0x430
[   10.470565] [c00000096f717e40] [c0000000001aa1c4] .cpu_startup_entry+0x34/0x50
[   10.470567] [c00000096f717eb0] [c0000000000512a8] .start_secondary+0x628/0x650
[   10.470569] [c00000096f717f90] [c00000000000d2e4] start_secondary_prolog+0x10/0x14
[   10.470570] Instruction dump:
[   10.470571] e94d0030 3d22ff90 39294200 7d295214 394900d8 7c0004ac 38e00002 7d005028 
[   10.470576] 7d074214 7d00512d 40c2fff4 7c0004ac <71080001> 4d820020 3900fffe 7ce05028 
[   10.470661] watchdog: CPU 12 self-detected hard LOCKUP @ .tick_check_broadcast_expired+0x24/0x50
[   10.470662] watchdog: CPU 12 TB:43797430562, last heartbeat TB:38599394684 (10152ms ago)
[   10.470663] Modules linked in:
[   10.470665] CPU: 12 PID: 0 Comm: swapper/12 Tainted: G        W         5.8.0-rc5+ #121
[   10.470667] NIP:  c00000000023e564 LR: c0000000001a9c60 CTR: c00000000022ede0
[   10.470668] REGS: c0000009ffe5fd70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   10.470669] MSR:  9000000000009032 <SF,HV,EE,ME,IR,DR,RI>  CR: 24000222  XER: 00000000
[   10.470676] CFAR: c0000000001a9c5c IRQMASK: 1 
[   10.470678] GPR00: c0000000001a9c60 c00000096f77fd50 c000000001685e00 000000000000000c 
[   10.470682] GPR04: 0000000000000000 c000000977413e00 c0000009fffc0600 0000000000000002 
[   10.470686] GPR08: 0000000000000000 0000000000000000 c0000000017c8a80 0000000000000400 
[   10.470691] GPR12: 0000000024000222 c0000009fffc0600 c0000009f33d7f90 0000000000000000 
[   10.470695] GPR16: 0000000000000000 0000000000000000 c0000000015b9bd8 0000000000000000 
[   10.470699] GPR20: 0000000000000000 c00000096f4eb480 0000000000000001 0000000000080000 
[   10.470703] GPR24: c00000096f4eb400 c00000096f4eb480 c00000096f4eb400 c0000000016d2640 
[   10.470708] GPR28: 0000000000000001 c00000096f4eb400 000000000000000c c0000000016d21e0 
[   10.470713] NIP [c00000000023e564] .tick_check_broadcast_expired+0x24/0x50
[   10.470714] LR [c0000000001a9c60] .do_idle+0x120/0x430
[   10.470715] Call Trace:
[   10.470716] [c00000096f77fd50] [c0000000001a9c60] .do_idle+0x120/0x430 (unreliable)
[   10.470719] [c00000096f77fe40] [c0000000001aa1c0] .cpu_startup_entry+0x30/0x50
[   10.470721] [c00000096f77feb0] [c0000000000512a8] .start_secondary+0x628/0x650
[   10.470723] [c00000096f77ff90] [c00000000000d2e4] start_secondary_prolog+0x10/0x14
[   10.470724] Instruction dump:
[   10.470726] 7c0803a6 4e800020 60000000 7c0802a6 f8010010 f821ff91 60000000 60000000 
[   10.470731] 3d420014 394a2c80 a06d000a 38210070 <e8010010> 5469ecf8 546306be 7c0803a6 
[   10.470901] watchdog: CPU 3 self-detected hard LOCKUP @ .rcu_dynticks_eqs_exit+0x18/0x60
[   10.470902] watchdog: CPU 3 TB:43797557615, last heartbeat TB:38597462127 (10156ms ago)
[   10.470903] Modules linked in:
[   10.470905] CPU: 3 PID: 0 Comm: swapper/3 Tainted: G        W         5.8.0-rc5+ #121
[   10.470906] NIP:  c000000000b4f1e8 LR: c000000000b4f2fc CTR: c00000000022ede0
[   10.470907] REGS: c0000009ffecbd70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   10.470908] MSR:  9000000000009032 <SF,HV,EE,ME,IR,DR,RI>  CR: 24000222  XER: 00000000
[   10.470914] CFAR: c000000000b4f2f8 IRQMASK: 1 
[   10.470916] GPR00: c000000000214d98 c00000096f723c50 c000000001685e00 0000000000000000 
[   10.470920] GPR04: 0000000000000000 c000000976b13e00 c0000009ffffc900 0000000000000808 
[   10.470923] GPR08: c00000096f4e2480 c000000976b9a000 c000000976b9a0d8 c0000000016d25e0 
[   10.470927] GPR12: 0000000024000228 c0000009ffffc900 c0000009f33b3f90 0000000000000000 
[   10.470931] GPR16: 0000000000000000 0000000000000000 c0000000015b9bd8 0000000000000000 
[   10.470934] GPR20: 0000000000000000 c00000096f4e2480 0000000000000001 0000000000080000 
[   10.470938] GPR24: c00000096f4e2400 c00000096f4e2480 c00000096f4e2400 c0000000016d2640 
[   10.470942] GPR28: 0000000000000001 c00000096f4e2400 0000000000000003 c000000976b9a000 
[   10.470946] NIP [c000000000b4f1e8] .rcu_dynticks_eqs_exit+0x18/0x60
[   10.470947] LR [c000000000b4f2fc] .rcu_eqs_exit.isra.0+0x4c/0xa0
[   10.470948] Call Trace:
[   10.470949] [c00000096f723c50] [c00000096f723ce0] 0xc00000096f723ce0 (unreliable)
[   10.470951] [c00000096f723cd0] [c000000000214d98] .rcu_idle_exit+0x28/0x50
[   10.470953] [c00000096f723d50] [c0000000001a9d20] .do_idle+0x1e0/0x430
[   10.470954] [c00000096f723e40] [c0000000001aa1c4] .cpu_startup_entry+0x34/0x50
[   10.470956] [c00000096f723eb0] [c0000000000512a8] .start_secondary+0x628/0x650
[   10.470958] [c00000096f723f90] [c00000000000d2e4] start_secondary_prolog+0x10/0x14
[   10.470959] Instruction dump:
[   10.470960] 40c2fff4 7c0004ac 4e800020 60000000 60000000 60000000 e94d0030 3d22ff90 
[   10.470965] 39294200 7d295214 394900d8 7c0004ac <38e00002> 7d005028 7d074214 7d00512d 
[   10.471123] watchdog: CPU 13 self-detected hard LOCKUP @ .arch_cpu_idle+0xbc/0x180
[   10.471125] watchdog: CPU 13 TB:43797663816, last heartbeat TB:38599633583 (10152ms ago)
[   10.471125] Modules linked in:
[   10.471128] CPU: 13 PID: 0 Comm: swapper/13 Tainted: G        W         5.8.0-rc5+ #121
[   10.471129] NIP:  c00000000001b04c LR: c00000000001b034 CTR: c00000000022ede0
[   10.471130] REGS: c0000009ffe53d70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   10.471131] MSR:  9000000000009032 <SF,HV,EE,ME,IR,DR,RI>  CR: 44000228  XER: 00000000
[   10.471138] CFAR: c00000000001aa10 IRQMASK: 1 
[   10.471141] GPR00: c000000000b57034 c00000096f72bc60 c000000001685e00 0000000000000000 
[   10.471145] GPR04: 0000000000000000 c000000977513e00 c0000009fffbf400 0000000000000808 
[   10.471149] GPR08: c00000096f4f2080 0000000000008002 c00000096f4f2000 c0000000016d25e0 
[   10.471154] GPR12: 0000000024000222 c0000009fffbf400 c0000009f33dbf90 0000000000000000 
[   10.471158] GPR16: 0000000000000000 0000000000000000 c0000000015b9bd8 0000000000000000 
[   10.471162] GPR20: 0000000000000000 c00000096f4f2080 0000000000000001 0000000000080000 
[   10.471167] GPR24: c00000096f4f2000 c00000096f4f2080 c00000096f4f2000 c0000000016d2640 
[   10.471171] GPR28: 0000000000000001 c00000096f4f2000 000000000000000d 9000000000009032 
[   10.471176] NIP [c00000000001b04c] .arch_cpu_idle+0xbc/0x180
[   10.471177] LR [c00000000001b034] .arch_cpu_idle+0xa4/0x180
[   10.471178] Call Trace:
[   10.471179] [c00000096f72bc60] [c00000000001b078] .arch_cpu_idle+0xe8/0x180 (unreliable)
[   10.471182] [c00000096f72bce0] [c000000000b57034] .default_idle_call+0x44/0x7c
[   10.471184] [c00000096f72bd50] [c0000000001a9ea4] .do_idle+0x364/0x430
[   10.471186] [c00000096f72be40] [c0000000001aa1c0] .cpu_startup_entry+0x30/0x50
[   10.471188] [c00000096f72beb0] [c0000000000512a8] .start_secondary+0x628/0x650
[   10.471190] [c00000096f72bf90] [c00000000000d2e4] start_secondary_prolog+0x10/0x14
[   10.471191] Instruction dump:
[   10.471193] 60000000 fbe10078 7fe000a6 39200002 7d210164 4bfff9c1 60000000 73e98000 
[   10.471199] 418200c4 39200000 61298002 7d210164 <3d22000d> ebe10078 3929a380 e9290160 
[   10.655385] watchdog: CPU 4 self-detected hard LOCKUP @ .cpuidle_get_cpu_driver+0x10/0x50
[   10.655386] watchdog: CPU 4 TB:43892009279, last heartbeat TB:38597670672 (10340ms ago)
[   10.655387] Modules linked in:
[   10.655389] CPU: 4 PID: 0 Comm: swapper/4 Tainted: G        W         5.8.0-rc5+ #121
[   10.655391] NIP:  c0000000008ed1d0 LR: c0000000001a9c84 CTR: c00000000022ede0
[   10.655392] REGS: c0000009ffebfd70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   10.655393] MSR:  9000000000009032 <SF,HV,EE,ME,IR,DR,RI>  CR: 28000222  XER: 00000000
[   10.655400] CFAR: c0000000001a9c80 IRQMASK: 1 
[   10.655402] GPR00: c0000000001a9c84 c00000096f76bd50 c000000001685e00 0000000000000000 
[   10.655407] GPR04: 0000000000000000 c000000976c13e00 c0000009fffc9600 0000000000000002 
[   10.655411] GPR08: 0000000000000000 c000000000f87c88 0000000975d10000 0000000300000004 
[   10.655415] GPR12: 0000000024000222 c0000009fffc9600 c0000009f33b7f90 0000000000000000 
[   10.655420] GPR16: 0000000000000000 0000000000000000 c0000000015b9bd8 0000000000000000 
[   10.655424] GPR20: 0000000000000000 c00000096f4ed880 0000000000000001 0000000000080000 
[   10.655428] GPR24: c00000096f4ed800 c00000096f4ed880 c00000096f4ed800 c0000000016d2640 
[   10.655432] GPR28: 0000000000000001 c00000096f4ed800 0000000000000004 0000000000000000 
[   10.655438] NIP [c0000000008ed1d0] .cpuidle_get_cpu_driver+0x10/0x50
[   10.655439] LR [c0000000001a9c84] .do_idle+0x144/0x430
[   10.655440] Call Trace:
[   10.655441] [c00000096f76bd50] [c0000000001a9c84] .do_idle+0x144/0x430 (unreliable)
[   10.655443] [c00000096f76be40] [c0000000001aa1c4] .cpu_startup_entry+0x34/0x50
[   10.655445] [c00000096f76beb0] [c0000000000512a8] .start_secondary+0x628/0x650
[   10.655447] [c00000096f76bf90] [c00000000000d2e4] start_secondary_prolog+0x10/0x14
[   10.655449] Instruction dump:
[   10.655450] 60000000 3d22001f e8695430 38210070 e8010010 7c0803a6 4e800020 60000000 
[   10.655456] 7c0802a6 fbe1fff8 7c7f1b78 f8010010 <f821ff81> 60000000 60000000 2c3f0000 
[   10.656189] watchdog: CPU 6 self-detected hard LOCKUP @ .default_idle_call+0x2c/0x7c
[   10.656190] watchdog: CPU 6 TB:43892420592, last heartbeat TB:38598082863 (10340ms ago)
[   10.656191] Modules linked in:
[   10.656193] CPU: 6 PID: 0 Comm: swapper/6 Tainted: G        W         5.8.0-rc5+ #121
[   10.656195] NIP:  c000000000b5701c LR: c0000000001a9ea4 CTR: c00000000022ede0
[   10.656196] REGS: c0000009ffea7d70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   10.656197] MSR:  9000000000009032 <SF,HV,EE,ME,IR,DR,RI>  CR: 24000222  XER: 00000000
[   10.656204] CFAR: c0000000001a9ea0 IRQMASK: 1 
[   10.656206] GPR00: c0000000001a9ea4 c00000096f777ce0 c000000001685e00 0000000000000000 
[   10.656210] GPR04: 0000000000000000 c000000976e13e00 c0000009fffc7200 0000000000000808 
[   10.656215] GPR08: c00000096f4f9e80 c00000096f4f9e00 0000000000080000 c0000000016d25e0 
[   10.656219] GPR12: 0000000024000228 c0000009fffc7200 c0000009f33bff90 0000000000000000 
[   10.656223] GPR16: 0000000000000000 0000000000000000 c0000000015b9bd8 0000000000000000 
[   10.656227] GPR20: 0000000000000000 c00000096f4f9e80 0000000000000001 0000000000080000 
[   10.656232] GPR24: c00000096f4f9e00 c00000096f4f9e80 c00000096f4f9e00 c0000000016d2640 
[   10.656236] GPR28: 0000000000000001 c00000096f4f9e00 0000000000000006 c0000000016d21e0 
[   10.656241] NIP [c000000000b5701c] .default_idle_call+0x2c/0x7c
[   10.656242] LR [c0000000001a9ea4] .do_idle+0x364/0x430
[   10.656243] Call Trace:
[   10.656244] [c00000096f777ce0] [c000000000b57034] .default_idle_call+0x44/0x7c (unreliable)
[   10.656247] [c00000096f777d50] [c0000000001a9ea4] .do_idle+0x364/0x430
[   10.656249] [c00000096f777e40] [c0000000001aa1c0] .cpu_startup_entry+0x30/0x50
[   10.656251] [c00000096f777eb0] [c0000000000512a8] .start_secondary+0x628/0x650
[   10.656253] [c00000096f777f90] [c00000000000d2e4] start_secondary_prolog+0x10/0x14
[   10.656255] Instruction dump:
[   10.656256] 60000000 7c0802a6 f8010010 f821ff91 60000000 60000000 e92d0968 3d400008 
[   10.656262] 39090080 7ce040a8 7ce75078 7ce041ad <40c2fff4> 7c0004ac e9290080 71290004 
[   10.657746] watchdog: CPU 10 self-detected hard LOCKUP @ .rcu_eqs_enter.isra.0+0x58/0x80
[   10.657748] watchdog: CPU 10 TB:43893218388, last heartbeat TB:38598918623 (10340ms ago)
[   10.657749] Modules linked in:
[   10.657751] CPU: 10 PID: 0 Comm: swapper/10 Tainted: G        W         5.8.0-rc5+ #121
[   10.657753] NIP:  c000000000b4f288 LR: c0000000001a9e9c CTR: c00000000022ede0
[   10.657754] REGS: c0000009ffe77d70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   10.657755] MSR:  9000000000009032 <SF,HV,EE,ME,IR,DR,RI>  CR: 24000222  XER: 00000000
[   10.657762] CFAR: c000000000b4f26c IRQMASK: 1 
[   10.657764] GPR00: c0000000001a9e9c c00000096f72fd50 c000000001685e00 0000000000000000 
[   10.657769] GPR04: 0000000000000000 c000000977213e00 c0000009fffc2a00 0000000000000000 
[   10.657773] GPR08: 0000000000000001 0000000976310000 c000000000f8a000 0000000300000004 
[   10.657777] GPR12: 0000000024000222 c0000009fffc2a00 c0000009f33cff90 0000000000000000 
[   10.657782] GPR16: 0000000000000000 0000000000000000 c0000000015b9bd8 0000000000000000 
[   10.657786] GPR20: 0000000000000000 c00000096f4ea280 0000000000000001 0000000000080000 
[   10.657790] GPR24: c00000096f4ea200 c00000096f4ea280 c00000096f4ea200 c0000000016d2640 
[   10.657795] GPR28: 0000000000000001 c00000096f4ea200 000000000000000a c0000000016d21e0 
[   10.657800] NIP [c000000000b4f288] .rcu_eqs_enter.isra.0+0x58/0x80
[   10.657801] LR [c0000000001a9e9c] .do_idle+0x35c/0x430
[   10.657802] Call Trace:
[   10.657803] [c00000096f72fd50] [c0000000001a9e9c] .do_idle+0x35c/0x430 (unreliable)
[   10.657806] [c00000096f72fe40] [c0000000001aa1c4] .cpu_startup_entry+0x34/0x50
[   10.657808] [c00000096f72feb0] [c0000000000512a8] .start_secondary+0x628/0x650
[   10.657810] [c00000096f72ff90] [c00000000000d2e4] start_secondary_prolog+0x10/0x14
[   10.657811] Instruction dump:
[   10.657813] 40820048 e90900c8 38e00000 f8e900d0 2c280001 41820014 3908ffff f90900c8 
[   10.657819] 4e800020 60000000 812900d8 e9260030 <7d4a4a14> f8ea00c8 4bffff00 60000000 
[   10.658186] watchdog: CPU 11 self-detected hard LOCKUP @ .default_idle_call+0x2c/0x7c
[   10.658187] watchdog: CPU 11 TB:43893442523, last heartbeat TB:38599164832 (10340ms ago)
[   10.658188] Modules linked in:
[   10.658190] CPU: 11 PID: 0 Comm: swapper/11 Tainted: G        W         5.8.0-rc5+ #121
[   10.658192] NIP:  c000000000b5701c LR: c0000000001a9ea4 CTR: c00000000022ede0
[   10.658193] REGS: c0000009ffe6bd70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   10.658194] MSR:  9000000000009032 <SF,HV,EE,ME,IR,DR,RI>  CR: 24000222  XER: 00000000
[   10.658201] CFAR: c0000000001a9ea0 IRQMASK: 1 
[   10.658204] GPR00: c0000000001a9ea4 c00000096f71fce0 c000000001685e00 0000000000000000 
[   10.658208] GPR04: 0000000000000000 c000000977313e00 c0000009fffc1800 0000000000000808 
[   10.658212] GPR08: c00000096f4f7a80 c00000096f4f7a00 0000000000080000 c0000000016d25e0 
[   10.658217] GPR12: 0000000024000228 c0000009fffc1800 c0000009f33d3f90 0000000000000000 
[   10.658221] GPR16: 0000000000000000 0000000000000000 c0000000015b9bd8 0000000000000000 
[   10.658225] GPR20: 0000000000000000 c00000096f4f7a80 0000000000000001 0000000000080000 
[   10.658230] GPR24: c00000096f4f7a00 c00000096f4f7a80 c00000096f4f7a00 c0000000016d2640 
[   10.658234] GPR28: 0000000000000001 c00000096f4f7a00 000000000000000b c0000000016d21e0 
[   10.658239] NIP [c000000000b5701c] .default_idle_call+0x2c/0x7c
[   10.658240] LR [c0000000001a9ea4] .do_idle+0x364/0x430
[   10.658241] Call Trace:
[   10.658242] [c00000096f71fce0] [c000000000b57034] .default_idle_call+0x44/0x7c (unreliable)
[   10.658244] [c00000096f71fd50] [c0000000001a9ea4] .do_idle+0x364/0x430
[   10.658247] [c00000096f71fe40] [c0000000001aa1c4] .cpu_startup_entry+0x34/0x50
[   10.658249] [c00000096f71feb0] [c0000000000512a8] .start_secondary+0x628/0x650
[   10.658251] [c00000096f71ff90] [c00000000000d2e4] start_secondary_prolog+0x10/0x14
[   10.658252] Instruction dump:
[   10.658254] 60000000 7c0802a6 f8010010 f821ff91 60000000 60000000 e92d0968 3d400008 
[   10.658260] 39090080 7ce040a8 7ce75078 7ce041ad <40c2fff4> 7c0004ac e9290080 71290004 
[   10.659070] watchdog: CPU 20 self-detected hard LOCKUP @ .can_stop_idle_tick.isra.0+0xcc/0x180
[   10.659072] watchdog: CPU 20 TB:43893894591, last heartbeat TB:38601710719 (10336ms ago)
[   10.659073] Modules linked in:
[   10.659075] CPU: 20 PID: 0 Comm: swapper/20 Tainted: G        W         5.8.0-rc5+ #121
[   10.659076] NIP:  c00000000023f73c LR: c0000000002400b0 CTR: c00000000022ede0
[   10.659078] REGS: c0000009ffdffd70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   10.659079] MSR:  9000000000009032 <SF,HV,EE,ME,IR,DR,RI>  CR: 24000222  XER: 00000000
[   10.659086] CFAR: c00000000023f6c4 IRQMASK: 1 
[   10.659088] GPR00: c0000000002400b0 c00000096f75bca0 c000000001685e00 0000000000000000 
[   10.659092] GPR04: 0000000000000000 c000000977c13e00 c0000009fffb7580 0000000000000002 
[   10.659097] GPR08: 0000000000000014 0000000000000001 0000000000000000 0000000300000004 
[   10.659101] GPR12: 0000000024000222 c0000009fffb7580 c0000009f33f7f90 0000000000000000 
[   10.659105] GPR16: 0000000000000000 0000000000000000 c0000000015b9bd8 0000000000000000 
[   10.659110] GPR20: 0000000000000000 c00000096f7e5a80 0000000000000001 0000000000080000 
[   10.659114] GPR24: c00000096f7e5a00 c00000096f7e5a80 c00000096f7e5a00 0000000000000014 
[   10.659118] GPR28: 0000000000000001 c00000096f7e5a00 0000000000000014 c000000977c13da8 
[   10.659123] NIP [c00000000023f73c] .can_stop_idle_tick.isra.0+0xcc/0x180
[   10.659124] LR [c0000000002400b0] .tick_nohz_idle_stop_tick+0x1c0/0x380
[   10.659125] Call Trace:
[   10.659126] [c00000096f75bca0] [c0000000002400b0] .tick_nohz_idle_stop_tick+0x1c0/0x380 (unreliable)
[   10.659129] [c00000096f75bd50] [c0000000001a9e94] .do_idle+0x354/0x430
[   10.659131] [c00000096f75be40] [c0000000001aa1c0] .cpu_startup_entry+0x30/0x50
[   10.659133] [c00000096f75beb0] [c0000000000512a8] .start_secondary+0x628/0x650
[   10.659135] [c00000096f75bf90] [c00000000000d2e4] start_secondary_prolog+0x10/0x14
[   10.659137] Instruction dump:
[   10.659138] 7c0803a6 4e800020 60000000 60000000 60000000 38210090 3be00000 e8010010 
[   10.659144] 7fe3fb78 eba1ffe8 ebc1fff0 ebe1fff8 <7c0803a6> 4e800020 60000000 60000000 
[   10.848983] watchdog: CPU 8 self-detected hard LOCKUP @ .default_idle_call+0x2c/0x7c
[   10.848985] watchdog: CPU 8 TB:43991130719, last heartbeat TB:38598494447 (10532ms ago)
[   10.848985] Modules linked in:
[   10.848988] CPU: 8 PID: 0 Comm: swapper/8 Tainted: G        W         5.8.0-rc5+ #121
[   10.848989] NIP:  c000000000b5701c LR: c0000000001a9ea4 CTR: c00000000022ede0
[   10.848990] REGS: c0000009ffe8fd70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   10.848991] MSR:  9000000002009032 <SF,HV,VEC,EE,ME,IR,DR,RI>  CR: 24000222  XER: 00000000
[   10.848999] CFAR: c0000000001a9ea0 IRQMASK: 1 
[   10.849001] GPR00: c0000000001a9ea4 c00000096f74fce0 c000000001685e00 0000000000000000 
[   10.849005] GPR04: 0000000000000000 c000000977013e00 c0000009fffc4e00 0000000000000808 
[   10.849010] GPR08: c00000096f4ec680 c00000096f4ec600 0000000000080000 c0000000016d25e0 
[   10.849014] GPR12: 0000000024000228 c0000009fffc4e00 c0000009f33c7f90 0000000000000000 
[   10.849018] GPR16: 0000000000000000 0000000000000000 c0000000015b9bd8 0000000000000000 
[   10.849023] GPR20: 0000000000000000 c00000096f4ec680 0000000000000001 0000000000080000 
[   10.849027] GPR24: c00000096f4ec600 c00000096f4ec680 c00000096f4ec600 c0000000016d2640 
[   10.849031] GPR28: 0000000000000001 c00000096f4ec600 0000000000000008 c0000000016d21e0 
[   10.849036] NIP [c000000000b5701c] .default_idle_call+0x2c/0x7c
[   10.849037] LR [c0000000001a9ea4] .do_idle+0x364/0x430
[   10.849038] Call Trace:
[   10.849039] [c00000096f74fce0] [c000000000b57034] .default_idle_call+0x44/0x7c (unreliable)
[   10.849042] [c00000096f74fd50] [c0000000001a9ea4] .do_idle+0x364/0x430
[   10.849044] [c00000096f74fe40] [c0000000001aa1c4] .cpu_startup_entry+0x34/0x50
[   10.849046] [c00000096f74feb0] [c0000000000512a8] .start_secondary+0x628/0x650
[   10.849048] [c00000096f74ff90] [c00000000000d2e4] start_secondary_prolog+0x10/0x14
[   10.849049] Instruction dump:
[   10.849051] 60000000 7c0802a6 f8010010 f821ff91 60000000 60000000 e92d0968 3d400008 
[   10.849057] 39090080 7ce040a8 7ce75078 7ce041ad <40c2fff4> 7c0004ac e9290080 71290004 
[   10.849204] watchdog: CPU 17 self-detected hard LOCKUP @ .tick_nohz_idle_stop_tick+0x0/0x380
[   10.849206] watchdog: CPU 17 TB:43991245411, last heartbeat TB:38600736139 (10528ms ago)
[   10.849207] Modules linked in:
[   10.849209] CPU: 17 PID: 0 Comm: swapper/17 Tainted: G        W         5.8.0-rc5+ #121
[   10.849210] NIP:  c00000000023fef0 LR: c0000000001a9e94 CTR: c00000000022ede0
[   10.849212] REGS: c0000009ffe23d70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   10.849212] MSR:  9000000000009032 <SF,HV,EE,ME,IR,DR,RI>  CR: 44000222  XER: 00000000
[   10.849220] CFAR: c0000000001a9e90 IRQMASK: 1 
[   10.849222] GPR00: c0000000001a9ca0 c00000096f767d50 c000000001685e00 0000000000000001 
[   10.849226] GPR04: 0000000000000000 c000000977913e00 c0000009fffbac00 0000000000000002 
[   10.849230] GPR08: 0000000000000000 0000000000000000 0000000976a10000 c0000000016d25e0 
[   10.849235] GPR12: 0000000024000228 c0000009fffbac00 c0000009f33ebf90 0000000000000000 
[   10.849239] GPR16: 0000000000000000 0000000000000000 c0000000015b9bd8 0000000000000000 
[   10.849243] GPR20: 0000000000000000 c00000096f7f4480 0000000000000001 0000000000080000 
[   10.849247] GPR24: c00000096f7f4400 c00000096f7f4480 c00000096f7f4400 c0000000016d2640 
[   10.849252] GPR28: 0000000000000001 c00000096f7f4400 0000000000000011 c0000000016d21e0 
[   10.849257] NIP [c00000000023fef0] .tick_nohz_idle_stop_tick+0x0/0x380
[   10.849258] LR [c0000000001a9e94] .do_idle+0x354/0x430
[   10.849259] Call Trace:
[   10.849260] [c00000096f767d50] [c0000000001a9ca0] .do_idle+0x160/0x430 (unreliable)
[   10.849263] [c00000096f767e40] [c0000000001aa1c4] .cpu_startup_entry+0x34/0x50
[   10.849265] [c00000096f767eb0] [c0000000000512a8] .start_secondary+0x628/0x650
[   10.849267] [c00000096f767f90] [c00000000000d2e4] start_secondary_prolog+0x10/0x14
[   10.849268] Instruction dump:
[   10.849270] 3929dfa8 7d4a182a e8010010 7c0803a6 7d2a4a14 ebe1fff8 e8690048 786317e2 
[   10.849275] 4e800020 60000000 60000000 60000000 <7c0802a6> fb61ffd8 fbc1fff0 fbe1fff8 
[   28.346664] pci 0000:a0:00.0: Adding to iommu group 2
[   28.346805] pci 0000:a0:00.1: Added to existing PE#79
[   28.346831] pci 0000:a0:00.1: Adding to iommu group 2
[   28.346963] pci_bus 0001:00: Configuring PE for bus
[   28.346985] pci 0001:00     : [PE# 7e] Secondary bus 0x0000000000000000 associated with PE#7e
[   28.347017] pci 0001:00:00.0: Configured PE#7e
[   28.347117] pci_bus 0002:00: Configuring PE for bus
[   28.347138] pci 0002:00     : [PE# 7e] Secondary bus 0x0000000000000000 associated with PE#7e
[   28.347170] pci 0002:00:00.0: Configured PE#7e
[   28.347270] pci_bus 0003:00: Configuring PE for bus
[   28.347291] pci 0003:00     : [PE# 7e] Secondary bus 0x0000000000000000 associated with PE#7e
[   28.347322] pci 0003:00:00.0: Configured PE#7e
[   28.819355] pci_bus 0004:00: Configuring PE for bus
[   28.819377] pci 0004:00     : [PE# 7e] Secondary bus 0x0000000000000000 associated with PE#7e
[   28.819409] pci 0004:00:00.0: Configured PE#7e
[   28.819508] pci_bus 0005:00: Configuring PE for bus
[   28.819530] pci 0005:00     : [PE# 7e] Secondary bus 0x0000000000000000 associated with PE#7e
[   28.819561] pci 0005:00:00.0: Configured PE#7e
[   28.819660] pci_bus 0005:01: Configuring PE for bus
[   28.819685] pci 0005:01     : [PE# 7d] Secondary bus 0x0000000000000001 associated with PE#7d
[   28.819723] pci 0005:01:00.0: Configured PE#7d
[   28.819757] pci 0005:01     : [PE# 7d] DMA weight 20 (20), assigned (0) 8 DMA32 segments
[   28.819780] pci 0005:01     : [PE# 7d]  Setting up 32-bit TCE table at 00000000..7fffffff
[   28.856013] pci 0005:01:00.0: Adding to iommu group 3
[   28.856227] pci 0005:01:00.1: Added to existing PE#7d
[   28.856252] pci 0005:01:00.1: Adding to iommu group 3
[   29.295219] pci 0000:00:00.0: enabling device (0141 -> 0143)
[   29.295267] pci 0000:01:00.0: enabling device (0541 -> 0543)
[   29.295319] pci 0000:02:06.0: enabling device (0541 -> 0543)
[   29.295370] pci 0000:02:08.0: enabling device (0541 -> 0543)
[   29.295424] pci 0000:80:00.0: enabling device (0141 -> 0143)
[   29.295477] pci 0000:02:09.0: enabling device (0541 -> 0543)
[   29.295527] pci 0005:00:00.0: enabling device (0141 -> 0143)
[   29.295561] EEH: Capable adapter found: recovery enabled.
[   29.295692] PCI: Probing PCI hardware done
[   29.299110] watchdog: CPU 0 self-detected hard LOCKUP @ .__do_irq+0x6c/0x1d0
[   29.299111] watchdog: CPU 0 TB:53437576815, last heartbeat TB:38596666946 (28986ms ago)
[   29.299112] Modules linked in:
[   29.299115] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G        W         5.8.0-rc5+ #121
[   29.299116] NIP:  c00000000001573c LR: c000000000015718 CTR: c0000000000b7c90
[   29.299118] REGS: c0000009ffeefd70 TRAP: 0900   Tainted: G        W          (5.8.0-rc5+)
[   29.299119] MSR:  9000000000009032 <SF,HV,EE,ME,IR,DR,RI>  CR: 24000224  XER: 20000000
[   29.299126] CFAR: c0000000000b7ddc IRQMASK: 1 
[   29.299128] GPR00: c000000000015718 c0000009fffabf10 c000000001685e00 0000000000000010 
[   29.299132] GPR04: 0000000000000002 0000000000000000 0000000000000004 0000000000000001 
[   29.299137] GPR08: 0000000000008002 0000000000000000 0000000000000001 fffffffffffffffd 
[   29.299141] GPR12: c00000096f3fcde0 c000000001890000 0000000000000000 0000000000000000 
[   29.299145] GPR16: 0000000000000000 0000000000000000 0000000000000001 0000000000000000 
[   29.299149] GPR20: 0000000000000000 c000000001546780 0000000000000001 0000000000080000 
[   29.299154] GPR24: c000000001546700 c000000001546780 c000000001546700 c0000000016d2640 
[   29.299158] GPR28: 0000000000000000 c000000000f87460 c000000001683900 c000000001683900 
[   29.299163] NIP [c00000000001573c] .__do_irq+0x6c/0x1d0
[   29.299164] LR [c000000000015718] .__do_irq+0x48/0x1d0
[   29.299165] Call Trace:
[   29.299166] [c0000009fffabf10] [c000000000015718] .__do_irq+0x48/0x1d0 (unreliable)
[   29.299168] [c0000009fffabf90] [c000000000027824] .call_do_irq+0x14/0x24
[   29.299170] [c000000001683800] [c000000000015918] .do_IRQ+0x78/0xe0
[   29.299173] [c000000001683890] [c00000000000931c] hardware_interrupt_common_virt+0x1dc/0x1e0
[   29.299175] --- interrupt: 500 at .arch_local_irq_restore+0x5c/0x90
[   29.299176]     LR = .arch_local_irq_restore+0x40/0x90
[   29.299177] [c000000001683c00] [c00000000001b078] .arch_cpu_idle+0xe8/0x180
[   29.299179] [c000000001683c80] [c000000000b57034] .default_idle_call+0x44/0x7c
[   29.299181] [c000000001683cf0] [c0000000001a9ea4] .do_idle+0x364/0x430
[   29.299183] [c000000001683de0] [c0000000001aa1c4] .cpu_startup_entry+0x34/0x50
[   29.299185] [c000000001683e50] [c000000000012698] .rest_init+0xd8/0xf0
[   29.299187] [c000000001683ed0] [c000000000e248e0] .start_kernel+0x708/0x750
[   29.299189] [c000000001683f90] [c00000000000d42c] start_here_common+0x1c/0x3f0
[   29.299191] Instruction dump:
[   29.299192] 7d4903a6 e8490008 4e800421 e8410028 892d0989 71280044 40820018 5529003c 
[   29.299199] 39000000 61088002 992d0989 7d010164 <2c230000> 41820140 481e1bed 60000000 
[   29.322560] opal-stat: OPAL: no stats root node
[   29.322589] cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT
[   29.326628] HugeTLB registered 16.0 MiB page size, pre-allocated 0 pages
[   29.326664] HugeTLB registered 16.0 GiB page size, pre-allocated 0 pages
[   30.672004] iommu: Default domain type: Translated 
[   30.672211] vgaarb: loaded
[   30.672662] SCSI subsystem initialized
[   30.672875] libata version 3.00 loaded.
[   30.673616] watchdog: CPU 11 became unstuck TB:54141383030
[   30.673638] CPU: 11 PID: 65 Comm: migration/11 Tainted: G        W         5.8.0-rc5+ #121
[   30.673655] Call Trace:
[   30.673671] [c00000096fc2b9b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   30.673691] [c00000096fc2ba50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   30.673710] [c00000096fc2bb20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   30.673728] [c00000096fc2bbe0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   30.673747] [c00000096fc2bcb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   30.673765] [c00000096fc2bd60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   30.673783] [c00000096fc2be20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   30.673803] watchdog: CPU 5 became unstuck TB:54141383583
[   30.673823] CPU: 5 PID: 35 Comm: migration/5 Tainted: G        W         5.8.0-rc5+ #121
[   30.673840] Call Trace:
[   30.673856] [c00000096f8cf9b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   30.673875] [c00000096f8cfa50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   30.673893] [c00000096f8cfb20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   30.673912] [c00000096f8cfbe0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   30.673930] [c00000096f8cfcb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   30.673948] [c00000096f8cfd60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   30.673966] [c00000096f8cfe20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   30.673986] watchdog: CPU 19 became unstuck TB:54141383409
[   30.674009] CPU: 19 PID: 105 Comm: migration/19 Tainted: G        W         5.8.0-rc5+ #121
[   30.674026] Call Trace:
[   30.674042] [c00000096d0bb9b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   30.674061] [c00000096d0bba50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   30.674079] [c00000096d0bbb20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   30.674098] [c00000096d0bbbe0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   30.674116] [c00000096d0bbcb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   30.674134] [c00000096d0bbd60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   30.674152] [c00000096d0bbe20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   30.674171] watchdog: CPU 8 became unstuck TB:54141383583
[   30.674191] CPU: 8 PID: 50 Comm: migration/8 Tainted: G        W         5.8.0-rc5+ #121
[   30.674208] Call Trace:
[   30.674225] [c00000096fc0f9b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   30.674245] [c00000096fc0fa50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   30.674265] [c00000096fc0fb20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   30.674284] [c00000096fc0fbe0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   30.674304] [c00000096fc0fcb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   30.674323] [c00000096fc0fd60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   30.674342] [c00000096fc0fe20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   30.674363] watchdog: CPU 15 became unstuck TB:54141382959
[   30.674382] CPU: 15 PID: 85 Comm: migration/15 Tainted: G        W         5.8.0-rc5+ #121
[   30.972228] Call Trace:
[   30.972244] [c00000096d0ef9b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   30.972263] [c00000096d0efa50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   30.972282] [c00000096d0efb20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   30.972300] [c00000096d0efbe0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   30.972318] [c00000096d0efcb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   30.972337] [c00000096d0efd60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   30.972354] [c00000096d0efe20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   30.972374] watchdog: CPU 16 became unstuck TB:54141383023
[   30.972393] CPU: 16 PID: 90 Comm: migration/16 Tainted: G        W         5.8.0-rc5+ #121
[   30.972410] Call Trace:
[   30.972427] [c00000096d0ff9b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   30.972447] [c00000096d0ffa50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   31.419577] [c00000096d0ffb20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   31.419597] [c00000096d0ffbe0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   31.419616] [c00000096d0ffcb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   31.419636] [c00000096d0ffd60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   31.419655] [c00000096d0ffe20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   31.419675] watchdog: CPU 18 became unstuck TB:54141383391
[   31.419695] CPU: 18 PID: 100 Comm: migration/18 Tainted: G        W         5.8.0-rc5+ #121
[   31.419714] Call Trace:
[   31.419731] [c00000096d0c79b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   31.419754] [c00000096d0c7a50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   31.419776] [c00000096d0c7b20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   31.419798] [c00000096d0c7be0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   32.088223] [c00000096d0c7cb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   32.088244] [c00000096d0c7d60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   32.088265] [c00000096d0c7e20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   32.088289] watchdog: CPU 6 became unstuck TB:54141383823
[   32.088310] CPU: 6 PID: 40 Comm: migration/6 Tainted: G        W         5.8.0-rc5+ #121
[   32.088328] Call Trace:
[   32.088344] [c00000096f8ab9b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   32.088364] [c00000096f8aba50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   32.088384] [c00000096f8abb20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   32.088403] [c00000096f8abbe0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   32.088423] [c00000096f8abcb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   32.088443] [c00000096f8abd60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   32.635635] [c00000096f8abe20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   32.635656] watchdog: CPU 12 became unstuck TB:54141383295
[   32.635676] CPU: 12 PID: 70 Comm: migration/12 Tainted: G        W         5.8.0-rc5+ #121
[   32.635694] Call Trace:
[   32.635710] [c00000096fc639b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   32.635730] [c00000096fc63a50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   32.635750] [c00000096fc63b20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   32.635770] [c00000096fc63be0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   32.635789] [c00000096fc63cb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   32.635809] [c00000096fc63d60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   32.635828] [c00000096fc63e20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   32.635848] watchdog: CPU 10 became unstuck TB:54141383136
[   32.635867] CPU: 10 PID: 60 Comm: migration/10 Tainted: G        W         5.8.0-rc5+ #121
[   33.173485] Call Trace:
[   33.173502] [c00000096fc4f9b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   33.173525] [c00000096fc4fa50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   33.173547] [c00000096fc4fb20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   33.173569] [c00000096fc4fbe0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   33.173591] [c00000096fc4fcb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   33.173612] [c00000096fc4fd60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   33.173633] [c00000096fc4fe20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   33.173655] watchdog: CPU 20 became unstuck TB:54141382879
[   33.173676] CPU: 20 PID: 110 Comm: migration/20 Tainted: G        W         5.8.0-rc5+ #121
[   33.173693] Call Trace:
[   33.173709] [c00000096d5239b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   33.173728] [c00000096d523a50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   33.770869] [c00000096d523b20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   33.770888] [c00000096d523be0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   33.770906] [c00000096d523cb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   33.770924] [c00000096d523d60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   33.770942] [c00000096d523e20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   33.770960] watchdog: CPU 17 became unstuck TB:54141383398
[   33.770981] CPU: 17 PID: 95 Comm: migration/17 Tainted: G        W         5.8.0-rc5+ #121
[   33.771001] Call Trace:
[   33.771020] [c00000096d0e79b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   33.771043] [c00000096d0e7a50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   33.771067] [c00000096d0e7b20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   33.771090] [c00000096d0e7be0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   34.328966] [c00000096d0e7cb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   34.328989] [c00000096d0e7d60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   34.329011] [c00000096d0e7e20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   34.329033] watchdog: CPU 14 became unstuck TB:54141383684
[   34.329053] CPU: 14 PID: 80 Comm: migration/14 Tainted: G        W         5.8.0-rc5+ #121
[   34.329072] Call Trace:
[   34.329090] [c00000096d0979b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   34.329113] [c00000096d097a50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   34.329135] [c00000096d097b20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   34.329157] [c00000096d097be0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   34.329178] [c00000096d097cb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   34.329200] [c00000096d097d60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   34.329221] [c00000096d097e20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   34.893441] watchdog: CPU 7 became unstuck TB:54141383786
[   34.893461] CPU: 7 PID: 45 Comm: migration/7 Tainted: G        W         5.8.0-rc5+ #121
[   34.893481] Call Trace:
[   34.893499] [c00000096fc039b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   34.893521] [c00000096fc03a50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   34.893543] [c00000096fc03b20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   34.893565] [c00000096fc03be0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   34.893587] [c00000096fc03cb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   34.893610] [c00000096fc03d60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   34.893631] [c00000096fc03e20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   34.893652] watchdog: CPU 21 became unstuck TB:54141382959
[   34.893671] CPU: 21 PID: 115 Comm: migration/21 Tainted: G        W         5.8.0-rc5+ #121
[   35.463290] Call Trace:
[   35.463308] [c00000096d54f9b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   35.463329] [c00000096d54fa50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   35.463349] [c00000096d54fb20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   35.463369] [c00000096d54fbe0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   35.463389] [c00000096d54fcb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   35.463409] [c00000096d54fd60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   35.463429] [c00000096d54fe20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   35.463448] watchdog: CPU 9 became unstuck TB:54141382735
[   35.463469] CPU: 9 PID: 55 Comm: migration/9 Tainted: G        W         5.8.0-rc5+ #121
[   35.463490] Call Trace:
[   35.463509] [c00000096fc239b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   35.463532] [c00000096fc23a50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   35.799148] [c00000096fc23b20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   35.799172] [c00000096fc23be0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   35.799194] [c00000096fc23cb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   35.799217] [c00000096fc23d60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   35.799239] [c00000096fc23e20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   35.799262] watchdog: CPU 0 became unstuck TB:54141383317
[   35.799280] CPU: 0 PID: 11 Comm: migration/0 Tainted: G        W         5.8.0-rc5+ #121
[   35.799297] Call Trace:
[   35.799313] [c00000096f7439b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   35.799332] [c00000096f743a50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   35.799351] [c00000096f743b20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   35.799369] [c00000096f743be0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   35.799388] [c00000096f743cb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   36.136459] [c00000096f743d60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   36.136476] [c00000096f743e20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   36.136495] watchdog: CPU 13 became unstuck TB:54141383839
[   36.136516] CPU: 13 PID: 75 Comm: migration/13 Tainted: G        W         5.8.0-rc5+ #121
[   36.136536] Call Trace:
[   36.136554] [c00000096d0cb9b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   36.136578] [c00000096d0cba50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   36.136601] [c00000096d0cbb20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   36.136624] [c00000096d0cbbe0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   36.136647] [c00000096d0cbcb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   36.136670] [c00000096d0cbd60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   36.136692] [c00000096d0cbe20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   36.471512] watchdog: CPU 23 became unstuck TB:54141382978
[   36.471531] CPU: 23 PID: 125 Comm: migration/23 Tainted: G        W         5.8.0-rc5+ #121
[   36.471550] Call Trace:
[   36.471568] [c00000096d5639b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   36.471590] [c00000096d563a50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   36.471612] [c00000096d563b20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   36.471633] [c00000096d563be0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   36.471654] [c00000096d563cb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   36.471675] [c00000096d563d60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   36.471696] [c00000096d563e20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   36.471717] watchdog: CPU 2 became unstuck TB:54141383439
[   36.471735] CPU: 2 PID: 20 Comm: migration/2 Tainted: G        W         5.8.0-rc5+ #121
[   36.471753] Call Trace:
[   36.838982] [c00000096f8839b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   36.839003] [c00000096f883a50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   36.839023] [c00000096f883b20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   36.839042] [c00000096f883be0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   36.839062] [c00000096f883cb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   36.839082] [c00000096f883d60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   36.839100] [c00000096f883e20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   36.839120] watchdog: CPU 22 became unstuck TB:54141382963
[   36.839141] CPU: 22 PID: 120 Comm: migration/22 Tainted: G        W         5.8.0-rc5+ #121
[   36.839161] Call Trace:
[   36.839180] [c00000096d52b9b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   36.839204] [c00000096d52ba50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   36.839227] [c00000096d52bb20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   37.180102] [c00000096d52bbe0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   37.180124] [c00000096d52bcb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   37.180148] [c00000096d52bd60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   37.180170] [c00000096d52be20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   37.180192] watchdog: CPU 4 became unstuck TB:54141383398
[   37.180212] CPU: 4 PID: 30 Comm: migration/4 Tainted: G        W         5.8.0-rc5+ #121
[   37.180233] Call Trace:
[   37.180251] [c00000096f8a39b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   37.180276] [c00000096f8a3a50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   37.180300] [c00000096f8a3b20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   37.180323] [c00000096f8a3be0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   37.180346] [c00000096f8a3cb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   37.520174] [c00000096f8a3d60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   37.520197] [c00000096f8a3e20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   37.520220] watchdog: CPU 1 became unstuck TB:54141382975
[   37.520240] CPU: 1 PID: 15 Comm: migration/1 Tainted: G        W         5.8.0-rc5+ #121
[   37.520260] Call Trace:
[   37.520277] [c00000096f8db9b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   37.520300] [c00000096f8dba50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   37.520322] [c00000096f8dbb20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   37.520344] [c00000096f8dbbe0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   37.520366] [c00000096f8dbcb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   37.520388] [c00000096f8dbd60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   37.520409] [c00000096f8dbe20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   37.520430] watchdog: CPU 3 became unstuck TB:54141382976
[   37.857400] CPU: 3 PID: 25 Comm: migration/3 Tainted: G        W         5.8.0-rc5+ #121
[   37.857421] Call Trace:
[   37.857440] [c00000096f88f9b0] [c0000000006fc370] .dump_stack+0xbc/0x10c (unreliable)
[   37.857464] [c00000096f88fa50] [c00000000003269c] .wd_smp_clear_cpu_pending+0x40c/0x420
[   37.857488] [c00000096f88fb20] [c00000000027a794] .multi_cpu_stop+0x164/0x230
[   37.857512] [c00000096f88fbe0] [c00000000027a3dc] .cpu_stopper_thread+0xbc/0x210
[   37.857534] [c00000096f88fcb0] [c000000000195bdc] .smpboot_thread_fn+0x1fc/0x270
[   37.857559] [c00000096f88fd60] [c00000000018f14c] .kthread+0x18c/0x1a0
[   37.857581] [c00000096f88fe20] [c00000000000dac0] .ret_from_kernel_thread+0x58/0x78
[   37.857643] clocksource: Switched to clocksource timebase
[   37.878888] VFS: Disk quotas dquot_6.6.0
[   37.879029] VFS: Dquot-cache hash table entries: 8192 (order 0, 65536 bytes)
[   37.879550] AppArmor: AppArmor Filesystem Enabled
[   38.132936] NET: Registered protocol family 2
[   38.133377] tcp_listen_portaddr_hash hash table entries: 32768 (order: 3, 524288 bytes, linear)
[   38.133963] TCP established hash table entries: 524288 (order: 6, 4194304 bytes, linear)
[   38.140465] TCP bind hash table entries: 65536 (order: 4, 1048576 bytes, linear)
[   38.141579] TCP: Hash tables configured (established 524288 bind 65536)
[   38.141778] UDP hash table entries: 32768 (order: 4, 1048576 bytes, linear)
[   38.142901] UDP-Lite hash table entries: 32768 (order: 4, 1048576 bytes, linear)
[   38.144240] NET: Registered protocol family 1
[   38.144269] NET: Registered protocol family 44
[   38.144364] pci 0000:90:01.0: enabling device (0140 -> 0142)
[   38.144442] pci 0000:90:01.1: enabling device (0140 -> 0142)
[   38.144522] pci 0000:90:01.2: enabling device (0140 -> 0142)
[   38.144632] PCI: CLS 128 bytes, default 128
[   38.144752] Unpacking initramfs...
[   39.192569] Freeing initrd memory: 32512K
[   39.194773] Initialise system trusted keyrings
[   39.194814] Key type blacklist registered
[   39.194904] workingset: timestamp_bits=38 max_order=20 bucket_order=0
[   39.198211] zbud: loaded
[   39.198722] integrity: Platform Keyring initialized
[   39.198742] Key type asymmetric registered
[   39.198760] Asymmetric key parser 'x509' registered
[   39.198790] Block layer SCSI generic (bsg) driver version 0.4 loaded (major 252)
[   39.198928] io scheduler mq-deadline registered
[   39.199432] Driver 'hvc_console' was unable to register with bus_type 'vio' because the bus was not initialized.
[   39.199481] hvc0: raw protocol on /ibm,opal/consoles/serial@0 (boot console)
[   39.199504] hvc0: No interrupts property, using OPAL event
[   39.199977] hvc1: hvsi protocol on /ibm,opal/consoles/serial@1
[   39.199999] hvc1: No interrupts property, using OPAL event
[   39.200042] hvc2: hvsi protocol on /ibm,opal/consoles/serial@2
[   39.200063] hvc2: No interrupts property, using OPAL event
[   39.200103] Serial: 8250/16550 driver, 4 ports, IRQ sharing disabled
[   39.200963] pmac_zilog: 0.6 (Benjamin Herrenschmidt <benh@kernel.crashing.org>)
[   39.201047] Non-volatile memory driver v1.3
[   39.201065] Linux agpgart interface v0.103
[   39.201644] mousedev: PS/2 mouse device common for all mice
[   39.220573] rtc-opal opal-rtc: registered as rtc0
[   39.240550] rtc-opal opal-rtc: setting system clock to 2020-07-15T09:57:43 UTC (1594807063)
[   39.242431] powernv-cpufreq: ibm,pstate-min node not found
[   39.242450] powernv-cpufreq: Platform driver disabled. System does not support PState control
[   39.242471] cpuidle-powernv : Only Snooze is available
[   39.242729] powernv_idle_driver registered
[   39.242958] ledtrig-cpu: registered to indicate activity on CPUs
[   39.243026] drop_monitor: Initializing network drop monitor service
[   39.243291] NET: Registered protocol family 10
[   39.259368] Segment Routing with IPv6
[   39.259405] mip6: Mobile IPv6
[   39.259421] NET: Registered protocol family 17
[   39.259492] mpls_gso: MPLS GSO support
[   39.259521] drmem: No dynamic reconfiguration memory found
[   39.259652] registered taskstats version 1
[   39.259670] Loading compiled-in X.509 certificates
[   39.259736] zswap: loaded using pool lzo/zbud
[   39.259938] Key type ._fscrypt registered
[   39.259953] Key type .fscrypt registered
[   39.259968] Key type fscrypt-provisioning registered
[   39.260000] AppArmor: AppArmor sha1 policy hashing enabled
[   39.263561] Freeing unused kernel memory: 6528K
[   39.263591] This architecture does not have kernel memory protection.
[   39.263614] Run /init as init process
[   39.263631]   with arguments:
[   39.263632]     /init
[   39.263633]   with environment:
[   39.263635]     HOME=/
[   39.263636]     TERM=linux
[   39.376393] pps_core: module verification failed: signature and/or required key missing - tainting kernel
[   39.376699] pps_core: LinuxPPS API ver. 1 registered
[   39.376715] pps_core: Software ver. 5.3.6 - Copyright 2005-2007 Rodolfo Giometti <giometti@linux.it>
[   39.391414] ipr: IBM Power RAID SCSI Device Driver version: 2.6.4 (March 14, 2017)
[   39.455742] usbcore: registered new interface driver usbfs
[   39.455755] usbcore: registered new interface driver hub
[   39.455847] usbcore: registered new device driver usb
[   39.471918] ehci_hcd: USB 2.0 'Enhanced' Host Controller (EHCI) Driver
[   39.485162] ohci_hcd: USB 1.1 'Open' Host Controller (OHCI) Driver
[   39.488858] ehci-pci: EHCI PCI platform driver
[   39.488984] ehci-pci 0000:90:01.2: EHCI Host Controller
[   39.489004] ehci-pci 0000:90:01.2: new USB bus registered, assigned bus number 1
[   39.489170] ehci-pci 0000:90:01.2: irq 26, io mem 0x3da082020000
[   39.490103] ipr 0000:60:00.0: Found IOA with IRQ: 25
[   39.490375] ipr 0000:60:00.0: enabling device (0140 -> 0142)
[   39.490493] ipr 0000:60:00.0: Received IRQ : 34
[   39.490520] ipr 0000:60:00.0: Request for 1 MSIs succeeded.
[   39.490978] ipr 0000:60:00.0: Initializing IOA.
[   39.496248] PTP clock support registered
[   39.671497] Emulex LightPulse Fibre Channel SCSI driver 12.8.0.1
[   39.671498] Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries.
[   39.828775] scsi host0: IBM 0 Storage Adapter
[   39.828838] ehci-pci 0000:90:01.2: USB 2.0 started, EHCI 1.00
[   39.828969] usb usb1: New USB device found, idVendor=1d6b, idProduct=0002, bcdDevice= 5.08
[   39.828989] usb usb1: New USB device strings: Mfr=3, Product=2, SerialNumber=1
[   39.829007] usb usb1: Product: EHCI Host Controller
[   39.829024] usb usb1: Manufacturer: Linux 5.8.0-rc5+ ehci_hcd
[   39.829041] usb usb1: SerialNumber: 0000:90:01.2
[   39.829325] hub 1-0:1.0: USB hub found
[   39.829354] hub 1-0:1.0: 5 ports detected
[   39.829894] lpfc 0005:01:00.0: enabling device (0140 -> 0142)
[   39.831642] scsi host1: Emulex LPe12000 PCIe Fibre Channel Adapter on PCI bus 01 device 00 irq 27
[   39.833544] ohci-pci: OHCI PCI platform driver
[   39.890916] e1000e: Intel(R) PRO/1000 Network Driver - 3.2.6-k
[   39.890917] e1000e: Copyright(c) 1999 - 2015 Intel Corporation.
[   40.209903] e1000e 0000:a0:00.0: enabling device (0140 -> 0142)
[   40.210084] e1000e 0000:a0:00.0: Interrupt Throttling Rate (ints/sec) set to dynamic conservative mode
[   40.210326] ohci-pci 0000:90:01.0: OHCI PCI host controller
[   40.210354] ohci-pci 0000:90:01.0: new USB bus registered, assigned bus number 2
[   40.210465] ohci-pci 0000:90:01.0: irq 24, io mem 0x3da082000000
[   40.344962] usb usb2: New USB device found, idVendor=1d6b, idProduct=0001, bcdDevice= 5.08
[   40.344985] usb usb2: New USB device strings: Mfr=3, Product=2, SerialNumber=1
[   40.345006] usb usb2: Product: OHCI PCI host controller
[   40.345025] usb usb2: Manufacturer: Linux 5.8.0-rc5+ ohci_hcd
[   40.345044] usb usb2: SerialNumber: 0000:90:01.0
[   40.345325] hub 2-0:1.0: USB hub found
[   40.669421] hub 2-0:1.0: 3 ports detected
[   40.669734] ohci-pci 0000:90:01.1: OHCI PCI host controller
[   40.669756] ohci-pci 0000:90:01.1: new USB bus registered, assigned bus number 3
[   40.669816] ohci-pci 0000:90:01.1: irq 25, io mem 0x3da082010000
[   40.758611] usb usb3: New USB device found, idVendor=1d6b, idProduct=0001, bcdDevice= 5.08
[   40.758630] usb usb3: New USB device strings: Mfr=3, Product=2, SerialNumber=1
[   40.758649] usb usb3: Product: OHCI PCI host controller
[   40.758665] usb usb3: Manufacturer: Linux 5.8.0-rc5+ ohci_hcd
[   40.758683] usb usb3: SerialNumber: 0000:90:01.1
[   40.758861] hub 3-0:1.0: USB hub found
[   40.758890] hub 3-0:1.0: 2 ports detected
[   40.801447] e1000e 0000:a0:00.0 eth0: (PCI Express:2.5GT/s:Width x4) 5c:f3:fc:eb:7c:ba
[   40.801469] e1000e 0000:a0:00.0 eth0: Intel(R) PRO/1000 Network Connection
[   40.801557] e1000e 0000:a0:00.0 eth0: MAC: 0, PHY: 4, PBA No: E34292-006
[   41.151832] e1000e 0000:a0:00.1: enabling device (0140 -> 0142)
[   41.151973] e1000e 0000:a0:00.1: Interrupt Throttling Rate (ints/sec) set to dynamic conservative mode
[   41.313385] e1000e 0000:a0:00.1 eth1: (PCI Express:2.5GT/s:Width x4) 5c:f3:fc:eb:7c:bb
[   41.313405] e1000e 0000:a0:00.1 eth1: Intel(R) PRO/1000 Network Connection
[   41.313492] e1000e 0000:a0:00.1 eth1: MAC: 0, PHY: 4, PBA No: E34292-006
[   41.315058] e1000e 0000:a0:00.1 enp160s0f1: renamed from eth1
[   41.328772] e1000e 0000:a0:00.0 enp160s0f0: renamed from eth0
[   42.684625] lpfc 0005:01:00.1: enabling device (0140 -> 0142)
[   42.685603] scsi host2: Emulex LPe12000 PCIe Fibre Channel Adapter on PCI bus 01 device 01 irq 28
[   45.094490] random: lvm: uninitialized urandom read (4 bytes read)
[   45.159719] random: lvm: uninitialized urandom read (4 bytes read)
[   46.230810] random: lvm: uninitialized urandom read (4 bytes read)
[   46.470355] ipr 0000:60:00.0: Starting IOA initialization sequence.
[   46.470929] ipr 0000:60:00.0: Adapter firmware version: 04220029
[   46.537793] ipr 0000:60:00.0: IOA initialized.
[   46.538243] scsi 0:255:255:255: No Device         IBM      57C7001SISIOA    0150 PQ: 0 ANSI: 0
[   47.290177] random: lvm: uninitialized urandom read (4 bytes read)
[   48.338357] random: lvm: uninitialized urandom read (4 bytes read)
[   48.927161] scsi 0:8:0:0: Enclosure         IBM      VSBPD3E4A  3GSAS   01 PQ: 0 ANSI: 2
[   48.927415] scsi 0:8:1:0: Enclosure         IBM      VSBPD3E4B  3GSAS   01 PQ: 0 ANSI: 2
[   49.402253] random: lvm: uninitialized urandom read (4 bytes read)
[   50.470383] random: lvm: uninitialized urandom read (4 bytes read)
[   50.517573] ata1.00: ATA-10: ST1000NX0313         00LY266 00LY265IBM, BE32, max UDMA/133
[   50.517602] ata1.00: 1953525168 sectors, multi 0: LBA48 NCQ (depth 0/32)
[   50.518364] ata1.00: configured for UDMA/133
[   50.518473] scsi 0:0:0:0: Direct-Access     ATA      ST1000NX0313     BE32 PQ: 0 ANSI: 5
[   51.522363] random: lvm: uninitialized urandom read (4 bytes read)
[   52.109818] ata2.00: ATAPI: IBM     RMBO0040542, SA61, max UDMA/100
[   52.112397] ata2.00: configured for UDMA/100
[   52.115601] scsi 0:6:0:0: CD-ROM            IBM      RMBO0040542      SA61 PQ: 0 ANSI: 2
[   52.116289] scsi 0:1:0:0: Direct-Access     IBM      ST9300653SS      7411 PQ: 0 ANSI: 6
[   52.145086] sd 0:0:0:0: [sda] 1953525168 512-byte logical blocks: (1.00 TB/932 GiB)
[   52.145120] sd 0:0:0:0: [sda] 4096-byte physical blocks
[   52.145163] sd 0:0:0:0: [sda] Write Protect is off
[   52.145181] sd 0:0:0:0: [sda] Mode Sense: 00 3a 00 00
[   52.145205] sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[   52.160740] sd 0:1:0:0: Power-on or device reset occurred
[   52.161529] sd 0:1:0:0: [sdb] 585937500 512-byte logical blocks: (300 GB/279 GiB)
[   52.161835] sd 0:1:0:0: [sdb] Write Protect is off
[   52.161852] sd 0:1:0:0: [sdb] Mode Sense: e1 00 10 08
[   52.162335] sd 0:1:0:0: [sdb] Write cache: enabled, read cache: enabled, supports DPO and FUA
[   52.175799]  sda: sda1 sda2 sda3
[   52.176618] sd 0:0:0:0: [sda] Attached SCSI disk
[   52.179583] sr 0:6:0:0: [sr0] scsi3-mmc drive: 24x/24x writer dvd-ram cd/rw xa/form2 cdda tray
[   52.179607] cdrom: Uniform CD-ROM driver Revision: 3.20
[   52.189486]  sdb:
[   52.192277] sd 0:1:0:0: [sdb] Attached SCSI disk
[   52.213447] sr 0:6:0:0: Attached scsi CD-ROM sr0
[   52.570565] random: lvm: uninitialized urandom read (4 bytes read)
[   52.634445] device-mapper: uevent: version 1.0.3
[   52.634598] device-mapper: ioctl: 4.42.0-ioctl (2020-02-27) initialised: dm-devel@redhat.com
[   52.635735] random: lvm: uninitialized urandom read (2 bytes read)
[   58.139950] ses 0:8:0:0: Attached Enclosure device
[   58.139995] ses 0:8:1:0: Attached Enclosure device
[   58.155408] PM: Image not found (code -22)
[   59.685512] EXT4-fs (dm-1): mounted filesystem with ordered data mode. Opts: (null)
[   59.814229] Not activating Mandatory Access Control as /sbin/tomoyo-init does not exist.
[   60.273066] random: fast init done
[   60.600836] systemd[1]: Inserted module 'autofs4'
[   60.686979] systemd[1]: systemd 245.6-1 running in system mode. (+PAM +AUDIT +SELINUX +IMA +APPARMOR +SMACK +SYSVINIT +UTMP +LIBCRYPTSETUP +GCRYPT +GNUTLS +ACL +XZ +LZ4 +SECCOMP +BLKID +ELFUTILS +KMOD +IDN2 -IDN +PCRE2 default-hierarchy=hybrid)
[   60.687409] systemd[1]: Detected architecture ppc64.
[   60.735750] systemd[1]: Set hostname to <amure>.
[   60.969265] random: crng init done
[   62.358333] systemd[1]: Created slice Virtual Machine and Container Slice.
[   62.359355] systemd[1]: Created slice system-getty.slice.
[   62.359864] systemd[1]: Created slice system-modprobe.slice.
[   62.360458] systemd[1]: Created slice system-postfix.slice.
[   62.360985] systemd[1]: Created slice system-serial\x2dgetty.slice.
[   62.361496] systemd[1]: Created slice system-systemd\x2dfsck.slice.
[   62.361973] systemd[1]: Created slice User and Session Slice.
[   62.362160] systemd[1]: Started Dispatch Password Requests to Console Directory Watch.
[   62.362324] systemd[1]: Started Forward Password Requests to Wall Directory Watch.
[   62.362462] systemd[1]: Condition check resulted in Arbitrary Executable File Formats File System Automount Point being skipped.
[   62.362533] systemd[1]: Reached target Local Encrypted Volumes.
[   62.362661] systemd[1]: Reached target Paths.
[   62.362763] systemd[1]: Reached target Remote File Systems.
[   62.362865] systemd[1]: Reached target Slices.
[   62.362986] systemd[1]: Reached target Libvirt guests shutdown.
[   62.363172] systemd[1]: Listening on Device-mapper event daemon FIFOs.
[   62.363436] systemd[1]: Listening on LVM2 poll daemon socket.
[   62.363650] systemd[1]: Listening on Syslog Socket.
[   62.363864] systemd[1]: Listening on fsck to fsckd communication Socket.
[   62.364021] systemd[1]: Listening on initctl Compatibility Named Pipe.
[   62.364367] systemd[1]: Listening on Journal Audit Socket.
[   62.364587] systemd[1]: Listening on Journal Socket (/dev/log).
[   62.364824] systemd[1]: Listening on Journal Socket.
[   62.365046] systemd[1]: Listening on udev Control Socket.
[   62.365220] systemd[1]: Listening on udev Kernel Socket.
[   62.366857] systemd[1]: Mounting Huge Pages File System...
[   62.368562] systemd[1]: Mounting POSIX Message Queue File System...
[   62.370479] systemd[1]: Mounting Kernel Debug File System...
[   62.372238] systemd[1]: Mounting Kernel Trace File System...
[   62.373886] systemd[1]: Starting Availability of block devices...
[   62.375687] systemd[1]: Starting Create list of static device nodes for the current kernel...
[   62.377248] systemd[1]: Starting Monitoring of LVM2 mirrors, snapshots etc. using dmeventd or progress polling...
[   62.378893] systemd[1]: Starting Load Kernel Module drm...
[   62.401843] systemd[1]: Condition check resulted in Set Up Additional Binary Formats being skipped.
[   62.401902] systemd[1]: Condition check resulted in File System Check on Root Device being skipped.
[   62.403994] systemd[1]: Starting Journal Service...
[   62.416921] systemd[1]: Starting Load Kernel Modules...
[   62.418461] systemd[1]: Starting Remount Root and Kernel File Systems...
[   62.420012] systemd[1]: Starting udev Coldplug all Devices...
[   62.422478] systemd[1]: Mounted Huge Pages File System.
[   62.422820] systemd[1]: Mounted POSIX Message Queue File System.
[   62.423110] systemd[1]: Mounted Kernel Debug File System.
[   62.423386] systemd[1]: Mounted Kernel Trace File System.
[   62.424336] systemd[1]: Finished Availability of block devices.
[   62.424968] systemd[1]: modprobe@drm.service: Succeeded.
[   62.425550] systemd[1]: Finished Load Kernel Module drm.
[   62.429403] systemd[1]: Finished Create list of static device nodes for the current kernel.
[   62.477192] EXT4-fs (dm-1): re-mounted. Opts: errors=remount-ro
[   62.478974] systemd[1]: Finished Remount Root and Kernel File Systems.
[   62.522752] systemd[1]: Condition check resulted in Rebuild Hardware Database being skipped.
[   62.522808] systemd[1]: Condition check resulted in Platform Persistent Storage Archival being skipped.
[   62.524220] systemd[1]: Starting Load/Save Random Seed...
[   62.526265] systemd[1]: Starting Create System Users...
[   62.527492] systemd[1]: Finished Load Kernel Modules.
[   62.528018] systemd[1]: Condition check resulted in FUSE Control File System being skipped.
[   62.529625] systemd[1]: Mounting Kernel Configuration File System...
[   62.531233] systemd[1]: Starting Apply Kernel Variables...
[   62.533932] systemd[1]: Mounted Kernel Configuration File System.
[   62.590378] systemd[1]: Finished Monitoring of LVM2 mirrors, snapshots etc. using dmeventd or progress polling.
[   62.638113] systemd[1]: Finished udev Coldplug all Devices.
[   62.656511] systemd[1]: Starting Helper to synchronize boot up for ifupdown...
[   62.657640] systemd[1]: Finished Apply Kernel Variables.
[   62.725974] systemd[1]: Finished Create System Users.
[   62.727600] systemd[1]: Starting Create Static Device Nodes in /dev...
[   62.728735] systemd[1]: Finished Helper to synchronize boot up for ifupdown.
[   62.772621] systemd[1]: Finished Load/Save Random Seed.
[   62.816410] systemd[1]: Finished Create Static Device Nodes in /dev.
[   62.816682] systemd[1]: Reached target Local File Systems (Pre).
[   62.816808] systemd[1]: Condition check resulted in Virtual Machine and Container Storage (Compatibility) being skipped.
[   62.816843] systemd[1]: Reached target Containers.
[   62.818458] systemd[1]: Starting udev Kernel Device Manager...
[   62.948568] systemd[1]: Started udev Kernel Device Manager.
[   63.032275] systemd[1]: Found device /dev/hvc0.
[   63.135641] systemd[1]: Started Journal Service.
[   63.140762] scsi 0:255:255:255: Attached scsi generic sg0 type 31
[   63.140847] ses 0:8:0:0: Attached scsi generic sg1 type 13
[   63.140908] ses 0:8:1:0: Attached scsi generic sg2 type 13
[   63.140975] sd 0:0:0:0: Attached scsi generic sg3 type 0
[   63.141044] sr 0:6:0:0: Attached scsi generic sg4 type 5
[   63.141111] sd 0:1:0:0: Attached scsi generic sg5 type 0
[   63.246208] systemd-journald[422]: Received client request to flush runtime journal.
[   64.284515] Adding 39251904k swap on /dev/mapper/vg--amure-swap.  Priority:-2 extents:1 across:39251904k FS
[   64.618407] EXT4-fs (sda2): mounting ext2 file system using the ext4 subsystem
[   64.645027] EXT4-fs (sda2): mounted filesystem without journal. Opts: (null)
[   65.086272] audit: type=1400 audit(1594807089.340:2): apparmor="STATUS" operation="profile_load" profile="unconfined" name="lsb_release" pid=528 comm="apparmor_parser"
[   65.086978] audit: type=1400 audit(1594807089.340:3): apparmor="STATUS" operation="profile_load" profile="unconfined" name="nvidia_modprobe" pid=527 comm="apparmor_parser"
[   65.087008] audit: type=1400 audit(1594807089.340:4): apparmor="STATUS" operation="profile_load" profile="unconfined" name="nvidia_modprobe//kmod" pid=527 comm="apparmor_parser"
[   65.106970] audit: type=1400 audit(1594807089.360:5): apparmor="STATUS" operation="profile_load" profile="unconfined" name="/usr/bin/man" pid=529 comm="apparmor_parser"
[   65.107002] audit: type=1400 audit(1594807089.360:6): apparmor="STATUS" operation="profile_load" profile="unconfined" name="man_filter" pid=529 comm="apparmor_parser"
[   65.107027] audit: type=1400 audit(1594807089.360:7): apparmor="STATUS" operation="profile_load" profile="unconfined" name="man_groff" pid=529 comm="apparmor_parser"
[   65.107901] audit: type=1400 audit(1594807089.360:8): apparmor="STATUS" operation="profile_load" profile="unconfined" name="libvirtd" pid=531 comm="apparmor_parser"
[   65.107928] audit: type=1400 audit(1594807089.360:9): apparmor="STATUS" operation="profile_load" profile="unconfined" name="libvirtd//qemu_bridge_helper" pid=531 comm="apparmor_parser"
[   65.196918] audit: type=1400 audit(1594807089.452:10): apparmor="STATUS" operation="profile_load" profile="unconfined" name="virt-aa-helper" pid=530 comm="apparmor_parser"
[   68.401440] e1000e 0000:a0:00.0 enp160s0f0: NIC Link is Up 1000 Mbps Full Duplex, Flow Control: Rx/Tx
[   68.401648] IPv6: ADDRCONF(NETDEV_CHANGE): enp160s0f0: link becomes ready

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 05/15] powerpc/powernv/sriov: Move SR-IOV into a seperate file
  2020-07-14  9:16   ` Alexey Kardashevskiy
@ 2020-07-22  5:01     ` Oliver O'Halloran
  2020-07-22  9:53       ` Alexey Kardashevskiy
  0 siblings, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-22  5:01 UTC (permalink / raw)
  To: Alexey Kardashevskiy; +Cc: linuxppc-dev

On Tue, Jul 14, 2020 at 7:16 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>
> On 10/07/2020 15:23, Oliver O'Halloran wrote:
> > +     align = pci_iov_resource_size(pdev, resno);
> > +
> > +     /*
> > +      * iov can be null if we have an SR-IOV device with IOV BAR that can't
> > +      * be placed in the m64 space (i.e. The BAR is 32bit or non-prefetch).
> > +      * In that case we don't allow VFs to be enabled so just return the
> > +      * default alignment.
> > +      */
> > +     if (!iov)
> > +             return align;
>
>
> This is the new chunk. What would happen before? Non-prefetch BAR would
> still go to m64 space?

I don't think there's any real change. Currently if the setup in
pnv_pci_ioda_fixup_iov_resources() fails then pdn->vfs_expanded will
be zero. The !iov check here fills the same role, but it's more
explicit. vfs_expanded has some other behaviour too so we can't get
rid of it entirely (yet).

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 15/15] powerpc/powernv/sriov: Make single PE mode a per-BAR setting
  2020-07-15  8:00       ` Alexey Kardashevskiy
@ 2020-07-22  5:39         ` Oliver O'Halloran
  2020-07-22 10:06           ` Alexey Kardashevskiy
  0 siblings, 1 reply; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-22  5:39 UTC (permalink / raw)
  To: Alexey Kardashevskiy; +Cc: linuxppc-dev

On Wed, Jul 15, 2020 at 6:00 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>
> >>>                *
> >>> -              * Generally, one M64 BAR maps one IOV BAR. To avoid conflict
> >>> -              * with other devices, IOV BAR size is expanded to be
> >>> -              * (total_pe * VF_BAR_size).  When VF_BAR_size is half of M64
> >>> -              * segment size , the expanded size would equal to half of the
> >>> -              * whole M64 space size, which will exhaust the M64 Space and
> >>> -              * limit the system flexibility.  This is a design decision to
> >>> -              * set the boundary to quarter of the M64 segment size.
> >>> +              * The 1/4 limit is arbitrary and can be tweaked.
> >>>                */
> >>> -             if (total_vf_bar_sz > gate) {
> >>> -                     mul = roundup_pow_of_two(total_vfs);
> >>> -                     dev_info(&pdev->dev,
> >>> -                             "VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n",
> >>> -                             total_vf_bar_sz, gate, mul);
> >>> -                     iov->m64_single_mode = true;
> >>> -                     break;
> >>> -             }
> >>> -     }
> >>> +             if (vf_bar_sz > (phb->ioda.m64_segsize >> 2)) {
> >>> +                     /*
> >>> +                      * On PHB3, the minimum size alignment of M64 BAR in
> >>> +                      * single mode is 32MB. If this VF BAR is smaller than
> >>> +                      * 32MB, but still too large for a segmented window
> >>> +                      * then we can't map it and need to disable SR-IOV for
> >>> +                      * this device.
> >>
> >>
> >> Why not use single PE mode for such BAR? Better than nothing.
> >
> > Suppose you could, but I figured VFs were mainly interesting since you
> > could give each VF to a separate guest. If there's multiple VFs under
> > the same single PE BAR then they'd have to be assigned to the same
>
> True. But with one PE per VF we can still have 15 (or 14?) isolated VFs
> which is not hundreds but better than 0.

We can only use single PE BARs if the per-VF size is >= 32MB due to
the alignment requirements on P8. If the per-VF size is smaller then
we're stuck with multiple VFs inside the same BAR which is bad due to
the PAPR requirements mentioned below. Sure we could look at doing
something else, but considering this matches the current behaviour
it's a bit hard to care...

> > guest in order to retain the freeze/unfreeze behaviour that PAPR
> > requires. I guess that's how it used to work, but it seems better just
> > to disable them rather than having VFs which sort of work.
>
> Well, realistically the segment size should be 8MB to make this matter
> (or the whole window 2GB) which does not seem to happen so it does not
> matter.

I'm not sure what you mean.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 05/15] powerpc/powernv/sriov: Move SR-IOV into a seperate file
  2020-07-22  5:01     ` Oliver O'Halloran
@ 2020-07-22  9:53       ` Alexey Kardashevskiy
  0 siblings, 0 replies; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-22  9:53 UTC (permalink / raw)
  To: Oliver O'Halloran; +Cc: linuxppc-dev



On 22/07/2020 15:01, Oliver O'Halloran wrote:
> On Tue, Jul 14, 2020 at 7:16 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>>
>> On 10/07/2020 15:23, Oliver O'Halloran wrote:
>>> +     align = pci_iov_resource_size(pdev, resno);
>>> +
>>> +     /*
>>> +      * iov can be null if we have an SR-IOV device with IOV BAR that can't
>>> +      * be placed in the m64 space (i.e. The BAR is 32bit or non-prefetch).
>>> +      * In that case we don't allow VFs to be enabled so just return the
>>> +      * default alignment.
>>> +      */
>>> +     if (!iov)
>>> +             return align;
>>
>>
>> This is the new chunk. What would happen before? Non-prefetch BAR would
>> still go to m64 space?
> 
> I don't think there's any real change. Currently if the setup in
> pnv_pci_ioda_fixup_iov_resources() fails then pdn->vfs_expanded will
> be zero. The !iov check here fills the same role, but it's more
> explicit. vfs_expanded has some other behaviour too so we can't get
> rid of it entirely (yet).

The check is fine, you have to have one as @iov can be NULL (unlike
pci_dn). The comment is what bothered me. It would make more sense
somewhere in pnv_pci_ioda_fixup_iov_resources() near
"dev_warn(&pdev->dev, "Don't support SR-IOV with"" as now it suggests
there is one reason for the failed iov configuration only while there
are two reasons.


-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 15/15] powerpc/powernv/sriov: Make single PE mode a per-BAR setting
  2020-07-22  5:39         ` Oliver O'Halloran
@ 2020-07-22 10:06           ` Alexey Kardashevskiy
  2020-07-24  3:40             ` Oliver O'Halloran
  0 siblings, 1 reply; 57+ messages in thread
From: Alexey Kardashevskiy @ 2020-07-22 10:06 UTC (permalink / raw)
  To: Oliver O'Halloran; +Cc: linuxppc-dev



On 22/07/2020 15:39, Oliver O'Halloran wrote:
> On Wed, Jul 15, 2020 at 6:00 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>>
>>>>>                *
>>>>> -              * Generally, one M64 BAR maps one IOV BAR. To avoid conflict
>>>>> -              * with other devices, IOV BAR size is expanded to be
>>>>> -              * (total_pe * VF_BAR_size).  When VF_BAR_size is half of M64
>>>>> -              * segment size , the expanded size would equal to half of the
>>>>> -              * whole M64 space size, which will exhaust the M64 Space and
>>>>> -              * limit the system flexibility.  This is a design decision to
>>>>> -              * set the boundary to quarter of the M64 segment size.
>>>>> +              * The 1/4 limit is arbitrary and can be tweaked.
>>>>>                */
>>>>> -             if (total_vf_bar_sz > gate) {
>>>>> -                     mul = roundup_pow_of_two(total_vfs);
>>>>> -                     dev_info(&pdev->dev,
>>>>> -                             "VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n",
>>>>> -                             total_vf_bar_sz, gate, mul);
>>>>> -                     iov->m64_single_mode = true;
>>>>> -                     break;
>>>>> -             }
>>>>> -     }
>>>>> +             if (vf_bar_sz > (phb->ioda.m64_segsize >> 2)) {
>>>>> +                     /*
>>>>> +                      * On PHB3, the minimum size alignment of M64 BAR in
>>>>> +                      * single mode is 32MB. If this VF BAR is smaller than
>>>>> +                      * 32MB, but still too large for a segmented window
>>>>> +                      * then we can't map it and need to disable SR-IOV for
>>>>> +                      * this device.
>>>>
>>>>
>>>> Why not use single PE mode for such BAR? Better than nothing.
>>>
>>> Suppose you could, but I figured VFs were mainly interesting since you
>>> could give each VF to a separate guest. If there's multiple VFs under
>>> the same single PE BAR then they'd have to be assigned to the same
>>
>> True. But with one PE per VF we can still have 15 (or 14?) isolated VFs
>> which is not hundreds but better than 0.
> 
> We can only use single PE BARs if the per-VF size is >= 32MB due to
> the alignment requirements on P8. If the per-VF size is smaller then
> we're stuck with multiple VFs inside the same BAR which is bad due to
> the PAPR requirements mentioned below. Sure we could look at doing
> something else, but considering this matches the current behaviour
> it's a bit hard to care...
>
>>> guest in order to retain the freeze/unfreeze behaviour that PAPR
>>> requires. I guess that's how it used to work, but it seems better just
>>> to disable them rather than having VFs which sort of work.
>>
>> Well, realistically the segment size should be 8MB to make this matter
>> (or the whole window 2GB) which does not seem to happen so it does not
>> matter.
> 
> I'm not sure what you mean.

I mean how can we possibly hit this case, what m64_segsize would the
platform have to trigger this. The whole check seems useless but whatever.



-- 
Alexey

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 15/15] powerpc/powernv/sriov: Make single PE mode a per-BAR setting
  2020-07-22 10:06           ` Alexey Kardashevskiy
@ 2020-07-24  3:40             ` Oliver O'Halloran
  0 siblings, 0 replies; 57+ messages in thread
From: Oliver O'Halloran @ 2020-07-24  3:40 UTC (permalink / raw)
  To: Alexey Kardashevskiy; +Cc: linuxppc-dev

On Wed, Jul 22, 2020 at 8:06 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>
> >> Well, realistically the segment size should be 8MB to make this matter
> >> (or the whole window 2GB) which does not seem to happen so it does not
> >> matter.
> >
> > I'm not sure what you mean.
>
> I mean how can we possibly hit this case, what m64_segsize would the
> platform have to trigger this. The whole check seems useless but whatever.

Yeah maybe.

IIRC some old P8 FSP systems had tiny M64 windows so it might have
been an issue there. Maybe we can get rid of it., but I'd rather just
leave the behaviour as-is for now.

^ permalink raw reply	[flat|nested] 57+ messages in thread

end of thread, other threads:[~2020-07-24  3:42 UTC | newest]

Thread overview: 57+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-10  5:23 PowerNV PCI & SR-IOV cleanups Oliver O'Halloran
2020-07-10  5:23 ` [PATCH 01/15] powernv/pci: Add pci_bus_to_pnvhb() helper Oliver O'Halloran
2020-07-13  8:28   ` Alexey Kardashevskiy
2020-07-10  5:23 ` [PATCH 02/15] powerpc/powernv/pci: Always tear down DMA windows on PE release Oliver O'Halloran
2020-07-13  8:30   ` Alexey Kardashevskiy
2020-07-10  5:23 ` [PATCH 03/15] powerpc/powernv/pci: Add explicit tracking of the DMA setup state Oliver O'Halloran
2020-07-14  5:37   ` Alexey Kardashevskiy
2020-07-14  5:58     ` Oliver O'Halloran
2020-07-14  7:21       ` Alexey Kardashevskiy
2020-07-15  0:23         ` Alexey Kardashevskiy
2020-07-15  1:38         ` Oliver O'Halloran
2020-07-15  3:33           ` Alexey Kardashevskiy
2020-07-15  7:05             ` Cédric Le Goater
2020-07-15  9:00               ` Oliver O'Halloran
2020-07-15 10:05                 ` Cédric Le Goater
2020-07-10  5:23 ` [PATCH 04/15] powerpc/powernv/pci: Initialise M64 for IODA1 as a 1-1 window Oliver O'Halloran
2020-07-14  7:39   ` Alexey Kardashevskiy
2020-07-10  5:23 ` [PATCH 05/15] powerpc/powernv/sriov: Move SR-IOV into a seperate file Oliver O'Halloran
2020-07-14  9:16   ` Alexey Kardashevskiy
2020-07-22  5:01     ` Oliver O'Halloran
2020-07-22  9:53       ` Alexey Kardashevskiy
2020-07-10  5:23 ` [PATCH 06/15] powerpc/powernv/sriov: Explain how SR-IOV works on PowerNV Oliver O'Halloran
2020-07-15  0:40   ` Alexey Kardashevskiy
2020-07-10  5:23 ` [PATCH 07/15] powerpc/powernv/sriov: Rename truncate_iov Oliver O'Halloran
2020-07-15  0:46   ` Alexey Kardashevskiy
2020-07-10  5:23 ` [PATCH 08/15] powerpc/powernv/sriov: Simplify used window tracking Oliver O'Halloran
2020-07-15  1:34   ` Alexey Kardashevskiy
2020-07-15  1:41     ` Oliver O'Halloran
2020-07-10  5:23 ` [PATCH 09/15] powerpc/powernv/sriov: Factor out M64 BAR setup Oliver O'Halloran
2020-07-15  2:09   ` Alexey Kardashevskiy
2020-07-10  5:23 ` [PATCH 10/15] powerpc/powernv/pci: Refactor pnv_ioda_alloc_pe() Oliver O'Halloran
2020-07-15  2:29   ` Alexey Kardashevskiy
2020-07-15  2:53     ` Oliver O'Halloran
2020-07-15  3:15       ` Alexey Kardashevskiy
2020-07-10  5:23 ` [PATCH 11/15] powerpc/powernv/sriov: Drop iov->pe_num_map[] Oliver O'Halloran
2020-07-15  3:31   ` Alexey Kardashevskiy
2020-07-10  5:23 ` [PATCH 12/15] powerpc/powernv/sriov: De-indent setup and teardown Oliver O'Halloran
2020-07-15  4:00   ` Alexey Kardashevskiy
2020-07-15  4:21     ` Oliver O'Halloran
2020-07-15  4:41       ` Alexey Kardashevskiy
2020-07-15  4:46         ` Oliver O'Halloran
2020-07-15  4:58           ` Alexey Kardashevskiy
2020-07-10  5:23 ` [PATCH 13/15] powerpc/powernv/sriov: Move M64 BAR allocation into a helper Oliver O'Halloran
2020-07-15  4:02   ` Alexey Kardashevskiy
2020-07-10  5:23 ` [PATCH 14/15] powerpc/powernv/sriov: Refactor M64 BAR setup Oliver O'Halloran
2020-07-15  4:50   ` Alexey Kardashevskiy
2020-07-10  5:23 ` [PATCH 15/15] powerpc/powernv/sriov: Make single PE mode a per-BAR setting Oliver O'Halloran
2020-07-15  5:24   ` Alexey Kardashevskiy
2020-07-15  6:16     ` Oliver O'Halloran
2020-07-15  8:00       ` Alexey Kardashevskiy
2020-07-22  5:39         ` Oliver O'Halloran
2020-07-22 10:06           ` Alexey Kardashevskiy
2020-07-24  3:40             ` Oliver O'Halloran
2020-07-10  6:45 ` PowerNV PCI & SR-IOV cleanups Christoph Hellwig
2020-07-10  6:45   ` Christoph Hellwig
2020-07-10 12:45   ` Oliver O'Halloran
2020-07-10 12:45     ` Oliver O'Halloran

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.