All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/6] powerpc/powernv: Support M64 window
@ 2014-07-16 12:24 Guo Chao
  2014-07-16 12:24 ` [PATCH v2 1/6] powerpc/powernv: Enable M64 aperatus for PHB3 Guo Chao
                   ` (5 more replies)
  0 siblings, 6 replies; 8+ messages in thread
From: Guo Chao @ 2014-07-16 12:24 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: yan, gwshan

This version is rebased on top of Gavin's patches of EEH support for guest and
related fixes which are supposed to be merged in 3.17.

Changed from v1:
	* Don't overwrite PE flags
	* Don't return segment alignment if M64 is not supported
	* Output M64 total size and segment size together with M32 and IO

Currently, all MMIO resources, including 64-bits MMIO resources are hooked
to PHB 32-bits MMIO BAR, which has limited space. If there're PCI devices
with large 64-bits MMIO BAR (could reach 1GB), we're running out of MMIO
resources (as well as PE numbers) quickly. The patchset reuses the M32
infrastructure to support M64:

   * The last M64 BAR covers all M64 aperatus and that's shared by all PEs.
   * Reuse ppc_md.pcibios_window_alignment() to affect resource assignment
     in PCI core so that we can get well segmented 64-bits window of PCI
     bridges.
   * One PCI bus might require multiple discrete M64 segment. We invent
     if we're going to unfreeze any one in the group.

Gavin Shan (5):
  powerpc/powernv: Allow to freeze PE
  powerpc/powernv: Split ioda_eeh_get_state()
  powerpc/powernv: Handle compound PE
  powerpc/powernv: Handle compound PE for EEH
  powerpc/powernv: Handle compound PE in config accessors

Guo Chao (1):
  powerpc/powernv: Enable M64 aperatus for PHB3

 arch/powerpc/include/asm/opal.h                |  17 +-
 arch/powerpc/platforms/powernv/eeh-ioda.c      | 293 +++++++++-------
 arch/powerpc/platforms/powernv/opal-wrappers.S |   2 +-
 arch/powerpc/platforms/powernv/pci-ioda.c      | 448 +++++++++++++++++++++++--
 arch/powerpc/platforms/powernv/pci.c           |  87 +++--
 arch/powerpc/platforms/powernv/pci.h           |  23 ++
 6 files changed, 694 insertions(+), 176 deletions(-)

-- 
1.9.1

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2 1/6] powerpc/powernv: Enable M64 aperatus for PHB3
  2014-07-16 12:24 [PATCH v2 0/6] powerpc/powernv: Support M64 window Guo Chao
@ 2014-07-16 12:24 ` Guo Chao
  2014-07-17  4:40   ` Gavin Shan
  2014-07-16 12:24 ` [PATCH v2 2/6] powerpc/powernv: Allow to freeze PE Guo Chao
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 8+ messages in thread
From: Guo Chao @ 2014-07-16 12:24 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: yan, gwshan

This patch enables M64 aperatus for PHB3.

We already had platform hook (ppc_md.pcibios_window_alignment) to affect
the PCI resource assignment done in PCI core so that each PE's M32 resource
was built on basis of M32 segment size. Similarly, we're using that for
M64 assignment on basis of M64 segment size.

   * We're using last M64 BAR to cover M64 aperatus, and it's shared by all
     256 PEs.
   * We don't support P7IOC yet. However, some function callbacks are added
     to (struct pnv_phb) so that we can reuse them on P7IOC in future.
   * PE, corresponding to PCI bus with large M64 BAR device attached, might
     span multiple M64 segments. We introduce "compound" PE to cover the case.
     The compound PE is a list of PEs and the master PE is used as before.
     The slave PEs are just for MMIO isolation.

Signed-off-by: Guo Chao <yan@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/opal.h           |   8 +-
 arch/powerpc/platforms/powernv/pci-ioda.c | 301 +++++++++++++++++++++++++++---
 arch/powerpc/platforms/powernv/pci.h      |  20 ++
 3 files changed, 307 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 0da1dbd..ae885cc 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -340,6 +340,12 @@ enum OpalMveEnableAction {
 	OPAL_ENABLE_MVE = 1
 };
 
+enum OpalM64EnableAction {
+	OPAL_DISABLE_M64 = 0,
+	OPAL_ENABLE_M64_SPLIT = 1,
+	OPAL_ENABLE_M64_NON_SPLIT = 2
+};
+
 enum OpalPciResetScope {
 	OPAL_PHB_COMPLETE = 1, OPAL_PCI_LINK = 2, OPAL_PHB_ERROR = 3,
 	OPAL_PCI_HOT_RESET = 4, OPAL_PCI_FUNDAMENTAL_RESET = 5,
@@ -768,7 +774,7 @@ int64_t opal_pci_set_phb_mem_window(uint64_t phb_id, uint16_t window_type,
 				    uint16_t window_num,
 				    uint64_t starting_real_address,
 				    uint64_t starting_pci_address,
-				    uint16_t segment_size);
+				    uint64_t size);
 int64_t opal_pci_map_pe_mmio_window(uint64_t phb_id, uint16_t pe_number,
 				    uint16_t window_type, uint16_t window_num,
 				    uint16_t segment_num);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 93fd815..2b659d9 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -36,6 +36,7 @@
 #include <asm/tce.h>
 #include <asm/xics.h>
 #include <asm/debug.h>
+#include <asm/firmware.h>
 
 #include "powernv.h"
 #include "pci.h"
@@ -82,6 +83,12 @@ static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr)
 		: : "r" (val), "r" (paddr) : "memory");
 }
 
+static inline bool pnv_pci_is_mem_pref_64(unsigned long flags)
+{
+	return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) ==
+		(IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
+}
+
 static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
 {
 	unsigned long pe;
@@ -106,6 +113,240 @@ static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
 	clear_bit(pe, phb->ioda.pe_alloc);
 }
 
+/* The default M64 BAR is shared by all PEs */
+static int pnv_ioda2_init_m64(struct pnv_phb *phb)
+{
+	const char *desc;
+	struct resource *r;
+	s64 rc;
+
+	/* Configure the default M64 BAR */
+	rc = opal_pci_set_phb_mem_window(phb->opal_id,
+					 OPAL_M64_WINDOW_TYPE,
+					 phb->ioda.m64_bar_idx,
+					 phb->ioda.m64_base,
+					 0, /* unused */
+					 phb->ioda.m64_size);
+	if (rc != OPAL_SUCCESS) {
+		desc = "configuring";
+		goto fail;
+	}
+
+	/* Enable the default M64 BAR */
+	rc = opal_pci_phb_mmio_enable(phb->opal_id,
+				      OPAL_M64_WINDOW_TYPE,
+				      phb->ioda.m64_bar_idx,
+				      OPAL_ENABLE_M64_SPLIT);
+	if (rc != OPAL_SUCCESS) {
+		desc = "enabling";
+		goto fail;
+	}
+
+	/* Mark the M64 BAR assigned */
+	set_bit(phb->ioda.m64_bar_idx, &phb->ioda.m64_bar_alloc);
+
+	/*
+	 * Strip off the segment used by the reserved PE, which is
+	 * expected to be 0 or last one of PE capabicity.
+	 */
+	r = &phb->hose->mem_resources[1];
+	if (phb->ioda.reserved_pe == 0)
+		r->start += phb->ioda.m64_segsize;
+	else if (phb->ioda.reserved_pe == (phb->ioda.total_pe - 1))
+		r->end -= phb->ioda.m64_segsize;
+	else
+		pr_warn("  Cannot strip M64 segment for reserved PE#%d\n",
+			phb->ioda.reserved_pe);
+
+	return 0;
+
+fail:
+	pr_warn("  Failure %lld %s M64 BAR#%d\n",
+		rc, desc, phb->ioda.m64_bar_idx);
+	opal_pci_phb_mmio_enable(phb->opal_id,
+				 OPAL_M64_WINDOW_TYPE,
+				 phb->ioda.m64_bar_idx,
+				 OPAL_DISABLE_M64);
+	return -EIO;
+}
+
+static void pnv_ioda2_alloc_m64_pe(struct pnv_phb *phb)
+{
+	resource_size_t sgsz = phb->ioda.m64_segsize;
+	struct pci_dev *pdev;
+	struct resource *r;
+	int base, step, i;
+
+	/*
+	 * Root bus always has full M64 range and root port has
+	 * M64 range used in reality. So we're checking root port
+	 * instead of root bus.
+	 */
+	list_for_each_entry(pdev, &phb->hose->bus->devices, bus_list) {
+		for (i = PCI_BRIDGE_RESOURCES;
+		     i <= PCI_BRIDGE_RESOURCE_END; i++) {
+			r = &pdev->resource[i];
+			if (!r->parent ||
+			    !pnv_pci_is_mem_pref_64(r->flags))
+				continue;
+
+			base = (r->start - phb->ioda.m64_base) / sgsz;
+			for (step = 0; step < resource_size(r) / sgsz; step++)
+				set_bit(base + step, phb->ioda.pe_alloc);
+		}
+	}
+}
+
+static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb,
+				 struct pci_bus *bus, int all)
+{
+	resource_size_t segsz = phb->ioda.m64_segsize;
+	struct pci_dev *pdev;
+	struct resource *r;
+	struct pnv_ioda_pe *master_pe, *pe;
+	unsigned long size, *pe_alloc;
+	bool found;
+	int start, i, j;
+
+	/* Root bus shouldn't use M64 */
+	if (pci_is_root_bus(bus))
+		return IODA_INVALID_PE;
+
+	/* We support only one M64 window on each bus */
+	found = false;
+	pci_bus_for_each_resource(bus, r, i) {
+		if (r && r->parent &&
+		    pnv_pci_is_mem_pref_64(r->flags)) {
+			found = true;
+			break;
+		}
+	}
+
+	/* No M64 window found ? */
+	if (!found)
+		return IODA_INVALID_PE;
+
+	/* Allocate bitmap */
+	size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
+	pe_alloc = kzalloc(size, GFP_KERNEL);
+	if (!pe_alloc) {
+		pr_warn("%s: Out of memory !\n",
+			__func__);
+		return IODA_INVALID_PE;
+	}
+
+	/*
+	 * Figure out reserved PE numbers by the PE
+	 * the its child PEs.
+	 */
+	start = (r->start - phb->ioda.m64_base) / segsz;
+	for (i = 0; i < resource_size(r) / segsz; i++)
+		set_bit(start + i, pe_alloc);
+
+	if (all)
+		goto done;
+
+	/*
+	 * If the PE doesn't cover all subordinate buses,
+	 * we need subtract from reserved PEs for children.
+	 */
+	list_for_each_entry(pdev, &bus->devices, bus_list) {
+		if (!pdev->subordinate)
+			continue;
+
+		pci_bus_for_each_resource(pdev->subordinate, r, i) {
+			if (!r || !r->parent ||
+			    !pnv_pci_is_mem_pref_64(r->flags))
+				continue;
+
+			start = (r->start - phb->ioda.m64_base) / segsz;
+			for (j = 0; j < resource_size(r) / segsz ; j++)
+				clear_bit(start + j, pe_alloc);
+                }
+        }
+
+	/*
+	 * the current bus might not own M64 window and that's all
+	 * contributed by its child buses. For the case, we needn't
+	 * pick M64 dependent PE#.
+	 */
+	if (bitmap_empty(pe_alloc, phb->ioda.total_pe)) {
+		kfree(pe_alloc);
+		return IODA_INVALID_PE;
+	}
+
+	/*
+	 * Figure out the master PE and put all slave PEs to master
+	 * PE's list to form compound PE.
+	 */
+done:
+	master_pe = NULL;
+	i = -1;
+	while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) <
+		phb->ioda.total_pe) {
+		pe = &phb->ioda.pe_array[i];
+		pe->phb = phb;
+		pe->pe_number = i;
+
+		if (!master_pe) {
+			pe->flags |= PNV_IODA_PE_MASTER;
+			INIT_LIST_HEAD(&pe->slaves);
+			master_pe = pe;
+		} else {
+			pe->flags |= PNV_IODA_PE_SLAVE;
+			pe->master = master_pe;
+			list_add_tail(&pe->list, &master_pe->slaves);
+		}
+	}
+
+	kfree(pe_alloc);
+	return master_pe->pe_number;
+}
+
+static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
+{
+	struct pci_controller *hose = phb->hose;
+	struct device_node *dn = hose->dn;
+	struct resource *res;
+	const u32 *r;
+	u64 pci_addr;
+
+	if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
+		pr_info("  Firmware too old to support M64 window\n");
+		return;
+	}
+
+	r = of_get_property(dn, "ibm,opal-m64-window", NULL);
+	if (!r) {
+		pr_info("  No <ibm,opal-m64-window> on %s\n",
+			dn->full_name);
+		return;
+	}
+
+	/* FIXME: Support M64 for P7IOC */
+	if (phb->type != PNV_PHB_IODA2) {
+		pr_info("  Not support M64 window\n");
+		return;
+	}
+
+	res = &hose->mem_resources[1];
+	res->start = of_translate_address(dn, r + 2);
+	res->end = res->start + of_read_number(r + 4, 2) - 1;
+	res->flags = (IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
+	pci_addr = of_read_number(r, 2);
+	hose->mem_offset[1] = res->start - pci_addr;
+
+	phb->ioda.m64_size = resource_size(res);
+	phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe;
+	phb->ioda.m64_base = pci_addr;
+
+	/* Use last M64 BAR to cover M64 window */
+	phb->ioda.m64_bar_idx = 15;
+	phb->init_m64 = pnv_ioda2_init_m64;
+	phb->alloc_m64_pe = pnv_ioda2_alloc_m64_pe;
+	phb->pick_m64_pe = pnv_ioda2_pick_m64_pe;
+}
+
 /* Currently those 2 are only used when MSIs are enabled, this will change
  * but in the meantime, we need to protect them to avoid warnings
  */
@@ -363,9 +604,16 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
 	struct pci_controller *hose = pci_bus_to_host(bus);
 	struct pnv_phb *phb = hose->private_data;
 	struct pnv_ioda_pe *pe;
-	int pe_num;
+	int pe_num = IODA_INVALID_PE;
+
+	/* Check if PE is determined by M64 */
+	if (phb->pick_m64_pe)
+		pe_num = phb->pick_m64_pe(phb, bus, all);
+
+	/* The PE number isn't pinned by M64 */
+	if (pe_num == IODA_INVALID_PE)
+		pe_num = pnv_ioda_alloc_pe(phb);
 
-	pe_num = pnv_ioda_alloc_pe(phb);
 	if (pe_num == IODA_INVALID_PE) {
 		pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
 			__func__, pci_domain_nr(bus), bus->number);
@@ -373,7 +621,7 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
 	}
 
 	pe = &phb->ioda.pe_array[pe_num];
-	pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
+	pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
 	pe->pbus = bus;
 	pe->pdev = NULL;
 	pe->tce32_seg = -1;
@@ -441,8 +689,15 @@ static void pnv_ioda_setup_PEs(struct pci_bus *bus)
 static void pnv_pci_ioda_setup_PEs(void)
 {
 	struct pci_controller *hose, *tmp;
+	struct pnv_phb *phb;
 
 	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		phb = hose->private_data;
+
+		/* M64 layout might affect PE allocation */
+		if (phb->alloc_m64_pe)
+			phb->alloc_m64_pe(phb);
+
 		pnv_ioda_setup_PEs(hose->bus);
 	}
 }
@@ -1067,9 +1322,6 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
 				index++;
 			}
 		} else if (res->flags & IORESOURCE_MEM) {
-			/* WARNING: Assumes M32 is mem region 0 in PHB. We need to
-			 * harden that algorithm when we start supporting M64
-			 */
 			region.start = res->start -
 				       hose->mem_offset[0] -
 				       phb->ioda.m32_pci_base;
@@ -1190,7 +1442,10 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
 		bridge = bridge->bus->self;
 	}
 
-	/* We need support prefetchable memory window later */
+	/* We fail back to M32 if M64 isn't supported */
+	if (phb->ioda.m64_segsize &&
+	    pnv_pci_is_mem_pref_64(type))
+		return phb->ioda.m64_segsize;
 	if (type & IORESOURCE_MEM)
 		return phb->ioda.m32_segsize;
 
@@ -1311,6 +1566,10 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL);
 	if (prop32)
 		phb->ioda.reserved_pe = be32_to_cpup(prop32);
+
+	/* Parse 64-bit MMIO range */
+	pnv_ioda_parse_m64_window(phb);
+
 	phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
 	/* FW Has already off top 64k of M32 space (MSI space) */
 	phb->ioda.m32_size += 0x10000;
@@ -1346,14 +1605,6 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	/* Calculate how many 32-bit TCE segments we have */
 	phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
 
-	/* Clear unusable m64 */
-	hose->mem_resources[1].flags = 0;
-	hose->mem_resources[1].start = 0;
-	hose->mem_resources[1].end = 0;
-	hose->mem_resources[2].flags = 0;
-	hose->mem_resources[2].start = 0;
-	hose->mem_resources[2].end = 0;
-
 #if 0 /* We should really do that ... */
 	rc = opal_pci_set_phb_mem_window(opal->phb_id,
 					 window_type,
@@ -1363,12 +1614,16 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
 					 segment_size);
 #endif
 
-	pr_info("  %d (%d) PE's M32: 0x%x [segment=0x%x]"
-		" IO: 0x%x [segment=0x%x]\n",
-		phb->ioda.total_pe,
-		phb->ioda.reserved_pe,
-		phb->ioda.m32_size, phb->ioda.m32_segsize,
-		phb->ioda.io_size, phb->ioda.io_segsize);
+	pr_info("  %03d (%03d) PE's M32: 0x%x [segment=0x%x]\n",
+		phb->ioda.total_pe, phb->ioda.reserved_pe,
+		phb->ioda.m32_size, phb->ioda.m32_segsize);
+	if (phb->ioda.m64_size)
+		pr_info("                 M64: 0x%lx [segment=0x%lx]\n",
+			phb->ioda.m64_size, phb->ioda.m64_segsize);
+	if (phb->ioda.io_size)
+		pr_info("                  IO: 0x%x [segment=0x%x]\n",
+			phb->ioda.io_size, phb->ioda.io_segsize);
+
 
 	phb->hose->ops = &pnv_pci_ops;
 #ifdef CONFIG_EEH
@@ -1416,6 +1671,10 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
 		ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
 		ioda_eeh_phb_reset(hose, OPAL_DEASSERT_RESET);
 	}
+
+	/* Configure M64 window */
+	if (phb->init_m64 && phb->init_m64(phb))
+		hose->mem_resources[1].flags = 0;
 }
 
 void __init pnv_pci_init_ioda2_phb(struct device_node *np)
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 676232c..def7171 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -21,6 +21,8 @@ enum pnv_phb_model {
 #define PNV_IODA_PE_DEV		(1 << 0)	/* PE has single PCI device	*/
 #define PNV_IODA_PE_BUS		(1 << 1)	/* PE has primary PCI bus	*/
 #define PNV_IODA_PE_BUS_ALL	(1 << 2)	/* PE has subordinate buses	*/
+#define PNV_IODA_PE_MASTER	(1 << 3)	/* Master PE in compound case	*/
+#define PNV_IODA_PE_SLAVE	(1 << 4)	/* Slave PE in compound case	*/
 
 /* Data associated with a PE, including IOMMU tracking etc.. */
 struct pnv_phb;
@@ -64,6 +66,10 @@ struct pnv_ioda_pe {
 	 */
 	int			mve_number;
 
+	/* PEs in compound case */
+	struct pnv_ioda_pe	*master;
+	struct list_head	slaves;
+
 	/* Link in list of PE#s */
 	struct list_head	dma_link;
 	struct list_head	list;
@@ -119,6 +125,9 @@ struct pnv_phb {
 	void (*fixup_phb)(struct pci_controller *hose);
 	u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
 	void (*shutdown)(struct pnv_phb *phb);
+	int (*init_m64)(struct pnv_phb *phb);
+	void (*alloc_m64_pe)(struct pnv_phb *phb);
+	int (*pick_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus, int all);
 
 	union {
 		struct {
@@ -129,9 +138,20 @@ struct pnv_phb {
 			/* Global bridge info */
 			unsigned int		total_pe;
 			unsigned int		reserved_pe;
+
+			/* 32-bit MMIO window */
 			unsigned int		m32_size;
 			unsigned int		m32_segsize;
 			unsigned int		m32_pci_base;
+
+			/* 64-bit MMIO window */
+			unsigned int		m64_bar_idx;
+			unsigned long		m64_size;
+			unsigned long		m64_segsize;
+			unsigned long		m64_base;
+			unsigned long		m64_bar_alloc;
+
+			/* IO ports */
 			unsigned int		io_size;
 			unsigned int		io_segsize;
 			unsigned int		io_pci_base;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 2/6] powerpc/powernv: Allow to freeze PE
  2014-07-16 12:24 [PATCH v2 0/6] powerpc/powernv: Support M64 window Guo Chao
  2014-07-16 12:24 ` [PATCH v2 1/6] powerpc/powernv: Enable M64 aperatus for PHB3 Guo Chao
@ 2014-07-16 12:24 ` Guo Chao
  2014-07-16 12:24 ` [PATCH v2 3/6] powerpc/powernv: Split ioda_eeh_get_state() Guo Chao
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 8+ messages in thread
From: Guo Chao @ 2014-07-16 12:24 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: yan, gwshan

From: Gavin Shan <gwshan@linux.vnet.ibm.com>

For compound PE, all PEs should be frozen if any one in the group
becomes frozen. Unfortunately, hardware doesn't always do that
automatically with help of PELTV. So we have to flirt with
PESTA/B a bit to freeze all PEs for the case.

The patch sychronizes with firmware hearder and change the name
of opal_pci_eeh_freeze_clear() to opal_pci_eeh_freeze_set() to
reflect its usage: the API can be used to clear or set frozen
state for the specified PE.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/opal.h                | 9 +++++----
 arch/powerpc/platforms/powernv/eeh-ioda.c      | 6 +++---
 arch/powerpc/platforms/powernv/opal-wrappers.S | 2 +-
 arch/powerpc/platforms/powernv/pci-ioda.c      | 4 ++--
 arch/powerpc/platforms/powernv/pci.c           | 4 ++--
 5 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index ae885cc..edbfe1c 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -84,7 +84,7 @@ struct opal_sg_list {
 #define OPAL_PCI_EEH_FREEZE_STATUS		23
 #define OPAL_PCI_SHPC				24
 #define OPAL_CONSOLE_WRITE_BUFFER_SPACE		25
-#define OPAL_PCI_EEH_FREEZE_CLEAR		26
+#define OPAL_PCI_EEH_FREEZE_SET			26
 #define OPAL_PCI_PHB_MMIO_ENABLE		27
 #define OPAL_PCI_SET_PHB_MEM_WINDOW		28
 #define OPAL_PCI_MAP_PE_MMIO_WINDOW		29
@@ -167,7 +167,8 @@ enum OpalFreezeState {
 	OPAL_EEH_STOPPED_PERM_UNAVAIL = 6
 };
 
-enum OpalEehFreezeActionToken {
+enum OpalPciFreezeActionToken {
+	OPAL_EEH_ACTION_SET_FREEZE_ALL = 0,
 	OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO = 1,
 	OPAL_EEH_ACTION_CLEAR_FREEZE_DMA = 2,
 	OPAL_EEH_ACTION_CLEAR_FREEZE_ALL = 3
@@ -762,8 +763,8 @@ int64_t opal_pci_eeh_freeze_status(uint64_t phb_id, uint64_t pe_number,
 				   uint8_t *freeze_state,
 				   __be16 *pci_error_type,
 				   __be64 *phb_status);
-int64_t opal_pci_eeh_freeze_clear(uint64_t phb_id, uint64_t pe_number,
-				  uint64_t eeh_action_token);
+int64_t opal_pci_eeh_freeze_set(uint64_t phb_id, uint64_t pe_number,
+				uint64_t eeh_action_token);
 int64_t opal_pci_shpc(uint64_t phb_id, uint64_t shpc_action, uint8_t *state);
 
 
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index f6abdb1..b3b4bc2 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -210,7 +210,7 @@ static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
 		ret = 0;
 		break;
 	case EEH_OPT_THAW_MMIO:
-		ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
+		ret = opal_pci_eeh_freeze_set(phb->opal_id, pe_no,
 				OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO);
 		if (ret) {
 			pr_warning("%s: Failed to enable MMIO for "
@@ -221,7 +221,7 @@ static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
 
 		break;
 	case EEH_OPT_THAW_DMA:
-		ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
+		ret = opal_pci_eeh_freeze_set(phb->opal_id, pe_no,
 				OPAL_EEH_ACTION_CLEAR_FREEZE_DMA);
 		if (ret) {
 			pr_warning("%s: Failed to enable DMA for "
@@ -809,7 +809,7 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
 					hose->global_number, frozen_pe_no);
 				pr_info("EEH: PHB location: %s\n",
 					eeh_pe_loc_get(phb_pe));
-				opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no,
+				opal_pci_eeh_freeze_set(phb->opal_id, frozen_pe_no,
 					OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
 				ret = EEH_NEXT_ERR_NONE;
 			} else if ((*pe)->state & EEH_PE_ISOLATED ||
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 4abbff2..abf5ffa 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -85,7 +85,7 @@ OPAL_CALL(opal_set_xive,			OPAL_SET_XIVE);
 OPAL_CALL(opal_get_xive,			OPAL_GET_XIVE);
 OPAL_CALL(opal_register_exception_handler,	OPAL_REGISTER_OPAL_EXCEPTION_HANDLER);
 OPAL_CALL(opal_pci_eeh_freeze_status,		OPAL_PCI_EEH_FREEZE_STATUS);
-OPAL_CALL(opal_pci_eeh_freeze_clear,		OPAL_PCI_EEH_FREEZE_CLEAR);
+OPAL_CALL(opal_pci_eeh_freeze_set,		OPAL_PCI_EEH_FREEZE_SET);
 OPAL_CALL(opal_pci_shpc,			OPAL_PCI_SHPC);
 OPAL_CALL(opal_pci_phb_mmio_enable,		OPAL_PCI_PHB_MMIO_ENABLE);
 OPAL_CALL(opal_pci_set_phb_mem_window,		OPAL_PCI_SET_PHB_MEM_WINDOW);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 2b659d9..cc3c59b 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -423,8 +423,8 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 				pe->pe_number, OPAL_ADD_PE_TO_DOMAIN);
 	if (rc)
 		pe_warn(pe, "OPAL error %d adding self to PELTV\n", rc);
-	opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
-				  OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+	opal_pci_eeh_freeze_set(phb->opal_id, pe->pe_number,
+				OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
 
 	/* Add to all parents PELT-V */
 	while (parent) {
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index f91a4e5..02f4bd9 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -327,8 +327,8 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
 					 PNV_PCI_DIAG_BUF_SIZE);
 	has_diag = (rc == OPAL_SUCCESS);
 
-	rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
-				       OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+	rc = opal_pci_eeh_freeze_set(phb->opal_id, pe_no,
+				     OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
 	if (rc) {
 		pr_warning("PCI %d: Failed to clear EEH freeze state"
 			   " for PE#%d, err %ld\n",
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 3/6] powerpc/powernv: Split ioda_eeh_get_state()
  2014-07-16 12:24 [PATCH v2 0/6] powerpc/powernv: Support M64 window Guo Chao
  2014-07-16 12:24 ` [PATCH v2 1/6] powerpc/powernv: Enable M64 aperatus for PHB3 Guo Chao
  2014-07-16 12:24 ` [PATCH v2 2/6] powerpc/powernv: Allow to freeze PE Guo Chao
@ 2014-07-16 12:24 ` Guo Chao
  2014-07-16 12:24 ` [PATCH v2 4/6] powerpc/powernv: Handle compound PE Guo Chao
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 8+ messages in thread
From: Guo Chao @ 2014-07-16 12:24 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: yan, gwshan

From: Gavin Shan <gwshan@linux.vnet.ibm.com>

Function ioda_eeh_get_state() is used to fetch EEH state for PHB
or PE. We're going to support compound PE and the function becomes
more complicated with that. The patch splits the function into two
functions for PHB and PE cases separately to improve readability.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/eeh-ioda.c | 186 +++++++++++++++++-------------
 1 file changed, 105 insertions(+), 81 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index b3b4bc2..dd20ba8 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -255,130 +255,154 @@ static void ioda_eeh_phb_diag(struct pci_controller *hose)
 	pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
 }
 
-/**
- * ioda_eeh_get_state - Retrieve the state of PE
- * @pe: EEH PE
- *
- * The PE's state should be retrieved from the PEEV, PEST
- * IODA tables. Since the OPAL has exported the function
- * to do it, it'd better to use that.
- */
-static int ioda_eeh_get_state(struct eeh_pe *pe)
+static int ioda_eeh_get_phb_state(struct eeh_pe *pe)
 {
-	s64 ret = 0;
+	struct pnv_phb *phb = pe->phb->private_data;
 	u8 fstate;
 	__be16 pcierr;
-	u32 pe_no;
-	int result;
-	struct pci_controller *hose = pe->phb;
-	struct pnv_phb *phb = hose->private_data;
+	s64 rc;
+	int result = 0;
+
+	rc = opal_pci_eeh_freeze_status(phb->opal_id,
+					pe->addr,
+					&fstate,
+					&pcierr,
+					NULL);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld getting PHB#%x state\n",
+			__func__, rc, phb->hose->global_number);
+                return EEH_STATE_NOT_SUPPORT;
+        }
 
 	/*
-	 * Sanity check on PE address. The PHB PE address should
-	 * be zero.
+	 * Check PHB state. If the PHB is frozen for the
+	 * first time, to dump the PHB diag-data.
 	 */
-	if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) {
-		pr_err("%s: PE address %x out of range [0, %x] "
-		       "on PHB#%x\n",
-		       __func__, pe->addr, phb->ioda.total_pe,
-		       hose->global_number);
-		return EEH_STATE_NOT_SUPPORT;
+	if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
+		result = (EEH_STATE_MMIO_ACTIVE  |
+			  EEH_STATE_DMA_ACTIVE   |
+			  EEH_STATE_MMIO_ENABLED |
+			  EEH_STATE_DMA_ENABLED);
+	} else if (!(pe->state & EEH_PE_ISOLATED)) {
+		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+		ioda_eeh_phb_diag(phb->hose);
 	}
 
+	return result;
+}
+
+static int ioda_eeh_get_pe_state(struct eeh_pe *pe)
+{
+	struct pnv_phb *phb = pe->phb->private_data;
+	u8 fstate;
+	__be16 pcierr;
+	s64 rc;
+	int result;
+
 	/*
-	 * If we're in middle of PE reset, return normal
-	 * state to keep EEH core going. For PHB reset, we
-	 * still expect to have fenced PHB cleared with
-	 * PHB reset.
+	 * We don't clobber hardware frozen state until PE
+	 * reset is completed. In order to keep EEH core
+	 * moving forward, we have to return operational
+	 * state during PE reset.
 	 */
-	if (!(pe->type & EEH_PE_PHB) &&
-	    (pe->state & EEH_PE_RESET)) {
-		result = (EEH_STATE_MMIO_ACTIVE |
-			  EEH_STATE_DMA_ACTIVE |
+	if (pe->state & EEH_PE_RESET) {
+		result = (EEH_STATE_MMIO_ACTIVE  |
+			  EEH_STATE_DMA_ACTIVE   |
 			  EEH_STATE_MMIO_ENABLED |
 			  EEH_STATE_DMA_ENABLED);
 		return result;
 	}
 
-	/* Retrieve PE status through OPAL */
-	pe_no = pe->addr;
-	ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
-			&fstate, &pcierr, NULL);
-	if (ret) {
-		pr_err("%s: Failed to get EEH status on "
-		       "PHB#%x-PE#%x\n, err=%lld\n",
-		       __func__, hose->global_number, pe_no, ret);
+	/* Fetch state from hardware */
+	rc = opal_pci_eeh_freeze_status(phb->opal_id,
+					pe->addr,
+					&fstate,
+					&pcierr,
+					NULL);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",
+			__func__, rc, phb->hose->global_number, pe->addr);
 		return EEH_STATE_NOT_SUPPORT;
 	}
 
-	/* Check PHB status */
-	if (pe->type & EEH_PE_PHB) {
-		result = 0;
-		result &= ~EEH_STATE_RESET_ACTIVE;
-
-		if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
-			result |= EEH_STATE_MMIO_ACTIVE;
-			result |= EEH_STATE_DMA_ACTIVE;
-			result |= EEH_STATE_MMIO_ENABLED;
-			result |= EEH_STATE_DMA_ENABLED;
-		} else if (!(pe->state & EEH_PE_ISOLATED)) {
-			eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
-			ioda_eeh_phb_diag(hose);
-		}
-
-		return result;
-	}
-
-	/* Parse result out */
-	result = 0;
+	/* Figure out state */
 	switch (fstate) {
 	case OPAL_EEH_STOPPED_NOT_FROZEN:
-		result &= ~EEH_STATE_RESET_ACTIVE;
-		result |= EEH_STATE_MMIO_ACTIVE;
-		result |= EEH_STATE_DMA_ACTIVE;
-		result |= EEH_STATE_MMIO_ENABLED;
-		result |= EEH_STATE_DMA_ENABLED;
+		result = (EEH_STATE_MMIO_ACTIVE  |
+			  EEH_STATE_DMA_ACTIVE   |
+			  EEH_STATE_MMIO_ENABLED |
+			  EEH_STATE_DMA_ENABLED);
 		break;
 	case OPAL_EEH_STOPPED_MMIO_FREEZE:
-		result &= ~EEH_STATE_RESET_ACTIVE;
-		result |= EEH_STATE_DMA_ACTIVE;
-		result |= EEH_STATE_DMA_ENABLED;
+		result = (EEH_STATE_DMA_ACTIVE |
+			  EEH_STATE_DMA_ENABLED);
 		break;
 	case OPAL_EEH_STOPPED_DMA_FREEZE:
-		result &= ~EEH_STATE_RESET_ACTIVE;
-		result |= EEH_STATE_MMIO_ACTIVE;
-		result |= EEH_STATE_MMIO_ENABLED;
+		result = (EEH_STATE_MMIO_ACTIVE |
+			  EEH_STATE_MMIO_ENABLED);
 		break;
 	case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
-		result &= ~EEH_STATE_RESET_ACTIVE;
+		result = 0;
 		break;
 	case OPAL_EEH_STOPPED_RESET:
-		result |= EEH_STATE_RESET_ACTIVE;
+		result = EEH_STATE_RESET_ACTIVE;
 		break;
 	case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
-		result |= EEH_STATE_UNAVAILABLE;
+		result = EEH_STATE_UNAVAILABLE;
 		break;
 	case OPAL_EEH_STOPPED_PERM_UNAVAIL:
-		result |= EEH_STATE_NOT_SUPPORT;
+		result = EEH_STATE_NOT_SUPPORT;
 		break;
 	default:
-		pr_warning("%s: Unexpected EEH status 0x%x "
-			   "on PHB#%x-PE#%x\n",
-			   __func__, fstate, hose->global_number, pe_no);
+		result = EEH_STATE_NOT_SUPPORT;
+		pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n",
+			__func__, phb->hose->global_number,
+			pe->addr, fstate);
 	}
 
-	/* Dump PHB diag-data for frozen PE */
-	if (result != EEH_STATE_NOT_SUPPORT &&
-	    (result & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) !=
-	    (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE) &&
+	/*
+	 * If the PE is switching to frozen state for the
+	 * first time, to dump the PHB diag-data.
+	 */
+	if (!(result & EEH_STATE_NOT_SUPPORT) &&
+	    !(result & EEH_STATE_UNAVAILABLE) &&
+	    !(result & EEH_STATE_MMIO_ACTIVE) &&
+	    !(result & EEH_STATE_DMA_ACTIVE)  &&
 	    !(pe->state & EEH_PE_ISOLATED)) {
 		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
-		ioda_eeh_phb_diag(hose);
+		ioda_eeh_phb_diag(phb->hose);
 	}
 
 	return result;
 }
 
+/**
+ * ioda_eeh_get_state - Retrieve the state of PE
+ * @pe: EEH PE
+ *
+ * The PE's state should be retrieved from the PEEV, PEST
+ * IODA tables. Since the OPAL has exported the function
+ * to do it, it'd better to use that.
+ */
+static int ioda_eeh_get_state(struct eeh_pe *pe)
+{
+	struct pnv_phb *phb = pe->phb->private_data;
+
+	/* Sanity check on PE number. PHB PE should have 0 */
+	if (pe->addr < 0 ||
+	    pe->addr >= phb->ioda.total_pe) {
+		pr_warn("%s: PHB#%x-PE#%x out of range [0, %x]\n",
+			__func__, phb->hose->global_number,
+			pe->addr, phb->ioda.total_pe);
+		return EEH_STATE_NOT_SUPPORT;
+	}
+
+	if (pe->type & EEH_PE_PHB)
+		return ioda_eeh_get_phb_state(pe);
+
+	return ioda_eeh_get_pe_state(pe);
+}
+
 static s64 ioda_eeh_phb_poll(struct pnv_phb *phb)
 {
 	s64 rc = OPAL_HARDWARE;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 4/6] powerpc/powernv: Handle compound PE
  2014-07-16 12:24 [PATCH v2 0/6] powerpc/powernv: Support M64 window Guo Chao
                   ` (2 preceding siblings ...)
  2014-07-16 12:24 ` [PATCH v2 3/6] powerpc/powernv: Split ioda_eeh_get_state() Guo Chao
@ 2014-07-16 12:24 ` Guo Chao
  2014-07-16 12:24 ` [PATCH v2 5/6] powerpc/powernv: Handle compound PE for EEH Guo Chao
  2014-07-16 12:24 ` [PATCH v2 6/6] powerpc/powernv: Handle compound PE in config accessors Guo Chao
  5 siblings, 0 replies; 8+ messages in thread
From: Guo Chao @ 2014-07-16 12:24 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: yan, gwshan

From: Gavin Shan <gwshan@linux.vnet.ibm.com>

The patch introduces 3 PHB callbacks: compound PE state retrieval,
force freezing and unfreezing compound PE. The PCI config accessors
and PowerNV EEH backend can use them in subsequent patches.

We don't export the capability of compound PE to EEH core, which
helps avoiding more complexity to EEH core.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 143 ++++++++++++++++++++++++++++++
 arch/powerpc/platforms/powernv/pci.h      |   3 +
 2 files changed, 146 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index cc3c59b..6d262f7 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -347,6 +347,146 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
 	phb->pick_m64_pe = pnv_ioda2_pick_m64_pe;
 }
 
+static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no)
+{
+	struct pnv_ioda_pe *pe = &phb->ioda.pe_array[pe_no];
+	struct pnv_ioda_pe *slave;
+	s64 rc;
+
+	/* Fetch master PE */
+	if (pe->flags & PNV_IODA_PE_SLAVE) {
+		pe = pe->master;
+		WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER));
+		pe_no = pe->pe_number;
+	}
+
+	/* Freeze master PE */
+	rc = opal_pci_eeh_freeze_set(phb->opal_id,
+				     pe_no,
+				     OPAL_EEH_ACTION_SET_FREEZE_ALL);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n",
+			__func__, rc, phb->hose->global_number, pe_no);
+		return;
+	}
+
+	/* Freeze slave PEs */
+	if (!(pe->flags & PNV_IODA_PE_MASTER))
+		return;
+
+	list_for_each_entry(slave, &pe->slaves, list) {
+		rc = opal_pci_eeh_freeze_set(phb->opal_id,
+					     slave->pe_number,
+					     OPAL_EEH_ACTION_SET_FREEZE_ALL);
+		if (rc != OPAL_SUCCESS)
+			pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n",
+				__func__, rc, phb->hose->global_number,
+				slave->pe_number);
+	}
+}
+
+int pnv_ioda_unfreeze_pe(struct pnv_phb *phb, int pe_no, int opt)
+{
+	struct pnv_ioda_pe *pe, *slave;
+	s64 rc;
+
+	/* Find master PE */
+	pe = &phb->ioda.pe_array[pe_no];
+	if (pe->flags & PNV_IODA_PE_SLAVE) {
+		pe = pe->master;
+		WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER));
+		pe_no = pe->pe_number;
+	}
+
+	/* Clear frozen state for master PE */
+	rc = opal_pci_eeh_freeze_set(phb->opal_id, pe_no, opt);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n",
+			__func__, rc, opt, phb->hose->global_number, pe_no);
+		return -EIO;
+	}
+
+	if (!(pe->flags & PNV_IODA_PE_MASTER))
+		return 0;
+
+	/* Clear frozen state for slave PEs */
+	list_for_each_entry(slave, &pe->slaves, list) {
+		rc = opal_pci_eeh_freeze_set(phb->opal_id,
+					     slave->pe_number,
+					     opt);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n",
+				__func__, rc, opt, phb->hose->global_number,
+				slave->pe_number);
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
+static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no)
+{
+	struct pnv_ioda_pe *slave, *pe;
+	u8 fstate, state;
+	__be16 pcierr;
+	s64 rc;
+
+	/* Sanity check on PE number */
+	if (pe_no < 0 || pe_no >= phb->ioda.total_pe)
+		return OPAL_EEH_STOPPED_PERM_UNAVAIL;
+
+	/*
+	 * Fetch the master PE and the PE instance might be
+	 * not initialized yet.
+	 */
+	pe = &phb->ioda.pe_array[pe_no];
+	if (pe->flags & PNV_IODA_PE_SLAVE) {
+		pe = pe->master;
+		WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER));
+		pe_no = pe->pe_number;
+	}
+
+	/* Check the master PE */
+	rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
+					&state, &pcierr, NULL);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld getting "
+			"PHB#%x-PE#%x state\n",
+			__func__, rc,
+			phb->hose->global_number, pe_no);
+		return OPAL_EEH_STOPPED_TEMP_UNAVAIL;
+	}
+
+	/* Check the slave PE */
+	if (!(pe->flags & PNV_IODA_PE_MASTER))
+		return state;
+
+	list_for_each_entry(slave, &pe->slaves, list) {
+		rc = opal_pci_eeh_freeze_status(phb->opal_id,
+						slave->pe_number,
+						&fstate,
+						&pcierr,
+						NULL);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("%s: Failure %lld getting "
+				"PHB#%x-PE#%x state\n",
+				__func__, rc,
+				phb->hose->global_number, slave->pe_number);
+			return OPAL_EEH_STOPPED_TEMP_UNAVAIL;
+		}
+
+		/*
+		 * Override the result based on the ascending
+		 * priority.
+		 */
+		if (fstate > state)
+			state = fstate;
+	}
+
+	return state;
+}
+
 /* Currently those 2 are only used when MSIs are enabled, this will change
  * but in the meantime, we need to protect them to avoid warnings
  */
@@ -1626,6 +1766,9 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
 
 
 	phb->hose->ops = &pnv_pci_ops;
+	phb->get_pe_state = pnv_ioda_get_pe_state;
+	phb->freeze_pe = pnv_ioda_freeze_pe;
+	phb->unfreeze_pe = pnv_ioda_unfreeze_pe;
 #ifdef CONFIG_EEH
 	phb->eeh_ops = &ioda_eeh_ops;
 #endif
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index def7171..b160e6b 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -128,6 +128,9 @@ struct pnv_phb {
 	int (*init_m64)(struct pnv_phb *phb);
 	void (*alloc_m64_pe)(struct pnv_phb *phb);
 	int (*pick_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus, int all);
+	int (*get_pe_state)(struct pnv_phb *phb, int pe_no);
+	void (*freeze_pe)(struct pnv_phb *phb, int pe_no);
+	int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt);
 
 	union {
 		struct {
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 5/6] powerpc/powernv: Handle compound PE for EEH
  2014-07-16 12:24 [PATCH v2 0/6] powerpc/powernv: Support M64 window Guo Chao
                   ` (3 preceding siblings ...)
  2014-07-16 12:24 ` [PATCH v2 4/6] powerpc/powernv: Handle compound PE Guo Chao
@ 2014-07-16 12:24 ` Guo Chao
  2014-07-16 12:24 ` [PATCH v2 6/6] powerpc/powernv: Handle compound PE in config accessors Guo Chao
  5 siblings, 0 replies; 8+ messages in thread
From: Guo Chao @ 2014-07-16 12:24 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: yan, gwshan

From: Gavin Shan <gwshan@linux.vnet.ibm.com>

The patch handles compound PE for EEH backend. If one specific
PE in compound group has been frozen, we enforces to freeze
all PEs in the group. If we're enable DMA or MMIO for one PE
in compound group, DMA or MMIO of all PEs in the group will be
enabled.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/eeh-ioda.c | 125 +++++++++++++++++++-----------
 1 file changed, 78 insertions(+), 47 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index dd20ba8..7f6bee1 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -187,10 +187,10 @@ static int ioda_eeh_post_init(struct pci_controller *hose)
  */
 static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
 {
-	s64 ret;
-	u32 pe_no;
 	struct pci_controller *hose = pe->phb;
 	struct pnv_phb *phb = hose->private_data;
+	int enable, ret = 0;
+	s64 rc;
 
 	/* Check on PE number */
 	if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) {
@@ -201,41 +201,38 @@ static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
 		return -EINVAL;
 	}
 
-	pe_no = pe->addr;
 	switch (option) {
 	case EEH_OPT_DISABLE:
-		ret = -EEXIST;
-		break;
+		return -EPERM;
 	case EEH_OPT_ENABLE:
-		ret = 0;
-		break;
+		return 0;
 	case EEH_OPT_THAW_MMIO:
-		ret = opal_pci_eeh_freeze_set(phb->opal_id, pe_no,
-				OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO);
-		if (ret) {
-			pr_warning("%s: Failed to enable MMIO for "
-				   "PHB#%x-PE#%x, err=%lld\n",
-				__func__, hose->global_number, pe_no, ret);
-			return -EIO;
-		}
-
+		enable = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO;
 		break;
 	case EEH_OPT_THAW_DMA:
-		ret = opal_pci_eeh_freeze_set(phb->opal_id, pe_no,
-				OPAL_EEH_ACTION_CLEAR_FREEZE_DMA);
-		if (ret) {
-			pr_warning("%s: Failed to enable DMA for "
-				   "PHB#%x-PE#%x, err=%lld\n",
-				__func__, hose->global_number, pe_no, ret);
-			return -EIO;
-		}
-
+		enable = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA;
 		break;
 	default:
-		pr_warning("%s: Invalid option %d\n", __func__, option);
+		pr_warn("%s: Invalid option %d\n",
+			__func__, option);
 		return -EINVAL;
 	}
 
+	/* If PHB supports compound PE, to handle it */
+	if (phb->unfreeze_pe) {
+		ret = phb->unfreeze_pe(phb, pe->addr, enable);
+	} else {
+		rc = opal_pci_eeh_freeze_set(phb->opal_id,
+					     pe->addr,
+					     enable);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("%s: Failure %lld enable %d for PHB#%x-PE#%x\n",
+				__func__, rc, option, phb->hose->global_number,
+				pe->addr);
+			ret = -EIO;
+		}
+	}
+
 	return ret;
 }
 
@@ -313,16 +310,23 @@ static int ioda_eeh_get_pe_state(struct eeh_pe *pe)
 		return result;
 	}
 
-	/* Fetch state from hardware */
-	rc = opal_pci_eeh_freeze_status(phb->opal_id,
-					pe->addr,
-					&fstate,
-					&pcierr,
-					NULL);
-	if (rc != OPAL_SUCCESS) {
-		pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",
-			__func__, rc, phb->hose->global_number, pe->addr);
-		return EEH_STATE_NOT_SUPPORT;
+	/*
+	 * Fetch PE state from hardware. If the PHB
+	 * supports compound PE, let it handle that.
+	 */
+	if (phb->get_pe_state) {
+		fstate = phb->get_pe_state(phb, pe->addr);
+	} else {
+		rc = opal_pci_eeh_freeze_status(phb->opal_id,
+						pe->addr,
+						&fstate,
+						&pcierr,
+						NULL);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",
+				__func__, rc, phb->hose->global_number, pe->addr);
+			return EEH_STATE_NOT_SUPPORT;
+		}
 	}
 
 	/* Figure out state */
@@ -361,6 +365,9 @@ static int ioda_eeh_get_pe_state(struct eeh_pe *pe)
 	}
 
 	/*
+	 * If PHB supports compound PE, to freeze all
+	 * slave PEs for consistency.
+	 *
 	 * If the PE is switching to frozen state for the
 	 * first time, to dump the PHB diag-data.
 	 */
@@ -369,6 +376,9 @@ static int ioda_eeh_get_pe_state(struct eeh_pe *pe)
 	    !(result & EEH_STATE_MMIO_ACTIVE) &&
 	    !(result & EEH_STATE_DMA_ACTIVE)  &&
 	    !(pe->state & EEH_PE_ISOLATED)) {
+		if (phb->freeze_pe)
+			phb->freeze_pe(phb, pe->addr);
+
 		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
 		ioda_eeh_phb_diag(phb->hose);
 	}
@@ -696,22 +706,43 @@ static void ioda_eeh_hub_diag(struct pci_controller *hose)
 static int ioda_eeh_get_pe(struct pci_controller *hose,
 			   u16 pe_no, struct eeh_pe **pe)
 {
-	struct eeh_pe *phb_pe, *dev_pe;
-	struct eeh_dev dev;
+	struct pnv_phb *phb = hose->private_data;
+	struct pnv_ioda_pe *pnv_pe;
+	struct eeh_pe *dev_pe;
+	struct eeh_dev edev;
 
-	/* Find the PHB PE */
-	phb_pe = eeh_phb_pe_get(hose);
-	if (!phb_pe)
-		return -EEXIST;
+	/*
+	 * If PHB supports compound PE, to fetch
+	 * the master PE because slave PE is invisible
+	 * to EEH core.
+	 */
+	if (phb->get_pe_state) {
+		pnv_pe = &phb->ioda.pe_array[pe_no];
+		if (pnv_pe->flags & PNV_IODA_PE_SLAVE) {
+			pnv_pe = pnv_pe->master;
+			WARN_ON(!pnv_pe ||
+				!(pnv_pe->flags & PNV_IODA_PE_MASTER));
+			pe_no = pnv_pe->pe_number;
+		}
+	}
 
 	/* Find the PE according to PE# */
-	memset(&dev, 0, sizeof(struct eeh_dev));
-	dev.phb = hose;
-	dev.pe_config_addr = pe_no;
-	dev_pe = eeh_pe_get(&dev);
-	if (!dev_pe) return -EEXIST;
+	memset(&edev, 0, sizeof(struct eeh_dev));
+	edev.phb = hose;
+	edev.pe_config_addr = pe_no;
+	dev_pe = eeh_pe_get(&edev);
+	if (!dev_pe)
+		return -EEXIST;
 
+	/*
+	 * At this point, we're sure the compound PE should
+	 * be put into frozen state.
+	 */
 	*pe = dev_pe;
+	if (phb->freeze_pe &&
+	    !(dev_pe->state & EEH_PE_ISOLATED))
+		phb->freeze_pe(phb, pe_no);
+
 	return 0;
 }
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 6/6] powerpc/powernv: Handle compound PE in config accessors
  2014-07-16 12:24 [PATCH v2 0/6] powerpc/powernv: Support M64 window Guo Chao
                   ` (4 preceding siblings ...)
  2014-07-16 12:24 ` [PATCH v2 5/6] powerpc/powernv: Handle compound PE for EEH Guo Chao
@ 2014-07-16 12:24 ` Guo Chao
  5 siblings, 0 replies; 8+ messages in thread
From: Guo Chao @ 2014-07-16 12:24 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: yan, gwshan

From: Gavin Shan <gwshan@linux.vnet.ibm.com>

The PCI config accessors check for PE frozen state and clear it if
EEH isn't functional. The patch handles compound PE in config accessors
if PHB supports it. For consistency, all PEs will be put into frozen
state if any one in compound group gets frozen by hardware.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/pci.c | 89 ++++++++++++++++++++++++------------
 1 file changed, 60 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 02f4bd9..353e35b 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -319,43 +319,52 @@ void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
 static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
 {
 	unsigned long flags, rc;
-	int has_diag;
+	int has_diag, ret = 0;
 
 	spin_lock_irqsave(&phb->lock, flags);
 
+	/* Fetch PHB diag-data */
 	rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
 					 PNV_PCI_DIAG_BUF_SIZE);
 	has_diag = (rc == OPAL_SUCCESS);
 
-	rc = opal_pci_eeh_freeze_set(phb->opal_id, pe_no,
-				     OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
-	if (rc) {
-		pr_warning("PCI %d: Failed to clear EEH freeze state"
-			   " for PE#%d, err %ld\n",
-			   phb->hose->global_number, pe_no, rc);
-
-		/* For now, let's only display the diag buffer when we fail to clear
-		 * the EEH status. We'll do more sensible things later when we have
-		 * proper EEH support. We need to make sure we don't pollute ourselves
-		 * with the normal errors generated when probing empty slots
-		 */
-		if (has_diag)
-			pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob);
-		else
-			pr_warning("PCI %d: No diag data available\n",
-				   phb->hose->global_number);
+	/* If PHB supports compound PE, to handle it */
+	if (phb->unfreeze_pe) {
+		ret = phb->unfreeze_pe(phb,
+				       pe_no,
+				       OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+	} else {
+		rc = opal_pci_eeh_freeze_set(phb->opal_id,
+					     pe_no,
+					     OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+		if (rc) {
+			pr_warn("%s: Failure %ld clearing frozen "
+				"PHB#%x-PE#%x\n",
+				__func__, rc, phb->hose->global_number,
+				pe_no);
+			ret = -EIO;
+		}
 	}
 
+	/*
+	 * For now, let's only display the diag buffer when we fail to clear
+	 * the EEH status. We'll do more sensible things later when we have
+	 * proper EEH support. We need to make sure we don't pollute ourselves
+	 * with the normal errors generated when probing empty slots
+	 */
+	if (has_diag && ret)
+		pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob);
+
 	spin_unlock_irqrestore(&phb->lock, flags);
 }
 
 static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
 				     struct device_node *dn)
 {
-	s64	rc;
 	u8	fstate;
 	__be16	pcierr;
-	u32	pe_no;
+	int	pe_no;
+	s64	rc;
 
 	/*
 	 * Get the PE#. During the PCI probe stage, we might not
@@ -370,20 +379,42 @@ static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
 			pe_no = phb->ioda.reserved_pe;
 	}
 
-	/* Read freeze status */
-	rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, &fstate, &pcierr,
-					NULL);
-	if (rc) {
-		pr_warning("%s: Can't read EEH status (PE#%d) for "
-			   "%s, err %lld\n",
-			   __func__, pe_no, dn->full_name, rc);
-		return;
+	/*
+	 * Fetch frozen state. If the PHB support compound PE,
+	 * we need handle that case.
+	 */
+	if (phb->get_pe_state) {
+		fstate = phb->get_pe_state(phb, pe_no);
+	} else {
+		rc = opal_pci_eeh_freeze_status(phb->opal_id,
+						pe_no,
+						&fstate,
+						&pcierr,
+						NULL);
+		if (rc) {
+			pr_warn("%s: Failure %lld getting PHB#%x-PE#%x state\n",
+				__func__, rc, phb->hose->global_number, pe_no);
+			return;
+		}
 	}
+
 	cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
 		(PCI_DN(dn)->busno << 8) | (PCI_DN(dn)->devfn),
 		pe_no, fstate);
-	if (fstate != 0)
+
+	/* Clear the frozen state if applicable */
+	if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE ||
+	    fstate == OPAL_EEH_STOPPED_DMA_FREEZE  ||
+	    fstate == OPAL_EEH_STOPPED_MMIO_DMA_FREEZE) {
+		/*
+		 * If PHB supports compound PE, freeze it for
+		 * consistency.
+		 */
+		if (phb->freeze_pe)
+			phb->freeze_pe(phb, pe_no);
+
 		pnv_pci_handle_eeh_config(phb, pe_no);
+	}
 }
 
 int pnv_pci_cfg_read(struct device_node *dn,
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/6] powerpc/powernv: Enable M64 aperatus for PHB3
  2014-07-16 12:24 ` [PATCH v2 1/6] powerpc/powernv: Enable M64 aperatus for PHB3 Guo Chao
@ 2014-07-17  4:40   ` Gavin Shan
  0 siblings, 0 replies; 8+ messages in thread
From: Gavin Shan @ 2014-07-17  4:40 UTC (permalink / raw)
  To: Guo Chao; +Cc: linuxppc-dev, gwshan

On Wed, Jul 16, 2014 at 08:24:30PM +0800, Guo Chao wrote:
>This patch enables M64 aperatus for PHB3.
>
>We already had platform hook (ppc_md.pcibios_window_alignment) to affect
>the PCI resource assignment done in PCI core so that each PE's M32 resource
>was built on basis of M32 segment size. Similarly, we're using that for
>M64 assignment on basis of M64 segment size.
>
>   * We're using last M64 BAR to cover M64 aperatus, and it's shared by all
>     256 PEs.
>   * We don't support P7IOC yet. However, some function callbacks are added
>     to (struct pnv_phb) so that we can reuse them on P7IOC in future.
>   * PE, corresponding to PCI bus with large M64 BAR device attached, might
>     span multiple M64 segments. We introduce "compound" PE to cover the case.
>     The compound PE is a list of PEs and the master PE is used as before.
>     The slave PEs are just for MMIO isolation.
>
>Signed-off-by: Guo Chao <yan@linux.vnet.ibm.com>

Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com>

It looks good to me except the PELTV bits (I told you before), which can be
fixed later. Without PELTV, we can rely software to maintain master/slave
business. However, it's worthy to have PELTV setup correctly so that inbound
ER errors can freeze multiple PEs (if applicable) by hardware.

Also, I run it on P7 box and no problem found there.

Thanks,
Gavin

>---
> arch/powerpc/include/asm/opal.h           |   8 +-
> arch/powerpc/platforms/powernv/pci-ioda.c | 301 +++++++++++++++++++++++++++---
> arch/powerpc/platforms/powernv/pci.h      |  20 ++
> 3 files changed, 307 insertions(+), 22 deletions(-)
>
>diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
>index 0da1dbd..ae885cc 100644
>--- a/arch/powerpc/include/asm/opal.h
>+++ b/arch/powerpc/include/asm/opal.h
>@@ -340,6 +340,12 @@ enum OpalMveEnableAction {
> 	OPAL_ENABLE_MVE = 1
> };
>
>+enum OpalM64EnableAction {
>+	OPAL_DISABLE_M64 = 0,
>+	OPAL_ENABLE_M64_SPLIT = 1,
>+	OPAL_ENABLE_M64_NON_SPLIT = 2
>+};
>+
> enum OpalPciResetScope {
> 	OPAL_PHB_COMPLETE = 1, OPAL_PCI_LINK = 2, OPAL_PHB_ERROR = 3,
> 	OPAL_PCI_HOT_RESET = 4, OPAL_PCI_FUNDAMENTAL_RESET = 5,
>@@ -768,7 +774,7 @@ int64_t opal_pci_set_phb_mem_window(uint64_t phb_id, uint16_t window_type,
> 				    uint16_t window_num,
> 				    uint64_t starting_real_address,
> 				    uint64_t starting_pci_address,
>-				    uint16_t segment_size);
>+				    uint64_t size);
> int64_t opal_pci_map_pe_mmio_window(uint64_t phb_id, uint16_t pe_number,
> 				    uint16_t window_type, uint16_t window_num,
> 				    uint16_t segment_num);
>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>index 93fd815..2b659d9 100644
>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>@@ -36,6 +36,7 @@
> #include <asm/tce.h>
> #include <asm/xics.h>
> #include <asm/debug.h>
>+#include <asm/firmware.h>
>
> #include "powernv.h"
> #include "pci.h"
>@@ -82,6 +83,12 @@ static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr)
> 		: : "r" (val), "r" (paddr) : "memory");
> }
>
>+static inline bool pnv_pci_is_mem_pref_64(unsigned long flags)
>+{
>+	return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) ==
>+		(IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
>+}
>+
> static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
> {
> 	unsigned long pe;
>@@ -106,6 +113,240 @@ static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
> 	clear_bit(pe, phb->ioda.pe_alloc);
> }
>
>+/* The default M64 BAR is shared by all PEs */
>+static int pnv_ioda2_init_m64(struct pnv_phb *phb)
>+{
>+	const char *desc;
>+	struct resource *r;
>+	s64 rc;
>+
>+	/* Configure the default M64 BAR */
>+	rc = opal_pci_set_phb_mem_window(phb->opal_id,
>+					 OPAL_M64_WINDOW_TYPE,
>+					 phb->ioda.m64_bar_idx,
>+					 phb->ioda.m64_base,
>+					 0, /* unused */
>+					 phb->ioda.m64_size);
>+	if (rc != OPAL_SUCCESS) {
>+		desc = "configuring";
>+		goto fail;
>+	}
>+
>+	/* Enable the default M64 BAR */
>+	rc = opal_pci_phb_mmio_enable(phb->opal_id,
>+				      OPAL_M64_WINDOW_TYPE,
>+				      phb->ioda.m64_bar_idx,
>+				      OPAL_ENABLE_M64_SPLIT);
>+	if (rc != OPAL_SUCCESS) {
>+		desc = "enabling";
>+		goto fail;
>+	}
>+
>+	/* Mark the M64 BAR assigned */
>+	set_bit(phb->ioda.m64_bar_idx, &phb->ioda.m64_bar_alloc);
>+
>+	/*
>+	 * Strip off the segment used by the reserved PE, which is
>+	 * expected to be 0 or last one of PE capabicity.
>+	 */
>+	r = &phb->hose->mem_resources[1];
>+	if (phb->ioda.reserved_pe == 0)
>+		r->start += phb->ioda.m64_segsize;
>+	else if (phb->ioda.reserved_pe == (phb->ioda.total_pe - 1))
>+		r->end -= phb->ioda.m64_segsize;
>+	else
>+		pr_warn("  Cannot strip M64 segment for reserved PE#%d\n",
>+			phb->ioda.reserved_pe);
>+
>+	return 0;
>+
>+fail:
>+	pr_warn("  Failure %lld %s M64 BAR#%d\n",
>+		rc, desc, phb->ioda.m64_bar_idx);
>+	opal_pci_phb_mmio_enable(phb->opal_id,
>+				 OPAL_M64_WINDOW_TYPE,
>+				 phb->ioda.m64_bar_idx,
>+				 OPAL_DISABLE_M64);
>+	return -EIO;
>+}
>+
>+static void pnv_ioda2_alloc_m64_pe(struct pnv_phb *phb)
>+{
>+	resource_size_t sgsz = phb->ioda.m64_segsize;
>+	struct pci_dev *pdev;
>+	struct resource *r;
>+	int base, step, i;
>+
>+	/*
>+	 * Root bus always has full M64 range and root port has
>+	 * M64 range used in reality. So we're checking root port
>+	 * instead of root bus.
>+	 */
>+	list_for_each_entry(pdev, &phb->hose->bus->devices, bus_list) {
>+		for (i = PCI_BRIDGE_RESOURCES;
>+		     i <= PCI_BRIDGE_RESOURCE_END; i++) {
>+			r = &pdev->resource[i];
>+			if (!r->parent ||
>+			    !pnv_pci_is_mem_pref_64(r->flags))
>+				continue;
>+
>+			base = (r->start - phb->ioda.m64_base) / sgsz;
>+			for (step = 0; step < resource_size(r) / sgsz; step++)
>+				set_bit(base + step, phb->ioda.pe_alloc);
>+		}
>+	}
>+}
>+
>+static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb,
>+				 struct pci_bus *bus, int all)
>+{
>+	resource_size_t segsz = phb->ioda.m64_segsize;
>+	struct pci_dev *pdev;
>+	struct resource *r;
>+	struct pnv_ioda_pe *master_pe, *pe;
>+	unsigned long size, *pe_alloc;
>+	bool found;
>+	int start, i, j;
>+
>+	/* Root bus shouldn't use M64 */
>+	if (pci_is_root_bus(bus))
>+		return IODA_INVALID_PE;
>+
>+	/* We support only one M64 window on each bus */
>+	found = false;
>+	pci_bus_for_each_resource(bus, r, i) {
>+		if (r && r->parent &&
>+		    pnv_pci_is_mem_pref_64(r->flags)) {
>+			found = true;
>+			break;
>+		}
>+	}
>+
>+	/* No M64 window found ? */
>+	if (!found)
>+		return IODA_INVALID_PE;
>+
>+	/* Allocate bitmap */
>+	size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
>+	pe_alloc = kzalloc(size, GFP_KERNEL);
>+	if (!pe_alloc) {
>+		pr_warn("%s: Out of memory !\n",
>+			__func__);
>+		return IODA_INVALID_PE;
>+	}
>+
>+	/*
>+	 * Figure out reserved PE numbers by the PE
>+	 * the its child PEs.
>+	 */
>+	start = (r->start - phb->ioda.m64_base) / segsz;
>+	for (i = 0; i < resource_size(r) / segsz; i++)
>+		set_bit(start + i, pe_alloc);
>+
>+	if (all)
>+		goto done;
>+
>+	/*
>+	 * If the PE doesn't cover all subordinate buses,
>+	 * we need subtract from reserved PEs for children.
>+	 */
>+	list_for_each_entry(pdev, &bus->devices, bus_list) {
>+		if (!pdev->subordinate)
>+			continue;
>+
>+		pci_bus_for_each_resource(pdev->subordinate, r, i) {
>+			if (!r || !r->parent ||
>+			    !pnv_pci_is_mem_pref_64(r->flags))
>+				continue;
>+
>+			start = (r->start - phb->ioda.m64_base) / segsz;
>+			for (j = 0; j < resource_size(r) / segsz ; j++)
>+				clear_bit(start + j, pe_alloc);
>+                }
>+        }
>+
>+	/*
>+	 * the current bus might not own M64 window and that's all
>+	 * contributed by its child buses. For the case, we needn't
>+	 * pick M64 dependent PE#.
>+	 */
>+	if (bitmap_empty(pe_alloc, phb->ioda.total_pe)) {
>+		kfree(pe_alloc);
>+		return IODA_INVALID_PE;
>+	}
>+
>+	/*
>+	 * Figure out the master PE and put all slave PEs to master
>+	 * PE's list to form compound PE.
>+	 */
>+done:
>+	master_pe = NULL;
>+	i = -1;
>+	while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) <
>+		phb->ioda.total_pe) {
>+		pe = &phb->ioda.pe_array[i];
>+		pe->phb = phb;
>+		pe->pe_number = i;
>+
>+		if (!master_pe) {
>+			pe->flags |= PNV_IODA_PE_MASTER;
>+			INIT_LIST_HEAD(&pe->slaves);
>+			master_pe = pe;
>+		} else {
>+			pe->flags |= PNV_IODA_PE_SLAVE;
>+			pe->master = master_pe;
>+			list_add_tail(&pe->list, &master_pe->slaves);
>+		}
>+	}
>+
>+	kfree(pe_alloc);
>+	return master_pe->pe_number;
>+}
>+
>+static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
>+{
>+	struct pci_controller *hose = phb->hose;
>+	struct device_node *dn = hose->dn;
>+	struct resource *res;
>+	const u32 *r;
>+	u64 pci_addr;
>+
>+	if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
>+		pr_info("  Firmware too old to support M64 window\n");
>+		return;
>+	}
>+
>+	r = of_get_property(dn, "ibm,opal-m64-window", NULL);
>+	if (!r) {
>+		pr_info("  No <ibm,opal-m64-window> on %s\n",
>+			dn->full_name);
>+		return;
>+	}
>+
>+	/* FIXME: Support M64 for P7IOC */
>+	if (phb->type != PNV_PHB_IODA2) {
>+		pr_info("  Not support M64 window\n");
>+		return;
>+	}
>+
>+	res = &hose->mem_resources[1];
>+	res->start = of_translate_address(dn, r + 2);
>+	res->end = res->start + of_read_number(r + 4, 2) - 1;
>+	res->flags = (IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
>+	pci_addr = of_read_number(r, 2);
>+	hose->mem_offset[1] = res->start - pci_addr;
>+
>+	phb->ioda.m64_size = resource_size(res);
>+	phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe;
>+	phb->ioda.m64_base = pci_addr;
>+
>+	/* Use last M64 BAR to cover M64 window */
>+	phb->ioda.m64_bar_idx = 15;
>+	phb->init_m64 = pnv_ioda2_init_m64;
>+	phb->alloc_m64_pe = pnv_ioda2_alloc_m64_pe;
>+	phb->pick_m64_pe = pnv_ioda2_pick_m64_pe;
>+}
>+
> /* Currently those 2 are only used when MSIs are enabled, this will change
>  * but in the meantime, we need to protect them to avoid warnings
>  */
>@@ -363,9 +604,16 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
> 	struct pci_controller *hose = pci_bus_to_host(bus);
> 	struct pnv_phb *phb = hose->private_data;
> 	struct pnv_ioda_pe *pe;
>-	int pe_num;
>+	int pe_num = IODA_INVALID_PE;
>+
>+	/* Check if PE is determined by M64 */
>+	if (phb->pick_m64_pe)
>+		pe_num = phb->pick_m64_pe(phb, bus, all);
>+
>+	/* The PE number isn't pinned by M64 */
>+	if (pe_num == IODA_INVALID_PE)
>+		pe_num = pnv_ioda_alloc_pe(phb);
>
>-	pe_num = pnv_ioda_alloc_pe(phb);
> 	if (pe_num == IODA_INVALID_PE) {
> 		pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
> 			__func__, pci_domain_nr(bus), bus->number);
>@@ -373,7 +621,7 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
> 	}
>
> 	pe = &phb->ioda.pe_array[pe_num];
>-	pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
>+	pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
> 	pe->pbus = bus;
> 	pe->pdev = NULL;
> 	pe->tce32_seg = -1;
>@@ -441,8 +689,15 @@ static void pnv_ioda_setup_PEs(struct pci_bus *bus)
> static void pnv_pci_ioda_setup_PEs(void)
> {
> 	struct pci_controller *hose, *tmp;
>+	struct pnv_phb *phb;
>
> 	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
>+		phb = hose->private_data;
>+
>+		/* M64 layout might affect PE allocation */
>+		if (phb->alloc_m64_pe)
>+			phb->alloc_m64_pe(phb);
>+
> 		pnv_ioda_setup_PEs(hose->bus);
> 	}
> }
>@@ -1067,9 +1322,6 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
> 				index++;
> 			}
> 		} else if (res->flags & IORESOURCE_MEM) {
>-			/* WARNING: Assumes M32 is mem region 0 in PHB. We need to
>-			 * harden that algorithm when we start supporting M64
>-			 */
> 			region.start = res->start -
> 				       hose->mem_offset[0] -
> 				       phb->ioda.m32_pci_base;
>@@ -1190,7 +1442,10 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
> 		bridge = bridge->bus->self;
> 	}
>
>-	/* We need support prefetchable memory window later */
>+	/* We fail back to M32 if M64 isn't supported */
>+	if (phb->ioda.m64_segsize &&
>+	    pnv_pci_is_mem_pref_64(type))
>+		return phb->ioda.m64_segsize;
> 	if (type & IORESOURCE_MEM)
> 		return phb->ioda.m32_segsize;
>
>@@ -1311,6 +1566,10 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
> 	prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL);
> 	if (prop32)
> 		phb->ioda.reserved_pe = be32_to_cpup(prop32);
>+
>+	/* Parse 64-bit MMIO range */
>+	pnv_ioda_parse_m64_window(phb);
>+
> 	phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
> 	/* FW Has already off top 64k of M32 space (MSI space) */
> 	phb->ioda.m32_size += 0x10000;
>@@ -1346,14 +1605,6 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
> 	/* Calculate how many 32-bit TCE segments we have */
> 	phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
>
>-	/* Clear unusable m64 */
>-	hose->mem_resources[1].flags = 0;
>-	hose->mem_resources[1].start = 0;
>-	hose->mem_resources[1].end = 0;
>-	hose->mem_resources[2].flags = 0;
>-	hose->mem_resources[2].start = 0;
>-	hose->mem_resources[2].end = 0;
>-
> #if 0 /* We should really do that ... */
> 	rc = opal_pci_set_phb_mem_window(opal->phb_id,
> 					 window_type,
>@@ -1363,12 +1614,16 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
> 					 segment_size);
> #endif
>
>-	pr_info("  %d (%d) PE's M32: 0x%x [segment=0x%x]"
>-		" IO: 0x%x [segment=0x%x]\n",
>-		phb->ioda.total_pe,
>-		phb->ioda.reserved_pe,
>-		phb->ioda.m32_size, phb->ioda.m32_segsize,
>-		phb->ioda.io_size, phb->ioda.io_segsize);
>+	pr_info("  %03d (%03d) PE's M32: 0x%x [segment=0x%x]\n",
>+		phb->ioda.total_pe, phb->ioda.reserved_pe,
>+		phb->ioda.m32_size, phb->ioda.m32_segsize);
>+	if (phb->ioda.m64_size)
>+		pr_info("                 M64: 0x%lx [segment=0x%lx]\n",
>+			phb->ioda.m64_size, phb->ioda.m64_segsize);
>+	if (phb->ioda.io_size)
>+		pr_info("                  IO: 0x%x [segment=0x%x]\n",
>+			phb->ioda.io_size, phb->ioda.io_segsize);
>+
>
> 	phb->hose->ops = &pnv_pci_ops;
> #ifdef CONFIG_EEH
>@@ -1416,6 +1671,10 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
> 		ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
> 		ioda_eeh_phb_reset(hose, OPAL_DEASSERT_RESET);
> 	}
>+
>+	/* Configure M64 window */
>+	if (phb->init_m64 && phb->init_m64(phb))
>+		hose->mem_resources[1].flags = 0;
> }
>
> void __init pnv_pci_init_ioda2_phb(struct device_node *np)
>diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
>index 676232c..def7171 100644
>--- a/arch/powerpc/platforms/powernv/pci.h
>+++ b/arch/powerpc/platforms/powernv/pci.h
>@@ -21,6 +21,8 @@ enum pnv_phb_model {
> #define PNV_IODA_PE_DEV		(1 << 0)	/* PE has single PCI device	*/
> #define PNV_IODA_PE_BUS		(1 << 1)	/* PE has primary PCI bus	*/
> #define PNV_IODA_PE_BUS_ALL	(1 << 2)	/* PE has subordinate buses	*/
>+#define PNV_IODA_PE_MASTER	(1 << 3)	/* Master PE in compound case	*/
>+#define PNV_IODA_PE_SLAVE	(1 << 4)	/* Slave PE in compound case	*/
>
> /* Data associated with a PE, including IOMMU tracking etc.. */
> struct pnv_phb;
>@@ -64,6 +66,10 @@ struct pnv_ioda_pe {
> 	 */
> 	int			mve_number;
>
>+	/* PEs in compound case */
>+	struct pnv_ioda_pe	*master;
>+	struct list_head	slaves;
>+
> 	/* Link in list of PE#s */
> 	struct list_head	dma_link;
> 	struct list_head	list;
>@@ -119,6 +125,9 @@ struct pnv_phb {
> 	void (*fixup_phb)(struct pci_controller *hose);
> 	u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
> 	void (*shutdown)(struct pnv_phb *phb);
>+	int (*init_m64)(struct pnv_phb *phb);
>+	void (*alloc_m64_pe)(struct pnv_phb *phb);
>+	int (*pick_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus, int all);
>
> 	union {
> 		struct {
>@@ -129,9 +138,20 @@ struct pnv_phb {
> 			/* Global bridge info */
> 			unsigned int		total_pe;
> 			unsigned int		reserved_pe;
>+
>+			/* 32-bit MMIO window */
> 			unsigned int		m32_size;
> 			unsigned int		m32_segsize;
> 			unsigned int		m32_pci_base;
>+
>+			/* 64-bit MMIO window */
>+			unsigned int		m64_bar_idx;
>+			unsigned long		m64_size;
>+			unsigned long		m64_segsize;
>+			unsigned long		m64_base;
>+			unsigned long		m64_bar_alloc;
>+
>+			/* IO ports */
> 			unsigned int		io_size;
> 			unsigned int		io_segsize;
> 			unsigned int		io_pci_base;
>-- 
>1.9.1
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2014-07-17  4:40 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-07-16 12:24 [PATCH v2 0/6] powerpc/powernv: Support M64 window Guo Chao
2014-07-16 12:24 ` [PATCH v2 1/6] powerpc/powernv: Enable M64 aperatus for PHB3 Guo Chao
2014-07-17  4:40   ` Gavin Shan
2014-07-16 12:24 ` [PATCH v2 2/6] powerpc/powernv: Allow to freeze PE Guo Chao
2014-07-16 12:24 ` [PATCH v2 3/6] powerpc/powernv: Split ioda_eeh_get_state() Guo Chao
2014-07-16 12:24 ` [PATCH v2 4/6] powerpc/powernv: Handle compound PE Guo Chao
2014-07-16 12:24 ` [PATCH v2 5/6] powerpc/powernv: Handle compound PE for EEH Guo Chao
2014-07-16 12:24 ` [PATCH v2 6/6] powerpc/powernv: Handle compound PE in config accessors Guo Chao

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.