linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 01/21] powerpc/eeh: Drop unused argument in eeh_check_failure()
@ 2014-09-30  2:38 Gavin Shan
  2014-09-30  2:38 ` [PATCH 02/21] powerpc/eeh: Add eeh_pe_state sysfs entry Gavin Shan
                   ` (19 more replies)
  0 siblings, 20 replies; 23+ messages in thread
From: Gavin Shan @ 2014-09-30  2:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Vishal Mansur, Gavin Shan

eeh_check_failure() is used to check frozen state of the PE which
owns the indicated I/O address. The argument "val" of the function
isn't used. The patch drops it and return the frozen state of the
PE as expected.

Cc: Vishal Mansur <vmansur@linux.vnet.ibm.com>
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h | 29 ++++++++++++++---------------
 arch/powerpc/kernel/eeh.c      | 15 ++++++---------
 2 files changed, 20 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 9983c3d..adcddb1 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -269,8 +269,7 @@ void eeh_dev_phb_init_dynamic(struct pci_controller *phb);
 int eeh_init(void);
 int __init eeh_ops_register(struct eeh_ops *ops);
 int __exit eeh_ops_unregister(const char *name);
-unsigned long eeh_check_failure(const volatile void __iomem *token,
-				unsigned long val);
+int eeh_check_failure(const volatile void __iomem *token);
 int eeh_dev_check_failure(struct eeh_dev *edev);
 void eeh_addr_cache_build(void);
 void eeh_add_device_early(struct device_node *);
@@ -321,9 +320,9 @@ static inline void *eeh_dev_init(struct device_node *dn, void *data)
 
 static inline void eeh_dev_phb_init_dynamic(struct pci_controller *phb) { }
 
-static inline unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
+static inline int eeh_check_failure(const volatile void __iomem *token)
 {
-	return val;
+	return 0;
 }
 
 #define eeh_dev_check_failure(x) (0)
@@ -354,7 +353,7 @@ static inline u8 eeh_readb(const volatile void __iomem *addr)
 {
 	u8 val = in_8(addr);
 	if (EEH_POSSIBLE_ERROR(val, u8))
-		return eeh_check_failure(addr, val);
+		eeh_check_failure(addr);
 	return val;
 }
 
@@ -362,7 +361,7 @@ static inline u16 eeh_readw(const volatile void __iomem *addr)
 {
 	u16 val = in_le16(addr);
 	if (EEH_POSSIBLE_ERROR(val, u16))
-		return eeh_check_failure(addr, val);
+		eeh_check_failure(addr);
 	return val;
 }
 
@@ -370,7 +369,7 @@ static inline u32 eeh_readl(const volatile void __iomem *addr)
 {
 	u32 val = in_le32(addr);
 	if (EEH_POSSIBLE_ERROR(val, u32))
-		return eeh_check_failure(addr, val);
+		eeh_check_failure(addr);
 	return val;
 }
 
@@ -378,7 +377,7 @@ static inline u64 eeh_readq(const volatile void __iomem *addr)
 {
 	u64 val = in_le64(addr);
 	if (EEH_POSSIBLE_ERROR(val, u64))
-		return eeh_check_failure(addr, val);
+		eeh_check_failure(addr);
 	return val;
 }
 
@@ -386,7 +385,7 @@ static inline u16 eeh_readw_be(const volatile void __iomem *addr)
 {
 	u16 val = in_be16(addr);
 	if (EEH_POSSIBLE_ERROR(val, u16))
-		return eeh_check_failure(addr, val);
+		eeh_check_failure(addr);
 	return val;
 }
 
@@ -394,7 +393,7 @@ static inline u32 eeh_readl_be(const volatile void __iomem *addr)
 {
 	u32 val = in_be32(addr);
 	if (EEH_POSSIBLE_ERROR(val, u32))
-		return eeh_check_failure(addr, val);
+		eeh_check_failure(addr);
 	return val;
 }
 
@@ -402,7 +401,7 @@ static inline u64 eeh_readq_be(const volatile void __iomem *addr)
 {
 	u64 val = in_be64(addr);
 	if (EEH_POSSIBLE_ERROR(val, u64))
-		return eeh_check_failure(addr, val);
+		eeh_check_failure(addr);
 	return val;
 }
 
@@ -416,7 +415,7 @@ static inline void eeh_memcpy_fromio(void *dest, const
 	 * were copied. Check all four bytes.
 	 */
 	if (n >= 4 && EEH_POSSIBLE_ERROR(*((u32 *)(dest + n - 4)), u32))
-		eeh_check_failure(src, *((u32 *)(dest + n - 4)));
+		eeh_check_failure(src);
 }
 
 /* in-string eeh macros */
@@ -425,7 +424,7 @@ static inline void eeh_readsb(const volatile void __iomem *addr, void * buf,
 {
 	_insb(addr, buf, ns);
 	if (EEH_POSSIBLE_ERROR((*(((u8*)buf)+ns-1)), u8))
-		eeh_check_failure(addr, *(u8*)buf);
+		eeh_check_failure(addr);
 }
 
 static inline void eeh_readsw(const volatile void __iomem *addr, void * buf,
@@ -433,7 +432,7 @@ static inline void eeh_readsw(const volatile void __iomem *addr, void * buf,
 {
 	_insw(addr, buf, ns);
 	if (EEH_POSSIBLE_ERROR((*(((u16*)buf)+ns-1)), u16))
-		eeh_check_failure(addr, *(u16*)buf);
+		eeh_check_failure(addr);
 }
 
 static inline void eeh_readsl(const volatile void __iomem *addr, void * buf,
@@ -441,7 +440,7 @@ static inline void eeh_readsl(const volatile void __iomem *addr, void * buf,
 {
 	_insl(addr, buf, nl);
 	if (EEH_POSSIBLE_ERROR((*(((u32*)buf)+nl-1)), u32))
-		eeh_check_failure(addr, *(u32*)buf);
+		eeh_check_failure(addr);
 }
 
 #endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 59a64f8..17cf52ba 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -542,17 +542,16 @@ EXPORT_SYMBOL_GPL(eeh_dev_check_failure);
 
 /**
  * eeh_check_failure - Check if all 1's data is due to EEH slot freeze
- * @token: I/O token, should be address in the form 0xA....
- * @val: value, should be all 1's (XXX why do we need this arg??)
+ * @token: I/O address
  *
- * Check for an EEH failure at the given token address.  Call this
+ * Check for an EEH failure at the given I/O address. Call this
  * routine if the result of a read was all 0xff's and you want to
- * find out if this is due to an EEH slot freeze event.  This routine
+ * find out if this is due to an EEH slot freeze event. This routine
  * will query firmware for the EEH status.
  *
  * Note this routine is safe to call in an interrupt context.
  */
-unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
+int eeh_check_failure(const volatile void __iomem *token)
 {
 	unsigned long addr;
 	struct eeh_dev *edev;
@@ -562,13 +561,11 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon
 	edev = eeh_addr_cache_get_dev(addr);
 	if (!edev) {
 		eeh_stats.no_device++;
-		return val;
+		return 0;
 	}
 
-	eeh_dev_check_failure(edev);
-	return val;
+	return eeh_dev_check_failure(edev);
 }
-
 EXPORT_SYMBOL(eeh_check_failure);
 
 
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 02/21] powerpc/eeh: Add eeh_pe_state sysfs entry
  2014-09-30  2:38 [PATCH 01/21] powerpc/eeh: Drop unused argument in eeh_check_failure() Gavin Shan
@ 2014-09-30  2:38 ` Gavin Shan
  2014-10-01  3:43   ` [02/21] " Michael Ellerman
  2014-09-30  2:38 ` [PATCH 03/21] powerpc/eeh: Freeze PE before PE reset Gavin Shan
                   ` (18 subsequent siblings)
  19 siblings, 1 reply; 23+ messages in thread
From: Gavin Shan @ 2014-09-30  2:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan

The patch adds sysfs entry "eeh_pe_state". Reading on it returns
the PE's state while writing to it clears the frozen state. It's
used to check or clear the PE frozen state from userland for
debugging purpose.

The patch also replaces printk(KERN_WARNING ...) with pr_warn() in
eeh_sysfs.c

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
v2: Not output PE number and return error from the sysfs entry
    if necessary
---
 arch/powerpc/kernel/eeh_sysfs.c | 60 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 59 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index e2595ba..eb15be4 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -54,6 +54,62 @@ EEH_SHOW_ATTR(eeh_mode,            mode,            "0x%x");
 EEH_SHOW_ATTR(eeh_config_addr,     config_addr,     "0x%x");
 EEH_SHOW_ATTR(eeh_pe_config_addr,  pe_config_addr,  "0x%x");
 
+static ssize_t eeh_pe_state_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+	int state;
+
+	if (!edev || !edev->pe)
+		return -ENODEV;
+
+	state = eeh_ops->get_state(edev->pe, NULL);
+	return sprintf(buf, "%08x %08x\n",
+		       state, edev->pe->state);
+}
+
+static ssize_t eeh_pe_state_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+	int ret;
+
+	if (!edev || !edev->pe)
+		return -ENODEV;
+
+	/* Nothing to do if it's not frozen */
+	if (!(edev->pe->state & EEH_PE_ISOLATED))
+		return count;
+
+	/* Enable MMIO */
+	ret = eeh_pci_enable(edev->pe, EEH_OPT_THAW_MMIO);
+	if (ret) {
+		pr_warn("%s: Failure %d enabling MMIO for PHB#%d-PE#%d\n",
+			__func__, ret, edev->pe->phb->global_number,
+			edev->pe->addr);
+		return -EIO;
+	}
+
+	/* Enable DMA */
+	ret = eeh_pci_enable(edev->pe, EEH_OPT_THAW_DMA);
+	if (ret) {
+		pr_warn("%s: Failure %d enabling DMA for PHB#%d-PE#%d\n",
+			__func__, ret, edev->pe->phb->global_number,
+			edev->pe->addr);
+		return -EIO;
+	}
+
+	/* Clear software state */
+	eeh_pe_state_clear(edev->pe, EEH_PE_ISOLATED);
+
+	return count;
+}
+
+static DEVICE_ATTR_RW(eeh_pe_state);
+
 void eeh_sysfs_add_device(struct pci_dev *pdev)
 {
 	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
@@ -68,9 +124,10 @@ void eeh_sysfs_add_device(struct pci_dev *pdev)
 	rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
 	rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr);
 	rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
+	rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_state);
 
 	if (rc)
-		printk(KERN_WARNING "EEH: Unable to create sysfs entries\n");
+		pr_warn("EEH: Unable to create sysfs entries\n");
 	else if (edev)
 		edev->mode |= EEH_DEV_SYSFS;
 }
@@ -92,6 +149,7 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev)
 	device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
 	device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr);
 	device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
+	device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state);
 
 	if (edev)
 		edev->mode &= ~EEH_DEV_SYSFS;
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 03/21] powerpc/eeh: Freeze PE before PE reset
  2014-09-30  2:38 [PATCH 01/21] powerpc/eeh: Drop unused argument in eeh_check_failure() Gavin Shan
  2014-09-30  2:38 ` [PATCH 02/21] powerpc/eeh: Add eeh_pe_state sysfs entry Gavin Shan
@ 2014-09-30  2:38 ` Gavin Shan
  2014-09-30  2:38 ` [PATCH 04/21] powerpc/eeh: Reenable PCI devices after reset Gavin Shan
                   ` (17 subsequent siblings)
  19 siblings, 0 replies; 23+ messages in thread
From: Gavin Shan @ 2014-09-30  2:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan

The patch adds one more option (EEH_OPT_FREEZE_PE) to set_option()
method to proactively freeze PE, which will be issued before resetting
pass-throughed PE to drop MMIO access during reset because it's
always contributing to recursive EEH error.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h               |  1 +
 arch/powerpc/kernel/eeh.c                    |  7 +++++
 arch/powerpc/platforms/powernv/eeh-ioda.c    | 43 +++++++++++++++++++++-------
 arch/powerpc/platforms/pseries/eeh_pseries.c |  4 ++-
 4 files changed, 44 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index adcddb1..f98b1b5 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -167,6 +167,7 @@ enum {
 #define EEH_OPT_ENABLE		1	/* EEH enable	*/
 #define EEH_OPT_THAW_MMIO	2	/* MMIO enable	*/
 #define EEH_OPT_THAW_DMA	3	/* DMA enable	*/
+#define EEH_OPT_FREEZE_PE	4	/* Freeze PE	*/
 #define EEH_STATE_UNAVAILABLE	(1 << 0)	/* State unavailable	*/
 #define EEH_STATE_NOT_SUPPORT	(1 << 1)	/* EEH not supported	*/
 #define EEH_STATE_RESET_ACTIVE	(1 << 2)	/* Active reset		*/
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 17cf52ba..898c75f 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1382,6 +1382,13 @@ int eeh_pe_reset(struct eeh_pe *pe, int option)
 		break;
 	case EEH_RESET_HOT:
 	case EEH_RESET_FUNDAMENTAL:
+		/*
+		 * Proactively freeze the PE to drop all MMIO access
+		 * during reset, which should be banned as it's always
+		 * cause recursive EEH error.
+		 */
+		eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
+
 		ret = eeh_ops->reset(pe, option);
 		break;
 	default:
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index c945bed..f3027b9 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -189,6 +189,7 @@ static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
 {
 	struct pci_controller *hose = pe->phb;
 	struct pnv_phb *phb = hose->private_data;
+	bool freeze_pe = false;
 	int enable, ret = 0;
 	s64 rc;
 
@@ -212,6 +213,10 @@ static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
 	case EEH_OPT_THAW_DMA:
 		enable = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA;
 		break;
+	case EEH_OPT_FREEZE_PE:
+		freeze_pe = true;
+		enable = OPAL_EEH_ACTION_SET_FREEZE_ALL;
+		break;
 	default:
 		pr_warn("%s: Invalid option %d\n",
 			__func__, option);
@@ -219,17 +224,35 @@ static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
 	}
 
 	/* If PHB supports compound PE, to handle it */
-	if (phb->unfreeze_pe) {
-		ret = phb->unfreeze_pe(phb, pe->addr, enable);
+	if (freeze_pe) {
+		if (phb->freeze_pe) {
+			phb->freeze_pe(phb, pe->addr);
+		} else {
+			rc = opal_pci_eeh_freeze_set(phb->opal_id,
+						     pe->addr,
+						     enable);
+			if (rc != OPAL_SUCCESS) {
+				pr_warn("%s: Failure %lld freezing "
+					"PHB#%x-PE#%x\n",
+					__func__, rc,
+					phb->hose->global_number, pe->addr);
+				ret = -EIO;
+			}
+		}
 	} else {
-		rc = opal_pci_eeh_freeze_clear(phb->opal_id,
-					       pe->addr,
-					       enable);
-		if (rc != OPAL_SUCCESS) {
-			pr_warn("%s: Failure %lld enable %d for PHB#%x-PE#%x\n",
-				__func__, rc, option, phb->hose->global_number,
-				pe->addr);
-			ret = -EIO;
+		if (phb->unfreeze_pe) {
+			ret = phb->unfreeze_pe(phb, pe->addr, enable);
+		} else {
+			rc = opal_pci_eeh_freeze_clear(phb->opal_id,
+						       pe->addr,
+						       enable);
+			if (rc != OPAL_SUCCESS) {
+				pr_warn("%s: Failure %lld enable %d "
+					"for PHB#%x-PE#%x\n",
+					__func__, rc, option,
+					phb->hose->global_number, pe->addr);
+				ret = -EIO;
+			}
 		}
 	}
 
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index b080538..b645dc6 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -349,7 +349,9 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
 		if (pe->addr)
 			config_addr = pe->addr;
 		break;
-
+	case EEH_OPT_FREEZE_PE:
+		/* Not support */
+		return 0;
 	default:
 		pr_err("%s: Invalid option %d\n",
 			__func__, option);
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 04/21] powerpc/eeh: Reenable PCI devices after reset
  2014-09-30  2:38 [PATCH 01/21] powerpc/eeh: Drop unused argument in eeh_check_failure() Gavin Shan
  2014-09-30  2:38 ` [PATCH 02/21] powerpc/eeh: Add eeh_pe_state sysfs entry Gavin Shan
  2014-09-30  2:38 ` [PATCH 03/21] powerpc/eeh: Freeze PE before PE reset Gavin Shan
@ 2014-09-30  2:38 ` Gavin Shan
  2014-09-30  2:38 ` [PATCH 05/21] powerpc/eeh: Clear frozen state on passing device Gavin Shan
                   ` (16 subsequent siblings)
  19 siblings, 0 replies; 23+ messages in thread
From: Gavin Shan @ 2014-09-30  2:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan

The PCI devices that have been passed through are enabled before
reset, we need restore to the enabled state after reset. Otherwise,
MMIO access might be issued to disabled devices after reset and
causes exceptional recursive EEH error.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/eeh.c | 62 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 48 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 898c75f..db2841c 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1342,6 +1342,53 @@ int eeh_pe_get_state(struct eeh_pe *pe)
 }
 EXPORT_SYMBOL_GPL(eeh_pe_get_state);
 
+static int eeh_pe_reenable_devices(struct eeh_pe *pe)
+{
+	struct eeh_dev *edev, *tmp;
+	struct pci_dev *pdev;
+	int ret = 0;
+
+	/* Restore config space */
+	eeh_pe_restore_bars(pe);
+
+	/*
+	 * Reenable PCI devices as the devices passed
+	 * through are always enabled before the reset.
+	 */
+	eeh_pe_for_each_dev(pe, edev, tmp) {
+		pdev = eeh_dev_to_pci_dev(edev);
+		if (!pdev)
+			continue;
+
+		ret = pci_reenable_device(pdev);
+		if (ret) {
+			pr_warn("%s: Failure %d reenabling %s\n",
+				__func__, ret, pci_name(pdev));
+			return ret;
+		}
+	}
+
+	/* The PE is still in frozen state */
+	ret = eeh_ops->set_option(pe, EEH_OPT_THAW_MMIO);
+	if (ret) {
+		pr_warn("%s: Failure %d enabling MMIO for PHB#%x-PE#%x\n",
+			__func__, ret, pe->phb->global_number, pe->addr);
+		return ret;
+	}
+
+	ret = eeh_ops->set_option(pe, EEH_OPT_THAW_DMA);
+	if (ret) {
+		pr_warn("%s: Failure %d enabling DMA for PHB#%x-PE#%x\n",
+			__func__, ret, pe->phb->global_number, pe->addr);
+		return ret;
+	}
+
+	/* Clear software isolated state */
+	eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
+
+	return ret;
+}
+
 /**
  * eeh_pe_reset - Issue PE reset according to specified type
  * @pe: EEH PE
@@ -1368,17 +1415,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option)
 		if (ret)
 			break;
 
-		/*
-		 * The PE is still in frozen state and we need to clear
-		 * that. It's good to clear frozen state after deassert
-		 * to avoid messy IO access during reset, which might
-		 * cause recursive frozen PE.
-		 */
-		ret = eeh_ops->set_option(pe, EEH_OPT_THAW_MMIO);
-		if (!ret)
-			ret = eeh_ops->set_option(pe, EEH_OPT_THAW_DMA);
-		if (!ret)
-			eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
+		ret = eeh_pe_reenable_devices(pe);
 		break;
 	case EEH_RESET_HOT:
 	case EEH_RESET_FUNDAMENTAL:
@@ -1417,9 +1454,6 @@ int eeh_pe_configure(struct eeh_pe *pe)
 	if (!pe)
 		return -ENODEV;
 
-	/* Restore config space for the affected devices */
-	eeh_pe_restore_bars(pe);
-
 	return ret;
 }
 EXPORT_SYMBOL_GPL(eeh_pe_configure);
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 05/21] powerpc/eeh: Clear frozen state on passing device
  2014-09-30  2:38 [PATCH 01/21] powerpc/eeh: Drop unused argument in eeh_check_failure() Gavin Shan
                   ` (2 preceding siblings ...)
  2014-09-30  2:38 ` [PATCH 04/21] powerpc/eeh: Reenable PCI devices after reset Gavin Shan
@ 2014-09-30  2:38 ` Gavin Shan
  2014-09-30  2:38 ` [PATCH 06/21] powerpc/powernv: Sync header with firmware Gavin Shan
                   ` (15 subsequent siblings)
  19 siblings, 0 replies; 23+ messages in thread
From: Gavin Shan @ 2014-09-30  2:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan

When passing through device, its PE might have been put into frozen
state. One obvious example would be: the passed PE is forced to be
offline because of hitting maximal allowed EEH errors in userland.
In that case, the frozen state won't be cleared and then the PE is
returned back to host, which might not have chance detecting and
recovering from it.

The patch adds more check when passing through device and clear the
PE frozen state if necessary.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/eeh.c | 36 +++++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index db2841c..e436d5e 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1150,6 +1150,8 @@ void eeh_remove_device(struct pci_dev *dev)
 int eeh_dev_open(struct pci_dev *pdev)
 {
 	struct eeh_dev *edev;
+	int flag = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+	int ret = -ENODEV;
 
 	mutex_lock(&eeh_dev_mutex);
 
@@ -1162,6 +1164,38 @@ int eeh_dev_open(struct pci_dev *pdev)
 	if (!edev || !edev->pe)
 		goto out;
 
+	/*
+	 * The PE might have been put into frozen state, but we
+	 * didn't detect that yet. The passed through PCI devices
+	 * in frozen PE won't work properly. Clear the frozen state
+	 * in advance.
+	 */
+	ret = eeh_ops->get_state(edev->pe, NULL);
+	if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT &&
+	    (ret & flag) != flag) {
+		ret = eeh_ops->set_option(edev->pe, EEH_OPT_THAW_MMIO);
+		if (ret) {
+			pr_warn("%s: Failure %d enabling MMIO "
+				"for PHB#%x-PE#%x\n",
+				__func__, ret, edev->phb->global_number,
+				edev->pe->addr);
+			goto out;
+		}
+
+		ret = eeh_ops->set_option(edev->pe, EEH_OPT_THAW_DMA);
+		if (ret) {
+			pr_warn("%s: Failure %d enabling DMA "
+				"for PHB#%x-PE#%x\n",
+				__func__, ret, edev->phb->global_number,
+				edev->pe->addr);
+			goto out;
+		}
+	}
+
+	/* Clear software isolated state */
+	if (edev->pe->state & EEH_PE_ISOLATED)
+		eeh_pe_state_clear(edev->pe, EEH_PE_ISOLATED);
+
 	/* Increase PE's pass through count */
 	atomic_inc(&edev->pe->pass_dev_cnt);
 	mutex_unlock(&eeh_dev_mutex);
@@ -1169,7 +1203,7 @@ int eeh_dev_open(struct pci_dev *pdev)
 	return 0;
 out:
 	mutex_unlock(&eeh_dev_mutex);
-	return -ENODEV;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(eeh_dev_open);
 
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 06/21] powerpc/powernv: Sync header with firmware
  2014-09-30  2:38 [PATCH 01/21] powerpc/eeh: Drop unused argument in eeh_check_failure() Gavin Shan
                   ` (3 preceding siblings ...)
  2014-09-30  2:38 ` [PATCH 05/21] powerpc/eeh: Clear frozen state on passing device Gavin Shan
@ 2014-09-30  2:38 ` Gavin Shan
  2014-09-30  2:38 ` [PATCH 07/21] powerpc/eeh: Introduce eeh_ops::err_inject Gavin Shan
                   ` (14 subsequent siblings)
  19 siblings, 0 replies; 23+ messages in thread
From: Gavin Shan @ 2014-09-30  2:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Mike Qiu, Gavin Shan

The patch synchronizes firmware header file (opal.h) for PCI error
injection.

Signed-off-by: Mike Qiu <qiudayu@linux.vnet.ibm.com>
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
v2: Seperate enum error type and function and adjust the names
    according to mpe's suggestion. Also replacing "injct" with
    "inject"
---
 arch/powerpc/include/asm/opal.h                | 32 ++++++++++++++++++++++++++
 arch/powerpc/platforms/powernv/opal-wrappers.S |  1 +
 2 files changed, 33 insertions(+)

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 7127b87..f157941 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -148,6 +148,7 @@ struct opal_sg_list {
 #define OPAL_SET_PARAM				90
 #define OPAL_DUMP_RESEND			91
 #define OPAL_DUMP_INFO2				94
+#define OPAL_PCI_ERR_INJECT			96
 #define OPAL_PCI_EEH_FREEZE_SET			97
 #define OPAL_HANDLE_HMI				98
 #define OPAL_REGISTER_DUMP_REGION		101
@@ -200,6 +201,35 @@ enum OpalPciErrorSeverity {
 	OPAL_EEH_SEV_INF	= 5
 };
 
+enum OpalErrinjectType {
+	OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR	= 0,
+	OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64	= 1,
+};
+
+enum OpalErrinjectFunc {
+	/* IOA bus specific errors */
+	OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR	= 0,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_DATA	= 1,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_IO_ADDR	= 2,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_IO_DATA	= 3,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_ADDR	= 4,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_DATA	= 5,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_ADDR	= 6,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_DATA	= 7,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_IO_ADDR	= 8,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_IO_DATA	= 9,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_ADDR	= 10,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_DATA	= 11,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_ADDR	= 12,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_DATA	= 13,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_MASTER	= 14,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_TARGET	= 15,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_ADDR	= 16,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_DATA	= 17,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_MASTER	= 18,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET	= 19,
+};
+
 enum OpalShpcAction {
 	OPAL_SHPC_GET_LINK_STATE = 0,
 	OPAL_SHPC_GET_SLOT_STATE = 1
@@ -820,6 +850,8 @@ int64_t opal_pci_eeh_freeze_clear(uint64_t phb_id, uint64_t pe_number,
 				  uint64_t eeh_action_token);
 int64_t opal_pci_eeh_freeze_set(uint64_t phb_id, uint64_t pe_number,
 				uint64_t eeh_action_token);
+int64_t opal_pci_err_inject(uint64_t phb_id, uint32_t pe_no, uint32_t type,
+			    uint32_t func, uint64_t addr, uint64_t mask);
 int64_t opal_pci_shpc(uint64_t phb_id, uint64_t shpc_action, uint8_t *state);
 
 
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 5718855..15942b6 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -184,6 +184,7 @@ OPAL_CALL(opal_register_exception_handler,	OPAL_REGISTER_OPAL_EXCEPTION_HANDLER)
 OPAL_CALL(opal_pci_eeh_freeze_status,		OPAL_PCI_EEH_FREEZE_STATUS);
 OPAL_CALL(opal_pci_eeh_freeze_clear,		OPAL_PCI_EEH_FREEZE_CLEAR);
 OPAL_CALL(opal_pci_eeh_freeze_set,		OPAL_PCI_EEH_FREEZE_SET);
+OPAL_CALL(opal_pci_err_inject,			OPAL_PCI_ERR_INJECT);
 OPAL_CALL(opal_pci_shpc,			OPAL_PCI_SHPC);
 OPAL_CALL(opal_pci_phb_mmio_enable,		OPAL_PCI_PHB_MMIO_ENABLE);
 OPAL_CALL(opal_pci_set_phb_mem_window,		OPAL_PCI_SET_PHB_MEM_WINDOW);
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 07/21] powerpc/eeh: Introduce eeh_ops::err_inject
  2014-09-30  2:38 [PATCH 01/21] powerpc/eeh: Drop unused argument in eeh_check_failure() Gavin Shan
                   ` (4 preceding siblings ...)
  2014-09-30  2:38 ` [PATCH 06/21] powerpc/powernv: Sync header with firmware Gavin Shan
@ 2014-09-30  2:38 ` Gavin Shan
  2014-09-30  2:38 ` [PATCH 08/21] powerpc/powernv: Add PCI error injection debugfs entry Gavin Shan
                   ` (13 subsequent siblings)
  19 siblings, 0 replies; 23+ messages in thread
From: Gavin Shan @ 2014-09-30  2:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Mike Qiu, Gavin Shan

The patch introduces eeh_ops::err_inject(), which allows to inject
specified errors to indicated PE for testing purpose. The functionality
isn't support on pSeries platform. On PowerNV, the functionality
relies on OPAL API opal_pci_err_inject().

Signed-off-by: Mike Qiu <qiudayu@linux.vnet.ibm.com>
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h               |  2 ++
 arch/powerpc/platforms/powernv/eeh-ioda.c    | 44 ++++++++++++++++++++++++++++
 arch/powerpc/platforms/powernv/eeh-powernv.c | 26 ++++++++++++++++
 arch/powerpc/platforms/powernv/pci.h         |  2 ++
 arch/powerpc/platforms/pseries/eeh_pseries.c |  1 +
 5 files changed, 75 insertions(+)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index f98b1b5..ae8c441 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -199,6 +199,8 @@ struct eeh_ops {
 	int (*wait_state)(struct eeh_pe *pe, int max_wait);
 	int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len);
 	int (*configure_bridge)(struct eeh_pe *pe);
+	int (*err_inject)(struct eeh_pe *pe, int type, int func,
+			  unsigned long addr, unsigned long mask);
 	int (*read_config)(struct device_node *dn, int where, int size, u32 *val);
 	int (*write_config)(struct device_node *dn, int where, int size, u32 val);
 	int (*next_error)(struct eeh_pe **pe);
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index f3027b9..ddb38cc 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -673,6 +673,49 @@ static int ioda_eeh_configure_bridge(struct eeh_pe *pe)
 	return 0;
 }
 
+static int ioda_eeh_err_inject(struct eeh_pe *pe, int type, int func,
+			       unsigned long addr, unsigned long mask)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+	s64 ret;
+
+	/* Sanity check on error type */
+	if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&
+	    type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) {
+		pr_warn("%s: Invalid error type %d\n",
+			__func__, type);
+		return -ERANGE;
+	}
+
+	if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR ||
+	    func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) {
+		pr_warn("%s: Invalid error function %d\n",
+			__func__, func);
+		return -ERANGE;
+	}
+
+	/* Firmware supports error injection ? */
+	if (!opal_check_token(OPAL_PCI_ERR_INJECT)) {
+		pr_warn("%s: Firmware doesn't support error injection\n",
+			__func__);
+		return -ENXIO;
+	}
+
+	/* Do error injection */
+	ret = opal_pci_err_inject(phb->opal_id, pe->addr,
+				  type, func, addr, mask);
+	if (ret != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld injecting error "
+			"%d-%d to PHB#%x-PE#%x\n",
+			__func__, ret, type, func,
+			hose->global_number, pe->addr);
+		return -EIO;
+	}
+
+	return 0;
+}
+
 static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data)
 {
 	/* GEM */
@@ -994,5 +1037,6 @@ struct pnv_eeh_ops ioda_eeh_ops = {
 	.reset			= ioda_eeh_reset,
 	.get_log		= ioda_eeh_get_log,
 	.configure_bridge	= ioda_eeh_configure_bridge,
+	.err_inject		= ioda_eeh_err_inject,
 	.next_error		= ioda_eeh_next_error
 };
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index fd7a16f..3e89cbf 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -359,6 +359,31 @@ static int powernv_eeh_configure_bridge(struct eeh_pe *pe)
 }
 
 /**
+ * powernv_pe_err_inject - Inject specified error to the indicated PE
+ * @pe: the indicated PE
+ * @type: error type
+ * @func: specific error type
+ * @addr: address
+ * @mask: address mask
+ *
+ * The routine is called to inject specified error, which is
+ * determined by @type and @func, to the indicated PE for
+ * testing purpose.
+ */
+static int powernv_eeh_err_inject(struct eeh_pe *pe, int type, int func,
+				  unsigned long addr, unsigned long mask)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+	int ret = -EEXIST;
+
+	if (phb->eeh_ops && phb->eeh_ops->err_inject)
+		ret = phb->eeh_ops->err_inject(pe, type, func, addr, mask);
+
+	return ret;
+}
+
+/**
  * powernv_eeh_next_error - Retrieve next EEH error to handle
  * @pe: Affected PE
  *
@@ -414,6 +439,7 @@ static struct eeh_ops powernv_eeh_ops = {
 	.wait_state             = powernv_eeh_wait_state,
 	.get_log                = powernv_eeh_get_log,
 	.configure_bridge       = powernv_eeh_configure_bridge,
+	.err_inject		= powernv_eeh_err_inject,
 	.read_config            = pnv_pci_cfg_read,
 	.write_config           = pnv_pci_cfg_write,
 	.next_error		= powernv_eeh_next_error,
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 48494d4..27594cf 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -85,6 +85,8 @@ struct pnv_eeh_ops {
 	int (*get_log)(struct eeh_pe *pe, int severity,
 		       char *drv_log, unsigned long len);
 	int (*configure_bridge)(struct eeh_pe *pe);
+	int (*err_inject)(struct eeh_pe *pe, int type, int func,
+			  unsigned long addr, unsigned long mask);
 	int (*next_error)(struct eeh_pe **pe);
 };
 #endif /* CONFIG_EEH */
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index b645dc6..4fc5ff9 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -731,6 +731,7 @@ static struct eeh_ops pseries_eeh_ops = {
 	.wait_state		= pseries_eeh_wait_state,
 	.get_log		= pseries_eeh_get_log,
 	.configure_bridge       = pseries_eeh_configure_bridge,
+	.err_inject		= NULL,
 	.read_config		= pseries_eeh_read_config,
 	.write_config		= pseries_eeh_write_config,
 	.next_error		= NULL,
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 08/21] powerpc/powernv: Add PCI error injection debugfs entry
  2014-09-30  2:38 [PATCH 01/21] powerpc/eeh: Drop unused argument in eeh_check_failure() Gavin Shan
                   ` (5 preceding siblings ...)
  2014-09-30  2:38 ` [PATCH 07/21] powerpc/eeh: Introduce eeh_ops::err_inject Gavin Shan
@ 2014-09-30  2:38 ` Gavin Shan
  2014-09-30  2:38 ` [PATCH 09/21] powerpc/powernv: Clear PAPR error injection registers Gavin Shan
                   ` (12 subsequent siblings)
  19 siblings, 0 replies; 23+ messages in thread
From: Gavin Shan @ 2014-09-30  2:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Mike Qiu, Gavin Shan

From: Mike Qiu <qiudayu@linux.vnet.ibm.com>

The patch adds debugfs file (/sys/kernel/debug/powerpc/PCIxxxx/
err_injct), which accepts following formated string, to support
error injection. It will be used to support userland utility
"errinjct" in future.

  "pe_no:0:function:address:mask" - 32-bits PCI errors
  "pe_no:1:function:address:mask" - 64-bits PCI errors

Signed-off-by: Mike Qiu <qiudayu@linux.vnet.ibm.com>
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/eeh-ioda.c | 52 +++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index ddb38cc..3389cf0 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -66,6 +66,54 @@ static struct notifier_block ioda_eeh_nb = {
 };
 
 #ifdef CONFIG_DEBUG_FS
+static ssize_t ioda_eeh_ei_write(struct file *filp,
+				 const char __user *user_buf,
+				 size_t count, loff_t *ppos)
+{
+	struct pci_controller *hose = filp->private_data;
+	struct pnv_phb *phb = hose->private_data;
+	struct eeh_dev *edev;
+	struct eeh_pe *pe;
+	int pe_no, type, func;
+	unsigned long addr, mask;
+	char buf[50];
+	int ret;
+
+	if (!phb->eeh_ops || !phb->eeh_ops->err_inject)
+		return -ENXIO;
+
+	ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
+	if (!ret)
+		return -EFAULT;
+
+	/* Retrieve parameters */
+	ret = sscanf(buf, "%x:%x:%x:%lx:%lx",
+		     &pe_no, &type, &func, &addr, &mask);
+	if (ret != 5)
+		return -EINVAL;
+
+	/* Retrieve PE */
+	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
+	if (!edev)
+		return -ENOMEM;
+	edev->phb = hose;
+	edev->pe_config_addr = pe_no;
+	pe = eeh_pe_get(edev);
+	kfree(edev);
+	if (!pe)
+		return -ENODEV;
+
+	/* Do error injection */
+	ret = phb->eeh_ops->err_inject(pe, type, func, addr, mask);
+	return ret < 0 ? ret : count;
+}
+
+static const struct file_operations ioda_eeh_ei_fops = {
+	.open   = simple_open,
+	.llseek = no_llseek,
+	.write  = ioda_eeh_ei_write,
+};
+
 static int ioda_eeh_dbgfs_set(void *data, int offset, u64 val)
 {
 	struct pci_controller *hose = data;
@@ -152,6 +200,10 @@ static int ioda_eeh_post_init(struct pci_controller *hose)
 	if (!phb->has_dbgfs && phb->dbgfs) {
 		phb->has_dbgfs = 1;
 
+		debugfs_create_file("err_injct", 0200,
+				    phb->dbgfs, hose,
+				    &ioda_eeh_ei_fops);
+
 		debugfs_create_file("err_injct_outbound", 0600,
 				    phb->dbgfs, hose,
 				    &ioda_eeh_outb_dbgfs_ops);
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 09/21] powerpc/powernv: Clear PAPR error injection registers
  2014-09-30  2:38 [PATCH 01/21] powerpc/eeh: Drop unused argument in eeh_check_failure() Gavin Shan
                   ` (6 preceding siblings ...)
  2014-09-30  2:38 ` [PATCH 08/21] powerpc/powernv: Add PCI error injection debugfs entry Gavin Shan
@ 2014-09-30  2:38 ` Gavin Shan
  2014-09-30  2:38 ` [PATCH 10/21] powerpc/eeh: Clear frozen device state in time Gavin Shan
                   ` (11 subsequent siblings)
  19 siblings, 0 replies; 23+ messages in thread
From: Gavin Shan @ 2014-09-30  2:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan

The frozen state on one specific PE is probably caused by error
injection, which is done with help of PAPR error injection registers.
According to the hardware spec, those registers should be cleared
automatically after one-shot frozen PE. However, that's not always
true, at least on P7IOC of Firebird-L. So we have to clear them
before doing PE reset to avoid recursive EEH errors at recovery
stage.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/eeh-ioda.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index 3389cf0..e516b7f 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -682,6 +682,31 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option)
 	if (pe->type & EEH_PE_PHB) {
 		ret = ioda_eeh_phb_reset(hose, option);
 	} else {
+		struct pnv_phb *phb;
+		s64 rc;
+
+		/*
+		 * The frozen PE might be caused by PAPR error injection
+		 * registers, which are expected to be cleared after hitting
+		 * frozen PE as stated in the hardware spec. Unfortunately,
+		 * that's not true on P7IOC. So we have to clear it manually
+		 * to avoid recursive EEH errors during recovery.
+		 */
+		phb = hose->private_data;
+		if (phb->model == PNV_PHB_MODEL_P7IOC &&
+		    (option == EEH_RESET_HOT ||
+		    option == EEH_RESET_FUNDAMENTAL)) {
+			rc = opal_pci_reset(phb->opal_id,
+					    OPAL_PHB_ERROR,
+					    OPAL_ASSERT_RESET);
+			if (rc != OPAL_SUCCESS) {
+				pr_warn("%s: Failure %lld clearing "
+					"error injection registers\n",
+					__func__, rc);
+				return -EIO;
+			}
+		}
+
 		bus = eeh_pe_bus_get(pe);
 		if (pci_is_root_bus(bus) ||
 		    pci_is_root_bus(bus->parent))
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 10/21] powerpc/eeh: Clear frozen device state in time
  2014-09-30  2:38 [PATCH 01/21] powerpc/eeh: Drop unused argument in eeh_check_failure() Gavin Shan
                   ` (7 preceding siblings ...)
  2014-09-30  2:38 ` [PATCH 09/21] powerpc/powernv: Clear PAPR error injection registers Gavin Shan
@ 2014-09-30  2:38 ` Gavin Shan
  2014-09-30  2:39 ` [PATCH 11/21] powerpc/eeh: Fix improper condition in eeh_pci_enable() Gavin Shan
                   ` (10 subsequent siblings)
  19 siblings, 0 replies; 23+ messages in thread
From: Gavin Shan @ 2014-09-30  2:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan, stable

The problem was reported by Carol: In the scenario of passing mlx4
adapter to guest, EEH error could be recovered successfully. When
returning the device back to host, the driver (mlx4_core.ko)
couldn't be loaded successfully because of error number -5 (-EIO)
returned from mlx4_get_ownership(), which hits offlined PCI device.
The root cause is that we missed to put the affected devices into
normal state on clearing PE isolated state right after PE reset.

The patch fixes above issue by putting the affected devices to
normal state when clearing PE isolated state in eeh_pe_state_clear().

Cc: stable@vger.kernel.org
Reported-by: Carol L. Soto <clsoto@us.ibm.com>
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/eeh_pe.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 00e3844..eef08f0 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -584,6 +584,8 @@ static void *__eeh_pe_state_clear(void *data, void *flag)
 {
 	struct eeh_pe *pe = (struct eeh_pe *)data;
 	int state = *((int *)flag);
+	struct eeh_dev *edev, *tmp;
+	struct pci_dev *pdev;
 
 	/* Keep the state of permanently removed PE intact */
 	if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) &&
@@ -592,9 +594,22 @@ static void *__eeh_pe_state_clear(void *data, void *flag)
 
 	pe->state &= ~state;
 
-	/* Clear check count since last isolation */
-	if (state & EEH_PE_ISOLATED)
-		pe->check_count = 0;
+	/*
+	 * Special treatment on clearing isolated state. Clear
+	 * check count since last isolation and put all affected
+	 * devices to normal state.
+	 */
+	if (!(state & EEH_PE_ISOLATED))
+		return NULL;
+
+	pe->check_count = 0;
+	eeh_pe_for_each_dev(pe, edev, tmp) {
+		pdev = eeh_dev_to_pci_dev(edev);
+		if (!pdev)
+			continue;
+
+		pdev->error_state = pci_channel_io_normal;
+	}
 
 	return NULL;
 }
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 11/21] powerpc/eeh: Fix improper condition in eeh_pci_enable()
  2014-09-30  2:38 [PATCH 01/21] powerpc/eeh: Drop unused argument in eeh_check_failure() Gavin Shan
                   ` (8 preceding siblings ...)
  2014-09-30  2:38 ` [PATCH 10/21] powerpc/eeh: Clear frozen device state in time Gavin Shan
@ 2014-09-30  2:39 ` Gavin Shan
  2014-09-30  2:39 ` [PATCH 12/21] powerpc/eeh: Unfreeze PE on enabling EEH functionality Gavin Shan
                   ` (9 subsequent siblings)
  19 siblings, 0 replies; 23+ messages in thread
From: Gavin Shan @ 2014-09-30  2:39 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan

The function eeh_pci_enable() is called to apply various requests
to one particular PE: Enabling EEH, Disabling EEH, Enabling IO,
Enabling DMA, Freezing PE. When enabling IO or DMA on one specific
PE, we need check that IO or DMA isn't enabled previously. But
the condition used to do the check isn't completely correct because
one PE would be in DMA frozen state with workable IO path, or vice
versa.

The patch fixes the improper condition.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/eeh.c | 58 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 42 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index e436d5e..09bcf95 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -579,25 +579,51 @@ EXPORT_SYMBOL(eeh_check_failure);
  */
 int eeh_pci_enable(struct eeh_pe *pe, int function)
 {
-	int rc, flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+	int active_flag, rc;
 
 	/*
 	 * pHyp doesn't allow to enable IO or DMA on unfrozen PE.
 	 * Also, it's pointless to enable them on unfrozen PE. So
-	 * we have the check here.
+	 * we have to check before enabling IO or DMA.
 	 */
-	if (function == EEH_OPT_THAW_MMIO ||
-	    function == EEH_OPT_THAW_DMA) {
+	switch (function) {
+	case EEH_OPT_THAW_MMIO:
+		active_flag = EEH_STATE_MMIO_ACTIVE;
+		break;
+	case EEH_OPT_THAW_DMA:
+		active_flag = EEH_STATE_DMA_ACTIVE;
+		break;
+	case EEH_OPT_DISABLE:
+	case EEH_OPT_ENABLE:
+	case EEH_OPT_FREEZE_PE:
+		active_flag = 0;
+		break;
+	default:
+		pr_warn("%s: Invalid function %d\n",
+			__func__, function);
+		return -EINVAL;
+	}
+
+	/*
+	 * Check if IO or DMA has been enabled before
+	 * enabling them.
+	 */
+	if (active_flag) {
 		rc = eeh_ops->get_state(pe, NULL);
 		if (rc < 0)
 			return rc;
 
-		/* Needn't to enable or already enabled */
-		if ((rc == EEH_STATE_NOT_SUPPORT) ||
-		    ((rc & flags) == flags))
+		/* Needn't enable it at all */
+		if (rc == EEH_STATE_NOT_SUPPORT)
+			return 0;
+
+		/* It's already enabled */
+		if (rc & active_flag)
 			return 0;
 	}
 
+
+	/* Issue the request */
 	rc = eeh_ops->set_option(pe, function);
 	if (rc)
 		pr_warn("%s: Unexpected state change %d on "
@@ -605,17 +631,17 @@ int eeh_pci_enable(struct eeh_pe *pe, int function)
 			__func__, function, pe->phb->global_number,
 			pe->addr, rc);
 
-	rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
-	if (rc <= 0)
-		return rc;
+	/* Check if the request is finished successfully */
+	if (active_flag) {
+		rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
+		if (rc <= 0)
+			return rc;
 
-	if ((function == EEH_OPT_THAW_MMIO) &&
-	    (rc & EEH_STATE_MMIO_ENABLED))
-		return 0;
+		if (rc & active_flag)
+			return 0;
 
-	if ((function == EEH_OPT_THAW_DMA) &&
-	    (rc & EEH_STATE_DMA_ENABLED))
-		return 0;
+		return -EIO;
+	}
 
 	return rc;
 }
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 12/21] powerpc/eeh: Unfreeze PE on enabling EEH functionality
  2014-09-30  2:38 [PATCH 01/21] powerpc/eeh: Drop unused argument in eeh_check_failure() Gavin Shan
                   ` (9 preceding siblings ...)
  2014-09-30  2:39 ` [PATCH 11/21] powerpc/eeh: Fix improper condition in eeh_pci_enable() Gavin Shan
@ 2014-09-30  2:39 ` Gavin Shan
  2014-09-30  2:39 ` [PATCH 13/21] powerpc/eeh: Use eeh_unfreeze_pe() Gavin Shan
                   ` (8 subsequent siblings)
  19 siblings, 0 replies; 23+ messages in thread
From: Gavin Shan @ 2014-09-30  2:39 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan

When passing through PE to guest, that's possibly in frozen
state. The driver for the pass-through devices on guest side
can't be loaded successfully as reported. We already had one
gate in eeh_dev_open() to clear PE frozen state accordingly,
but that's not enough because the function is only called at
QEMU startup for once.

The patch adds another gate in eeh_pe_set_option() so that the
PE frozen state can be cleared at QEMU restart time.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h |  1 +
 arch/powerpc/kernel/eeh.c      | 60 ++++++++++++++++++++++--------------------
 2 files changed, 33 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index ae8c441..5e6e548 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -281,6 +281,7 @@ void eeh_add_device_late(struct pci_dev *);
 void eeh_add_device_tree_late(struct pci_bus *);
 void eeh_add_sysfs_files(struct pci_bus *);
 void eeh_remove_device(struct pci_dev *);
+int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state);
 int eeh_dev_open(struct pci_dev *pdev);
 void eeh_dev_release(struct pci_dev *pdev);
 struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 09bcf95..9678e16 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1164,6 +1164,31 @@ void eeh_remove_device(struct pci_dev *dev)
 	edev->mode &= ~EEH_DEV_SYSFS;
 }
 
+int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state)
+{
+	int ret;
+
+	ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+	if (ret) {
+		pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n",
+			__func__, ret, pe->phb->global_number, pe->addr);
+		return ret;
+	}
+
+	ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
+	if (ret) {
+		pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n",
+			__func__, ret, pe->phb->global_number, pe->addr);
+		return ret;
+	}
+
+	/* Clear software isolated state */
+	if (sw_state && (pe->state & EEH_PE_ISOLATED))
+		eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
+
+	return ret;
+}
+
 /**
  * eeh_dev_open - Increase count of pass through devices for PE
  * @pdev: PCI device
@@ -1176,7 +1201,6 @@ void eeh_remove_device(struct pci_dev *dev)
 int eeh_dev_open(struct pci_dev *pdev)
 {
 	struct eeh_dev *edev;
-	int flag = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
 	int ret = -ENODEV;
 
 	mutex_lock(&eeh_dev_mutex);
@@ -1196,31 +1220,9 @@ int eeh_dev_open(struct pci_dev *pdev)
 	 * in frozen PE won't work properly. Clear the frozen state
 	 * in advance.
 	 */
-	ret = eeh_ops->get_state(edev->pe, NULL);
-	if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT &&
-	    (ret & flag) != flag) {
-		ret = eeh_ops->set_option(edev->pe, EEH_OPT_THAW_MMIO);
-		if (ret) {
-			pr_warn("%s: Failure %d enabling MMIO "
-				"for PHB#%x-PE#%x\n",
-				__func__, ret, edev->phb->global_number,
-				edev->pe->addr);
-			goto out;
-		}
-
-		ret = eeh_ops->set_option(edev->pe, EEH_OPT_THAW_DMA);
-		if (ret) {
-			pr_warn("%s: Failure %d enabling DMA "
-				"for PHB#%x-PE#%x\n",
-				__func__, ret, edev->phb->global_number,
-				edev->pe->addr);
-			goto out;
-		}
-	}
-
-	/* Clear software isolated state */
-	if (edev->pe->state & EEH_PE_ISOLATED)
-		eeh_pe_state_clear(edev->pe, EEH_PE_ISOLATED);
+	ret = eeh_unfreeze_pe(edev->pe, true);
+	if (ret)
+		goto out;
 
 	/* Increase PE's pass through count */
 	atomic_inc(&edev->pe->pass_dev_cnt);
@@ -1338,8 +1340,10 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option)
 	 */
 	switch (option) {
 	case EEH_OPT_ENABLE:
-		if (eeh_enabled())
+		if (eeh_enabled()) {
+			ret = eeh_unfreeze_pe(pe, true);
 			break;
+		}
 		ret = -EIO;
 		break;
 	case EEH_OPT_DISABLE:
@@ -1351,7 +1355,7 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option)
 			break;
 		}
 
-		ret = eeh_ops->set_option(pe, option);
+		ret = eeh_pci_enable(pe, option);
 		break;
 	default:
 		pr_debug("%s: Option %d out of range (%d, %d)\n",
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 13/21] powerpc/eeh: Use eeh_unfreeze_pe()
  2014-09-30  2:38 [PATCH 01/21] powerpc/eeh: Drop unused argument in eeh_check_failure() Gavin Shan
                   ` (10 preceding siblings ...)
  2014-09-30  2:39 ` [PATCH 12/21] powerpc/eeh: Unfreeze PE on enabling EEH functionality Gavin Shan
@ 2014-09-30  2:39 ` Gavin Shan
  2014-09-30  2:39 ` [PATCH 14/21] powerpc/eeh: Block PCI config access during reset Gavin Shan
                   ` (7 subsequent siblings)
  19 siblings, 0 replies; 23+ messages in thread
From: Gavin Shan @ 2014-09-30  2:39 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan

The patch uses eeh_unfreeze_pe() to replace the logic clearing
frozen IO and DMA, in order to simplify the code.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/eeh.c        | 19 +------------------
 arch/powerpc/kernel/eeh_driver.c | 18 ++++++------------
 arch/powerpc/kernel/eeh_sysfs.c  | 21 +--------------------
 3 files changed, 8 insertions(+), 50 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 9678e16..7004673 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1433,24 +1433,7 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe)
 	}
 
 	/* The PE is still in frozen state */
-	ret = eeh_ops->set_option(pe, EEH_OPT_THAW_MMIO);
-	if (ret) {
-		pr_warn("%s: Failure %d enabling MMIO for PHB#%x-PE#%x\n",
-			__func__, ret, pe->phb->global_number, pe->addr);
-		return ret;
-	}
-
-	ret = eeh_ops->set_option(pe, EEH_OPT_THAW_DMA);
-	if (ret) {
-		pr_warn("%s: Failure %d enabling DMA for PHB#%x-PE#%x\n",
-			__func__, ret, pe->phb->global_number, pe->addr);
-		return ret;
-	}
-
-	/* Clear software isolated state */
-	eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
-
-	return ret;
+	return eeh_unfreeze_pe(pe, true);
 }
 
 /**
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 6a0dcee..948e6f9 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -450,21 +450,15 @@ static void *eeh_pe_detach_dev(void *data, void *userdata)
 static void *__eeh_clear_pe_frozen_state(void *data, void *flag)
 {
 	struct eeh_pe *pe = (struct eeh_pe *)data;
-	int i, rc;
+	int i, rc = 1;
 
-	for (i = 0; i < 3; i++) {
-		rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
-		if (rc)
-			continue;
-		rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
-		if (!rc)
-			break;
-	}
+	for (i = 0; rc && i < 3; i++)
+		rc = eeh_unfreeze_pe(pe, false);
 
-	/* The PE has been isolated, clear it */
+	/* Stop immediately on any errors */
 	if (rc) {
-		pr_warn("%s: Can't clear frozen PHB#%x-PE#%x (%d)\n",
-			__func__, pe->phb->global_number, pe->addr, rc);
+		pr_warn("%s: Failure %d unfreezing PHB#%x-PE#%x\n",
+			__func__, rc, pe->phb->global_number, pe->addr);
 		return (void *)pe;
 	}
 
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index eb15be4..9a44010 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -75,7 +75,6 @@ static ssize_t eeh_pe_state_store(struct device *dev,
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
-	int ret;
 
 	if (!edev || !edev->pe)
 		return -ENODEV;
@@ -84,26 +83,8 @@ static ssize_t eeh_pe_state_store(struct device *dev,
 	if (!(edev->pe->state & EEH_PE_ISOLATED))
 		return count;
 
-	/* Enable MMIO */
-	ret = eeh_pci_enable(edev->pe, EEH_OPT_THAW_MMIO);
-	if (ret) {
-		pr_warn("%s: Failure %d enabling MMIO for PHB#%d-PE#%d\n",
-			__func__, ret, edev->pe->phb->global_number,
-			edev->pe->addr);
+	if (eeh_unfreeze_pe(edev->pe, true))
 		return -EIO;
-	}
-
-	/* Enable DMA */
-	ret = eeh_pci_enable(edev->pe, EEH_OPT_THAW_DMA);
-	if (ret) {
-		pr_warn("%s: Failure %d enabling DMA for PHB#%d-PE#%d\n",
-			__func__, ret, edev->pe->phb->global_number,
-			edev->pe->addr);
-		return -EIO;
-	}
-
-	/* Clear software state */
-	eeh_pe_state_clear(edev->pe, EEH_PE_ISOLATED);
 
 	return count;
 }
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 14/21] powerpc/eeh: Block PCI config access during reset
  2014-09-30  2:38 [PATCH 01/21] powerpc/eeh: Drop unused argument in eeh_check_failure() Gavin Shan
                   ` (11 preceding siblings ...)
  2014-09-30  2:39 ` [PATCH 13/21] powerpc/eeh: Use eeh_unfreeze_pe() Gavin Shan
@ 2014-09-30  2:39 ` Gavin Shan
  2014-09-30  2:39 ` [PATCH 15/21] powerpc/pseries: Decrease message level on EEH initialization Gavin Shan
                   ` (6 subsequent siblings)
  19 siblings, 0 replies; 23+ messages in thread
From: Gavin Shan @ 2014-09-30  2:39 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan

Function pcibios_set_pcie_reset_state() can be used to do PCI
reset. PCI config access during the reset usually causes EEH
errors unexpectedly. In order to avoid the EEH error, the patch
blocks PCI config access during reset with the help of flag
EEH_PE_RESET, which is similar to what we did in EEH PE reset
path.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/eeh.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 7004673..545860f 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -668,14 +668,18 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
 	switch (state) {
 	case pcie_deassert_reset:
 		eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
+		eeh_pe_state_clear(pe, EEH_PE_RESET);
 		break;
 	case pcie_hot_reset:
+		eeh_pe_state_mark(pe, EEH_PE_RESET);
 		eeh_ops->reset(pe, EEH_RESET_HOT);
 		break;
 	case pcie_warm_reset:
+		eeh_pe_state_mark(pe, EEH_PE_RESET);
 		eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
 		break;
 	default:
+		eeh_pe_state_clear(pe, EEH_PE_RESET);
 		return -EINVAL;
 	};
 
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 15/21] powerpc/pseries: Decrease message level on EEH initialization
  2014-09-30  2:38 [PATCH 01/21] powerpc/eeh: Drop unused argument in eeh_check_failure() Gavin Shan
                   ` (12 preceding siblings ...)
  2014-09-30  2:39 ` [PATCH 14/21] powerpc/eeh: Block PCI config access during reset Gavin Shan
@ 2014-09-30  2:39 ` Gavin Shan
  2014-09-30  2:39 ` [PATCH 16/21] powerpc/powernv: Sync OpalPciResetScope with firmware Gavin Shan
                   ` (5 subsequent siblings)
  19 siblings, 0 replies; 23+ messages in thread
From: Gavin Shan @ 2014-09-30  2:39 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan

As Anton suggested, the patch decreases the message level on EEH
initialization to avoid unnecessary messages if required. Also,
we have unified hint if any of needful RTAS calls is missed, and
then we can check /proc/device-tree to figure out the missed RTAS
calls.

Suggested-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/pseries/eeh_pseries.c | 35 ++++++++--------------------
 1 file changed, 10 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 4fc5ff9..a6c7e19 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -88,29 +88,14 @@ static int pseries_eeh_init(void)
 	 * and its variant since the old firmware probably support address
 	 * of domain/bus/slot/function for EEH RTAS operations.
 	 */
-	if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) {
-		pr_warn("%s: RTAS service <ibm,set-eeh-option> invalid\n",
-			__func__);
-		return -EINVAL;
-	} else if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE) {
-		pr_warn("%s: RTAS service <ibm,set-slot-reset> invalid\n",
-			__func__);
-		return -EINVAL;
-	} else if (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE &&
-		   ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) {
-		pr_warn("%s: RTAS service <ibm,read-slot-reset-state2> and "
-			"<ibm,read-slot-reset-state> invalid\n",
-			__func__);
-		return -EINVAL;
-	} else if (ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE) {
-		pr_warn("%s: RTAS service <ibm,slot-error-detail> invalid\n",
-			__func__);
-		return -EINVAL;
-	} else if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE &&
-		   ibm_configure_bridge == RTAS_UNKNOWN_SERVICE) {
-		pr_warn("%s: RTAS service <ibm,configure-pe> and "
-			"<ibm,configure-bridge> invalid\n",
-			__func__);
+	if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE		||
+	    ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE		||
+	    (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE &&
+	     ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE)	||
+	    ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE	||
+	    (ibm_configure_pe == RTAS_UNKNOWN_SERVICE		&&
+	     ibm_configure_bridge == RTAS_UNKNOWN_SERVICE)) {
+		pr_info("EEH functionality not supported\n");
 		return -EINVAL;
 	}
 
@@ -118,11 +103,11 @@ static int pseries_eeh_init(void)
 	spin_lock_init(&slot_errbuf_lock);
 	eeh_error_buf_size = rtas_token("rtas-error-log-max");
 	if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
-		pr_warn("%s: unknown EEH error log size\n",
+		pr_info("%s: unknown EEH error log size\n",
 			__func__);
 		eeh_error_buf_size = 1024;
 	} else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
-		pr_warn("%s: EEH error log size %d exceeds the maximal %d\n",
+		pr_info("%s: EEH error log size %d exceeds the maximal %d\n",
 			__func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
 		eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
 	}
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 16/21] powerpc/powernv: Sync OpalPciResetScope with firmware
  2014-09-30  2:38 [PATCH 01/21] powerpc/eeh: Drop unused argument in eeh_check_failure() Gavin Shan
                   ` (13 preceding siblings ...)
  2014-09-30  2:39 ` [PATCH 15/21] powerpc/pseries: Decrease message level on EEH initialization Gavin Shan
@ 2014-09-30  2:39 ` Gavin Shan
  2014-09-30  2:39 ` [PATCH 17/21] powerpc/eeh: Tag reset state for user owned PE Gavin Shan
                   ` (4 subsequent siblings)
  19 siblings, 0 replies; 23+ messages in thread
From: Gavin Shan @ 2014-09-30  2:39 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan

The names of PCI reset scopes aren't sychronized with firmware.
The patch fixes it.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/opal.h           |  9 ++++++---
 arch/powerpc/platforms/powernv/eeh-ioda.c | 12 ++++++------
 arch/powerpc/platforms/powernv/pci-ioda.c |  4 ++--
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index f157941..a084cf5 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -387,9 +387,12 @@ enum OpalM64EnableAction {
 };
 
 enum OpalPciResetScope {
-	OPAL_PHB_COMPLETE = 1, OPAL_PCI_LINK = 2, OPAL_PHB_ERROR = 3,
-	OPAL_PCI_HOT_RESET = 4, OPAL_PCI_FUNDAMENTAL_RESET = 5,
-	OPAL_PCI_IODA_TABLE_RESET = 6,
+	OPAL_RESET_PHB_COMPLETE		= 1,
+	OPAL_RESET_PCI_LINK		= 2,
+	OPAL_RESET_PHB_ERROR		= 3,
+	OPAL_RESET_PCI_HOT		= 4,
+	OPAL_RESET_PCI_FUNDAMENTAL	= 5,
+	OPAL_RESET_PCI_IODA_TABLE	= 6
 };
 
 enum OpalPciReinitScope {
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index e516b7f..4d9400f 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -514,11 +514,11 @@ int ioda_eeh_phb_reset(struct pci_controller *hose, int option)
 	if (option == EEH_RESET_FUNDAMENTAL ||
 	    option == EEH_RESET_HOT)
 		rc = opal_pci_reset(phb->opal_id,
-				OPAL_PHB_COMPLETE,
+				OPAL_RESET_PHB_COMPLETE,
 				OPAL_ASSERT_RESET);
 	else if (option == EEH_RESET_DEACTIVATE)
 		rc = opal_pci_reset(phb->opal_id,
-				OPAL_PHB_COMPLETE,
+				OPAL_RESET_PHB_COMPLETE,
 				OPAL_DEASSERT_RESET);
 	if (rc < 0)
 		goto out;
@@ -558,15 +558,15 @@ static int ioda_eeh_root_reset(struct pci_controller *hose, int option)
 	 */
 	if (option == EEH_RESET_FUNDAMENTAL)
 		rc = opal_pci_reset(phb->opal_id,
-				OPAL_PCI_FUNDAMENTAL_RESET,
+				OPAL_RESET_PCI_FUNDAMENTAL,
 				OPAL_ASSERT_RESET);
 	else if (option == EEH_RESET_HOT)
 		rc = opal_pci_reset(phb->opal_id,
-				OPAL_PCI_HOT_RESET,
+				OPAL_RESET_PCI_HOT,
 				OPAL_ASSERT_RESET);
 	else if (option == EEH_RESET_DEACTIVATE)
 		rc = opal_pci_reset(phb->opal_id,
-				OPAL_PCI_HOT_RESET,
+				OPAL_RESET_PCI_HOT,
 				OPAL_DEASSERT_RESET);
 	if (rc < 0)
 		goto out;
@@ -697,7 +697,7 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option)
 		    (option == EEH_RESET_HOT ||
 		    option == EEH_RESET_FUNDAMENTAL)) {
 			rc = opal_pci_reset(phb->opal_id,
-					    OPAL_PHB_ERROR,
+					    OPAL_RESET_PHB_ERROR,
 					    OPAL_ASSERT_RESET);
 			if (rc != OPAL_SUCCESS) {
 				pr_warn("%s: Failure %lld clearing "
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index df241b1..36b1a7a 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1627,7 +1627,7 @@ static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
 
 static void pnv_pci_ioda_shutdown(struct pnv_phb *phb)
 {
-	opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET,
+	opal_pci_reset(phb->opal_id, OPAL_RESET_PCI_IODA_TABLE,
 		       OPAL_ASSERT_RESET);
 }
 
@@ -1803,7 +1803,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	pci_add_flags(PCI_REASSIGN_ALL_RSRC);
 
 	/* Reset IODA tables to a clean state */
-	rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET);
+	rc = opal_pci_reset(phb_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET);
 	if (rc)
 		pr_warning("  OPAL Error %ld performing IODA table reset !\n", rc);
 
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 17/21] powerpc/eeh: Tag reset state for user owned PE
  2014-09-30  2:38 [PATCH 01/21] powerpc/eeh: Drop unused argument in eeh_check_failure() Gavin Shan
                   ` (14 preceding siblings ...)
  2014-09-30  2:39 ` [PATCH 16/21] powerpc/powernv: Sync OpalPciResetScope with firmware Gavin Shan
@ 2014-09-30  2:39 ` Gavin Shan
  2014-09-30  2:39 ` [PATCH 18/21] powerpc/eeh: Emulate EEH recovery for VFIO devices Gavin Shan
                   ` (3 subsequent siblings)
  19 siblings, 0 replies; 23+ messages in thread
From: Gavin Shan @ 2014-09-30  2:39 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan

PE would be owned by userland, which probably request PE reset
done in host side. During the reset, we should drop the PCI
config accesses to the PE with help of flag EEH_PE_RESET.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/eeh.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 545860f..059aa00 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1463,6 +1463,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option)
 	switch (option) {
 	case EEH_RESET_DEACTIVATE:
 		ret = eeh_ops->reset(pe, option);
+		eeh_pe_state_clear(pe, EEH_PE_RESET);
 		if (ret)
 			break;
 
@@ -1477,6 +1478,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option)
 		 */
 		eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
 
+		eeh_pe_state_mark(pe, EEH_PE_RESET);
 		ret = eeh_ops->reset(pe, option);
 		break;
 	default:
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 18/21] powerpc/eeh: Emulate EEH recovery for VFIO devices
  2014-09-30  2:38 [PATCH 01/21] powerpc/eeh: Drop unused argument in eeh_check_failure() Gavin Shan
                   ` (15 preceding siblings ...)
  2014-09-30  2:39 ` [PATCH 17/21] powerpc/eeh: Tag reset state for user owned PE Gavin Shan
@ 2014-09-30  2:39 ` Gavin Shan
  2014-09-30  2:39 ` [PATCH 19/21] powerpc/eeh: Dump PCI config space for all child devices Gavin Shan
                   ` (2 subsequent siblings)
  19 siblings, 0 replies; 23+ messages in thread
From: Gavin Shan @ 2014-09-30  2:39 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan

When enabling EEH functionality on passed through devices (PE)
with VFIO, the devices in the PE would be removed permanently
from guest side. In that case, the PE remains frozen state.
When returning PE to host, or restarting the guest again, we
had mechanism unfreezing the PE by clearing PESTA/B frozen
bits. However, that's not enough for some adapters, which are
indicated as following "lspci" shows. Those adapters require
hot reset on the parent bus to bring their firmware back to
workable state. Otherwise, those adaptrs won't be operative
and the host (for returning case) or the guest will fail to
load the drivers for those adapters without exception.

0000:01:00.0 Ethernet controller: Emulex Corporation OneConnect \
             10Gb NIC (be3) (rev 02)
0000:01:00.0 0200: 19a2:0710 (rev 02)
0001:03:00.0 Ethernet controller: Emulex Corporation OneConnect \
             NIC (Lancer) (rev 10)
0001:03:00.0 0200: 10df:e220 (rev 10)

The patch adds mechanism to emulate EEH recovery (for hot reset
on parent PCI bus) on 3 gates to fix the issue: open/release one
adapter of the PE, enable EEH functionality on one adapter of the
PE.

Reported-by:  Murilo Fossa Vicentini <muvic@br.ibm.com>
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h   |  1 +
 arch/powerpc/kernel/eeh.c        | 59 +++++++++++++++++++++++++-
 arch/powerpc/kernel/eeh_driver.c | 90 ++++++++++++++++++++++++++++++++++++++--
 3 files changed, 144 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 5e6e548..ee54f01 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -282,6 +282,7 @@ void eeh_add_device_tree_late(struct pci_bus *);
 void eeh_add_sysfs_files(struct pci_bus *);
 void eeh_remove_device(struct pci_dev *);
 int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state);
+int eeh_pe_reset_and_recover(struct eeh_pe *pe);
 int eeh_dev_open(struct pci_dev *pdev);
 void eeh_dev_release(struct pci_dev *pdev);
 struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 059aa00..0b31169 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1193,6 +1193,60 @@ int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state)
 	return ret;
 }
 
+
+static struct pci_device_id eeh_reset_ids[] = {
+	{ PCI_DEVICE(0x19a2, 0x0710) },	/* Emulex, BE     */
+	{ PCI_DEVICE(0x10df, 0xe220) },	/* Emulex, Lancer */
+	{ 0 }
+};
+
+static int eeh_pe_change_owner(struct eeh_pe *pe)
+{
+	struct eeh_dev *edev, *tmp;
+	struct pci_dev *pdev;
+	struct pci_device_id *id;
+	int flags, ret;
+
+	/* Check PE state */
+	flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+	ret = eeh_ops->get_state(pe, NULL);
+	if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT)
+		return 0;
+
+	/* Unfrozen PE, nothing to do */
+	if ((ret & flags) == flags)
+		return 0;
+
+	/* Frozen PE, check if it needs PE level reset */
+	eeh_pe_for_each_dev(pe, edev, tmp) {
+		pdev = eeh_dev_to_pci_dev(edev);
+		if (!pdev)
+			continue;
+
+		for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) {
+			if (id->vendor != PCI_ANY_ID &&
+			    id->vendor != pdev->vendor)
+				continue;
+			if (id->device != PCI_ANY_ID &&
+			    id->device != pdev->device)
+				continue;
+			if (id->subvendor != PCI_ANY_ID &&
+			    id->subvendor != pdev->subsystem_vendor)
+				continue;
+			if (id->subdevice != PCI_ANY_ID &&
+			    id->subdevice != pdev->subsystem_device)
+				continue;
+
+			goto reset;
+		}
+	}
+
+	return eeh_unfreeze_pe(pe, true);
+
+reset:
+	return eeh_pe_reset_and_recover(pe);
+}
+
 /**
  * eeh_dev_open - Increase count of pass through devices for PE
  * @pdev: PCI device
@@ -1224,7 +1278,7 @@ int eeh_dev_open(struct pci_dev *pdev)
 	 * in frozen PE won't work properly. Clear the frozen state
 	 * in advance.
 	 */
-	ret = eeh_unfreeze_pe(edev->pe, true);
+	ret = eeh_pe_change_owner(edev->pe);
 	if (ret)
 		goto out;
 
@@ -1265,6 +1319,7 @@ void eeh_dev_release(struct pci_dev *pdev)
 	/* Decrease PE's pass through count */
 	atomic_dec(&edev->pe->pass_dev_cnt);
 	WARN_ON(atomic_read(&edev->pe->pass_dev_cnt) < 0);
+	eeh_pe_change_owner(edev->pe);
 out:
 	mutex_unlock(&eeh_dev_mutex);
 }
@@ -1345,7 +1400,7 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option)
 	switch (option) {
 	case EEH_OPT_ENABLE:
 		if (eeh_enabled()) {
-			ret = eeh_unfreeze_pe(pe, true);
+			ret = eeh_pe_change_owner(pe);
 			break;
 		}
 		ret = -EIO;
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 948e6f9..3fd514f 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -180,6 +180,22 @@ static bool eeh_dev_removed(struct eeh_dev *edev)
 	return false;
 }
 
+static void *eeh_dev_save_state(void *data, void *userdata)
+{
+	struct eeh_dev *edev = data;
+	struct pci_dev *pdev;
+
+	if (!edev)
+		return NULL;
+
+	pdev = eeh_dev_to_pci_dev(edev);
+	if (!pdev)
+		return NULL;
+
+	pci_save_state(pdev);
+	return NULL;
+}
+
 /**
  * eeh_report_error - Report pci error to each device driver
  * @data: eeh device
@@ -303,6 +319,22 @@ static void *eeh_report_reset(void *data, void *userdata)
 	return NULL;
 }
 
+static void *eeh_dev_restore_state(void *data, void *userdata)
+{
+	struct eeh_dev *edev = data;
+	struct pci_dev *pdev;
+
+	if (!edev)
+		return NULL;
+
+	pdev = eeh_dev_to_pci_dev(edev);
+	if (!pdev)
+		return NULL;
+
+	pci_restore_state(pdev);
+	return NULL;
+}
+
 /**
  * eeh_report_resume - Tell device to resume normal operations
  * @data: eeh device
@@ -450,10 +482,11 @@ static void *eeh_pe_detach_dev(void *data, void *userdata)
 static void *__eeh_clear_pe_frozen_state(void *data, void *flag)
 {
 	struct eeh_pe *pe = (struct eeh_pe *)data;
+	bool *clear_sw_state = flag;
 	int i, rc = 1;
 
 	for (i = 0; rc && i < 3; i++)
-		rc = eeh_unfreeze_pe(pe, false);
+		rc = eeh_unfreeze_pe(pe, clear_sw_state);
 
 	/* Stop immediately on any errors */
 	if (rc) {
@@ -465,17 +498,66 @@ static void *__eeh_clear_pe_frozen_state(void *data, void *flag)
 	return NULL;
 }
 
-static int eeh_clear_pe_frozen_state(struct eeh_pe *pe)
+static int eeh_clear_pe_frozen_state(struct eeh_pe *pe,
+				     bool clear_sw_state)
 {
 	void *rc;
 
-	rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, NULL);
+	rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, &clear_sw_state);
 	if (!rc)
 		eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
 
 	return rc ? -EIO : 0;
 }
 
+int eeh_pe_reset_and_recover(struct eeh_pe *pe)
+{
+	int result, ret;
+
+	/* Bail if the PE is being recovered */
+	if (pe->state & EEH_PE_RECOVERING)
+		return 0;
+
+	/* Put the PE into recovery mode */
+	eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+
+	/* Save states */
+	eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL);
+
+	/* Report error */
+	eeh_pe_dev_traverse(pe, eeh_report_error, &result);
+
+	/* Issue reset */
+	eeh_pe_state_mark(pe, EEH_PE_RESET);
+	ret = eeh_reset_pe(pe);
+	if (ret) {
+		eeh_pe_state_clear(pe, EEH_PE_RECOVERING | EEH_PE_RESET);
+		return ret;
+	}
+	eeh_pe_state_clear(pe, EEH_PE_RESET);
+
+	/* Unfreeze the PE */
+	ret = eeh_clear_pe_frozen_state(pe, true);
+	if (ret) {
+		eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+		return ret;
+	}
+
+	/* Notify completion of reset */
+	eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
+
+	/* Restore device state */
+	eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL);
+
+	/* Resume */
+	eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
+
+	/* Clear recovery mode */
+	eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+
+	return 0;
+}
+
 /**
  * eeh_reset_device - Perform actual reset of a pci slot
  * @pe: EEH PE
@@ -534,7 +616,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
 	eeh_pe_state_clear(pe, EEH_PE_RESET);
 
 	/* Clear frozen state */
-	rc = eeh_clear_pe_frozen_state(pe);
+	rc = eeh_clear_pe_frozen_state(pe, false);
 	if (rc)
 		return rc;
 
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 19/21] powerpc/eeh: Dump PCI config space for all child devices
  2014-09-30  2:38 [PATCH 01/21] powerpc/eeh: Drop unused argument in eeh_check_failure() Gavin Shan
                   ` (16 preceding siblings ...)
  2014-09-30  2:39 ` [PATCH 18/21] powerpc/eeh: Emulate EEH recovery for VFIO devices Gavin Shan
@ 2014-09-30  2:39 ` Gavin Shan
  2014-09-30  2:39 ` [PATCH 20/21] powerpc/powernv: Fetch frozen PE on top level Gavin Shan
  2014-09-30  2:39 ` [PATCH 21/21] powerpc/powernv: Override dma_get_required_mask() Gavin Shan
  19 siblings, 0 replies; 23+ messages in thread
From: Gavin Shan @ 2014-09-30  2:39 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan

The PEs can be organized as nested. Current implementation doesn't
dump PCI config space for subordinate devices of child PEs. However,
the frozen PE could be caused by those subordinate devices of its
child PEs.

The patch dumps PCI config space for all subordinate devices of the
problematic PE.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/eeh.c | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 0b31169..519622f 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -117,7 +117,7 @@ static DEFINE_MUTEX(eeh_dev_mutex);
  * not dynamically alloced, so that it ends up in RMO where RTAS
  * can access it.
  */
-#define EEH_PCI_REGS_LOG_LEN 4096
+#define EEH_PCI_REGS_LOG_LEN 8192
 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
 
 /*
@@ -148,16 +148,12 @@ static int __init eeh_setup(char *str)
 }
 __setup("eeh=", eeh_setup);
 
-/**
- * eeh_gather_pci_data - Copy assorted PCI config space registers to buff
- * @edev: device to report data for
- * @buf: point to buffer in which to log
- * @len: amount of room in buffer
- *
- * This routine captures assorted PCI configuration space data,
- * and puts them into a buffer for RTAS error logging.
+/*
+ * This routine captures assorted PCI configuration space data
+ * for the indicated PCI device, and puts them into a buffer
+ * for RTAS error logging.
  */
-static size_t eeh_gather_pci_data(struct eeh_dev *edev, char *buf, size_t len)
+static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
 {
 	struct device_node *dn = eeh_dev_to_of_node(edev);
 	u32 cfg;
@@ -255,6 +251,19 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char *buf, size_t len)
 	return n;
 }
 
+static void *eeh_dump_pe_log(void *data, void *flag)
+{
+	struct eeh_pe *pe = data;
+	struct eeh_dev *edev, *tmp;
+	size_t *plen = flag;
+
+	eeh_pe_for_each_dev(pe, edev, tmp)
+		*plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen,
+					  EEH_PCI_REGS_LOG_LEN - *plen);
+
+	return NULL;
+}
+
 /**
  * eeh_slot_error_detail - Generate combined log including driver log and error log
  * @pe: EEH PE
@@ -268,7 +277,6 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char *buf, size_t len)
 void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
 {
 	size_t loglen = 0;
-	struct eeh_dev *edev, *tmp;
 
 	/*
 	 * When the PHB is fenced or dead, it's pointless to collect
@@ -286,10 +294,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
 		eeh_pe_restore_bars(pe);
 
 		pci_regs_buf[0] = 0;
-		eeh_pe_for_each_dev(pe, edev, tmp) {
-			loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen,
-						      EEH_PCI_REGS_LOG_LEN - loglen);
-		}
+		eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen);
 	}
 
 	eeh_ops->get_log(pe, severity, pci_regs_buf, loglen);
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 20/21] powerpc/powernv: Fetch frozen PE on top level
  2014-09-30  2:38 [PATCH 01/21] powerpc/eeh: Drop unused argument in eeh_check_failure() Gavin Shan
                   ` (17 preceding siblings ...)
  2014-09-30  2:39 ` [PATCH 19/21] powerpc/eeh: Dump PCI config space for all child devices Gavin Shan
@ 2014-09-30  2:39 ` Gavin Shan
  2014-09-30  2:39 ` [PATCH 21/21] powerpc/powernv: Override dma_get_required_mask() Gavin Shan
  19 siblings, 0 replies; 23+ messages in thread
From: Gavin Shan @ 2014-09-30  2:39 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan

It should have been part of commit 1ad7a72c5 ("powerpc/eeh: Report
frozen parent PE prior to child PE"). There are 2 ways to report
EEH errors: proactively polling because of 0xFF's returned from
PCI config or IO read, or interrupt driven event. We missed to
report and handle parent frozen PE prior to child frozen PE for
the later case on PowerNV platform.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/eeh-ioda.c | 48 ++++++++++++++++++++++---------
 1 file changed, 34 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index 4d9400f..61ccb1e 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -886,14 +886,12 @@ static int ioda_eeh_get_pe(struct pci_controller *hose,
 	 * the master PE because slave PE is invisible
 	 * to EEH core.
 	 */
-	if (phb->get_pe_state) {
-		pnv_pe = &phb->ioda.pe_array[pe_no];
-		if (pnv_pe->flags & PNV_IODA_PE_SLAVE) {
-			pnv_pe = pnv_pe->master;
-			WARN_ON(!pnv_pe ||
-				!(pnv_pe->flags & PNV_IODA_PE_MASTER));
-			pe_no = pnv_pe->pe_number;
-		}
+	pnv_pe = &phb->ioda.pe_array[pe_no];
+	if (pnv_pe->flags & PNV_IODA_PE_SLAVE) {
+		pnv_pe = pnv_pe->master;
+		WARN_ON(!pnv_pe ||
+			!(pnv_pe->flags & PNV_IODA_PE_MASTER));
+		pe_no = pnv_pe->pe_number;
 	}
 
 	/* Find the PE according to PE# */
@@ -904,15 +902,37 @@ static int ioda_eeh_get_pe(struct pci_controller *hose,
 	if (!dev_pe)
 		return -EEXIST;
 
-	/*
-	 * At this point, we're sure the compound PE should
-	 * be put into frozen state.
-	 */
+	/* Freeze the (compound) PE */
 	*pe = dev_pe;
-	if (phb->freeze_pe &&
-	    !(dev_pe->state & EEH_PE_ISOLATED))
+	if (!(dev_pe->state & EEH_PE_ISOLATED))
 		phb->freeze_pe(phb, pe_no);
 
+	/*
+	 * At this point, we're sure the (compound) PE should
+	 * have been frozen. However, we still need poke until
+	 * hitting the frozen PE on top level.
+	 */
+	dev_pe = dev_pe->parent;
+	while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {
+		int ret;
+		int active_flags = (EEH_STATE_MMIO_ACTIVE |
+				    EEH_STATE_DMA_ACTIVE);
+
+		ret = eeh_ops->get_state(dev_pe, NULL);
+		if (ret <= 0 || (ret & active_flags) == active_flags) {
+			dev_pe = dev_pe->parent;
+			continue;
+		}
+
+		/* Frozen parent PE */
+		*pe = dev_pe;
+		if (!(dev_pe->state & EEH_PE_ISOLATED))
+			phb->freeze_pe(phb, dev_pe->addr);
+
+		/* Next one */
+		dev_pe = dev_pe->parent;
+	}
+
 	return 0;
 }
 
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 21/21] powerpc/powernv: Override dma_get_required_mask()
  2014-09-30  2:38 [PATCH 01/21] powerpc/eeh: Drop unused argument in eeh_check_failure() Gavin Shan
                   ` (18 preceding siblings ...)
  2014-09-30  2:39 ` [PATCH 20/21] powerpc/powernv: Fetch frozen PE on top level Gavin Shan
@ 2014-09-30  2:39 ` Gavin Shan
  19 siblings, 0 replies; 23+ messages in thread
From: Gavin Shan @ 2014-09-30  2:39 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan

The dma_get_required_mask() function is used by some drivers to
query the platform about what DMA mask is needed to cover all of
memory. This is a bit of a strange semantic when we have to choose
between IOMMU translation or bypass, but essentially what it means
is "what DMA mask will give best performances".

Currently, our IOMMU backend always returns a 32-bit mask here, we
don't do anything special to it when we have bypass available. This
causes some drivers to choose a 32-bit mask, thus losing the ability
to use the bypass window, thinking this is more efficient. The problem
was reported from the driver of following device:

0004:03:00.0 0107: 1000:0087 (rev 05)
0004:03:00.0 Serial Attached SCSI controller: LSI Logic / Symbios \
             Logic SAS2308 PCI-Express Fusion-MPT SAS-2 (rev 05)

This patch adds an override of that function in order to, instead,
return a 64-bit mask whenever a bypass window is available in order
for drivers to prefer this configuration.

Reported-by: Murali N. Iyer <mniyer@us.ibm.com>
Suggested-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/dma-mapping.h    |  1 +
 arch/powerpc/kernel/dma.c                 | 14 ++++++++++----
 arch/powerpc/platforms/powernv/pci-ioda.c | 23 +++++++++++++++++++++++
 arch/powerpc/platforms/powernv/pci.c      | 11 +++++++++++
 arch/powerpc/platforms/powernv/pci.h      |  2 ++
 arch/powerpc/platforms/powernv/powernv.h  |  6 ++++++
 arch/powerpc/platforms/powernv/setup.c    |  9 +++++++++
 7 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index 150866b..894d538 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -135,6 +135,7 @@ static inline int dma_supported(struct device *dev, u64 mask)
 
 extern int dma_set_mask(struct device *dev, u64 dma_mask);
 extern int __dma_set_mask(struct device *dev, u64 dma_mask);
+extern u64 __dma_get_required_mask(struct device *dev);
 
 #define dma_alloc_coherent(d,s,h,f)	dma_alloc_attrs(d,s,h,f,NULL)
 
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index ee78f6e..210ff9d 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -202,6 +202,7 @@ int __dma_set_mask(struct device *dev, u64 dma_mask)
 	*dev->dma_mask = dma_mask;
 	return 0;
 }
+
 int dma_set_mask(struct device *dev, u64 dma_mask)
 {
 	if (ppc_md.dma_set_mask)
@@ -210,13 +211,10 @@ int dma_set_mask(struct device *dev, u64 dma_mask)
 }
 EXPORT_SYMBOL(dma_set_mask);
 
-u64 dma_get_required_mask(struct device *dev)
+u64 __dma_get_required_mask(struct device *dev)
 {
 	struct dma_map_ops *dma_ops = get_dma_ops(dev);
 
-	if (ppc_md.dma_get_required_mask)
-		return ppc_md.dma_get_required_mask(dev);
-
 	if (unlikely(dma_ops == NULL))
 		return 0;
 
@@ -225,6 +223,14 @@ u64 dma_get_required_mask(struct device *dev)
 
 	return DMA_BIT_MASK(8 * sizeof(dma_addr_t));
 }
+
+u64 dma_get_required_mask(struct device *dev)
+{
+	if (ppc_md.dma_get_required_mask)
+		return ppc_md.dma_get_required_mask(dev);
+
+	return __dma_get_required_mask(dev);
+}
 EXPORT_SYMBOL_GPL(dma_get_required_mask);
 
 static int __init dma_init(void)
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 36b1a7a..380ebc9 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -890,6 +890,28 @@ static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
 	return 0;
 }
 
+static u64 pnv_pci_ioda_dma_get_required_mask(struct pnv_phb *phb,
+					      struct pci_dev *pdev)
+{
+	struct pci_dn *pdn = pci_get_pdn(pdev);
+	struct pnv_ioda_pe *pe;
+	u64 end, mask;
+
+	if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
+		return 0;
+
+	pe = &phb->ioda.pe_array[pdn->pe_number];
+	if (!pe->tce_bypass_enabled)
+		return __dma_get_required_mask(&pdev->dev);
+
+
+	end = pe->tce_bypass_base + memblock_end_of_DRAM();
+	mask = 1ULL << (fls64(end) - 1);
+	mask += mask - 1;
+
+	return mask;
+}
+
 static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
 				   struct pci_bus *bus,
 				   bool add_to_iommu_group)
@@ -1782,6 +1804,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	/* Setup TCEs */
 	phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
 	phb->dma_set_mask = pnv_pci_ioda_dma_set_mask;
+	phb->dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask;
 
 	/* Setup shutdown function for kexec */
 	phb->shutdown = pnv_pci_ioda_shutdown;
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index b854b57..e9f509b 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -761,6 +761,17 @@ int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
 	return __dma_set_mask(&pdev->dev, dma_mask);
 }
 
+u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev)
+{
+	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+	struct pnv_phb *phb = hose->private_data;
+
+	if (phb && phb->dma_get_required_mask)
+		return phb->dma_get_required_mask(phb, pdev);
+
+	return __dma_get_required_mask(&pdev->dev);
+}
+
 void pnv_pci_shutdown(void)
 {
 	struct pci_controller *hose;
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 27594cf..34d29eb 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -124,6 +124,8 @@ struct pnv_phb {
 	void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
 	int (*dma_set_mask)(struct pnv_phb *phb, struct pci_dev *pdev,
 			    u64 dma_mask);
+	u64 (*dma_get_required_mask)(struct pnv_phb *phb,
+				     struct pci_dev *pdev);
 	void (*fixup_phb)(struct pci_controller *hose);
 	u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
 	void (*shutdown)(struct pnv_phb *phb);
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index 75501bf..6c8e2d1 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -13,6 +13,7 @@ struct pci_dev;
 extern void pnv_pci_init(void);
 extern void pnv_pci_shutdown(void);
 extern int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask);
+extern u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev);
 #else
 static inline void pnv_pci_init(void) { }
 static inline void pnv_pci_shutdown(void) { }
@@ -21,6 +22,11 @@ static inline int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
 {
 	return -ENODEV;
 }
+
+static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev)
+{
+	return 0;
+}
 #endif
 
 extern void pnv_lpc_init(void);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 5a0e2dc..0cb3a07 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -173,6 +173,14 @@ static int pnv_dma_set_mask(struct device *dev, u64 dma_mask)
 	return __dma_set_mask(dev, dma_mask);
 }
 
+static u64 pnv_dma_get_required_mask(struct device *dev)
+{
+	if (dev_is_pci(dev))
+		return pnv_pci_dma_get_required_mask(to_pci_dev(dev));
+
+	return __dma_get_required_mask(dev);
+}
+
 static void pnv_shutdown(void)
 {
 	/* Let the PCI code clear up IODA tables */
@@ -335,6 +343,7 @@ define_machine(powernv) {
 	.power_save             = power7_idle,
 	.calibrate_decr		= generic_calibrate_decr,
 	.dma_set_mask		= pnv_dma_set_mask,
+	.dma_get_required_mask	= pnv_dma_get_required_mask,
 #ifdef CONFIG_KEXEC
 	.kexec_cpu_down		= pnv_kexec_cpu_down,
 #endif
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [02/21] powerpc/eeh: Add eeh_pe_state sysfs entry
  2014-09-30  2:38 ` [PATCH 02/21] powerpc/eeh: Add eeh_pe_state sysfs entry Gavin Shan
@ 2014-10-01  3:43   ` Michael Ellerman
  2014-10-01  4:20     ` Gavin Shan
  0 siblings, 1 reply; 23+ messages in thread
From: Michael Ellerman @ 2014-10-01  3:43 UTC (permalink / raw)
  To: Gavin Shan, linuxppc-dev

On Tue, 2014-30-09 at 02:38:51 UTC, Gavin Shan wrote:
> diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
> index e2595ba..eb15be4 100644
> --- a/arch/powerpc/kernel/eeh_sysfs.c
> +++ b/arch/powerpc/kernel/eeh_sysfs.c
> @@ -54,6 +54,62 @@ EEH_SHOW_ATTR(eeh_mode,            mode,            "0x%x");
>  EEH_SHOW_ATTR(eeh_config_addr,     config_addr,     "0x%x");
>  EEH_SHOW_ATTR(eeh_pe_config_addr,  pe_config_addr,  "0x%x");
>  
> +static ssize_t eeh_pe_state_show(struct device *dev,
> +				 struct device_attribute *attr, char *buf)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
> +	int state;
> +
> +	if (!edev || !edev->pe)
> +		return -ENODEV;
> +
> +	state = eeh_ops->get_state(edev->pe, NULL);
> +	return sprintf(buf, "%08x %08x\n",
> +		       state, edev->pe->state);

Looking at all the other eeh sysfs files, they all use 0x%x. Which makes it
much clearer when you're looking at the file in userspace that the content is
hex.

Please send an incremental patch to change the format to 0x%08x, unless there's
a good reason not to.

cheers

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [02/21] powerpc/eeh: Add eeh_pe_state sysfs entry
  2014-10-01  3:43   ` [02/21] " Michael Ellerman
@ 2014-10-01  4:20     ` Gavin Shan
  0 siblings, 0 replies; 23+ messages in thread
From: Gavin Shan @ 2014-10-01  4:20 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev, Gavin Shan

On Wed, Oct 01, 2014 at 01:43:43PM +1000, Michael Ellerman wrote:
>On Tue, 2014-30-09 at 02:38:51 UTC, Gavin Shan wrote:
>> diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
>> index e2595ba..eb15be4 100644
>> --- a/arch/powerpc/kernel/eeh_sysfs.c
>> +++ b/arch/powerpc/kernel/eeh_sysfs.c
>> @@ -54,6 +54,62 @@ EEH_SHOW_ATTR(eeh_mode,            mode,            "0x%x");
>>  EEH_SHOW_ATTR(eeh_config_addr,     config_addr,     "0x%x");
>>  EEH_SHOW_ATTR(eeh_pe_config_addr,  pe_config_addr,  "0x%x");
>>  
>> +static ssize_t eeh_pe_state_show(struct device *dev,
>> +				 struct device_attribute *attr, char *buf)
>> +{
>> +	struct pci_dev *pdev = to_pci_dev(dev);
>> +	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
>> +	int state;
>> +
>> +	if (!edev || !edev->pe)
>> +		return -ENODEV;
>> +
>> +	state = eeh_ops->get_state(edev->pe, NULL);
>> +	return sprintf(buf, "%08x %08x\n",
>> +		       state, edev->pe->state);
>
>Looking at all the other eeh sysfs files, they all use 0x%x. Which makes it
>much clearer when you're looking at the file in userspace that the content is
>hex.
>
>Please send an incremental patch to change the format to 0x%08x, unless there's
>a good reason not to.
>

Agree and will do :)

Thanks,
Gavin

>cheers
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2014-10-01  4:20 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-30  2:38 [PATCH 01/21] powerpc/eeh: Drop unused argument in eeh_check_failure() Gavin Shan
2014-09-30  2:38 ` [PATCH 02/21] powerpc/eeh: Add eeh_pe_state sysfs entry Gavin Shan
2014-10-01  3:43   ` [02/21] " Michael Ellerman
2014-10-01  4:20     ` Gavin Shan
2014-09-30  2:38 ` [PATCH 03/21] powerpc/eeh: Freeze PE before PE reset Gavin Shan
2014-09-30  2:38 ` [PATCH 04/21] powerpc/eeh: Reenable PCI devices after reset Gavin Shan
2014-09-30  2:38 ` [PATCH 05/21] powerpc/eeh: Clear frozen state on passing device Gavin Shan
2014-09-30  2:38 ` [PATCH 06/21] powerpc/powernv: Sync header with firmware Gavin Shan
2014-09-30  2:38 ` [PATCH 07/21] powerpc/eeh: Introduce eeh_ops::err_inject Gavin Shan
2014-09-30  2:38 ` [PATCH 08/21] powerpc/powernv: Add PCI error injection debugfs entry Gavin Shan
2014-09-30  2:38 ` [PATCH 09/21] powerpc/powernv: Clear PAPR error injection registers Gavin Shan
2014-09-30  2:38 ` [PATCH 10/21] powerpc/eeh: Clear frozen device state in time Gavin Shan
2014-09-30  2:39 ` [PATCH 11/21] powerpc/eeh: Fix improper condition in eeh_pci_enable() Gavin Shan
2014-09-30  2:39 ` [PATCH 12/21] powerpc/eeh: Unfreeze PE on enabling EEH functionality Gavin Shan
2014-09-30  2:39 ` [PATCH 13/21] powerpc/eeh: Use eeh_unfreeze_pe() Gavin Shan
2014-09-30  2:39 ` [PATCH 14/21] powerpc/eeh: Block PCI config access during reset Gavin Shan
2014-09-30  2:39 ` [PATCH 15/21] powerpc/pseries: Decrease message level on EEH initialization Gavin Shan
2014-09-30  2:39 ` [PATCH 16/21] powerpc/powernv: Sync OpalPciResetScope with firmware Gavin Shan
2014-09-30  2:39 ` [PATCH 17/21] powerpc/eeh: Tag reset state for user owned PE Gavin Shan
2014-09-30  2:39 ` [PATCH 18/21] powerpc/eeh: Emulate EEH recovery for VFIO devices Gavin Shan
2014-09-30  2:39 ` [PATCH 19/21] powerpc/eeh: Dump PCI config space for all child devices Gavin Shan
2014-09-30  2:39 ` [PATCH 20/21] powerpc/powernv: Fetch frozen PE on top level Gavin Shan
2014-09-30  2:39 ` [PATCH 21/21] powerpc/powernv: Override dma_get_required_mask() Gavin Shan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).