All of lore.kernel.org
 help / color / mirror / Atom feed
* powerpc/pci: [PATCH 1/1]: PCIE PHB reset
@ 2020-05-07 13:10 wenxiong
  2020-05-11 10:17 ` Oliver O'Halloran
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: wenxiong @ 2020-05-07 13:10 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: brking, Wen Xiong, oohall, wenxiong

From: Wen Xiong <wenxiong@linux.vnet.ibm.com>

Several device drivers hit EEH(Extended Error handling) when triggering
kdump on Pseries PowerVM. This patch implemented a reset of the PHBs
in pci general code. PHB reset stop all PCI transactions from previous
kernel. We have tested the patch in several enviroments:
- direct slot adapters
- adapters under the switch
- a VF adapter in PowerVM
- a VF adapter/adapter in KVM guest.

Signed-off-by: Wen Xiong <wenxiong@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/pseries/pci.c | 153 +++++++++++++++++++++++++++
 1 file changed, 153 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 911534b89c85..aac7f00696d2 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -11,6 +11,8 @@
 #include <linux/kernel.h>
 #include <linux/pci.h>
 #include <linux/string.h>
+#include <linux/crash_dump.h>
+#include <linux/delay.h>
 
 #include <asm/eeh.h>
 #include <asm/pci-bridge.h>
@@ -354,3 +356,154 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
 
 	return 0;
 }
+
+/**
+ * pseries_get_pdn_addr - Retrieve PHB address
+ * @pe: EEH PE
+ *
+ * Retrieve the assocated PHB address. Actually, there're 2 RTAS
+ * function calls dedicated for the purpose. We need implement
+ * it through the new function and then the old one. Besides,
+ * you should make sure the config address is figured out from
+ * FDT node before calling the function.
+ *
+ */
+static int pseries_get_pdn_addr(struct pci_controller *phb)
+{
+	int ret = -1;
+	int rets[3];
+	int ibm_get_config_addr_info;
+	int ibm_get_config_addr_info2;
+	int config_addr = 0;
+	struct pci_dn *root_pdn, *pdn;
+
+	ibm_get_config_addr_info2   = rtas_token("ibm,get-config-addr-info2");
+	ibm_get_config_addr_info    = rtas_token("ibm,get-config-addr-info");
+
+	root_pdn = PCI_DN(phb->dn);
+	pdn = list_first_entry(&root_pdn->child_list, struct pci_dn, list);
+	config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
+
+	if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
+		/*
+		 * First of all, we need to make sure there has one PE
+		 * associated with the device. Otherwise, PE address is
+		 * meaningless.
+		 */
+		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
+			config_addr, BUID_HI(pdn->phb->buid),
+			BUID_LO(pdn->phb->buid), 1);
+		if (ret || (rets[0] == 0)) {
+			pr_warn("%s: Failed to get address for PHB#%x-PE# "
+				"option=%d config_addr=%x\n",
+				__func__, pdn->phb->global_number, 1, rets[0]);
+			return -1;
+		}
+
+		/* Retrieve the associated PE config address */
+		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
+			config_addr, BUID_HI(pdn->phb->buid),
+			BUID_LO(pdn->phb->buid), 0);
+		if (ret) {
+			pr_warn("%s: Failed to get address for PHB#%x-PE# "
+				"option=%d config_addr=%x\n",
+				__func__, pdn->phb->global_number, 0, rets[0]);
+			return -1;
+		}
+		return rets[0];
+	}
+
+	if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
+		ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
+			config_addr, BUID_HI(pdn->phb->buid),
+			BUID_LO(pdn->phb->buid), 0);
+		if (ret || rets[0]) {
+			pr_warn("%s: Failed to get address for PHB#%x-PE# "
+				"config_addr=%x\n",
+				__func__, pdn->phb->global_number, rets[0]);
+			return -1;
+		}
+		return rets[0];
+	}
+
+	return ret;
+}
+
+static int __init pseries_phb_reset(void)
+{
+	struct pci_controller *phb;
+	int config_addr;
+	int ibm_set_slot_reset;
+	int ibm_configure_pe;
+	int ret;
+
+	if (is_kdump_kernel() || reset_devices) {
+		pr_info("Issue PHB reset ...\n");
+		ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
+		ibm_configure_pe = rtas_token("ibm,configure-pe");
+
+		if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE ||
+				ibm_configure_pe == RTAS_UNKNOWN_SERVICE) {
+			pr_info("%s: EEH functionality not supported\n",
+				__func__);
+		}
+
+		list_for_each_entry(phb, &hose_list, list_node) {
+			config_addr = pseries_get_pdn_addr(phb);
+			if (config_addr == -1)
+				continue;
+
+			ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+				config_addr, BUID_HI(phb->buid),
+				BUID_LO(phb->buid), EEH_RESET_FUNDAMENTAL);
+
+			/* If fundamental-reset not supported, try hot-reset */
+			if (ret == -8)
+				ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+					config_addr, BUID_HI(phb->buid),
+					BUID_LO(phb->buid), EEH_RESET_HOT);
+
+			if (ret) {
+				pr_err("%s: fail with rtas_call fundamental reset=%d\n",
+					__func__, ret);
+				continue;
+			}
+		}
+		msleep(EEH_PE_RST_SETTLE_TIME);
+
+		list_for_each_entry(phb, &hose_list, list_node) {
+			config_addr = pseries_get_pdn_addr(phb);
+			if (config_addr == -1)
+				continue;
+
+			ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+				config_addr, BUID_HI(phb->buid),
+				BUID_LO(phb->buid), EEH_RESET_DEACTIVATE);
+			if (ret) {
+				pr_err("%s: fail with rtas_call deactive=%d\n",
+					__func__, ret);
+				continue;
+			}
+		}
+		msleep(EEH_PE_RST_SETTLE_TIME);
+
+		list_for_each_entry(phb, &hose_list, list_node) {
+			config_addr = pseries_get_pdn_addr(phb);
+			if (config_addr == -1)
+				continue;
+
+			ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
+				config_addr, BUID_HI(phb->buid),
+				BUID_LO(phb->buid));
+			if (ret) {
+				pr_err("%s: fail with rtas_call configure_pe =%d\n",
+					__func__, ret);
+				continue;
+			}
+		}
+	}
+
+	return 0;
+}
+postcore_initcall(pseries_phb_reset);
+
-- 
2.18.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: powerpc/pci: [PATCH 1/1]: PCIE PHB reset
  2020-05-07 13:10 powerpc/pci: [PATCH 1/1]: PCIE PHB reset wenxiong
@ 2020-05-11 10:17 ` Oliver O'Halloran
  2020-05-12  5:28 ` Sam Bobroff
  2020-05-13  7:23 ` Sam Bobroff
  2 siblings, 0 replies; 5+ messages in thread
From: Oliver O'Halloran @ 2020-05-11 10:17 UTC (permalink / raw)
  To: wenxiong; +Cc: Brian King, linuxppc-dev, wenxiong

On Fri, May 8, 2020 at 12:36 AM <wenxiong@linux.vnet.ibm.com> wrote:
>
> From: Wen Xiong <wenxiong@linux.vnet.ibm.com>
>
> Several device drivers hit EEH(Extended Error handling) when triggering
> kdump on Pseries PowerVM. This patch implemented a reset of the PHBs
> in pci general code. PHB reset stop all PCI transactions from previous
> kernel. We have tested the patch in several enviroments:
> - direct slot adapters
> - adapters under the switch
> - a VF adapter in PowerVM
> - a VF adapter/adapter in KVM guest.
>
> Signed-off-by: Wen Xiong <wenxiong@linux.vnet.ibm.com>
> ---
>  arch/powerpc/platforms/pseries/pci.c | 153 +++++++++++++++++++++++++++
>  1 file changed, 153 insertions(+)
>
> diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
> index 911534b89c85..aac7f00696d2 100644
> --- a/arch/powerpc/platforms/pseries/pci.c
> +++ b/arch/powerpc/platforms/pseries/pci.c
> @@ -11,6 +11,8 @@
>  #include <linux/kernel.h>
>  #include <linux/pci.h>
>  #include <linux/string.h>
> +#include <linux/crash_dump.h>
> +#include <linux/delay.h>
>
>  #include <asm/eeh.h>
>  #include <asm/pci-bridge.h>
> @@ -354,3 +356,154 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
>
>         return 0;
>  }
> +
> +/**
> + * pseries_get_pdn_addr - Retrieve PHB address
> + * @pe: EEH PE
> + *
> + * Retrieve the assocated PHB address. Actually, there're 2 RTAS
> + * function calls dedicated for the purpose. We need implement
> + * it through the new function and then the old one. Besides,
> + * you should make sure the config address is figured out from
> + * FDT node before calling the function.
> + *
> + */
> +static int pseries_get_pdn_addr(struct pci_controller *phb)
> +{
> +       int ret = -1;
> +       int rets[3];
> +       int ibm_get_config_addr_info;
> +       int ibm_get_config_addr_info2;
> +       int config_addr = 0;
> +       struct pci_dn *root_pdn, *pdn;
> +
> +       ibm_get_config_addr_info2   = rtas_token("ibm,get-config-addr-info2");
> +       ibm_get_config_addr_info    = rtas_token("ibm,get-config-addr-info");
> +
> +       root_pdn = PCI_DN(phb->dn);
> +       pdn = list_first_entry(&root_pdn->child_list, struct pci_dn, list);
> +       config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
> +
> +       if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
> +               /*
> +                * First of all, we need to make sure there has one PE
> +                * associated with the device. Otherwise, PE address is
> +                * meaningless.
> +                */
> +               ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
> +                       config_addr, BUID_HI(pdn->phb->buid),
> +                       BUID_LO(pdn->phb->buid), 1);
> +               if (ret || (rets[0] == 0)) {
> +                       pr_warn("%s: Failed to get address for PHB#%x-PE# "
> +                               "option=%d config_addr=%x\n",
> +                               __func__, pdn->phb->global_number, 1, rets[0]);
> +                       return -1;
> +               }
> +
> +               /* Retrieve the associated PE config address */
> +               ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
> +                       config_addr, BUID_HI(pdn->phb->buid),
> +                       BUID_LO(pdn->phb->buid), 0);
> +               if (ret) {
> +                       pr_warn("%s: Failed to get address for PHB#%x-PE# "
> +                               "option=%d config_addr=%x\n",
> +                               __func__, pdn->phb->global_number, 0, rets[0]);
> +                       return -1;
> +               }
> +               return rets[0];
> +       }
> +
> +       if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
> +               ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
> +                       config_addr, BUID_HI(pdn->phb->buid),
> +                       BUID_LO(pdn->phb->buid), 0);
> +               if (ret || rets[0]) {
> +                       pr_warn("%s: Failed to get address for PHB#%x-PE# "
> +                               "config_addr=%x\n",
> +                               __func__, pdn->phb->global_number, rets[0]);
> +                       return -1;
> +               }
> +               return rets[0];
> +       }
> +
> +       return ret;
> +}

I'd be nice if we could reduce the amount of duplication between this
function and eeh_pseries_get_pe_addr(). I was planning to re-working
the EEH version though so that this is fine for now.

> +
> +static int __init pseries_phb_reset(void)
> +{
> +       struct pci_controller *phb;
> +       int config_addr;
> +       int ibm_set_slot_reset;
> +       int ibm_configure_pe;
> +       int ret;
> +
> +       if (is_kdump_kernel() || reset_devices) {
> +               pr_info("Issue PHB reset ...\n");
> +               ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
> +               ibm_configure_pe = rtas_token("ibm,configure-pe");
> +
> +               if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE ||
> +                               ibm_configure_pe == RTAS_UNKNOWN_SERVICE) {
> +                       pr_info("%s: EEH functionality not supported\n",
> +                               __func__);
> +               }
> +
> +               list_for_each_entry(phb, &hose_list, list_node) {
> +                       config_addr = pseries_get_pdn_addr(phb);
> +                       if (config_addr == -1)
> +                               continue;
Considering we already cache the buid in the pci_controller we could
also cache the config_addr. That said, considering all this runs
precisely once at boot I'm not that bothered by calling
pseries_get_pdn_addr() again in each loop.


> +
> +                       ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
> +                               config_addr, BUID_HI(phb->buid),
> +                               BUID_LO(phb->buid), EEH_RESET_FUNDAMENTAL);
> +
> +                       /* If fundamental-reset not supported, try hot-reset */
> +                       if (ret == -8)
> +                               ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
> +                                       config_addr, BUID_HI(phb->buid),
> +                                       BUID_LO(phb->buid), EEH_RESET_HOT);
> +
> +                       if (ret) {
> +                               pr_err("%s: fail with rtas_call fundamental reset=%d\n",
> +                                       __func__, ret);
> +                               continue;
> +                       }
> +               }
> +               msleep(EEH_PE_RST_SETTLE_TIME);
> +
> +               list_for_each_entry(phb, &hose_list, list_node) {
> +                       config_addr = pseries_get_pdn_addr(phb);
> +                       if (config_addr == -1)
> +                               continue;
> +
> +                       ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
> +                               config_addr, BUID_HI(phb->buid),
> +                               BUID_LO(phb->buid), EEH_RESET_DEACTIVATE);
> +                       if (ret) {
> +                               pr_err("%s: fail with rtas_call deactive=%d\n",
> +                                       __func__, ret);
> +                               continue;
> +                       }
> +               }
> +               msleep(EEH_PE_RST_SETTLE_TIME);
> +
> +               list_for_each_entry(phb, &hose_list, list_node) {
> +                       config_addr = pseries_get_pdn_addr(phb);
> +                       if (config_addr == -1)
> +                               continue;
> +
> +                       ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
> +                               config_addr, BUID_HI(phb->buid),
> +                               BUID_LO(phb->buid));
> +                       if (ret) {
> +                               pr_err("%s: fail with rtas_call configure_pe =%d\n",
> +                                       __func__, ret);
> +                               continue;
> +                       }
> +               }
> +       }
> +
> +       return 0;
> +}
> +postcore_initcall(pseries_phb_reset);

You probably should use machine_postcore_initcall(pseries,
pseries_phb_reset); so that this only gets run on pseries. Without the
machine type specifier it'll run on PowerNV too.

> +
> --
> 2.18.1
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: powerpc/pci: [PATCH 1/1]: PCIE PHB reset
  2020-05-07 13:10 powerpc/pci: [PATCH 1/1]: PCIE PHB reset wenxiong
  2020-05-11 10:17 ` Oliver O'Halloran
@ 2020-05-12  5:28 ` Sam Bobroff
  2020-05-14 16:46   ` wenxiong
  2020-05-13  7:23 ` Sam Bobroff
  2 siblings, 1 reply; 5+ messages in thread
From: Sam Bobroff @ 2020-05-12  5:28 UTC (permalink / raw)
  To: wenxiong; +Cc: brking, oohall, linuxppc-dev, wenxiong

[-- Attachment #1: Type: text/plain, Size: 6788 bytes --]

On Thu, May 07, 2020 at 08:10:37AM -0500, wenxiong@linux.vnet.ibm.com wrote:
> From: Wen Xiong <wenxiong@linux.vnet.ibm.com>
> 
> Several device drivers hit EEH(Extended Error handling) when triggering
> kdump on Pseries PowerVM. This patch implemented a reset of the PHBs
> in pci general code. PHB reset stop all PCI transactions from previous
> kernel. We have tested the patch in several enviroments:
> - direct slot adapters
> - adapters under the switch
> - a VF adapter in PowerVM
> - a VF adapter/adapter in KVM guest.
> 
> Signed-off-by: Wen Xiong <wenxiong@linux.vnet.ibm.com>

Hi Wen Xiong,

I saw Oliver's review and I think he's covered the main issues I was
going to raise:
- This will run and produce some spurious errors on powernv. (I think
  distros do compile in both pseries and powernv.)
- There's a bit of code duplication but it's probably OK for this patch.

I have a few other minor comments, below:

> ---
>  arch/powerpc/platforms/pseries/pci.c | 153 +++++++++++++++++++++++++++
>  1 file changed, 153 insertions(+)
> 
> diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
> index 911534b89c85..aac7f00696d2 100644
> --- a/arch/powerpc/platforms/pseries/pci.c
> +++ b/arch/powerpc/platforms/pseries/pci.c
> @@ -11,6 +11,8 @@
>  #include <linux/kernel.h>
>  #include <linux/pci.h>
>  #include <linux/string.h>
> +#include <linux/crash_dump.h>
> +#include <linux/delay.h>
>  
>  #include <asm/eeh.h>
>  #include <asm/pci-bridge.h>
> @@ -354,3 +356,154 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
>  
>  	return 0;
>  }
> +
> +/**
> + * pseries_get_pdn_addr - Retrieve PHB address
> + * @pe: EEH PE
> + *
> + * Retrieve the assocated PHB address. Actually, there're 2 RTAS
> + * function calls dedicated for the purpose. We need implement
> + * it through the new function and then the old one. Besides,
> + * you should make sure the config address is figured out from
> + * FDT node before calling the function.
> + *
> + */
> +static int pseries_get_pdn_addr(struct pci_controller *phb)
> +{
> +	int ret = -1;
> +	int rets[3];
> +	int ibm_get_config_addr_info;
> +	int ibm_get_config_addr_info2;
> +	int config_addr = 0;
> +	struct pci_dn *root_pdn, *pdn;
> +
> +	ibm_get_config_addr_info2   = rtas_token("ibm,get-config-addr-info2");
> +	ibm_get_config_addr_info    = rtas_token("ibm,get-config-addr-info");
> +
> +	root_pdn = PCI_DN(phb->dn);
> +	pdn = list_first_entry(&root_pdn->child_list, struct pci_dn, list);
> +	config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
> +
> +	if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
> +		/*
> +		 * First of all, we need to make sure there has one PE
> +		 * associated with the device. Otherwise, PE address is
> +		 * meaningless.
> +		 */

This comment might be better if it explained how using option=0
with ibm_get_config_addr tests the PE.

> +		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
> +			config_addr, BUID_HI(pdn->phb->buid),
> +			BUID_LO(pdn->phb->buid), 1);
> +		if (ret || (rets[0] == 0)) {
> +			pr_warn("%s: Failed to get address for PHB#%x-PE# "
> +				"option=%d config_addr=%x\n",
> +				__func__, pdn->phb->global_number, 1, rets[0]);
> +			return -1;
> +		}
> +
> +		/* Retrieve the associated PE config address */
> +		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
> +			config_addr, BUID_HI(pdn->phb->buid),
> +			BUID_LO(pdn->phb->buid), 0);
> +		if (ret) {
> +			pr_warn("%s: Failed to get address for PHB#%x-PE# "
> +				"option=%d config_addr=%x\n",
> +				__func__, pdn->phb->global_number, 0, rets[0]);
> +			return -1;
> +		}
> +		return rets[0];
> +	}
> +
> +	if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
> +		ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
> +			config_addr, BUID_HI(pdn->phb->buid),
> +			BUID_LO(pdn->phb->buid), 0);
> +		if (ret || rets[0]) {
> +			pr_warn("%s: Failed to get address for PHB#%x-PE# "
> +				"config_addr=%x\n",
> +				__func__, pdn->phb->global_number, rets[0]);
> +			return -1;
> +		}
> +		return rets[0];
> +	}
> +
> +	return ret;
Can this ever return anything other than 0?

> +}
> +
> +static int __init pseries_phb_reset(void)
> +{
> +	struct pci_controller *phb;
> +	int config_addr;
> +	int ibm_set_slot_reset;
> +	int ibm_configure_pe;
> +	int ret;
> +
> +	if (is_kdump_kernel() || reset_devices) {
> +		pr_info("Issue PHB reset ...\n");
> +		ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
> +		ibm_configure_pe = rtas_token("ibm,configure-pe");
> +
> +		if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE ||
> +				ibm_configure_pe == RTAS_UNKNOWN_SERVICE) {
> +			pr_info("%s: EEH functionality not supported\n",
> +				__func__);
> +		}
> +
> +		list_for_each_entry(phb, &hose_list, list_node) {
> +			config_addr = pseries_get_pdn_addr(phb);
> +			if (config_addr == -1)
> +				continue;
> +
> +			ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
> +				config_addr, BUID_HI(phb->buid),
> +				BUID_LO(phb->buid), EEH_RESET_FUNDAMENTAL);
> +
> +			/* If fundamental-reset not supported, try hot-reset */
> +			if (ret == -8)
> +				ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
> +					config_addr, BUID_HI(phb->buid),
> +					BUID_LO(phb->buid), EEH_RESET_HOT);
> +
> +			if (ret) {
> +				pr_err("%s: fail with rtas_call fundamental reset=%d\n",
> +					__func__, ret);

This error might be a bit confusing, since it's not clear if the result
came from the fundamental or hot-reset.

> +				continue;
> +			}
> +		}
> +		msleep(EEH_PE_RST_SETTLE_TIME);
> +
> +		list_for_each_entry(phb, &hose_list, list_node) {
> +			config_addr = pseries_get_pdn_addr(phb);
> +			if (config_addr == -1)
> +				continue;
> +
> +			ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
> +				config_addr, BUID_HI(phb->buid),
> +				BUID_LO(phb->buid), EEH_RESET_DEACTIVATE);
> +			if (ret) {
> +				pr_err("%s: fail with rtas_call deactive=%d\n",
> +					__func__, ret);
> +				continue;
> +			}
> +		}
> +		msleep(EEH_PE_RST_SETTLE_TIME);
> +
> +		list_for_each_entry(phb, &hose_list, list_node) {
> +			config_addr = pseries_get_pdn_addr(phb);
> +			if (config_addr == -1)
> +				continue;
> +
> +			ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
> +				config_addr, BUID_HI(phb->buid),
> +				BUID_LO(phb->buid));
> +			if (ret) {
> +				pr_err("%s: fail with rtas_call configure_pe =%d\n",
> +					__func__, ret);

These errors might be more useful if they indicated which PHB caused the
error.

> +				continue;
> +			}
> +		}
> +	}
> +
> +	return 0;
> +}
> +postcore_initcall(pseries_phb_reset);
> +
> -- 
> 2.18.1
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: powerpc/pci: [PATCH 1/1]: PCIE PHB reset
  2020-05-07 13:10 powerpc/pci: [PATCH 1/1]: PCIE PHB reset wenxiong
  2020-05-11 10:17 ` Oliver O'Halloran
  2020-05-12  5:28 ` Sam Bobroff
@ 2020-05-13  7:23 ` Sam Bobroff
  2 siblings, 0 replies; 5+ messages in thread
From: Sam Bobroff @ 2020-05-13  7:23 UTC (permalink / raw)
  To: wenxiong; +Cc: brking, oohall, linuxppc-dev, wenxiong

[-- Attachment #1: Type: text/plain, Size: 6544 bytes --]

On Thu, May 07, 2020 at 08:10:37AM -0500, wenxiong@linux.vnet.ibm.com wrote:
> From: Wen Xiong <wenxiong@linux.vnet.ibm.com>
> 
> Several device drivers hit EEH(Extended Error handling) when triggering
> kdump on Pseries PowerVM. This patch implemented a reset of the PHBs
> in pci general code. PHB reset stop all PCI transactions from previous
> kernel. We have tested the patch in several enviroments:
> - direct slot adapters
> - adapters under the switch
> - a VF adapter in PowerVM
> - a VF adapter/adapter in KVM guest.
> 
> Signed-off-by: Wen Xiong <wenxiong@linux.vnet.ibm.com>

One other thing:

I tested the patch and this line is logged for each emulated PHB:

[    3.337057] pseries_get_pdn_addr: Failed to get address for PHB#0-PE# option=1 config_addr=0

And it's not really an error -- QEMU's emulated PHBs don't have EEH
support.

It's not a big deal, because there are other similar messages already
present but it would probably be better if this were suppressed for the
case where there's no support.

Cheers,
Sam.

> ---
>  arch/powerpc/platforms/pseries/pci.c | 153 +++++++++++++++++++++++++++
>  1 file changed, 153 insertions(+)
> 
> diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
> index 911534b89c85..aac7f00696d2 100644
> --- a/arch/powerpc/platforms/pseries/pci.c
> +++ b/arch/powerpc/platforms/pseries/pci.c
> @@ -11,6 +11,8 @@
>  #include <linux/kernel.h>
>  #include <linux/pci.h>
>  #include <linux/string.h>
> +#include <linux/crash_dump.h>
> +#include <linux/delay.h>
>  
>  #include <asm/eeh.h>
>  #include <asm/pci-bridge.h>
> @@ -354,3 +356,154 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
>  
>  	return 0;
>  }
> +
> +/**
> + * pseries_get_pdn_addr - Retrieve PHB address
> + * @pe: EEH PE
> + *
> + * Retrieve the assocated PHB address. Actually, there're 2 RTAS
> + * function calls dedicated for the purpose. We need implement
> + * it through the new function and then the old one. Besides,
> + * you should make sure the config address is figured out from
> + * FDT node before calling the function.
> + *
> + */
> +static int pseries_get_pdn_addr(struct pci_controller *phb)
> +{
> +	int ret = -1;
> +	int rets[3];
> +	int ibm_get_config_addr_info;
> +	int ibm_get_config_addr_info2;
> +	int config_addr = 0;
> +	struct pci_dn *root_pdn, *pdn;
> +
> +	ibm_get_config_addr_info2   = rtas_token("ibm,get-config-addr-info2");
> +	ibm_get_config_addr_info    = rtas_token("ibm,get-config-addr-info");
> +
> +	root_pdn = PCI_DN(phb->dn);
> +	pdn = list_first_entry(&root_pdn->child_list, struct pci_dn, list);
> +	config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
> +
> +	if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
> +		/*
> +		 * First of all, we need to make sure there has one PE
> +		 * associated with the device. Otherwise, PE address is
> +		 * meaningless.
> +		 */
> +		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
> +			config_addr, BUID_HI(pdn->phb->buid),
> +			BUID_LO(pdn->phb->buid), 1);
> +		if (ret || (rets[0] == 0)) {
> +			pr_warn("%s: Failed to get address for PHB#%x-PE# "
> +				"option=%d config_addr=%x\n",
> +				__func__, pdn->phb->global_number, 1, rets[0]);
> +			return -1;
> +		}
> +
> +		/* Retrieve the associated PE config address */
> +		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
> +			config_addr, BUID_HI(pdn->phb->buid),
> +			BUID_LO(pdn->phb->buid), 0);
> +		if (ret) {
> +			pr_warn("%s: Failed to get address for PHB#%x-PE# "
> +				"option=%d config_addr=%x\n",
> +				__func__, pdn->phb->global_number, 0, rets[0]);
> +			return -1;
> +		}
> +		return rets[0];
> +	}
> +
> +	if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
> +		ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
> +			config_addr, BUID_HI(pdn->phb->buid),
> +			BUID_LO(pdn->phb->buid), 0);
> +		if (ret || rets[0]) {
> +			pr_warn("%s: Failed to get address for PHB#%x-PE# "
> +				"config_addr=%x\n",
> +				__func__, pdn->phb->global_number, rets[0]);
> +			return -1;
> +		}
> +		return rets[0];
> +	}
> +
> +	return ret;
> +}
> +
> +static int __init pseries_phb_reset(void)
> +{
> +	struct pci_controller *phb;
> +	int config_addr;
> +	int ibm_set_slot_reset;
> +	int ibm_configure_pe;
> +	int ret;
> +
> +	if (is_kdump_kernel() || reset_devices) {
> +		pr_info("Issue PHB reset ...\n");
> +		ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
> +		ibm_configure_pe = rtas_token("ibm,configure-pe");
> +
> +		if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE ||
> +				ibm_configure_pe == RTAS_UNKNOWN_SERVICE) {
> +			pr_info("%s: EEH functionality not supported\n",
> +				__func__);
> +		}
> +
> +		list_for_each_entry(phb, &hose_list, list_node) {
> +			config_addr = pseries_get_pdn_addr(phb);
> +			if (config_addr == -1)
> +				continue;
> +
> +			ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
> +				config_addr, BUID_HI(phb->buid),
> +				BUID_LO(phb->buid), EEH_RESET_FUNDAMENTAL);
> +
> +			/* If fundamental-reset not supported, try hot-reset */
> +			if (ret == -8)
> +				ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
> +					config_addr, BUID_HI(phb->buid),
> +					BUID_LO(phb->buid), EEH_RESET_HOT);
> +
> +			if (ret) {
> +				pr_err("%s: fail with rtas_call fundamental reset=%d\n",
> +					__func__, ret);
> +				continue;
> +			}
> +		}
> +		msleep(EEH_PE_RST_SETTLE_TIME);
> +
> +		list_for_each_entry(phb, &hose_list, list_node) {
> +			config_addr = pseries_get_pdn_addr(phb);
> +			if (config_addr == -1)
> +				continue;
> +
> +			ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
> +				config_addr, BUID_HI(phb->buid),
> +				BUID_LO(phb->buid), EEH_RESET_DEACTIVATE);
> +			if (ret) {
> +				pr_err("%s: fail with rtas_call deactive=%d\n",
> +					__func__, ret);
> +				continue;
> +			}
> +		}
> +		msleep(EEH_PE_RST_SETTLE_TIME);
> +
> +		list_for_each_entry(phb, &hose_list, list_node) {
> +			config_addr = pseries_get_pdn_addr(phb);
> +			if (config_addr == -1)
> +				continue;
> +
> +			ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
> +				config_addr, BUID_HI(phb->buid),
> +				BUID_LO(phb->buid));
> +			if (ret) {
> +				pr_err("%s: fail with rtas_call configure_pe =%d\n",
> +					__func__, ret);
> +				continue;
> +			}
> +		}
> +	}
> +
> +	return 0;
> +}
> +postcore_initcall(pseries_phb_reset);
> +
> -- 
> 2.18.1
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: powerpc/pci: [PATCH 1/1]: PCIE PHB reset
  2020-05-12  5:28 ` Sam Bobroff
@ 2020-05-14 16:46   ` wenxiong
  0 siblings, 0 replies; 5+ messages in thread
From: wenxiong @ 2020-05-14 16:46 UTC (permalink / raw)
  To: Sam Bobroff; +Cc: brking, oohall, linuxppc-dev, wenxiong

On 2020-05-12 00:28, Sam Bobroff wrote:
> On Thu, May 07, 2020 at 08:10:37AM -0500, wenxiong@linux.vnet.ibm.com 
> wrote:
>> From: Wen Xiong <wenxiong@linux.vnet.ibm.com>
>> 
>> Several device drivers hit EEH(Extended Error handling) when 
>> triggering
>> kdump on Pseries PowerVM. This patch implemented a reset of the PHBs
>> in pci general code. PHB reset stop all PCI transactions from previous
>> kernel. We have tested the patch in several enviroments:
>> - direct slot adapters
>> - adapters under the switch
>> - a VF adapter in PowerVM
>> - a VF adapter/adapter in KVM guest.
>> 
>> Signed-off-by: Wen Xiong <wenxiong@linux.vnet.ibm.com>
> 
> Hi Wen Xiong,
> 
> I saw Oliver's review and I think he's covered the main issues I was
> going to raise:
> - This will run and produce some spurious errors on powernv. (I think
>   distros do compile in both pseries and powernv.)
> - There's a bit of code duplication but it's probably OK for this 
> patch.
> 

Hi Oliver and Sam,

Yes. Thanks for reviewing the code. I will keep some code duplication 
for now until Oliver re-work the EEH code. I am going to fix several 
minor things in error code patch.

Thanks,
Wendy

> I have a few other minor comments, below:
> 
>> ---
>>  arch/powerpc/platforms/pseries/pci.c | 153 
>> +++++++++++++++++++++++++++
>>  1 file changed, 153 insertions(+)
>> 
>> diff --git a/arch/powerpc/platforms/pseries/pci.c 
>> b/arch/powerpc/platforms/pseries/pci.c
>> index 911534b89c85..aac7f00696d2 100644
>> --- a/arch/powerpc/platforms/pseries/pci.c
>> +++ b/arch/powerpc/platforms/pseries/pci.c
>> @@ -11,6 +11,8 @@
>>  #include <linux/kernel.h>
>>  #include <linux/pci.h>
>>  #include <linux/string.h>
>> +#include <linux/crash_dump.h>
>> +#include <linux/delay.h>
>> 
>>  #include <asm/eeh.h>
>>  #include <asm/pci-bridge.h>
>> @@ -354,3 +356,154 @@ int pseries_root_bridge_prepare(struct 
>> pci_host_bridge *bridge)
>> 
>>  	return 0;
>>  }
>> +
>> +/**
>> + * pseries_get_pdn_addr - Retrieve PHB address
>> + * @pe: EEH PE
>> + *
>> + * Retrieve the assocated PHB address. Actually, there're 2 RTAS
>> + * function calls dedicated for the purpose. We need implement
>> + * it through the new function and then the old one. Besides,
>> + * you should make sure the config address is figured out from
>> + * FDT node before calling the function.
>> + *
>> + */
>> +static int pseries_get_pdn_addr(struct pci_controller *phb)
>> +{
>> +	int ret = -1;
>> +	int rets[3];
>> +	int ibm_get_config_addr_info;
>> +	int ibm_get_config_addr_info2;
>> +	int config_addr = 0;
>> +	struct pci_dn *root_pdn, *pdn;
>> +
>> +	ibm_get_config_addr_info2   = 
>> rtas_token("ibm,get-config-addr-info2");
>> +	ibm_get_config_addr_info    = 
>> rtas_token("ibm,get-config-addr-info");
>> +
>> +	root_pdn = PCI_DN(phb->dn);
>> +	pdn = list_first_entry(&root_pdn->child_list, struct pci_dn, list);
>> +	config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
>> +
>> +	if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
>> +		/*
>> +		 * First of all, we need to make sure there has one PE
>> +		 * associated with the device. Otherwise, PE address is
>> +		 * meaningless.
>> +		 */
> 
> This comment might be better if it explained how using option=0
> with ibm_get_config_addr tests the PE.
> 
>> +		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
>> +			config_addr, BUID_HI(pdn->phb->buid),
>> +			BUID_LO(pdn->phb->buid), 1);
>> +		if (ret || (rets[0] == 0)) {
>> +			pr_warn("%s: Failed to get address for PHB#%x-PE# "
>> +				"option=%d config_addr=%x\n",
>> +				__func__, pdn->phb->global_number, 1, rets[0]);
>> +			return -1;
>> +		}
>> +
>> +		/* Retrieve the associated PE config address */
>> +		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
>> +			config_addr, BUID_HI(pdn->phb->buid),
>> +			BUID_LO(pdn->phb->buid), 0);
>> +		if (ret) {
>> +			pr_warn("%s: Failed to get address for PHB#%x-PE# "
>> +				"option=%d config_addr=%x\n",
>> +				__func__, pdn->phb->global_number, 0, rets[0]);
>> +			return -1;
>> +		}
>> +		return rets[0];
>> +	}
>> +
>> +	if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
>> +		ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
>> +			config_addr, BUID_HI(pdn->phb->buid),
>> +			BUID_LO(pdn->phb->buid), 0);
>> +		if (ret || rets[0]) {
>> +			pr_warn("%s: Failed to get address for PHB#%x-PE# "
>> +				"config_addr=%x\n",
>> +				__func__, pdn->phb->global_number, rets[0]);
>> +			return -1;
>> +		}
>> +		return rets[0];
>> +	}
>> +
>> +	return ret;
> Can this ever return anything other than 0?
> 
>> +}
>> +
>> +static int __init pseries_phb_reset(void)
>> +{
>> +	struct pci_controller *phb;
>> +	int config_addr;
>> +	int ibm_set_slot_reset;
>> +	int ibm_configure_pe;
>> +	int ret;
>> +
>> +	if (is_kdump_kernel() || reset_devices) {
>> +		pr_info("Issue PHB reset ...\n");
>> +		ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
>> +		ibm_configure_pe = rtas_token("ibm,configure-pe");
>> +
>> +		if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE ||
>> +				ibm_configure_pe == RTAS_UNKNOWN_SERVICE) {
>> +			pr_info("%s: EEH functionality not supported\n",
>> +				__func__);
>> +		}
>> +
>> +		list_for_each_entry(phb, &hose_list, list_node) {
>> +			config_addr = pseries_get_pdn_addr(phb);
>> +			if (config_addr == -1)
>> +				continue;
>> +
>> +			ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
>> +				config_addr, BUID_HI(phb->buid),
>> +				BUID_LO(phb->buid), EEH_RESET_FUNDAMENTAL);
>> +
>> +			/* If fundamental-reset not supported, try hot-reset */
>> +			if (ret == -8)
>> +				ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
>> +					config_addr, BUID_HI(phb->buid),
>> +					BUID_LO(phb->buid), EEH_RESET_HOT);
>> +
>> +			if (ret) {
>> +				pr_err("%s: fail with rtas_call fundamental reset=%d\n",
>> +					__func__, ret);
> 
> This error might be a bit confusing, since it's not clear if the result
> came from the fundamental or hot-reset.
> 
>> +				continue;
>> +			}
>> +		}
>> +		msleep(EEH_PE_RST_SETTLE_TIME);
>> +
>> +		list_for_each_entry(phb, &hose_list, list_node) {
>> +			config_addr = pseries_get_pdn_addr(phb);
>> +			if (config_addr == -1)
>> +				continue;
>> +
>> +			ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
>> +				config_addr, BUID_HI(phb->buid),
>> +				BUID_LO(phb->buid), EEH_RESET_DEACTIVATE);
>> +			if (ret) {
>> +				pr_err("%s: fail with rtas_call deactive=%d\n",
>> +					__func__, ret);
>> +				continue;
>> +			}
>> +		}
>> +		msleep(EEH_PE_RST_SETTLE_TIME);
>> +
>> +		list_for_each_entry(phb, &hose_list, list_node) {
>> +			config_addr = pseries_get_pdn_addr(phb);
>> +			if (config_addr == -1)
>> +				continue;
>> +
>> +			ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
>> +				config_addr, BUID_HI(phb->buid),
>> +				BUID_LO(phb->buid));
>> +			if (ret) {
>> +				pr_err("%s: fail with rtas_call configure_pe =%d\n",
>> +					__func__, ret);
> 
> These errors might be more useful if they indicated which PHB caused 
> the
> error.
> 
>> +				continue;
>> +			}
>> +		}
>> +	}
>> +
>> +	return 0;
>> +}
>> +postcore_initcall(pseries_phb_reset);
>> +
>> --
>> 2.18.1
>> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-05-14 16:49 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-07 13:10 powerpc/pci: [PATCH 1/1]: PCIE PHB reset wenxiong
2020-05-11 10:17 ` Oliver O'Halloran
2020-05-12  5:28 ` Sam Bobroff
2020-05-14 16:46   ` wenxiong
2020-05-13  7:23 ` Sam Bobroff

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.