linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v5 03/10] PCI/RCEC: Cache RCEC capabilities in pci_init_capabilities()
       [not found] <20200918204603.62100-1-sean.v.kelley@intel.com>
@ 2020-09-18 20:45 ` Sean V Kelley
  2020-09-21 10:09   ` Jonathan Cameron
  2020-09-22 20:45   ` Bjorn Helgaas
  2020-09-18 20:45 ` [PATCH v5 05/10] PCI/AER: Apply function level reset to RCiEP on fatal error Sean V Kelley
                   ` (4 subsequent siblings)
  5 siblings, 2 replies; 15+ messages in thread
From: Sean V Kelley @ 2020-09-18 20:45 UTC (permalink / raw)
  To: bhelgaas, Jonathan.Cameron, rafael.j.wysocki, ashok.raj,
	tony.luck, sathyanarayanan.kuppuswamy, qiuxu.zhuo
  Cc: linux-pci, linux-kernel, Sean V Kelley

Extend support for Root Complex Event Collectors by decoding and
caching the RCEC Endpoint Association Extended Capabilities when
enumerating. Use that cached information for later error source
reporting. See PCI Express Base Specification, version 5.0-1,
section 7.9.10.

Suggested-by: Bjorn Helgaas <bhelgaas@google.com>
Co-developed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
---
 drivers/pci/pci.h         | 18 ++++++++++++++
 drivers/pci/pcie/Makefile |  2 +-
 drivers/pci/pcie/rcec.c   | 52 +++++++++++++++++++++++++++++++++++++++
 drivers/pci/probe.c       |  3 ++-
 include/linux/pci.h       |  4 +++
 5 files changed, 77 insertions(+), 2 deletions(-)
 create mode 100644 drivers/pci/pcie/rcec.c

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index fa12f7cbc1a0..83670a6425d8 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -449,6 +449,16 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info);
 void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
 #endif	/* CONFIG_PCIEAER */
 
+#ifdef CONFIG_PCIEPORTBUS
+/* Cached RCEC Associated Endpoint Extended Capabilities */
+struct rcec_ext {
+	u8		ver;
+	u8		nextbusn;
+	u8		lastbusn;
+	u32		bitmap;
+};
+#endif
+
 #ifdef CONFIG_PCIE_DPC
 void pci_save_dpc_state(struct pci_dev *dev);
 void pci_restore_dpc_state(struct pci_dev *dev);
@@ -461,6 +471,14 @@ static inline void pci_restore_dpc_state(struct pci_dev *dev) {}
 static inline void pci_dpc_init(struct pci_dev *pdev) {}
 #endif
 
+#ifdef CONFIG_PCIEPORTBUS
+void pci_rcec_init(struct pci_dev *dev);
+void pci_rcec_exit(struct pci_dev *dev);
+#else
+static inline void pci_rcec_init(struct pci_dev *dev) {}
+static inline void pci_rcec_exit(struct pci_dev *dev) {}
+#endif
+
 #ifdef CONFIG_PCI_ATS
 /* Address Translation Service */
 void pci_ats_init(struct pci_dev *dev);
diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile
index 68da9280ff11..d9697892fa3e 100644
--- a/drivers/pci/pcie/Makefile
+++ b/drivers/pci/pcie/Makefile
@@ -2,7 +2,7 @@
 #
 # Makefile for PCI Express features and port driver
 
-pcieportdrv-y			:= portdrv_core.o portdrv_pci.o err.o
+pcieportdrv-y			:= portdrv_core.o portdrv_pci.o err.o rcec.o
 
 obj-$(CONFIG_PCIEPORTBUS)	+= pcieportdrv.o
 
diff --git a/drivers/pci/pcie/rcec.c b/drivers/pci/pcie/rcec.c
new file mode 100644
index 000000000000..519ae086ff41
--- /dev/null
+++ b/drivers/pci/pcie/rcec.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Root Complex Event Collector Support
+ *
+ * Authors:
+ *  Sean V Kelley <sean.v.kelley@intel.com>
+ *  Qiuxu Zhuo <qiuxu.zhuo@intel.com>
+ *
+ * Copyright (C) 2020 Intel Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/bitops.h>
+#include <linux/pci.h>
+#include <linux/pci_regs.h>
+
+#include "../pci.h"
+
+void pci_rcec_init(struct pci_dev *dev)
+{
+	u32 rcec, hdr, busn;
+
+	/* Only for Root Complex Event Collectors */
+	if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC)
+		return;
+
+	dev->rcec_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_RCEC);
+	if (!dev->rcec_cap)
+		return;
+
+	dev->rcec_ext = kzalloc(sizeof(*dev->rcec_ext), GFP_KERNEL);
+
+	rcec = dev->rcec_cap;
+	pci_read_config_dword(dev, rcec + PCI_RCEC_RCIEP_BITMAP, &dev->rcec_ext->bitmap);
+
+	/* Check whether RCEC BUSN register is present */
+	pci_read_config_dword(dev, rcec, &hdr);
+	dev->rcec_ext->ver = PCI_EXT_CAP_VER(hdr);
+	if (dev->rcec_ext->ver < PCI_RCEC_BUSN_REG_VER)
+		return;
+
+	pci_read_config_dword(dev, rcec + PCI_RCEC_BUSN, &busn);
+	dev->rcec_ext->nextbusn = PCI_RCEC_BUSN_NEXT(busn);
+	dev->rcec_ext->lastbusn = PCI_RCEC_BUSN_LAST(busn);
+}
+
+void pci_rcec_exit(struct pci_dev *dev)
+{
+	kfree(dev->rcec_ext);
+	dev->rcec_ext = NULL;
+}
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 03d37128a24f..16bc651fecb7 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2201,6 +2201,7 @@ static void pci_configure_device(struct pci_dev *dev)
 static void pci_release_capabilities(struct pci_dev *dev)
 {
 	pci_aer_exit(dev);
+	pci_rcec_exit(dev);
 	pci_vpd_release(dev);
 	pci_iov_release(dev);
 	pci_free_cap_save_buffers(dev);
@@ -2400,7 +2401,7 @@ static void pci_init_capabilities(struct pci_dev *dev)
 	pci_ptm_init(dev);		/* Precision Time Measurement */
 	pci_aer_init(dev);		/* Advanced Error Reporting */
 	pci_dpc_init(dev);		/* Downstream Port Containment */
-
+	pci_rcec_init(dev);		/* Root Complex Event Collector */
 	pcie_report_downtraining(dev);
 
 	if (pci_probe_reset_function(dev) == 0)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 835530605c0d..5c5c4eb642b6 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -326,6 +326,10 @@ struct pci_dev {
 #ifdef CONFIG_PCIEAER
 	u16		aer_cap;	/* AER capability offset */
 	struct aer_stats *aer_stats;	/* AER stats for this device */
+#endif
+#ifdef CONFIG_PCIEPORTBUS
+	u16		rcec_cap;	/* RCEC capability offset */
+	struct rcec_ext *rcec_ext;	/* RCEC cached assoc. endpoint extended capabilities */
 #endif
 	u8		pcie_cap;	/* PCIe capability offset */
 	u8		msi_cap;	/* MSI capability offset */
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v5 05/10] PCI/AER: Apply function level reset to RCiEP on fatal error
       [not found] <20200918204603.62100-1-sean.v.kelley@intel.com>
  2020-09-18 20:45 ` [PATCH v5 03/10] PCI/RCEC: Cache RCEC capabilities in pci_init_capabilities() Sean V Kelley
@ 2020-09-18 20:45 ` Sean V Kelley
  2020-09-21 11:13   ` Jonathan Cameron
  2020-09-18 20:45 ` [PATCH v5 06/10] PCI/RCEC: Add pcie_link_rcec() to associate RCiEPs Sean V Kelley
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 15+ messages in thread
From: Sean V Kelley @ 2020-09-18 20:45 UTC (permalink / raw)
  To: bhelgaas, Jonathan.Cameron, rafael.j.wysocki, ashok.raj,
	tony.luck, sathyanarayanan.kuppuswamy, qiuxu.zhuo
  Cc: linux-pci, linux-kernel

From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>

Attempt to do function level reset for an RCiEP associated with an
RCEC device on fatal error.

Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
---
 drivers/pci/pcie/err.c | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index e575fa6cee63..5380ecc41506 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -169,6 +169,17 @@ static void pci_bridge_walk(struct pci_dev *bridge, int (*cb)(struct pci_dev *,
 		cb(bridge, userdata);
 }
 
+static pci_ers_result_t flr_on_rciep(struct pci_dev *dev)
+{
+	if (!pcie_has_flr(dev))
+		return PCI_ERS_RESULT_NONE;
+
+	if (pcie_flr(dev))
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
 pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
 			pci_channel_state_t state,
 			pci_ers_result_t (*reset_subordinate_devices)(struct pci_dev *pdev))
@@ -195,15 +206,17 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
 	if (state == pci_channel_io_frozen) {
 		pci_bridge_walk(bridge, report_frozen_detected, &status);
 		if (type == PCI_EXP_TYPE_RC_END) {
-			pci_warn(dev, "link reset not possible for RCiEP\n");
-			status = PCI_ERS_RESULT_NONE;
-			goto failed;
-		}
-
-		status = reset_subordinate_devices(bridge);
-		if (status != PCI_ERS_RESULT_RECOVERED) {
-			pci_warn(dev, "subordinate device reset failed\n");
-			goto failed;
+			status = flr_on_rciep(dev);
+			if (status != PCI_ERS_RESULT_RECOVERED) {
+				pci_warn(dev, "function level reset failed\n");
+				goto failed;
+			}
+		} else {
+			status = reset_subordinate_devices(bridge);
+			if (status != PCI_ERS_RESULT_RECOVERED) {
+				pci_warn(dev, "subordinate device reset failed\n");
+				goto failed;
+			}
 		}
 	} else {
 		pci_bridge_walk(bridge, report_normal_detected, &status);
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v5 06/10] PCI/RCEC: Add pcie_link_rcec() to associate RCiEPs
       [not found] <20200918204603.62100-1-sean.v.kelley@intel.com>
  2020-09-18 20:45 ` [PATCH v5 03/10] PCI/RCEC: Cache RCEC capabilities in pci_init_capabilities() Sean V Kelley
  2020-09-18 20:45 ` [PATCH v5 05/10] PCI/AER: Apply function level reset to RCiEP on fatal error Sean V Kelley
@ 2020-09-18 20:45 ` Sean V Kelley
  2020-09-21 11:25   ` Jonathan Cameron
  2020-09-18 20:46 ` [PATCH v5 09/10] PCI/PME: Add pcie_walk_rcec() to RCEC PME handling Sean V Kelley
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 15+ messages in thread
From: Sean V Kelley @ 2020-09-18 20:45 UTC (permalink / raw)
  To: bhelgaas, Jonathan.Cameron, rafael.j.wysocki, ashok.raj,
	tony.luck, sathyanarayanan.kuppuswamy, qiuxu.zhuo
  Cc: linux-pci, linux-kernel, Sean V Kelley

A Root Complex Event Collector provides support for
terminating error and PME messages from associated RCiEPs.

Make use of the RCEC Endpoint Association Extended Capability
to identify associated RCiEPs. Link the associated RCiEPs as
the RCECs are enumerated.

Co-developed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
---
 drivers/pci/pci.h              |  2 +
 drivers/pci/pcie/portdrv_pci.c |  3 ++
 drivers/pci/pcie/rcec.c        | 96 ++++++++++++++++++++++++++++++++++
 include/linux/pci.h            |  1 +
 4 files changed, 102 insertions(+)

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 7b547fc3679a..ddb5872466fb 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -474,9 +474,11 @@ static inline void pci_dpc_init(struct pci_dev *pdev) {}
 #ifdef CONFIG_PCIEPORTBUS
 void pci_rcec_init(struct pci_dev *dev);
 void pci_rcec_exit(struct pci_dev *dev);
+void pcie_link_rcec(struct pci_dev *rcec);
 #else
 static inline void pci_rcec_init(struct pci_dev *dev) {}
 static inline void pci_rcec_exit(struct pci_dev *dev) {}
+static inline void pcie_link_rcec(struct pci_dev *rcec) {}
 #endif
 
 #ifdef CONFIG_PCI_ATS
diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index 4d880679b9b1..dbeb0155c2c3 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -110,6 +110,9 @@ static int pcie_portdrv_probe(struct pci_dev *dev,
 	     (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC)))
 		return -ENODEV;
 
+	if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC)
+		pcie_link_rcec(dev);
+
 	status = pcie_port_device_register(dev);
 	if (status)
 		return status;
diff --git a/drivers/pci/pcie/rcec.c b/drivers/pci/pcie/rcec.c
index 519ae086ff41..5630480a6659 100644
--- a/drivers/pci/pcie/rcec.c
+++ b/drivers/pci/pcie/rcec.c
@@ -17,6 +17,102 @@
 
 #include "../pci.h"
 
+struct walk_rcec_data {
+	struct pci_dev *rcec;
+	int (*user_callback)(struct pci_dev *dev, void *data);
+	void *user_data;
+};
+
+static bool rcec_assoc_rciep(struct pci_dev *rcec, struct pci_dev *rciep)
+{
+	unsigned long bitmap = rcec->rcec_ext->bitmap;
+	unsigned int devn;
+
+	/* An RCiEP found on bus in range */
+	if (rcec->bus->number != rciep->bus->number)
+		return true;
+
+	/* Same bus, so check bitmap */
+	for_each_set_bit(devn, &bitmap, 32)
+		if (devn == rciep->devfn)
+			return true;
+
+	return false;
+}
+
+static int link_rcec_helper(struct pci_dev *dev, void *data)
+{
+	struct walk_rcec_data *rcec_data = data;
+	struct pci_dev *rcec = rcec_data->rcec;
+
+	if ((pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) && rcec_assoc_rciep(rcec, dev)) {
+		dev->rcec = rcec;
+		pci_dbg(dev, "PME & error events reported via %s\n", pci_name(rcec));
+	}
+
+	return 0;
+}
+
+void walk_rcec(int (*cb)(struct pci_dev *dev, void *data), void *userdata)
+{
+	struct walk_rcec_data *rcec_data = userdata;
+	struct pci_dev *rcec = rcec_data->rcec;
+	u8 nextbusn, lastbusn;
+	struct pci_bus *bus;
+	unsigned int bnr;
+
+	if (!rcec->rcec_cap)
+		return;
+
+	/* Walk own bus for bitmap based association */
+	pci_walk_bus(rcec->bus, cb, rcec_data);
+
+	/* Check whether RCEC BUSN register is present */
+	if (rcec->rcec_ext->ver < PCI_RCEC_BUSN_REG_VER)
+		return;
+
+	nextbusn = rcec->rcec_ext->nextbusn;
+	lastbusn = rcec->rcec_ext->lastbusn;
+
+	/* All RCiEP devices are on the same bus as the RCEC */
+	if (nextbusn == 0xff && lastbusn == 0x00)
+		return;
+
+	for (bnr = nextbusn; bnr <= lastbusn; bnr++) {
+		/* No association indicated (PCIe 5.0-1, 7.9.10.3) */
+		if (bnr == rcec->bus->number)
+			continue;
+
+		bus = pci_find_bus(pci_domain_nr(rcec->bus), bnr);
+		if (!bus)
+			continue;
+
+		/* Find RCiEP devices on the given bus ranges */
+		pci_walk_bus(bus, cb, rcec_data);
+	}
+}
+
+/**
+ * pcie_link_rcec - Link RCiEP devices associating with RCEC.
+ * @rcec     RCEC whose RCiEP devices should be linked.
+ *
+ * Link the given RCEC to each RCiEP device found.
+ *
+ */
+void pcie_link_rcec(struct pci_dev *rcec)
+{
+	struct walk_rcec_data rcec_data;
+
+	if (!rcec->rcec_cap)
+		return;
+
+	rcec_data.rcec = rcec;
+	rcec_data.user_callback = NULL;
+	rcec_data.user_data = NULL;
+
+	walk_rcec(link_rcec_helper, &rcec_data);
+}
+
 void pci_rcec_init(struct pci_dev *dev)
 {
 	u32 rcec, hdr, busn;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 5c5c4eb642b6..ad382a9484ea 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -330,6 +330,7 @@ struct pci_dev {
 #ifdef CONFIG_PCIEPORTBUS
 	u16		rcec_cap;	/* RCEC capability offset */
 	struct rcec_ext *rcec_ext;	/* RCEC cached assoc. endpoint extended capabilities */
+	struct pci_dev  *rcec;          /* Associated RCEC device */
 #endif
 	u8		pcie_cap;	/* PCIe capability offset */
 	u8		msi_cap;	/* MSI capability offset */
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v5 09/10] PCI/PME: Add pcie_walk_rcec() to RCEC PME handling
       [not found] <20200918204603.62100-1-sean.v.kelley@intel.com>
                   ` (2 preceding siblings ...)
  2020-09-18 20:45 ` [PATCH v5 06/10] PCI/RCEC: Add pcie_link_rcec() to associate RCiEPs Sean V Kelley
@ 2020-09-18 20:46 ` Sean V Kelley
  2020-09-21 12:15   ` Jonathan Cameron
       [not found] ` <20200918204603.62100-8-sean.v.kelley@intel.com>
       [not found] ` <20200918204603.62100-9-sean.v.kelley@intel.com>
  5 siblings, 1 reply; 15+ messages in thread
From: Sean V Kelley @ 2020-09-18 20:46 UTC (permalink / raw)
  To: bhelgaas, Jonathan.Cameron, rafael.j.wysocki, ashok.raj,
	tony.luck, sathyanarayanan.kuppuswamy, qiuxu.zhuo
  Cc: linux-pci, linux-kernel, Sean V Kelley

The Root Complex Event Collectors(RCEC) appear as peers of Root Ports
and also have the PME capability. As with AER, there is a need to be
able to walk the RCiEPs associated with their RCEC for purposes of
acting upon them with callbacks. So add RCEC support through the use
of pcie_walk_rcec() to the current PME service driver and attach the
PME service driver to the RCEC device.

Co-developed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
---
 drivers/pci/pcie/pme.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c
index 6a32970bb731..87799166c96a 100644
--- a/drivers/pci/pcie/pme.c
+++ b/drivers/pci/pcie/pme.c
@@ -310,7 +310,10 @@ static int pcie_pme_can_wakeup(struct pci_dev *dev, void *ign)
 static void pcie_pme_mark_devices(struct pci_dev *port)
 {
 	pcie_pme_can_wakeup(port, NULL);
-	if (port->subordinate)
+
+	if (pci_pcie_type(port) == PCI_EXP_TYPE_RC_EC)
+		pcie_walk_rcec(port, pcie_pme_can_wakeup, NULL);
+	else if (port->subordinate)
 		pci_walk_bus(port->subordinate, pcie_pme_can_wakeup, NULL);
 }
 
@@ -320,10 +323,15 @@ static void pcie_pme_mark_devices(struct pci_dev *port)
  */
 static int pcie_pme_probe(struct pcie_device *srv)
 {
-	struct pci_dev *port;
+	struct pci_dev *port = srv->port;
 	struct pcie_pme_service_data *data;
 	int ret;
 
+	/* Limit to Root Ports or Root Complex Event Collectors */
+	if ((pci_pcie_type(port) != PCI_EXP_TYPE_RC_EC) &&
+	    (pci_pcie_type(port) != PCI_EXP_TYPE_ROOT_PORT))
+		return -ENODEV;
+
 	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
@@ -333,7 +341,6 @@ static int pcie_pme_probe(struct pcie_device *srv)
 	data->srv = srv;
 	set_service_data(srv, data);
 
-	port = srv->port;
 	pcie_pme_interrupt_enable(port, false);
 	pcie_clear_root_pme_status(port);
 
@@ -445,7 +452,7 @@ static void pcie_pme_remove(struct pcie_device *srv)
 
 static struct pcie_port_service_driver pcie_pme_driver = {
 	.name		= "pcie_pme",
-	.port_type	= PCI_EXP_TYPE_ROOT_PORT,
+	.port_type	= PCIE_ANY_PORT,
 	.service	= PCIE_PORT_SERVICE_PME,
 
 	.probe		= pcie_pme_probe,
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 03/10] PCI/RCEC: Cache RCEC capabilities in pci_init_capabilities()
  2020-09-18 20:45 ` [PATCH v5 03/10] PCI/RCEC: Cache RCEC capabilities in pci_init_capabilities() Sean V Kelley
@ 2020-09-21 10:09   ` Jonathan Cameron
  2020-09-22 20:45   ` Bjorn Helgaas
  1 sibling, 0 replies; 15+ messages in thread
From: Jonathan Cameron @ 2020-09-21 10:09 UTC (permalink / raw)
  To: Sean V Kelley
  Cc: bhelgaas, rafael.j.wysocki, ashok.raj, tony.luck,
	sathyanarayanan.kuppuswamy, qiuxu.zhuo, linux-pci, linux-kernel

On Fri, 18 Sep 2020 13:45:56 -0700
Sean V Kelley <sean.v.kelley@intel.com> wrote:

> Extend support for Root Complex Event Collectors by decoding and
> caching the RCEC Endpoint Association Extended Capabilities when
> enumerating. Use that cached information for later error source
> reporting. See PCI Express Base Specification, version 5.0-1,
> section 7.9.10.
> 
> Suggested-by: Bjorn Helgaas <bhelgaas@google.com>
> Co-developed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>

Hi Sean,

A few comments inline.

Thanks,

Jonathan

> ---
>  drivers/pci/pci.h         | 18 ++++++++++++++
>  drivers/pci/pcie/Makefile |  2 +-
>  drivers/pci/pcie/rcec.c   | 52 +++++++++++++++++++++++++++++++++++++++
>  drivers/pci/probe.c       |  3 ++-
>  include/linux/pci.h       |  4 +++
>  5 files changed, 77 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/pci/pcie/rcec.c
> 
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index fa12f7cbc1a0..83670a6425d8 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -449,6 +449,16 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info);
>  void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
>  #endif	/* CONFIG_PCIEAER */
>  
> +#ifdef CONFIG_PCIEPORTBUS
> +/* Cached RCEC Associated Endpoint Extended Capabilities */
> +struct rcec_ext {
> +	u8		ver;
> +	u8		nextbusn;
> +	u8		lastbusn;
> +	u32		bitmap;
> +};
> +#endif
> +
>  #ifdef CONFIG_PCIE_DPC
>  void pci_save_dpc_state(struct pci_dev *dev);
>  void pci_restore_dpc_state(struct pci_dev *dev);
> @@ -461,6 +471,14 @@ static inline void pci_restore_dpc_state(struct pci_dev *dev) {}
>  static inline void pci_dpc_init(struct pci_dev *pdev) {}
>  #endif
>  
> +#ifdef CONFIG_PCIEPORTBUS
> +void pci_rcec_init(struct pci_dev *dev);
> +void pci_rcec_exit(struct pci_dev *dev);
> +#else
> +static inline void pci_rcec_init(struct pci_dev *dev) {}
> +static inline void pci_rcec_exit(struct pci_dev *dev) {}
> +#endif
> +
>  #ifdef CONFIG_PCI_ATS
>  /* Address Translation Service */
>  void pci_ats_init(struct pci_dev *dev);
> diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile
> index 68da9280ff11..d9697892fa3e 100644
> --- a/drivers/pci/pcie/Makefile
> +++ b/drivers/pci/pcie/Makefile
> @@ -2,7 +2,7 @@
>  #
>  # Makefile for PCI Express features and port driver
>  
> -pcieportdrv-y			:= portdrv_core.o portdrv_pci.o err.o
> +pcieportdrv-y			:= portdrv_core.o portdrv_pci.o err.o rcec.o
>  
>  obj-$(CONFIG_PCIEPORTBUS)	+= pcieportdrv.o
>  
> diff --git a/drivers/pci/pcie/rcec.c b/drivers/pci/pcie/rcec.c
> new file mode 100644
> index 000000000000..519ae086ff41
> --- /dev/null
> +++ b/drivers/pci/pcie/rcec.c
> @@ -0,0 +1,52 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Root Complex Event Collector Support
> + *
> + * Authors:
> + *  Sean V Kelley <sean.v.kelley@intel.com>
> + *  Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> + *
> + * Copyright (C) 2020 Intel Corp.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/errno.h>

I guess it might come in during later patches, but I can't see any
use of errno.h in here.  If it does, good to introduce the header
in the patch where it becomes relevant.

> +#include <linux/bitops.h>
No obvious use of bitops.h either.

> +#include <linux/pci.h>
> +#include <linux/pci_regs.h>
> +
> +#include "../pci.h"
> +
> +void pci_rcec_init(struct pci_dev *dev)
> +{
> +	u32 rcec, hdr, busn;
> +
> +	/* Only for Root Complex Event Collectors */
> +	if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC)
> +		return;
> +
> +	dev->rcec_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_RCEC);
> +	if (!dev->rcec_cap)
> +		return;
> +
> +	dev->rcec_ext = kzalloc(sizeof(*dev->rcec_ext), GFP_KERNEL);
> +
> +	rcec = dev->rcec_cap;
> +	pci_read_config_dword(dev, rcec + PCI_RCEC_RCIEP_BITMAP, &dev->rcec_ext->bitmap);

Given number of uses of dev->rcec_ext perhaps worth a local variable for
readability?

> +
> +	/* Check whether RCEC BUSN register is present */
> +	pci_read_config_dword(dev, rcec, &hdr);
> +	dev->rcec_ext->ver = PCI_EXT_CAP_VER(hdr);
> +	if (dev->rcec_ext->ver < PCI_RCEC_BUSN_REG_VER)
> +		return;

As there are values for nextbusn and lastbusn defined to mean
that there are no additional bus numbers, could we just fill them
in with dummy values for the case of the capability version being
too old? I think it ends up representing the same thing as them not
being there at all?

nextbusn = 0xFF
lastbusn = 0 (set by kzalloc anyway).

> +
> +	pci_read_config_dword(dev, rcec + PCI_RCEC_BUSN, &busn);
> +	dev->rcec_ext->nextbusn = PCI_RCEC_BUSN_NEXT(busn);
> +	dev->rcec_ext->lastbusn = PCI_RCEC_BUSN_LAST(busn);
> +}
> +
> +void pci_rcec_exit(struct pci_dev *dev)
> +{
> +	kfree(dev->rcec_ext);
> +	dev->rcec_ext = NULL;
> +}
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index 03d37128a24f..16bc651fecb7 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -2201,6 +2201,7 @@ static void pci_configure_device(struct pci_dev *dev)
>  static void pci_release_capabilities(struct pci_dev *dev)
>  {
>  	pci_aer_exit(dev);
> +	pci_rcec_exit(dev);
>  	pci_vpd_release(dev);
>  	pci_iov_release(dev);
>  	pci_free_cap_save_buffers(dev);
> @@ -2400,7 +2401,7 @@ static void pci_init_capabilities(struct pci_dev *dev)
>  	pci_ptm_init(dev);		/* Precision Time Measurement */
>  	pci_aer_init(dev);		/* Advanced Error Reporting */
>  	pci_dpc_init(dev);		/* Downstream Port Containment */
> -
> +	pci_rcec_init(dev);		/* Root Complex Event Collector */

Nice to avoid changing the layout and leave a blank line here.
Slightly reduces noise in the diff as well!

>  	pcie_report_downtraining(dev);
>  
>  	if (pci_probe_reset_function(dev) == 0)
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 835530605c0d..5c5c4eb642b6 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -326,6 +326,10 @@ struct pci_dev {
>  #ifdef CONFIG_PCIEAER
>  	u16		aer_cap;	/* AER capability offset */
>  	struct aer_stats *aer_stats;	/* AER stats for this device */
> +#endif
> +#ifdef CONFIG_PCIEPORTBUS
> +	u16		rcec_cap;	/* RCEC capability offset */
> +	struct rcec_ext *rcec_ext;	/* RCEC cached assoc. endpoint extended capabilities */
>  #endif
>  	u8		pcie_cap;	/* PCIe capability offset */
>  	u8		msi_cap;	/* MSI capability offset */



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 05/10] PCI/AER: Apply function level reset to RCiEP on fatal error
  2020-09-18 20:45 ` [PATCH v5 05/10] PCI/AER: Apply function level reset to RCiEP on fatal error Sean V Kelley
@ 2020-09-21 11:13   ` Jonathan Cameron
  2020-09-23  2:49     ` Sean V Kelley
  0 siblings, 1 reply; 15+ messages in thread
From: Jonathan Cameron @ 2020-09-21 11:13 UTC (permalink / raw)
  To: Sean V Kelley
  Cc: bhelgaas, rafael.j.wysocki, ashok.raj, tony.luck,
	sathyanarayanan.kuppuswamy, qiuxu.zhuo, linux-pci, linux-kernel

On Fri, 18 Sep 2020 13:45:58 -0700
Sean V Kelley <sean.v.kelley@intel.com> wrote:

> From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> 
> Attempt to do function level reset for an RCiEP associated with an
> RCEC device on fatal error.

I'm not sure the description is correct. Looks like it will do
the reset even if not associated with an RCEC.
I'd just cut this down to:

"Attempt to do a function level reset for an RCiEP on fatal error."

I'm not 100% sure doing an flr will actually help in most cass if you've
reported a fatal error, but I suppose it does no harm!

So with description changed.
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

> 
> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> ---
>  drivers/pci/pcie/err.c | 31 ++++++++++++++++++++++---------
>  1 file changed, 22 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
> index e575fa6cee63..5380ecc41506 100644
> --- a/drivers/pci/pcie/err.c
> +++ b/drivers/pci/pcie/err.c
> @@ -169,6 +169,17 @@ static void pci_bridge_walk(struct pci_dev *bridge, int (*cb)(struct pci_dev *,
>  		cb(bridge, userdata);
>  }
>  
> +static pci_ers_result_t flr_on_rciep(struct pci_dev *dev)
> +{
> +	if (!pcie_has_flr(dev))
> +		return PCI_ERS_RESULT_NONE;
> +
> +	if (pcie_flr(dev))
> +		return PCI_ERS_RESULT_DISCONNECT;
> +
> +	return PCI_ERS_RESULT_RECOVERED;
> +}
> +
>  pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
>  			pci_channel_state_t state,	
>  			pci_ers_result_t (*reset_subordinate_devices)(struct pci_dev *pdev))
> @@ -195,15 +206,17 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
>  	if (state == pci_channel_io_frozen) {
>  		pci_bridge_walk(bridge, report_frozen_detected, &status);
>  		if (type == PCI_EXP_TYPE_RC_END) {
> -			pci_warn(dev, "link reset not possible for RCiEP\n");
> -			status = PCI_ERS_RESULT_NONE;
> -			goto failed;
> -		}
> -
> -		status = reset_subordinate_devices(bridge);
> -		if (status != PCI_ERS_RESULT_RECOVERED) {
> -			pci_warn(dev, "subordinate device reset failed\n");
> -			goto failed;
> +			status = flr_on_rciep(dev);
> +			if (status != PCI_ERS_RESULT_RECOVERED) {
> +				pci_warn(dev, "function level reset failed\n");
> +				goto failed;
> +			}
> +		} else {
> +			status = reset_subordinate_devices(bridge);
> +			if (status != PCI_ERS_RESULT_RECOVERED) {
> +				pci_warn(dev, "subordinate device reset failed\n");
> +				goto failed;
> +			}
>  		}
>  	} else {
>  		pci_bridge_walk(bridge, report_normal_detected, &status);



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 06/10] PCI/RCEC: Add pcie_link_rcec() to associate RCiEPs
  2020-09-18 20:45 ` [PATCH v5 06/10] PCI/RCEC: Add pcie_link_rcec() to associate RCiEPs Sean V Kelley
@ 2020-09-21 11:25   ` Jonathan Cameron
  2020-09-23  2:55     ` Sean V Kelley
  0 siblings, 1 reply; 15+ messages in thread
From: Jonathan Cameron @ 2020-09-21 11:25 UTC (permalink / raw)
  To: Sean V Kelley
  Cc: bhelgaas, rafael.j.wysocki, ashok.raj, tony.luck,
	sathyanarayanan.kuppuswamy, qiuxu.zhuo, linux-pci, linux-kernel

On Fri, 18 Sep 2020 13:45:59 -0700
Sean V Kelley <sean.v.kelley@intel.com> wrote:

> A Root Complex Event Collector provides support for
> terminating error and PME messages from associated RCiEPs.
> 
> Make use of the RCEC Endpoint Association Extended Capability
> to identify associated RCiEPs. Link the associated RCiEPs as
> the RCECs are enumerated.
> 
> Co-developed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
A couple of minor things inline plus follow through on not
special casing the older versions of the capability.

Otherwise looks good to me.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

> ---
>  drivers/pci/pci.h              |  2 +
>  drivers/pci/pcie/portdrv_pci.c |  3 ++
>  drivers/pci/pcie/rcec.c        | 96 ++++++++++++++++++++++++++++++++++
>  include/linux/pci.h            |  1 +
>  4 files changed, 102 insertions(+)
> 
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index 7b547fc3679a..ddb5872466fb 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -474,9 +474,11 @@ static inline void pci_dpc_init(struct pci_dev *pdev) {}
>  #ifdef CONFIG_PCIEPORTBUS
>  void pci_rcec_init(struct pci_dev *dev);
>  void pci_rcec_exit(struct pci_dev *dev);
> +void pcie_link_rcec(struct pci_dev *rcec);
>  #else
>  static inline void pci_rcec_init(struct pci_dev *dev) {}
>  static inline void pci_rcec_exit(struct pci_dev *dev) {}
> +static inline void pcie_link_rcec(struct pci_dev *rcec) {}
>  #endif
>  
>  #ifdef CONFIG_PCI_ATS
> diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
> index 4d880679b9b1..dbeb0155c2c3 100644
> --- a/drivers/pci/pcie/portdrv_pci.c
> +++ b/drivers/pci/pcie/portdrv_pci.c
> @@ -110,6 +110,9 @@ static int pcie_portdrv_probe(struct pci_dev *dev,
>  	     (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC)))
>  		return -ENODEV;
>  
> +	if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC)
> +		pcie_link_rcec(dev);
> +
>  	status = pcie_port_device_register(dev);
>  	if (status)
>  		return status;
> diff --git a/drivers/pci/pcie/rcec.c b/drivers/pci/pcie/rcec.c
> index 519ae086ff41..5630480a6659 100644
> --- a/drivers/pci/pcie/rcec.c
> +++ b/drivers/pci/pcie/rcec.c
> @@ -17,6 +17,102 @@
>  
>  #include "../pci.h"
>  
> +struct walk_rcec_data {
> +	struct pci_dev *rcec;
> +	int (*user_callback)(struct pci_dev *dev, void *data);
> +	void *user_data;
> +};
> +
> +static bool rcec_assoc_rciep(struct pci_dev *rcec, struct pci_dev *rciep)
> +{
> +	unsigned long bitmap = rcec->rcec_ext->bitmap;
> +	unsigned int devn;
> +
> +	/* An RCiEP found on bus in range */
Perhaps adjust the comment to say:
	/* An RCiEP found on a different bus in range */

as the actual rcec bus can be in the range as I understand it.

> +	if (rcec->bus->number != rciep->bus->number)
> +		return true;
> +
> +	/* Same bus, so check bitmap */
> +	for_each_set_bit(devn, &bitmap, 32)
> +		if (devn == rciep->devfn)
> +			return true;
> +
> +	return false;
> +}
> +
> +static int link_rcec_helper(struct pci_dev *dev, void *data)
> +{
> +	struct walk_rcec_data *rcec_data = data;
> +	struct pci_dev *rcec = rcec_data->rcec;
> +
> +	if ((pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) && rcec_assoc_rciep(rcec, dev)) {
> +		dev->rcec = rcec;
> +		pci_dbg(dev, "PME & error events reported via %s\n", pci_name(rcec));
> +	}
> +
> +	return 0;
> +}
> +
> +void walk_rcec(int (*cb)(struct pci_dev *dev, void *data), void *userdata)

static, or declare it in a header if we are going to need it elsewhere
later in the series.

> +{
> +	struct walk_rcec_data *rcec_data = userdata;
> +	struct pci_dev *rcec = rcec_data->rcec;
> +	u8 nextbusn, lastbusn;
> +	struct pci_bus *bus;
> +	unsigned int bnr;
> +
> +	if (!rcec->rcec_cap)
> +		return;
> +
> +	/* Walk own bus for bitmap based association */
> +	pci_walk_bus(rcec->bus, cb, rcec_data);
> +
> +	/* Check whether RCEC BUSN register is present */
> +	if (rcec->rcec_ext->ver < PCI_RCEC_BUSN_REG_VER)
> +		return;

If you make earlier suggested change go fill in nextbusn = 0xFF
for the earlier versions of the capability can avoid special casing
here.

> +
> +	nextbusn = rcec->rcec_ext->nextbusn;
> +	lastbusn = rcec->rcec_ext->lastbusn;
> +
> +	/* All RCiEP devices are on the same bus as the RCEC */
> +	if (nextbusn == 0xff && lastbusn == 0x00)
> +		return;
> +
> +	for (bnr = nextbusn; bnr <= lastbusn; bnr++) {
> +		/* No association indicated (PCIe 5.0-1, 7.9.10.3) */
> +		if (bnr == rcec->bus->number)
> +			continue;
> +
> +		bus = pci_find_bus(pci_domain_nr(rcec->bus), bnr);
> +		if (!bus)
> +			continue;
> +
> +		/* Find RCiEP devices on the given bus ranges */
> +		pci_walk_bus(bus, cb, rcec_data);
> +	}
> +}
> +
> +/**
> + * pcie_link_rcec - Link RCiEP devices associating with RCEC.
> + * @rcec     RCEC whose RCiEP devices should be linked.
> + *
> + * Link the given RCEC to each RCiEP device found.

I'm a fusspot on blank lines. The one here doesn't add anything!

> + *
> + */
> +void pcie_link_rcec(struct pci_dev *rcec)
> +{
> +	struct walk_rcec_data rcec_data;
> +
> +	if (!rcec->rcec_cap)
> +		return;
> +
> +	rcec_data.rcec = rcec;
> +	rcec_data.user_callback = NULL;
> +	rcec_data.user_data = NULL;
> +
> +	walk_rcec(link_rcec_helper, &rcec_data);
> +}
> +
>  void pci_rcec_init(struct pci_dev *dev)
>  {
>  	u32 rcec, hdr, busn;
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 5c5c4eb642b6..ad382a9484ea 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -330,6 +330,7 @@ struct pci_dev {
>  #ifdef CONFIG_PCIEPORTBUS
>  	u16		rcec_cap;	/* RCEC capability offset */
>  	struct rcec_ext *rcec_ext;	/* RCEC cached assoc. endpoint extended capabilities */
> +	struct pci_dev  *rcec;          /* Associated RCEC device */
>  #endif
>  	u8		pcie_cap;	/* PCIe capability offset */
>  	u8		msi_cap;	/* MSI capability offset */



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 07/10] PCI/RCEC: Add RCiEP's linked RCEC to AER/ERR
       [not found] ` <20200918204603.62100-8-sean.v.kelley@intel.com>
@ 2020-09-21 11:31   ` Jonathan Cameron
  2020-09-23  2:50     ` Sean V Kelley
  0 siblings, 1 reply; 15+ messages in thread
From: Jonathan Cameron @ 2020-09-21 11:31 UTC (permalink / raw)
  To: Sean V Kelley
  Cc: bhelgaas, rafael.j.wysocki, ashok.raj, tony.luck,
	sathyanarayanan.kuppuswamy, qiuxu.zhuo, linux-pci, linux-kernel

On Fri, 18 Sep 2020 13:46:00 -0700
Sean V Kelley <sean.v.kelley@intel.com> wrote:

> From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> 
> When attempting error recovery for an RCiEP associated with an RCEC device,
> there needs to be a way to update the Root Error Status, the Uncorrectable
> Error Status and the Uncorrectable Error Severity of the parent RCEC.
> In some non-native cases in which there is no OS visible device
> associated with the RCiEP, there is nothing to act upon as the firmware
> is acting before the OS. So add handling for the linked 'rcec' in AER/ERR
> while taking into account non-native cases.
> 
> Co-developed-by: Sean V Kelley <sean.v.kelley@intel.com>
> Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
I'll give this a test run later to check I'm not missing anything, but LGTM.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

Thanks,

> ---
>  drivers/pci/pcie/aer.c |  9 +++++----
>  drivers/pci/pcie/err.c | 38 ++++++++++++++++++++++++--------------
>  2 files changed, 29 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index 65dff5f3457a..dccdba60b5d9 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -1358,17 +1358,18 @@ static int aer_probe(struct pcie_device *dev)
>  static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
>  {
>  	int aer = dev->aer_cap;
> +	int rc = 0;
>  	u32 reg32;
> -	int rc;
> -
>  
>  	/* Disable Root's interrupt in response to error messages */
>  	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
>  	reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
>  	pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
>  
> -	rc = pci_bus_error_reset(dev);
> -	pci_info(dev, "Root Port link has been reset\n");
> +	if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC) {
> +		rc = pci_bus_error_reset(dev);
> +		pci_info(dev, "Root Port link has been reset\n");
> +	}
>  
>  	/* Clear Root Error Status */
>  	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &reg32);
> diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
> index 5380ecc41506..a61a2518163a 100644
> --- a/drivers/pci/pcie/err.c
> +++ b/drivers/pci/pcie/err.c
> @@ -149,7 +149,8 @@ static int report_resume(struct pci_dev *dev, void *data)
>  /**
>   * pci_bridge_walk - walk bridges potentially AER affected
>   * @bridge   bridge which may be an RCEC with associated RCiEPs,
> - *           an RCiEP associated with an RCEC, or a Port.
> + *           or a Port.
> + * @dev      an RCiEP lacking an associated RCEC.
>   * @cb       callback to be called for each device found
>   * @userdata arbitrary pointer to be passed to callback.
>   *
> @@ -160,13 +161,16 @@ static int report_resume(struct pci_dev *dev, void *data)
>   * If the device provided has no subordinate bus, call the provided
>   * callback on the device itself.
>   */
> -static void pci_bridge_walk(struct pci_dev *bridge, int (*cb)(struct pci_dev *, void *),
> +static void pci_bridge_walk(struct pci_dev *bridge, struct pci_dev *dev,
> +			    int (*cb)(struct pci_dev *, void *),
>  			    void *userdata)
>  {
> -	if (bridge->subordinate)
> +	if (bridge && bridge->subordinate)
>  		pci_walk_bus(bridge->subordinate, cb, userdata);
> -	else
> +	else if (bridge)
>  		cb(bridge, userdata);
> +	else
> +		cb(dev, userdata);
>  }
>  
>  static pci_ers_result_t flr_on_rciep(struct pci_dev *dev)
> @@ -196,16 +200,24 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
>  	type = pci_pcie_type(dev);
>  	if (type == PCI_EXP_TYPE_ROOT_PORT ||
>  	    type == PCI_EXP_TYPE_DOWNSTREAM ||
> -	    type == PCI_EXP_TYPE_RC_EC ||
> -	    type == PCI_EXP_TYPE_RC_END)
> +	    type == PCI_EXP_TYPE_RC_EC)
>  		bridge = dev;
> +	else if (type == PCI_EXP_TYPE_RC_END)
> +		bridge = dev->rcec;
>  	else
>  		bridge = pci_upstream_bridge(dev);
>  
>  	pci_dbg(dev, "broadcast error_detected message\n");
>  	if (state == pci_channel_io_frozen) {
> -		pci_bridge_walk(bridge, report_frozen_detected, &status);
> +		pci_bridge_walk(bridge, dev, report_frozen_detected, &status);
>  		if (type == PCI_EXP_TYPE_RC_END) {
> +			/*
> +			 * The callback only clears the Root Error Status
> +			 * of the RCEC (see aer.c).
> +			 */
> +			if (bridge)
> +				reset_subordinate_devices(bridge);
> +
>  			status = flr_on_rciep(dev);
>  			if (status != PCI_ERS_RESULT_RECOVERED) {
>  				pci_warn(dev, "function level reset failed\n");
> @@ -219,13 +231,13 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
>  			}
>  		}
>  	} else {
> -		pci_bridge_walk(bridge, report_normal_detected, &status);
> +		pci_bridge_walk(bridge, dev, report_normal_detected, &status);
>  	}
>  
>  	if (status == PCI_ERS_RESULT_CAN_RECOVER) {
>  		status = PCI_ERS_RESULT_RECOVERED;
>  		pci_dbg(dev, "broadcast mmio_enabled message\n");
> -		pci_bridge_walk(bridge, report_mmio_enabled, &status);
> +		pci_bridge_walk(bridge, dev, report_mmio_enabled, &status);
>  	}
>  
>  	if (status == PCI_ERS_RESULT_NEED_RESET) {
> @@ -236,18 +248,16 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
>  		 */
>  		status = PCI_ERS_RESULT_RECOVERED;
>  		pci_dbg(dev, "broadcast slot_reset message\n");
> -		pci_bridge_walk(bridge, report_slot_reset, &status);
> +		pci_bridge_walk(bridge, dev, report_slot_reset, &status);
>  	}
>  
>  	if (status != PCI_ERS_RESULT_RECOVERED)
>  		goto failed;
>  
>  	pci_dbg(dev, "broadcast resume message\n");
> -	pci_bridge_walk(bridge, report_resume, &status);
> +	pci_bridge_walk(bridge, dev, report_resume, &status);
>  
> -	if (type == PCI_EXP_TYPE_ROOT_PORT ||
> -	    type == PCI_EXP_TYPE_DOWNSTREAM ||
> -	    type == PCI_EXP_TYPE_RC_EC) {
> +	if (bridge) {
>  		if (pcie_aer_is_native(bridge))
>  			pcie_clear_device_status(bridge);
>  		pci_aer_clear_nonfatal_status(bridge);



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 08/10] PCI/AER: Add pcie_walk_rcec() to RCEC AER handling
       [not found] ` <20200918204603.62100-9-sean.v.kelley@intel.com>
@ 2020-09-21 12:12   ` Jonathan Cameron
  0 siblings, 0 replies; 15+ messages in thread
From: Jonathan Cameron @ 2020-09-21 12:12 UTC (permalink / raw)
  To: Sean V Kelley
  Cc: bhelgaas, rafael.j.wysocki, ashok.raj, tony.luck,
	sathyanarayanan.kuppuswamy, qiuxu.zhuo, linux-pci, linux-kernel

On Fri, 18 Sep 2020 13:46:01 -0700
Sean V Kelley <sean.v.kelley@intel.com> wrote:

> The Root Complex Event Collectors(RCEC) appear as peers to Root Ports
> and also have the AER capability. In addition, actions need to be taken
> for assocated RCiEPs. In such cases the RCECs will need to be walked in
> order to find and act upon their respective RCiEPs.  Extend the existing
> ability to link the RCECs with a walking function pcie_walk_rcec(). Add
> RCEC support to the current AER service driver and attach the AER service
> driver to the RCEC device.
> 
> Co-developed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
Hi,

Couple of minor things in here, but assuming you tidy those up.
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

> ---
>  drivers/pci/pci.h       |  4 ++++
>  drivers/pci/pcie/aer.c  | 27 ++++++++++++++++++++-------
>  drivers/pci/pcie/rcec.c | 39 ++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 62 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index ddb5872466fb..e8535a7d4b53 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -475,10 +475,14 @@ static inline void pci_dpc_init(struct pci_dev *pdev) {}
>  void pci_rcec_init(struct pci_dev *dev);
>  void pci_rcec_exit(struct pci_dev *dev);
>  void pcie_link_rcec(struct pci_dev *rcec);
> +void pcie_walk_rcec(struct pci_dev *rcec, int (*cb)(struct pci_dev *, void *),
> +		    void *userdata);
>  #else
>  static inline void pci_rcec_init(struct pci_dev *dev) {}
>  static inline void pci_rcec_exit(struct pci_dev *dev) {}
>  static inline void pcie_link_rcec(struct pci_dev *rcec) {}
> +static inline void pcie_walk_rcec(struct pci_dev *rcec, int (*cb)(struct pci_dev *, void *),
> +				  void *userdata) {}
>  #endif
>  
>  #ifdef CONFIG_PCI_ATS
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index dccdba60b5d9..43772bfc134e 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -300,7 +300,7 @@ int pci_aer_raw_clear_status(struct pci_dev *dev)
>  		return -EIO;
>  
>  	port_type = pci_pcie_type(dev);
> -	if (port_type == PCI_EXP_TYPE_ROOT_PORT) {
> +	if (port_type == PCI_EXP_TYPE_ROOT_PORT || port_type == PCI_EXP_TYPE_RC_EC) {
>  		pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &status);
>  		pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, status);
>  	}
> @@ -595,7 +595,8 @@ static umode_t aer_stats_attrs_are_visible(struct kobject *kobj,
>  	if ((a == &dev_attr_aer_rootport_total_err_cor.attr ||
>  	     a == &dev_attr_aer_rootport_total_err_fatal.attr ||
>  	     a == &dev_attr_aer_rootport_total_err_nonfatal.attr) &&
> -	    pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT)
> +	    ((pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) &&
> +	    (pci_pcie_type(pdev) != PCI_EXP_TYPE_RC_EC)))

This line should be indented one more space I think..

>  		return 0;
>  
>  	return a->mode;
> @@ -916,7 +917,10 @@ static bool find_source_device(struct pci_dev *parent,
>  	if (result)
>  		return true;
>  
> -	pci_walk_bus(parent->subordinate, find_device_iter, e_info);
> +	if (pci_pcie_type(parent) == PCI_EXP_TYPE_RC_EC)
> +		pcie_walk_rcec(parent, find_device_iter, e_info);
> +	else
> +		pci_walk_bus(parent->subordinate, find_device_iter, e_info);
>  
>  	if (!e_info->error_dev_num) {
>  		pci_info(parent, "can't find device of ID%04x\n", e_info->id);
> @@ -1053,6 +1057,7 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
>  		if (!(info->status & ~info->mask))
>  			return 0;
>  	} else if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
> +		   pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC ||
>  	           pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM ||
>  		   info->severity == AER_NONFATAL) {
>  
> @@ -1205,6 +1210,7 @@ static int set_device_error_reporting(struct pci_dev *dev, void *data)
>  	int type = pci_pcie_type(dev);
>  
>  	if ((type == PCI_EXP_TYPE_ROOT_PORT) ||
> +	    (type == PCI_EXP_TYPE_RC_EC) ||
>  	    (type == PCI_EXP_TYPE_UPSTREAM) ||
>  	    (type == PCI_EXP_TYPE_DOWNSTREAM)) {
>  		if (enable)
> @@ -1229,9 +1235,11 @@ static void set_downstream_devices_error_reporting(struct pci_dev *dev,
>  {
>  	set_device_error_reporting(dev, &enable);
>  
> -	if (!dev->subordinate)
> -		return;
> -	pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable);
> +	if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC)
> +		pcie_walk_rcec(dev, set_device_error_reporting, &enable);
> +	else if (dev->subordinate)
> +		pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable);
> +
>  }
>  
>  /**
> @@ -1329,6 +1337,11 @@ static int aer_probe(struct pcie_device *dev)
>  	struct device *device = &dev->device;
>  	struct pci_dev *port = dev->port;
>  
> +	/* Limit to Root Ports or Root Complex Event Collectors */
> +	if ((pci_pcie_type(port) != PCI_EXP_TYPE_RC_EC) &&
> +	    (pci_pcie_type(port) != PCI_EXP_TYPE_ROOT_PORT))
> +		return -ENODEV;
> +
>  	rpc = devm_kzalloc(device, sizeof(struct aer_rpc), GFP_KERNEL);
>  	if (!rpc)
>  		return -ENOMEM;
> @@ -1385,7 +1398,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
>  
>  static struct pcie_port_service_driver aerdriver = {
>  	.name		= "aer",
> -	.port_type	= PCI_EXP_TYPE_ROOT_PORT,
> +	.port_type	= PCIE_ANY_PORT,
>  	.service	= PCIE_PORT_SERVICE_AER,
>  
>  	.probe		= aer_probe,
> diff --git a/drivers/pci/pcie/rcec.c b/drivers/pci/pcie/rcec.c
> index 5630480a6659..e6d20131b578 100644
> --- a/drivers/pci/pcie/rcec.c
> +++ b/drivers/pci/pcie/rcec.c
> @@ -53,7 +53,18 @@ static int link_rcec_helper(struct pci_dev *dev, void *data)
>  	return 0;
>  }
>  
> -void walk_rcec(int (*cb)(struct pci_dev *dev, void *data), void *userdata)
> +static int walk_rcec_helper(struct pci_dev *dev, void *data)
> +{
> +	struct walk_rcec_data *rcec_data = data;
> +	struct pci_dev *rcec = rcec_data->rcec;
> +
> +	if ((pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) && rcec_assoc_rciep(rcec, dev))
> +		rcec_data->user_callback(dev, rcec_data->user_data);
> +
> +	return 0;
> +}
> +
> +static void walk_rcec(int (*cb)(struct pci_dev *dev, void *data), void *userdata)

Ah. The missing static. Shift that into the earlier patch!

>  {
>  	struct walk_rcec_data *rcec_data = userdata;
>  	struct pci_dev *rcec = rcec_data->rcec;
> @@ -113,6 +124,32 @@ void pcie_link_rcec(struct pci_dev *rcec)
>  	walk_rcec(link_rcec_helper, &rcec_data);
>  }
>  
> +/**
> + * pcie_walk_rcec - Walk RCiEP devices associating with RCEC and call callback.
> + * @rcec     RCEC whose RCiEP devices should be walked.
> + * @cb       Callback to be called for each RCiEP device found.
> + * @userdata Arbitrary pointer to be passed to callback.
> + *
> + * Walk the given RCEC. Call the provided callback on each RCiEP device found.
> + *
> + * We check the return of @cb each time. If it returns anything
> + * other than 0, we break out.
> + */
> +void pcie_walk_rcec(struct pci_dev *rcec, int (*cb)(struct pci_dev *, void *),
> +		    void *userdata)
> +{
> +	struct walk_rcec_data rcec_data;
> +
> +	if (!rcec->rcec_cap)
> +		return;
> +
> +	rcec_data.rcec = rcec;
> +	rcec_data.user_callback = cb;
> +	rcec_data.user_data = userdata;
> +
> +	walk_rcec(walk_rcec_helper, &rcec_data);
> +}
> +
>  void pci_rcec_init(struct pci_dev *dev)
>  {
>  	u32 rcec, hdr, busn;



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 09/10] PCI/PME: Add pcie_walk_rcec() to RCEC PME handling
  2020-09-18 20:46 ` [PATCH v5 09/10] PCI/PME: Add pcie_walk_rcec() to RCEC PME handling Sean V Kelley
@ 2020-09-21 12:15   ` Jonathan Cameron
  0 siblings, 0 replies; 15+ messages in thread
From: Jonathan Cameron @ 2020-09-21 12:15 UTC (permalink / raw)
  To: Sean V Kelley
  Cc: bhelgaas, rafael.j.wysocki, ashok.raj, tony.luck,
	sathyanarayanan.kuppuswamy, qiuxu.zhuo, linux-pci, linux-kernel

On Fri, 18 Sep 2020 13:46:02 -0700
Sean V Kelley <sean.v.kelley@intel.com> wrote:

> The Root Complex Event Collectors(RCEC) appear as peers of Root Ports
> and also have the PME capability. As with AER, there is a need to be
> able to walk the RCiEPs associated with their RCEC for purposes of
> acting upon them with callbacks. So add RCEC support through the use
> of pcie_walk_rcec() to the current PME service driver and attach the
> PME service driver to the RCEC device.
> 
> Co-developed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>

I'm a lot less familiar with this code, but looks
good to me.

Thanks,

Jonathan

> ---
>  drivers/pci/pcie/pme.c | 15 +++++++++++----
>  1 file changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c
> index 6a32970bb731..87799166c96a 100644
> --- a/drivers/pci/pcie/pme.c
> +++ b/drivers/pci/pcie/pme.c
> @@ -310,7 +310,10 @@ static int pcie_pme_can_wakeup(struct pci_dev *dev, void *ign)
>  static void pcie_pme_mark_devices(struct pci_dev *port)
>  {
>  	pcie_pme_can_wakeup(port, NULL);
> -	if (port->subordinate)
> +
> +	if (pci_pcie_type(port) == PCI_EXP_TYPE_RC_EC)
> +		pcie_walk_rcec(port, pcie_pme_can_wakeup, NULL);
> +	else if (port->subordinate)
>  		pci_walk_bus(port->subordinate, pcie_pme_can_wakeup, NULL);
>  }
>  
> @@ -320,10 +323,15 @@ static void pcie_pme_mark_devices(struct pci_dev *port)
>   */
>  static int pcie_pme_probe(struct pcie_device *srv)
>  {
> -	struct pci_dev *port;
> +	struct pci_dev *port = srv->port;
>  	struct pcie_pme_service_data *data;
>  	int ret;
>  
> +	/* Limit to Root Ports or Root Complex Event Collectors */
> +	if ((pci_pcie_type(port) != PCI_EXP_TYPE_RC_EC) &&
> +	    (pci_pcie_type(port) != PCI_EXP_TYPE_ROOT_PORT))
> +		return -ENODEV;
> +
>  	data = kzalloc(sizeof(*data), GFP_KERNEL);
>  	if (!data)
>  		return -ENOMEM;
> @@ -333,7 +341,6 @@ static int pcie_pme_probe(struct pcie_device *srv)
>  	data->srv = srv;
>  	set_service_data(srv, data);
>  
> -	port = srv->port;
>  	pcie_pme_interrupt_enable(port, false);
>  	pcie_clear_root_pme_status(port);
>  
> @@ -445,7 +452,7 @@ static void pcie_pme_remove(struct pcie_device *srv)
>  
>  static struct pcie_port_service_driver pcie_pme_driver = {
>  	.name		= "pcie_pme",
> -	.port_type	= PCI_EXP_TYPE_ROOT_PORT,
> +	.port_type	= PCIE_ANY_PORT,
>  	.service	= PCIE_PORT_SERVICE_PME,
>  
>  	.probe		= pcie_pme_probe,



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 03/10] PCI/RCEC: Cache RCEC capabilities in pci_init_capabilities()
  2020-09-18 20:45 ` [PATCH v5 03/10] PCI/RCEC: Cache RCEC capabilities in pci_init_capabilities() Sean V Kelley
  2020-09-21 10:09   ` Jonathan Cameron
@ 2020-09-22 20:45   ` Bjorn Helgaas
  2020-09-22 21:54     ` Sean V Kelley
  1 sibling, 1 reply; 15+ messages in thread
From: Bjorn Helgaas @ 2020-09-22 20:45 UTC (permalink / raw)
  To: Sean V Kelley
  Cc: bhelgaas, Jonathan.Cameron, rafael.j.wysocki, ashok.raj,
	tony.luck, sathyanarayanan.kuppuswamy, qiuxu.zhuo, linux-pci,
	linux-kernel

I don't know what's going on with email, but I only see patches 3, 5,
6, 9 on the list and on lore:
https://lore.kernel.org/r/20200918204603.62100-4-sean.v.kelley@intel.com/

Similar issues with Sathy's series, where I only see patches 1, 3, 5:
https://lore.kernel.org/r/a640e9043db50f5adee8e38f5c60ff8423f3f598.1600457297.git.sathyanarayanan.kuppuswamy@linux.intel.com/

On Fri, Sep 18, 2020 at 01:45:56PM -0700, Sean V Kelley wrote:
> Extend support for Root Complex Event Collectors by decoding and
> caching the RCEC Endpoint Association Extended Capabilities when
> enumerating. Use that cached information for later error source
...


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 03/10] PCI/RCEC: Cache RCEC capabilities in pci_init_capabilities()
  2020-09-22 20:45   ` Bjorn Helgaas
@ 2020-09-22 21:54     ` Sean V Kelley
  0 siblings, 0 replies; 15+ messages in thread
From: Sean V Kelley @ 2020-09-22 21:54 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: bhelgaas, Jonathan.Cameron, rafael.j.wysocki, ashok.raj,
	tony.luck, sathyanarayanan.kuppuswamy, qiuxu.zhuo, linux-pci,
	linux-kernel

On 22 Sep 2020, at 13:45, Bjorn Helgaas wrote:

> I don't know what's going on with email, but I only see patches 3, 5,
> 6, 9 on the list and on lore:
> https://lore.kernel.org/r/20200918204603.62100-4-sean.v.kelley@intel.com/
>
> Similar issues with Sathy's series, where I only see patches 1, 3, 5:
> https://lore.kernel.org/r/a640e9043db50f5adee8e38f5c60ff8423f3f598.1600457297.git.sathyanarayanan.kuppuswamy@linux.intel.com/

We are still affected by issues with handshaking between smtp server and 
external lists.  I just resent after bumping to fix, and I’m switching 
to my personal account seanvk.dev@oregontracks.org

https://lore.kernel.org/linux-pci/20200922213859.108826-1-seanvk.dev@oregontracks.org/


Apologies for the inconvenience.

Sean

>
> On Fri, Sep 18, 2020 at 01:45:56PM -0700, Sean V Kelley wrote:
>> Extend support for Root Complex Event Collectors by decoding and
>> caching the RCEC Endpoint Association Extended Capabilities when
>> enumerating. Use that cached information for later error source
> ...

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 05/10] PCI/AER: Apply function level reset to RCiEP on fatal error
  2020-09-21 11:13   ` Jonathan Cameron
@ 2020-09-23  2:49     ` Sean V Kelley
  0 siblings, 0 replies; 15+ messages in thread
From: Sean V Kelley @ 2020-09-23  2:49 UTC (permalink / raw)
  To: Jonathan Cameron
  Cc: Sean V Kelley, Bjorn Helgaas, rafael.j.wysocki, ashok.raj,
	tony.luck, sathyanarayanan.kuppuswamy, qiuxu.zhuo, linux-pci,
	linux-kernel

On Mon, Sep 21, 2020 at 4:15 AM Jonathan Cameron
<Jonathan.Cameron@huawei.com> wrote:
>
> On Fri, 18 Sep 2020 13:45:58 -0700
> Sean V Kelley <sean.v.kelley@intel.com> wrote:
>
> > From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> >
> > Attempt to do function level reset for an RCiEP associated with an
> > RCEC device on fatal error.
>
> I'm not sure the description is correct. Looks like it will do
> the reset even if not associated with an RCEC.
> I'd just cut this down to:
>
> "Attempt to do a function level reset for an RCiEP on fatal error."

Agree. Will change.

>
> I'm not 100% sure doing an flr will actually help in most cass if you've
> reported a fatal error, but I suppose it does no harm!
>
> So with description changed.
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

Will do, thanks.

Sean

>
> >
> > Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> > ---
> >  drivers/pci/pcie/err.c | 31 ++++++++++++++++++++++---------
> >  1 file changed, 22 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
> > index e575fa6cee63..5380ecc41506 100644
> > --- a/drivers/pci/pcie/err.c
> > +++ b/drivers/pci/pcie/err.c
> > @@ -169,6 +169,17 @@ static void pci_bridge_walk(struct pci_dev *bridge, int (*cb)(struct pci_dev *,
> >               cb(bridge, userdata);
> >  }
> >
> > +static pci_ers_result_t flr_on_rciep(struct pci_dev *dev)
> > +{
> > +     if (!pcie_has_flr(dev))
> > +             return PCI_ERS_RESULT_NONE;
> > +
> > +     if (pcie_flr(dev))
> > +             return PCI_ERS_RESULT_DISCONNECT;
> > +
> > +     return PCI_ERS_RESULT_RECOVERED;
> > +}
> > +
> >  pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
> >                       pci_channel_state_t state,
> >                       pci_ers_result_t (*reset_subordinate_devices)(struct pci_dev *pdev))
> > @@ -195,15 +206,17 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
> >       if (state == pci_channel_io_frozen) {
> >               pci_bridge_walk(bridge, report_frozen_detected, &status);
> >               if (type == PCI_EXP_TYPE_RC_END) {
> > -                     pci_warn(dev, "link reset not possible for RCiEP\n");
> > -                     status = PCI_ERS_RESULT_NONE;
> > -                     goto failed;
> > -             }
> > -
> > -             status = reset_subordinate_devices(bridge);
> > -             if (status != PCI_ERS_RESULT_RECOVERED) {
> > -                     pci_warn(dev, "subordinate device reset failed\n");
> > -                     goto failed;
> > +                     status = flr_on_rciep(dev);
> > +                     if (status != PCI_ERS_RESULT_RECOVERED) {
> > +                             pci_warn(dev, "function level reset failed\n");
> > +                             goto failed;
> > +                     }
> > +             } else {
> > +                     status = reset_subordinate_devices(bridge);
> > +                     if (status != PCI_ERS_RESULT_RECOVERED) {
> > +                             pci_warn(dev, "subordinate device reset failed\n");
> > +                             goto failed;
> > +                     }
> >               }
> >       } else {
> >               pci_bridge_walk(bridge, report_normal_detected, &status);
>
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 07/10] PCI/RCEC: Add RCiEP's linked RCEC to AER/ERR
  2020-09-21 11:31   ` [PATCH v5 07/10] PCI/RCEC: Add RCiEP's linked RCEC to AER/ERR Jonathan Cameron
@ 2020-09-23  2:50     ` Sean V Kelley
  0 siblings, 0 replies; 15+ messages in thread
From: Sean V Kelley @ 2020-09-23  2:50 UTC (permalink / raw)
  To: Jonathan Cameron
  Cc: Sean V Kelley, Bjorn Helgaas, rafael.j.wysocki, ashok.raj,
	tony.luck, sathyanarayanan.kuppuswamy, qiuxu.zhuo, linux-pci,
	linux-kernel

On Mon, Sep 21, 2020 at 4:33 AM Jonathan Cameron
<Jonathan.Cameron@huawei.com> wrote:
>
> On Fri, 18 Sep 2020 13:46:00 -0700
> Sean V Kelley <sean.v.kelley@intel.com> wrote:
>
> > From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> >
> > When attempting error recovery for an RCiEP associated with an RCEC device,
> > there needs to be a way to update the Root Error Status, the Uncorrectable
> > Error Status and the Uncorrectable Error Severity of the parent RCEC.
> > In some non-native cases in which there is no OS visible device
> > associated with the RCiEP, there is nothing to act upon as the firmware
> > is acting before the OS. So add handling for the linked 'rcec' in AER/ERR
> > while taking into account non-native cases.
> >
> > Co-developed-by: Sean V Kelley <sean.v.kelley@intel.com>
> > Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
> > Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> I'll give this a test run later to check I'm not missing anything, but LGTM.
>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>
> Thanks,

Appreciate it.

Thanks,

Sean

>
> > ---
> >  drivers/pci/pcie/aer.c |  9 +++++----
> >  drivers/pci/pcie/err.c | 38 ++++++++++++++++++++++++--------------
> >  2 files changed, 29 insertions(+), 18 deletions(-)
> >
> > diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> > index 65dff5f3457a..dccdba60b5d9 100644
> > --- a/drivers/pci/pcie/aer.c
> > +++ b/drivers/pci/pcie/aer.c
> > @@ -1358,17 +1358,18 @@ static int aer_probe(struct pcie_device *dev)
> >  static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
> >  {
> >       int aer = dev->aer_cap;
> > +     int rc = 0;
> >       u32 reg32;
> > -     int rc;
> > -
> >
> >       /* Disable Root's interrupt in response to error messages */
> >       pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
> >       reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
> >       pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
> >
> > -     rc = pci_bus_error_reset(dev);
> > -     pci_info(dev, "Root Port link has been reset\n");
> > +     if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC) {
> > +             rc = pci_bus_error_reset(dev);
> > +             pci_info(dev, "Root Port link has been reset\n");
> > +     }
> >
> >       /* Clear Root Error Status */
> >       pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &reg32);
> > diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
> > index 5380ecc41506..a61a2518163a 100644
> > --- a/drivers/pci/pcie/err.c
> > +++ b/drivers/pci/pcie/err.c
> > @@ -149,7 +149,8 @@ static int report_resume(struct pci_dev *dev, void *data)
> >  /**
> >   * pci_bridge_walk - walk bridges potentially AER affected
> >   * @bridge   bridge which may be an RCEC with associated RCiEPs,
> > - *           an RCiEP associated with an RCEC, or a Port.
> > + *           or a Port.
> > + * @dev      an RCiEP lacking an associated RCEC.
> >   * @cb       callback to be called for each device found
> >   * @userdata arbitrary pointer to be passed to callback.
> >   *
> > @@ -160,13 +161,16 @@ static int report_resume(struct pci_dev *dev, void *data)
> >   * If the device provided has no subordinate bus, call the provided
> >   * callback on the device itself.
> >   */
> > -static void pci_bridge_walk(struct pci_dev *bridge, int (*cb)(struct pci_dev *, void *),
> > +static void pci_bridge_walk(struct pci_dev *bridge, struct pci_dev *dev,
> > +                         int (*cb)(struct pci_dev *, void *),
> >                           void *userdata)
> >  {
> > -     if (bridge->subordinate)
> > +     if (bridge && bridge->subordinate)
> >               pci_walk_bus(bridge->subordinate, cb, userdata);
> > -     else
> > +     else if (bridge)
> >               cb(bridge, userdata);
> > +     else
> > +             cb(dev, userdata);
> >  }
> >
> >  static pci_ers_result_t flr_on_rciep(struct pci_dev *dev)
> > @@ -196,16 +200,24 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
> >       type = pci_pcie_type(dev);
> >       if (type == PCI_EXP_TYPE_ROOT_PORT ||
> >           type == PCI_EXP_TYPE_DOWNSTREAM ||
> > -         type == PCI_EXP_TYPE_RC_EC ||
> > -         type == PCI_EXP_TYPE_RC_END)
> > +         type == PCI_EXP_TYPE_RC_EC)
> >               bridge = dev;
> > +     else if (type == PCI_EXP_TYPE_RC_END)
> > +             bridge = dev->rcec;
> >       else
> >               bridge = pci_upstream_bridge(dev);
> >
> >       pci_dbg(dev, "broadcast error_detected message\n");
> >       if (state == pci_channel_io_frozen) {
> > -             pci_bridge_walk(bridge, report_frozen_detected, &status);
> > +             pci_bridge_walk(bridge, dev, report_frozen_detected, &status);
> >               if (type == PCI_EXP_TYPE_RC_END) {
> > +                     /*
> > +                      * The callback only clears the Root Error Status
> > +                      * of the RCEC (see aer.c).
> > +                      */
> > +                     if (bridge)
> > +                             reset_subordinate_devices(bridge);
> > +
> >                       status = flr_on_rciep(dev);
> >                       if (status != PCI_ERS_RESULT_RECOVERED) {
> >                               pci_warn(dev, "function level reset failed\n");
> > @@ -219,13 +231,13 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
> >                       }
> >               }
> >       } else {
> > -             pci_bridge_walk(bridge, report_normal_detected, &status);
> > +             pci_bridge_walk(bridge, dev, report_normal_detected, &status);
> >       }
> >
> >       if (status == PCI_ERS_RESULT_CAN_RECOVER) {
> >               status = PCI_ERS_RESULT_RECOVERED;
> >               pci_dbg(dev, "broadcast mmio_enabled message\n");
> > -             pci_bridge_walk(bridge, report_mmio_enabled, &status);
> > +             pci_bridge_walk(bridge, dev, report_mmio_enabled, &status);
> >       }
> >
> >       if (status == PCI_ERS_RESULT_NEED_RESET) {
> > @@ -236,18 +248,16 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
> >                */
> >               status = PCI_ERS_RESULT_RECOVERED;
> >               pci_dbg(dev, "broadcast slot_reset message\n");
> > -             pci_bridge_walk(bridge, report_slot_reset, &status);
> > +             pci_bridge_walk(bridge, dev, report_slot_reset, &status);
> >       }
> >
> >       if (status != PCI_ERS_RESULT_RECOVERED)
> >               goto failed;
> >
> >       pci_dbg(dev, "broadcast resume message\n");
> > -     pci_bridge_walk(bridge, report_resume, &status);
> > +     pci_bridge_walk(bridge, dev, report_resume, &status);
> >
> > -     if (type == PCI_EXP_TYPE_ROOT_PORT ||
> > -         type == PCI_EXP_TYPE_DOWNSTREAM ||
> > -         type == PCI_EXP_TYPE_RC_EC) {
> > +     if (bridge) {
> >               if (pcie_aer_is_native(bridge))
> >                       pcie_clear_device_status(bridge);
> >               pci_aer_clear_nonfatal_status(bridge);
>
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 06/10] PCI/RCEC: Add pcie_link_rcec() to associate RCiEPs
  2020-09-21 11:25   ` Jonathan Cameron
@ 2020-09-23  2:55     ` Sean V Kelley
  0 siblings, 0 replies; 15+ messages in thread
From: Sean V Kelley @ 2020-09-23  2:55 UTC (permalink / raw)
  To: Jonathan Cameron
  Cc: Sean V Kelley, Bjorn Helgaas, rafael.j.wysocki, ashok.raj,
	tony.luck, sathyanarayanan.kuppuswamy, qiuxu.zhuo, linux-pci,
	linux-kernel

On Mon, Sep 21, 2020 at 4:26 AM Jonathan Cameron
<Jonathan.Cameron@huawei.com> wrote:
>
> On Fri, 18 Sep 2020 13:45:59 -0700
> Sean V Kelley <sean.v.kelley@intel.com> wrote:
>
> > A Root Complex Event Collector provides support for
> > terminating error and PME messages from associated RCiEPs.
> >
> > Make use of the RCEC Endpoint Association Extended Capability
> > to identify associated RCiEPs. Link the associated RCiEPs as
> > the RCECs are enumerated.
> >
> > Co-developed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> > Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> > Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
> A couple of minor things inline plus follow through on not
> special casing the older versions of the capability.
>
> Otherwise looks good to me.
>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

Thanks again for your feedback on v5.  I will be sure to add in v7.
Apologies again for the email server trouble resulting in partial
patch series landing on the list.

Sean

>
> > ---
> >  drivers/pci/pci.h              |  2 +
> >  drivers/pci/pcie/portdrv_pci.c |  3 ++
> >  drivers/pci/pcie/rcec.c        | 96 ++++++++++++++++++++++++++++++++++
> >  include/linux/pci.h            |  1 +
> >  4 files changed, 102 insertions(+)
> >
> > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> > index 7b547fc3679a..ddb5872466fb 100644
> > --- a/drivers/pci/pci.h
> > +++ b/drivers/pci/pci.h
> > @@ -474,9 +474,11 @@ static inline void pci_dpc_init(struct pci_dev *pdev) {}
> >  #ifdef CONFIG_PCIEPORTBUS
> >  void pci_rcec_init(struct pci_dev *dev);
> >  void pci_rcec_exit(struct pci_dev *dev);
> > +void pcie_link_rcec(struct pci_dev *rcec);
> >  #else
> >  static inline void pci_rcec_init(struct pci_dev *dev) {}
> >  static inline void pci_rcec_exit(struct pci_dev *dev) {}
> > +static inline void pcie_link_rcec(struct pci_dev *rcec) {}
> >  #endif
> >
> >  #ifdef CONFIG_PCI_ATS
> > diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
> > index 4d880679b9b1..dbeb0155c2c3 100644
> > --- a/drivers/pci/pcie/portdrv_pci.c
> > +++ b/drivers/pci/pcie/portdrv_pci.c
> > @@ -110,6 +110,9 @@ static int pcie_portdrv_probe(struct pci_dev *dev,
> >            (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC)))
> >               return -ENODEV;
> >
> > +     if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC)
> > +             pcie_link_rcec(dev);
> > +
> >       status = pcie_port_device_register(dev);
> >       if (status)
> >               return status;
> > diff --git a/drivers/pci/pcie/rcec.c b/drivers/pci/pcie/rcec.c
> > index 519ae086ff41..5630480a6659 100644
> > --- a/drivers/pci/pcie/rcec.c
> > +++ b/drivers/pci/pcie/rcec.c
> > @@ -17,6 +17,102 @@
> >
> >  #include "../pci.h"
> >
> > +struct walk_rcec_data {
> > +     struct pci_dev *rcec;
> > +     int (*user_callback)(struct pci_dev *dev, void *data);
> > +     void *user_data;
> > +};
> > +
> > +static bool rcec_assoc_rciep(struct pci_dev *rcec, struct pci_dev *rciep)
> > +{
> > +     unsigned long bitmap = rcec->rcec_ext->bitmap;
> > +     unsigned int devn;
> > +
> > +     /* An RCiEP found on bus in range */
> Perhaps adjust the comment to say:
>         /* An RCiEP found on a different bus in range */
>
> as the actual rcec bus can be in the range as I understand it.
>
> > +     if (rcec->bus->number != rciep->bus->number)
> > +             return true;
> > +
> > +     /* Same bus, so check bitmap */
> > +     for_each_set_bit(devn, &bitmap, 32)
> > +             if (devn == rciep->devfn)
> > +                     return true;
> > +
> > +     return false;
> > +}
> > +
> > +static int link_rcec_helper(struct pci_dev *dev, void *data)
> > +{
> > +     struct walk_rcec_data *rcec_data = data;
> > +     struct pci_dev *rcec = rcec_data->rcec;
> > +
> > +     if ((pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) && rcec_assoc_rciep(rcec, dev)) {
> > +             dev->rcec = rcec;
> > +             pci_dbg(dev, "PME & error events reported via %s\n", pci_name(rcec));
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> > +void walk_rcec(int (*cb)(struct pci_dev *dev, void *data), void *userdata)
>
> static, or declare it in a header if we are going to need it elsewhere
> later in the series.
>
> > +{
> > +     struct walk_rcec_data *rcec_data = userdata;
> > +     struct pci_dev *rcec = rcec_data->rcec;
> > +     u8 nextbusn, lastbusn;
> > +     struct pci_bus *bus;
> > +     unsigned int bnr;
> > +
> > +     if (!rcec->rcec_cap)
> > +             return;
> > +
> > +     /* Walk own bus for bitmap based association */
> > +     pci_walk_bus(rcec->bus, cb, rcec_data);
> > +
> > +     /* Check whether RCEC BUSN register is present */
> > +     if (rcec->rcec_ext->ver < PCI_RCEC_BUSN_REG_VER)
> > +             return;
>
> If you make earlier suggested change go fill in nextbusn = 0xFF
> for the earlier versions of the capability can avoid special casing
> here.
>
> > +
> > +     nextbusn = rcec->rcec_ext->nextbusn;
> > +     lastbusn = rcec->rcec_ext->lastbusn;
> > +
> > +     /* All RCiEP devices are on the same bus as the RCEC */
> > +     if (nextbusn == 0xff && lastbusn == 0x00)
> > +             return;
> > +
> > +     for (bnr = nextbusn; bnr <= lastbusn; bnr++) {
> > +             /* No association indicated (PCIe 5.0-1, 7.9.10.3) */
> > +             if (bnr == rcec->bus->number)
> > +                     continue;
> > +
> > +             bus = pci_find_bus(pci_domain_nr(rcec->bus), bnr);
> > +             if (!bus)
> > +                     continue;
> > +
> > +             /* Find RCiEP devices on the given bus ranges */
> > +             pci_walk_bus(bus, cb, rcec_data);
> > +     }
> > +}
> > +
> > +/**
> > + * pcie_link_rcec - Link RCiEP devices associating with RCEC.
> > + * @rcec     RCEC whose RCiEP devices should be linked.
> > + *
> > + * Link the given RCEC to each RCiEP device found.
>
> I'm a fusspot on blank lines. The one here doesn't add anything!
>
> > + *
> > + */
> > +void pcie_link_rcec(struct pci_dev *rcec)
> > +{
> > +     struct walk_rcec_data rcec_data;
> > +
> > +     if (!rcec->rcec_cap)
> > +             return;
> > +
> > +     rcec_data.rcec = rcec;
> > +     rcec_data.user_callback = NULL;
> > +     rcec_data.user_data = NULL;
> > +
> > +     walk_rcec(link_rcec_helper, &rcec_data);
> > +}
> > +
> >  void pci_rcec_init(struct pci_dev *dev)
> >  {
> >       u32 rcec, hdr, busn;
> > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > index 5c5c4eb642b6..ad382a9484ea 100644
> > --- a/include/linux/pci.h
> > +++ b/include/linux/pci.h
> > @@ -330,6 +330,7 @@ struct pci_dev {
> >  #ifdef CONFIG_PCIEPORTBUS
> >       u16             rcec_cap;       /* RCEC capability offset */
> >       struct rcec_ext *rcec_ext;      /* RCEC cached assoc. endpoint extended capabilities */
> > +     struct pci_dev  *rcec;          /* Associated RCEC device */
> >  #endif
> >       u8              pcie_cap;       /* PCIe capability offset */
> >       u8              msi_cap;        /* MSI capability offset */
>
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2020-09-23  2:55 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20200918204603.62100-1-sean.v.kelley@intel.com>
2020-09-18 20:45 ` [PATCH v5 03/10] PCI/RCEC: Cache RCEC capabilities in pci_init_capabilities() Sean V Kelley
2020-09-21 10:09   ` Jonathan Cameron
2020-09-22 20:45   ` Bjorn Helgaas
2020-09-22 21:54     ` Sean V Kelley
2020-09-18 20:45 ` [PATCH v5 05/10] PCI/AER: Apply function level reset to RCiEP on fatal error Sean V Kelley
2020-09-21 11:13   ` Jonathan Cameron
2020-09-23  2:49     ` Sean V Kelley
2020-09-18 20:45 ` [PATCH v5 06/10] PCI/RCEC: Add pcie_link_rcec() to associate RCiEPs Sean V Kelley
2020-09-21 11:25   ` Jonathan Cameron
2020-09-23  2:55     ` Sean V Kelley
2020-09-18 20:46 ` [PATCH v5 09/10] PCI/PME: Add pcie_walk_rcec() to RCEC PME handling Sean V Kelley
2020-09-21 12:15   ` Jonathan Cameron
     [not found] ` <20200918204603.62100-8-sean.v.kelley@intel.com>
2020-09-21 11:31   ` [PATCH v5 07/10] PCI/RCEC: Add RCiEP's linked RCEC to AER/ERR Jonathan Cameron
2020-09-23  2:50     ` Sean V Kelley
     [not found] ` <20200918204603.62100-9-sean.v.kelley@intel.com>
2020-09-21 12:12   ` [PATCH v5 08/10] PCI/AER: Add pcie_walk_rcec() to RCEC AER handling Jonathan Cameron

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).