[PATCH] PCI/ERR: Fix run error recovery callbacks for all affected devices

* [PATCH] PCI/ERR: Fix run error recovery callbacks for all affected devices
@ 2019-01-24 13:50 Dongdong Liu
  2019-01-24 18:18 ` Sinan Kaya
  0 siblings, 1 reply; 13+ messages in thread
From: Dongdong Liu @ 2019-01-24 13:50 UTC (permalink / raw)
  To: helgaas, keith.busch; +Cc: linux-pci, linuxarm, Dongdong Liu, Bjorn Helgaas

The patch [1] PCI/ERR: Run error recovery callbacks for all affected
devices have broken the non-fatal error handling logic in patch [2].
For non-fatal error, link is reliable, so no need to reset link,
handle non-fatal error for all subordinates seems incorrect.
Restore the non-fatal errors process logic.

[1] PCI/ERR: Run error recovery callbacks for all affected devices   #4.20
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=bfcb79fca19d267712e425af1dd48812c40dec0c

[2] PCI/AER: Report non-fatal errors only to the affected endpoint  #4.15
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h=v5.0-rc2&id=86acc790717fb60fb51ea3095084e331d8711c74

Fixes: bfcb79fca19d ("PCI/ERR: Run error recovery callbacks for all affected devices")
Reported-by: Xiaofei Tan <tanxiaofei@huawei.com>
Signed-off-by: Dongdong Liu <liudongdong3@huawei.com>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pcie/err.c | 37 ++++++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index 773197a..9de3880 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -187,7 +187,8 @@ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
 		      u32 service)
 {
 	pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
-	struct pci_bus *bus;
+	struct pci_bus *bus = dev->bus;
+	struct pci_dev *bridge = dev;
 
 	/*
 	 * Error recovery runs on all subordinates of the first downstream port.
@@ -195,23 +196,33 @@ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
 	 */
 	if (!(pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
 	      pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM))
-		dev = dev->bus->self;
-	bus = dev->subordinate;
+		bridge = bus->self;
+
+	if (bridge)
+		bus = bridge->subordinate;
 
 	pci_dbg(dev, "broadcast error_detected message\n");
 	if (state == pci_channel_io_frozen)
 		pci_walk_bus(bus, report_frozen_detected, &status);
-	else
-		pci_walk_bus(bus, report_normal_detected, &status);
+	else {
+		if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
+			report_normal_detected(dev, &status);
+		else
+			pci_walk_bus(bus, report_normal_detected, &status);
+	}
 
 	if (state == pci_channel_io_frozen &&
-	    reset_link(dev, service) != PCI_ERS_RESULT_RECOVERED)
+	    reset_link(bridge, service) != PCI_ERS_RESULT_RECOVERED)
 		goto failed;
 
 	if (status == PCI_ERS_RESULT_CAN_RECOVER) {
 		status = PCI_ERS_RESULT_RECOVERED;
 		pci_dbg(dev, "broadcast mmio_enabled message\n");
-		pci_walk_bus(bus, report_mmio_enabled, &status);
+		if (state == pci_channel_io_normal &&
+		    dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
+			report_mmio_enabled(dev, &status);
+		else
+			pci_walk_bus(bus, report_mmio_enabled, &status);
 	}
 
 	if (status == PCI_ERS_RESULT_NEED_RESET) {
@@ -222,14 +233,22 @@ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
 		 */
 		status = PCI_ERS_RESULT_RECOVERED;
 		pci_dbg(dev, "broadcast slot_reset message\n");
-		pci_walk_bus(bus, report_slot_reset, &status);
+		if (state == pci_channel_io_normal &&
+		    dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
+			report_slot_reset(dev, &status);
+		else
+			pci_walk_bus(bus, report_slot_reset, &status);
 	}
 
 	if (status != PCI_ERS_RESULT_RECOVERED)
 		goto failed;
 
 	pci_dbg(dev, "broadcast resume message\n");
-	pci_walk_bus(bus, report_resume, &status);
+	if (state == pci_channel_io_normal &&
+	    dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
+		report_resume(dev, &status);
+	else
+		pci_walk_bus(bus, report_resume, &status);
 
 	pci_aer_clear_device_status(dev);
 	pci_cleanup_aer_uncorrect_error_status(dev);
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread