* [PATCH net-next v2 1/2] qed: Enable device error reporting capability.
2020-04-22 13:16 [PATCH net-next v2 0/2] qed*: Add support for pcie advanced error recovery Sudarsana Reddy Kalluru
@ 2020-04-22 13:16 ` Sudarsana Reddy Kalluru
2020-04-22 13:16 ` [PATCH net-next v2 2/2] qede: Add support for handling the pcie errors Sudarsana Reddy Kalluru
1 sibling, 0 replies; 4+ messages in thread
From: Sudarsana Reddy Kalluru @ 2020-04-22 13:16 UTC (permalink / raw)
To: davem; +Cc: netdev, aelior, irusskikh, mkalderon
The patch enables the device to send error messages to root port when
an error is detected.
Signed-off-by: Sudarsana Reddy Kalluru <skalluru@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
---
drivers/net/ethernet/qlogic/qed/qed_main.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 96356e8..38a1d26 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -49,6 +49,7 @@
#include <linux/qed/qed_if.h>
#include <linux/qed/qed_ll2_if.h>
#include <net/devlink.h>
+#include <linux/aer.h>
#include "qed.h"
#include "qed_sriov.h"
@@ -129,6 +130,8 @@ static void qed_free_pci(struct qed_dev *cdev)
{
struct pci_dev *pdev = cdev->pdev;
+ pci_disable_pcie_error_reporting(pdev);
+
if (cdev->doorbells && cdev->db_size)
iounmap(cdev->doorbells);
if (cdev->regview)
@@ -231,6 +234,12 @@ static int qed_init_pci(struct qed_dev *cdev, struct pci_dev *pdev)
return -ENOMEM;
}
+ /* AER (Advanced Error reporting) configuration */
+ rc = pci_enable_pcie_error_reporting(pdev);
+ if (rc)
+ DP_VERBOSE(cdev, NETIF_MSG_DRV,
+ "Failed to configure PCIe AER [%d]\n", rc);
+
return 0;
err2:
--
1.8.3.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH net-next v2 2/2] qede: Add support for handling the pcie errors.
2020-04-22 13:16 [PATCH net-next v2 0/2] qed*: Add support for pcie advanced error recovery Sudarsana Reddy Kalluru
2020-04-22 13:16 ` [PATCH net-next v2 1/2] qed: Enable device error reporting capability Sudarsana Reddy Kalluru
@ 2020-04-22 13:16 ` Sudarsana Reddy Kalluru
2020-04-23 0:53 ` Jakub Kicinski
1 sibling, 1 reply; 4+ messages in thread
From: Sudarsana Reddy Kalluru @ 2020-04-22 13:16 UTC (permalink / raw)
To: davem; +Cc: netdev, aelior, irusskikh, mkalderon
The error recovery is handled by management firmware (MFW) with the help of
qed/qede drivers. Upon detecting the errors, driver informs MFW about this
event which in turn starts a recovery process. MFW sends ERROR_RECOVERY
notification to the driver which performs the required cleanup/recovery
from the driver side.
Signed-off-by: Sudarsana Reddy Kalluru <skalluru@marvell.com>
---
drivers/net/ethernet/qlogic/qede/qede.h | 1 +
drivers/net/ethernet/qlogic/qede/qede_main.c | 68 +++++++++++++++++++++++++++-
2 files changed, 68 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 234c6f3..1a708f9 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -485,6 +485,7 @@ struct qede_fastpath {
#define QEDE_SP_RECOVERY 0
#define QEDE_SP_RX_MODE 1
+#define QEDE_SP_AER 7
#ifdef CONFIG_RFS_ACCEL
int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 34fa391..9b45619 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -60,6 +60,7 @@
#include <net/ip6_checksum.h>
#include <linux/bitops.h>
#include <linux/vmalloc.h>
+#include <linux/aer.h>
#include "qede.h"
#include "qede_ptp.h"
@@ -124,6 +125,8 @@ enum qede_pci_private {
MODULE_DEVICE_TABLE(pci, qede_pci_tbl);
static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id);
+static pci_ers_result_t
+qede_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state);
#define TX_TIMEOUT (5 * HZ)
@@ -203,6 +206,10 @@ static int qede_sriov_configure(struct pci_dev *pdev, int num_vfs_param)
}
#endif
+static const struct pci_error_handlers qede_err_handler = {
+ .error_detected = qede_io_error_detected,
+};
+
static struct pci_driver qede_pci_driver = {
.name = "qede",
.id_table = qede_pci_tbl,
@@ -212,6 +219,7 @@ static int qede_sriov_configure(struct pci_dev *pdev, int num_vfs_param)
#ifdef CONFIG_QED_SRIOV
.sriov_configure = qede_sriov_configure,
#endif
+ .err_handler = &qede_err_handler,
};
static struct qed_eth_cb_ops qede_ll_ops = {
@@ -974,7 +982,8 @@ static void qede_sp_task(struct work_struct *work)
/* SRIOV must be disabled outside the lock to avoid a deadlock.
* The recovery of the active VFs is currently not supported.
*/
- qede_sriov_configure(edev->pdev, 0);
+ if (pci_num_vf(edev->pdev))
+ qede_sriov_configure(edev->pdev, 0);
#endif
qede_lock(edev);
qede_recovery_handler(edev);
@@ -994,6 +1003,17 @@ static void qede_sp_task(struct work_struct *work)
}
#endif
__qede_unlock(edev);
+
+ if (test_and_clear_bit(QEDE_SP_AER, &edev->sp_flags)) {
+#ifdef CONFIG_QED_SRIOV
+ /* SRIOV must be disabled outside the lock to avoid a deadlock.
+ * The recovery of the active VFs is currently not supported.
+ */
+ if (pci_num_vf(edev->pdev))
+ qede_sriov_configure(edev->pdev, 0);
+#endif
+ edev->ops->common->recovery_process(edev->cdev);
+ }
}
static void qede_update_pf_params(struct qed_dev *cdev)
@@ -2579,3 +2599,49 @@ static void qede_get_eth_tlv_data(void *dev, void *data)
etlv->num_txqs_full_set = true;
etlv->num_rxqs_full_set = true;
}
+
+/**
+ * qede_io_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected.
+ */
+static pci_ers_result_t
+qede_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+{
+ struct net_device *dev = pci_get_drvdata(pdev);
+ struct qede_dev *edev = netdev_priv(dev);
+
+ if (!edev)
+ return PCI_ERS_RESULT_NONE;
+
+ DP_NOTICE(edev, "IO error detected [%d]\n", state);
+
+ __qede_lock(edev);
+ if (edev->state == QEDE_STATE_RECOVERY) {
+ DP_NOTICE(edev, "Device already in the recovery state\n");
+ __qede_unlock(edev);
+ return PCI_ERS_RESULT_NONE;
+ }
+
+ /* PF handles the recovery of its VFs */
+ if (IS_VF(edev)) {
+ DP_VERBOSE(edev, QED_MSG_IOV,
+ "VF recovery is handled by its PF\n");
+ __qede_unlock(edev);
+ return PCI_ERS_RESULT_RECOVERED;
+ }
+
+ /* Close OS Tx */
+ netif_tx_disable(edev->ndev);
+ netif_carrier_off(edev->ndev);
+
+ set_bit(QEDE_SP_AER, &edev->sp_flags);
+ schedule_delayed_work(&edev->sp_task, 0);
+
+ __qede_unlock(edev);
+
+ return PCI_ERS_RESULT_CAN_RECOVER;
+}
--
1.8.3.1
^ permalink raw reply related [flat|nested] 4+ messages in thread