All of lore.kernel.org
 help / color / mirror / Atom feed
From: Igor Russkikh <irusskikh@marvell.com>
To: <netdev@vger.kernel.org>
Cc: "David S . Miller" <davem@davemloft.net>,
	Ariel Elior <aelior@marvell.com>,
	Michal Kalderon <mkalderon@marvell.com>,
	Denis Bolotin <dbolotin@marvell.com>,
	Jakub Kicinski <kuba@kernel.org>,
	Igor Russkikh <irusskikh@marvell.com>,
	Ariel Elior <ariel.elior@marvell.com>,
	"Michal Kalderon" <michal.kalderon@marvell.com>
Subject: [PATCH v2 net-next 01/11] net: qed: adding hw_err states and handling
Date: Thu, 14 May 2020 12:57:17 +0300	[thread overview]
Message-ID: <20200514095727.1361-2-irusskikh@marvell.com> (raw)
In-Reply-To: <20200514095727.1361-1-irusskikh@marvell.com>

Here we introduce qed device error tracking flags and error types.

qed_hw_err_notify is an entrace point to report errors.
It'll notify higher level drivers (qede/qedr/etc) to handle and recover
the error.

List of posible errors comes from hardware interfaces, but could be
extended in future.

Signed-off-by: Ariel Elior <ariel.elior@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
---
 drivers/net/ethernet/qlogic/qed/qed.h      |  2 ++
 drivers/net/ethernet/qlogic/qed/qed_hw.c   | 32 ++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_hw.h   | 15 ++++++++++
 drivers/net/ethernet/qlogic/qed/qed_main.c | 29 ++++++++++++++++++++
 include/linux/qed/qed_if.h                 | 12 ++++++++
 5 files changed, 90 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index fa41bf08a589..12c40ce3d876 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -1020,6 +1020,8 @@ u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
 		   u32 input_len, u8 *input_buf,
 		   u32 max_size, u8 *unzip_buf);
 void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn);
+void qed_hw_error_occurred(struct qed_hwfn *p_hwfn,
+			   enum qed_hw_err_type err_type);
 void qed_get_protocol_stats(struct qed_dev *cdev,
 			    enum qed_mcp_protocol_type type,
 			    union qed_mcp_protocol_stats *stats);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
index 4ab8cfaf63d1..90b777019cf5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
@@ -837,6 +837,38 @@ int qed_dmae_host2host(struct qed_hwfn *p_hwfn,
 	return rc;
 }
 
+void qed_hw_err_notify(struct qed_hwfn *p_hwfn,
+		       struct qed_ptt *p_ptt,
+		       enum qed_hw_err_type err_type, char *fmt, ...)
+{
+	char buf[QED_HW_ERR_MAX_STR_SIZE];
+	va_list vl;
+	int len;
+
+	if (fmt) {
+		va_start(vl, fmt);
+		len = vsnprintf(buf, QED_HW_ERR_MAX_STR_SIZE, fmt, vl);
+		va_end(vl);
+
+		if (len > QED_HW_ERR_MAX_STR_SIZE - 1)
+			len = QED_HW_ERR_MAX_STR_SIZE - 1;
+
+		DP_NOTICE(p_hwfn, "%s", buf);
+	}
+
+	/* Fan failure cannot be masked by handling of another HW error */
+	if (p_hwfn->cdev->recov_in_prog &&
+	    err_type != QED_HW_ERR_FAN_FAIL) {
+		DP_VERBOSE(p_hwfn,
+			   NETIF_MSG_DRV,
+			   "Recovery is in progress. Avoid notifying about HW error %d.\n",
+			   err_type);
+		return;
+	}
+
+	qed_hw_error_occurred(p_hwfn, err_type);
+}
+
 int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
 		    struct qed_ptt *p_ptt, const char *phase)
 {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.h b/drivers/net/ethernet/qlogic/qed/qed_hw.h
index 505e94db939d..f5b109b04b66 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.h
@@ -315,4 +315,19 @@ int qed_init_fw_data(struct qed_dev *cdev,
 int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
 		    struct qed_ptt *p_ptt, const char *phase);
 
+#define QED_HW_ERR_MAX_STR_SIZE 256
+
+/**
+ * @brief qed_hw_err_notify - Notify upper layer driver and management FW
+ *	about a HW error.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param err_type
+ * @param fmt - debug data buffer to send to the MFW
+ * @param ... - buffer format args
+ */
+void qed_hw_err_notify(struct qed_hwfn *p_hwfn,
+		       struct qed_ptt *p_ptt,
+		       enum qed_hw_err_type err_type, char *fmt, ...);
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 38a1d26ca9db..d7c9d94e4c59 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -2468,6 +2468,35 @@ void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn)
 		ops->schedule_recovery_handler(cookie);
 }
 
+char *qed_hw_err_type_descr[] = {
+	[QED_HW_ERR_FAN_FAIL]		= "Fan Failure",
+	[QED_HW_ERR_MFW_RESP_FAIL]	= "MFW Response Failure",
+	[QED_HW_ERR_HW_ATTN]		= "HW Attention",
+	[QED_HW_ERR_DMAE_FAIL]		= "DMAE Failure",
+	[QED_HW_ERR_RAMROD_FAIL]	= "Ramrod Failure",
+	[QED_HW_ERR_FW_ASSERT]		= "FW Assertion",
+	[QED_HW_ERR_LAST]		= "Unknown",
+};
+
+void qed_hw_error_occurred(struct qed_hwfn *p_hwfn,
+			   enum qed_hw_err_type err_type)
+{
+	struct qed_common_cb_ops *ops = p_hwfn->cdev->protocol_ops.common;
+	void *cookie = p_hwfn->cdev->ops_cookie;
+	char *err_str;
+
+	if (err_type > QED_HW_ERR_LAST)
+		err_type = QED_HW_ERR_LAST;
+	err_str = qed_hw_err_type_descr[err_type];
+
+	DP_NOTICE(p_hwfn, "HW error occurred [%s]\n", err_str);
+
+	/* Call the HW error handler of the protocol driver
+	 */
+	if (ops && ops->schedule_hw_err_handler)
+		ops->schedule_hw_err_handler(cookie, err_type);
+}
+
 static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
 			    void *handle)
 {
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 8f29e0d8a7b3..1b7d9548ee43 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -607,6 +607,16 @@ struct qed_sb_info {
 	struct qed_dev *cdev;
 };
 
+enum qed_hw_err_type {
+	QED_HW_ERR_FAN_FAIL,
+	QED_HW_ERR_MFW_RESP_FAIL,
+	QED_HW_ERR_HW_ATTN,
+	QED_HW_ERR_DMAE_FAIL,
+	QED_HW_ERR_RAMROD_FAIL,
+	QED_HW_ERR_FW_ASSERT,
+	QED_HW_ERR_LAST,
+};
+
 enum qed_dev_type {
 	QED_DEV_TYPE_BB,
 	QED_DEV_TYPE_AH,
@@ -814,6 +824,8 @@ struct qed_common_cb_ops {
 	void	(*link_update)(void			*dev,
 			       struct qed_link_output	*link);
 	void (*schedule_recovery_handler)(void *dev);
+	void (*schedule_hw_err_handler)(void *dev,
+					enum qed_hw_err_type err_type);
 	void	(*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type);
 	void (*get_generic_tlv_data)(void *dev, struct qed_generic_tlvs *data);
 	void (*get_protocol_tlv_data)(void *dev, void *data);
-- 
2.17.1


  reply	other threads:[~2020-05-14  9:58 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-14  9:57 [PATCH v2 net-next 00/11] net: qed/qede: critical hw error handling Igor Russkikh
2020-05-14  9:57 ` Igor Russkikh [this message]
2020-05-14  9:57 ` [PATCH v2 net-next 02/11] net: qede: add hw err scheduled handler Igor Russkikh
2020-05-14  9:57 ` [PATCH v2 net-next 03/11] net: qed: invoke err notify on critical areas Igor Russkikh
2020-05-14  9:57 ` [PATCH v2 net-next 04/11] net: qed: critical err reporting to management firmware Igor Russkikh
2020-05-14  9:57 ` [PATCH v2 net-next 05/11] net: qed: cleanup debug related declarations Igor Russkikh
2020-05-14  9:57 ` [PATCH v2 net-next 06/11] net: qed: attention clearing properties Igor Russkikh
2020-05-14  9:57 ` [PATCH v2 net-next 07/11] net: qede: optional hw recovery procedure Igor Russkikh
2020-05-14  9:57 ` [PATCH v2 net-next 08/11] net: qede: Implement ndo_tx_timeout Igor Russkikh
2020-05-14  9:57 ` [PATCH v2 net-next 09/11] net: qed: introduce critical fan failure handler Igor Russkikh
2020-05-14  9:57 ` [PATCH v2 net-next 10/11] net: qed: introduce critical hardware error handler Igor Russkikh
2020-05-14  9:57 ` [PATCH v2 net-next 11/11] net: qed: fix bad formatting Igor Russkikh
2020-05-14 19:06 ` [PATCH v2 net-next 00/11] net: qed/qede: critical hw error handling Jakub Kicinski
2020-05-14 19:40   ` [EXT] " Igor Russkikh
2020-05-14 20:02     ` Jakub Kicinski
2020-05-14 20:09       ` Igor Russkikh
2020-05-14 20:01 ` David Miller
2020-05-14 20:09   ` David Miller
2020-05-14 20:22     ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200514095727.1361-2-irusskikh@marvell.com \
    --to=irusskikh@marvell.com \
    --cc=aelior@marvell.com \
    --cc=ariel.elior@marvell.com \
    --cc=davem@davemloft.net \
    --cc=dbolotin@marvell.com \
    --cc=kuba@kernel.org \
    --cc=michal.kalderon@marvell.com \
    --cc=mkalderon@marvell.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.