All of lore.kernel.org
 help / color / mirror / Atom feed
From: Rasesh Mody <rasesh.mody@cavium.com>
To: dev@dpdk.org, ferruh.yigit@intel.com
Cc: Rasesh Mody <rasesh.mody@cavium.com>, Dept-EngDPDKDev@cavium.com
Subject: [PATCH 14/53] net/qede/base: add mdump sub-commands
Date: Mon, 18 Sep 2017 18:29:54 -0700	[thread overview]
Message-ID: <1505784633-1171-15-git-send-email-rasesh.mody@cavium.com> (raw)
In-Reply-To: <1505784633-1171-1-git-send-email-rasesh.mody@cavium.com>

 - Add support to retain/clear data for crash dump by introducing the mdump
   GET_RETAIN/CLR_RETAIN sub commands, new APIs
   ecore_mcp_mdump_get_retain() and ecore_mcp_mdump_clr_retain()
 - Avoid checking for mdump logs and data in case of an emulator
 - Fix "deadbeaf" returned value in case of pcie status command read
   fails (prevent false detection)

Signed-off-by: Rasesh Mody <rasesh.mody@cavium.com>
---
 drivers/net/qede/base/ecore_dev.c     |   24 +++++++--
 drivers/net/qede/base/ecore_mcp.c     |   87 +++++++++++++++++++++++++++------
 drivers/net/qede/base/ecore_mcp.h     |   21 ++++++++
 drivers/net/qede/base/ecore_mcp_api.h |   11 +++++
 drivers/net/qede/base/mcp_public.h    |   10 ++++
 5 files changed, 132 insertions(+), 21 deletions(-)

diff --git a/drivers/net/qede/base/ecore_dev.c b/drivers/net/qede/base/ecore_dev.c
index 938834b..93c2306 100644
--- a/drivers/net/qede/base/ecore_dev.c
+++ b/drivers/net/qede/base/ecore_dev.c
@@ -3564,6 +3564,7 @@ void ecore_prepare_hibernate(struct ecore_dev *p_dev)
 			void OSAL_IOMEM * p_doorbells,
 			struct ecore_hw_prepare_params *p_params)
 {
+	struct ecore_mdump_retain_data mdump_retain;
 	struct ecore_dev *p_dev = p_hwfn->p_dev;
 	struct ecore_mdump_info mdump_info;
 	enum _ecore_status_t rc = ECORE_SUCCESS;
@@ -3631,24 +3632,37 @@ void ecore_prepare_hibernate(struct ecore_dev *p_dev)
 	/* Sending a mailbox to the MFW should be after ecore_get_hw_info() is
 	 * called, since among others it sets the ports number in an engine.
 	 */
-	if (p_params->initiate_pf_flr && p_hwfn == ECORE_LEADING_HWFN(p_dev) &&
+	if (p_params->initiate_pf_flr && IS_LEAD_HWFN(p_hwfn) &&
 	    !p_dev->recov_in_prog) {
 		rc = ecore_mcp_initiate_pf_flr(p_hwfn, p_hwfn->p_main_ptt);
 		if (rc != ECORE_SUCCESS)
 			DP_NOTICE(p_hwfn, false, "Failed to initiate PF FLR\n");
 	}
 
-	/* Check if mdump logs are present and update the epoch value */
-	if (p_hwfn == ECORE_LEADING_HWFN(p_hwfn->p_dev)) {
+	/* Check if mdump logs/data are present and update the epoch value */
+	if (IS_LEAD_HWFN(p_hwfn)) {
+#ifndef ASIC_ONLY
+		if (!CHIP_REV_IS_EMUL(p_dev)) {
+#endif
 		rc = ecore_mcp_mdump_get_info(p_hwfn, p_hwfn->p_main_ptt,
 					      &mdump_info);
-		if (rc == ECORE_SUCCESS && mdump_info.num_of_logs > 0) {
+		if (rc == ECORE_SUCCESS && mdump_info.num_of_logs)
 			DP_NOTICE(p_hwfn, false,
 				  "* * * IMPORTANT - HW ERROR register dump captured by device * * *\n");
-		}
+
+		rc = ecore_mcp_mdump_get_retain(p_hwfn, p_hwfn->p_main_ptt,
+						&mdump_retain);
+		if (rc == ECORE_SUCCESS && mdump_retain.valid)
+			DP_NOTICE(p_hwfn, false,
+				  "mdump retained data: epoch 0x%08x, pf 0x%x, status 0x%08x\n",
+				  mdump_retain.epoch, mdump_retain.pf,
+				  mdump_retain.status);
 
 		ecore_mcp_mdump_set_values(p_hwfn, p_hwfn->p_main_ptt,
 					   p_params->epoch);
+#ifndef ASIC_ONLY
+		}
+#endif
 	}
 
 	/* Allocate the init RT array and initialize the init-ops engine */
diff --git a/drivers/net/qede/base/ecore_mcp.c b/drivers/net/qede/base/ecore_mcp.c
index 868b075..462fcc9 100644
--- a/drivers/net/qede/base/ecore_mcp.c
+++ b/drivers/net/qede/base/ecore_mcp.c
@@ -1434,11 +1434,16 @@ struct ecore_mdump_cmd_params {
 		return rc;
 
 	p_mdump_cmd_params->mcp_resp = mb_params.mcp_resp;
+
 	if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_MDUMP_INVALID_CMD) {
-		DP_NOTICE(p_hwfn, false,
-			  "MFW claims that the mdump command is illegal [mdump_cmd 0x%x]\n",
-			  p_mdump_cmd_params->cmd);
-		rc = ECORE_INVAL;
+		DP_INFO(p_hwfn,
+			"The mdump sub command is unsupported by the MFW [mdump_cmd 0x%x]\n",
+			p_mdump_cmd_params->cmd);
+		rc = ECORE_NOTIMPL;
+	} else if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
+		DP_INFO(p_hwfn,
+			"The mdump command is not supported by the MFW\n");
+		rc = ECORE_NOTIMPL;
 	}
 
 	return rc;
@@ -1496,16 +1501,10 @@ enum _ecore_status_t ecore_mcp_mdump_trigger(struct ecore_hwfn *p_hwfn,
 	if (rc != ECORE_SUCCESS)
 		return rc;
 
-	if (mdump_cmd_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
-		DP_INFO(p_hwfn,
-			"The mdump command is not supported by the MFW\n");
-		return ECORE_NOTIMPL;
-	}
-
 	if (mdump_cmd_params.mcp_resp != FW_MSG_CODE_OK) {
-		DP_NOTICE(p_hwfn, false,
-			  "Failed to get the mdump configuration and logs info [mcp_resp 0x%x]\n",
-			  mdump_cmd_params.mcp_resp);
+		DP_INFO(p_hwfn,
+			"Failed to get the mdump configuration and logs info [mcp_resp 0x%x]\n",
+			mdump_cmd_params.mcp_resp);
 		rc = ECORE_UNKNOWN_ERROR;
 	}
 
@@ -1566,17 +1565,71 @@ enum _ecore_status_t ecore_mcp_mdump_clear_logs(struct ecore_hwfn *p_hwfn,
 	return ecore_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
 }
 
+enum _ecore_status_t
+ecore_mcp_mdump_get_retain(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+			   struct ecore_mdump_retain_data *p_mdump_retain)
+{
+	struct ecore_mdump_cmd_params mdump_cmd_params;
+	struct mdump_retain_data_stc mfw_mdump_retain;
+	enum _ecore_status_t rc;
+
+	OSAL_MEM_ZERO(&mdump_cmd_params, sizeof(mdump_cmd_params));
+	mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_GET_RETAIN;
+	mdump_cmd_params.p_data_dst = &mfw_mdump_retain;
+	mdump_cmd_params.data_dst_size = sizeof(mfw_mdump_retain);
+
+	rc = ecore_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
+	if (rc != ECORE_SUCCESS)
+		return rc;
+
+	if (mdump_cmd_params.mcp_resp != FW_MSG_CODE_OK) {
+		DP_INFO(p_hwfn,
+			"Failed to get the mdump retained data [mcp_resp 0x%x]\n",
+			mdump_cmd_params.mcp_resp);
+		return ECORE_UNKNOWN_ERROR;
+	}
+
+	p_mdump_retain->valid = mfw_mdump_retain.valid;
+	p_mdump_retain->epoch = mfw_mdump_retain.epoch;
+	p_mdump_retain->pf = mfw_mdump_retain.pf;
+	p_mdump_retain->status = mfw_mdump_retain.status;
+
+	return ECORE_SUCCESS;
+}
+
+enum _ecore_status_t ecore_mcp_mdump_clr_retain(struct ecore_hwfn *p_hwfn,
+						struct ecore_ptt *p_ptt)
+{
+	struct ecore_mdump_cmd_params mdump_cmd_params;
+
+	OSAL_MEM_ZERO(&mdump_cmd_params, sizeof(mdump_cmd_params));
+	mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_CLR_RETAIN;
+
+	return ecore_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
+}
+
 static void ecore_mcp_handle_critical_error(struct ecore_hwfn *p_hwfn,
 					    struct ecore_ptt *p_ptt)
 {
+	struct ecore_mdump_retain_data mdump_retain;
+	enum _ecore_status_t rc;
+
 	/* In CMT mode - no need for more than a single acknowledgment to the
 	 * MFW, and no more than a single notification to the upper driver.
 	 */
 	if (p_hwfn != ECORE_LEADING_HWFN(p_hwfn->p_dev))
 		return;
 
-	DP_NOTICE(p_hwfn, false,
-		  "Received a critical error notification from the MFW!\n");
+	rc = ecore_mcp_mdump_get_retain(p_hwfn, p_ptt, &mdump_retain);
+	if (rc == ECORE_SUCCESS && mdump_retain.valid) {
+		DP_NOTICE(p_hwfn, false,
+			  "The MFW notified that a critical error occurred in the device [epoch 0x%08x, pf 0x%x, status 0x%08x]\n",
+			  mdump_retain.epoch, mdump_retain.pf,
+			  mdump_retain.status);
+	} else {
+		DP_NOTICE(p_hwfn, false,
+			  "The MFW notified that a critical error occurred in the device\n");
+	}
 
 	if (p_hwfn->p_dev->allow_mdump) {
 		DP_NOTICE(p_hwfn, false,
@@ -1584,6 +1637,8 @@ static void ecore_mcp_handle_critical_error(struct ecore_hwfn *p_hwfn,
 		return;
 	}
 
+	DP_NOTICE(p_hwfn, false,
+		  "Acknowledging the notification to not allow the MFW crash dump [driver debug data collection is preferable]\n");
 	ecore_mcp_mdump_ack(p_hwfn, p_ptt);
 	ecore_hw_err_notify(p_hwfn, ECORE_HW_ERR_HW_ATTN);
 }
@@ -2245,8 +2300,8 @@ enum _ecore_status_t ecore_mcp_mask_parities(struct ecore_hwfn *p_hwfn,
 					     struct ecore_ptt *p_ptt,
 					     u32 mask_parities)
 {
-	enum _ecore_status_t rc;
 	u32 resp = 0, param = 0;
+	enum _ecore_status_t rc;
 
 	rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MASK_PARITIES,
 			   mask_parities, &resp, &param);
diff --git a/drivers/net/qede/base/ecore_mcp.h b/drivers/net/qede/base/ecore_mcp.h
index 9b6a9b4..b84f0d1 100644
--- a/drivers/net/qede/base/ecore_mcp.h
+++ b/drivers/net/qede/base/ecore_mcp.h
@@ -376,12 +376,33 @@ enum _ecore_status_t ecore_mcp_mdump_set_values(struct ecore_hwfn *p_hwfn,
  *
  * @param p_hwfn
  * @param p_ptt
+ * @param epoch
  *
  * @param return ECORE_SUCCESS upon success.
  */
 enum _ecore_status_t ecore_mcp_mdump_trigger(struct ecore_hwfn *p_hwfn,
 					     struct ecore_ptt *p_ptt);
 
+struct ecore_mdump_retain_data {
+	u32 valid;
+	u32 epoch;
+	u32 pf;
+	u32 status;
+};
+
+/**
+ * @brief - Gets the mdump retained data from the MFW.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param p_mdump_retain
+ *
+ * @param return ECORE_SUCCESS upon success.
+ */
+enum _ecore_status_t
+ecore_mcp_mdump_get_retain(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+			   struct ecore_mdump_retain_data *p_mdump_retain);
+
 /**
  * @brief - Sets the MFW's max value for the given resource
  *
diff --git a/drivers/net/qede/base/ecore_mcp_api.h b/drivers/net/qede/base/ecore_mcp_api.h
index 86fa0cb..059b55e 100644
--- a/drivers/net/qede/base/ecore_mcp_api.h
+++ b/drivers/net/qede/base/ecore_mcp_api.h
@@ -1123,6 +1123,17 @@ enum _ecore_status_t ecore_mcp_mdump_clear_logs(struct ecore_hwfn *p_hwfn,
 						struct ecore_ptt *p_ptt);
 
 /**
+ * @brief - Clear the mdump retained data.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @param return ECORE_SUCCESS upon success.
+ */
+enum _ecore_status_t ecore_mcp_mdump_clr_retain(struct ecore_hwfn *p_hwfn,
+						struct ecore_ptt *p_ptt);
+
+/**
  * @brief - Processes the TLV request from MFW i.e., get the required TLV info
  *          from the ecore client and send it to the MFW.
  *
diff --git a/drivers/net/qede/base/mcp_public.h b/drivers/net/qede/base/mcp_public.h
index 41711cc..f934c17 100644
--- a/drivers/net/qede/base/mcp_public.h
+++ b/drivers/net/qede/base/mcp_public.h
@@ -1108,6 +1108,13 @@ struct load_rsp_stc {
 #define LOAD_RSP_FLAGS0_DRV_EXISTS	(0x1 << 0)
 };
 
+struct mdump_retain_data_stc {
+	u32 valid;
+	u32 epoch;
+	u32 pf;
+	u32 status;
+};
+
 union drv_union_data {
 	struct mcp_mac wol_mac; /* UNLOAD_DONE */
 
@@ -1138,6 +1145,7 @@ struct load_rsp_stc {
 
 	struct load_req_stc load_req;
 	struct load_rsp_stc load_rsp;
+	struct mdump_retain_data_stc mdump_retain;
 	/* ... */
 };
 
@@ -1350,6 +1358,8 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_MDUMP_SET_ENABLE		0x05
 /* Clear all logs */
 #define DRV_MSG_CODE_MDUMP_CLEAR_LOGS		0x06
+#define DRV_MSG_CODE_MDUMP_GET_RETAIN		0x07 /* Get retained data */
+#define DRV_MSG_CODE_MDUMP_CLR_RETAIN		0x08 /* Clear retain data */
 #define DRV_MSG_CODE_MEM_ECC_EVENTS		0x00260000 /* Param: None */
 /* Param: [0:15] - gpio number */
 #define DRV_MSG_CODE_GPIO_INFO			0x00270000
-- 
1.7.10.3

  parent reply	other threads:[~2017-09-19  1:31 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-09-19  1:29 [PATCH 00/53] net/qede/base: update PMD to 2.6.0.1 Rasesh Mody
2017-09-19  1:29 ` [PATCH 01/53] net/qede/base: add NVM config options Rasesh Mody
2017-09-19  1:29 ` [PATCH 02/53] net/qede/base: update management FW supported features Rasesh Mody
2017-09-19  1:29 ` [PATCH 03/53] net/qede/base: use crc32 OSAL macro Rasesh Mody
2017-09-19  1:29 ` [PATCH 04/53] net/qede/base: allocate VF queues before PF Rasesh Mody
2017-09-19  1:29 ` [PATCH 05/53] net/qede/base: convert device type to enum Rasesh Mody
2017-09-19  1:29 ` [PATCH 06/53] net/qede/base: changes for VF queue zone Rasesh Mody
2017-09-19  1:29 ` [PATCH 07/53] net/qede/base: interchangeably use SB between PF and VF Rasesh Mody
2017-09-19  1:29 ` [PATCH 08/53] net/qede/base: add API to configure coalescing for VF queues Rasesh Mody
2017-09-19  1:29 ` [PATCH 09/53] net/qede/base: restrict cache line size register padding Rasesh Mody
2017-09-19  1:29 ` [PATCH 10/53] net/qede/base: fix to use a passed ptt handle Rasesh Mody
2017-09-19  1:29 ` [PATCH 11/53] net/qede/base: add a sanity check Rasesh Mody
2017-09-19  1:29 ` [PATCH 12/53] net/qede/base: add SmartAN support Rasesh Mody
2017-09-19  1:29 ` [PATCH 13/53] net/qede/base: alter driver's force load behavior Rasesh Mody
2017-09-19  1:29 ` Rasesh Mody [this message]
2017-09-19  1:29 ` [PATCH 15/53] net/qede/base: add EEE support Rasesh Mody
2017-09-19  1:29 ` [PATCH 16/53] net/qede/base: use passed ptt handler Rasesh Mody
2017-09-19  1:29 ` [PATCH 17/53] net/qede/base: prevent re-assertions of parity errors Rasesh Mody
2017-09-19  1:29 ` [PATCH 18/53] net/qede/base: avoid possible race condition Rasesh Mody
2017-09-19  1:29 ` [PATCH 19/53] net/qede/base: revise management FW mbox access scheme Rasesh Mody
2017-09-19  1:30 ` [PATCH 20/53] net/qede/base: remove helper functions/structures Rasesh Mody
2017-09-19  1:30 ` [PATCH 21/53] net/qede/base: initialize resc lock/unlock params Rasesh Mody
2017-09-19  1:30 ` [PATCH 22/53] net/qede/base: rename MFW get/set field defines Rasesh Mody
2017-09-19  1:30 ` [PATCH 23/53] net/qede/base: allow clients to override VF MSI-X table size Rasesh Mody
2017-09-19  1:30 ` [PATCH 24/53] net/qede/base: add API to send STAG config update to FW Rasesh Mody
2017-09-19  1:30 ` [PATCH 25/53] net/qede/base: add support for doorbell overflow recovery Rasesh Mody
2017-09-19  1:30 ` [PATCH 26/53] net/qede/base: block mbox command to unresponsive MFW Rasesh Mody
2017-09-19  1:30 ` [PATCH 27/53] net/qede/base: prevent stop vport assert by malicious VF Rasesh Mody
2017-09-19  1:30 ` [PATCH 28/53] net/qede/base: remove unused parameters Rasesh Mody
2017-09-19  1:30 ` [PATCH 29/53] net/qede/base: fix macros to check chip revision/metal Rasesh Mody
2017-09-20 11:00 ` [PATCH 00/53] net/qede/base: update PMD to 2.6.0.1 Ferruh Yigit

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1505784633-1171-15-git-send-email-rasesh.mody@cavium.com \
    --to=rasesh.mody@cavium.com \
    --cc=Dept-EngDPDKDev@cavium.com \
    --cc=dev@dpdk.org \
    --cc=ferruh.yigit@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.