From mboxrd@z Thu Jan 1 00:00:00 1970 From: Rasesh Mody Subject: [PATCH 14/53] net/qede/base: add mdump sub-commands Date: Mon, 18 Sep 2017 18:29:54 -0700 Message-ID: <1505784633-1171-15-git-send-email-rasesh.mody@cavium.com> References: <1505784633-1171-1-git-send-email-rasesh.mody@cavium.com> Mime-Version: 1.0 Content-Type: text/plain Cc: Rasesh Mody , Dept-EngDPDKDev@cavium.com To: dev@dpdk.org, ferruh.yigit@intel.com Return-path: Received: from NAM02-CY1-obe.outbound.protection.outlook.com (mail-cys01nam02on0082.outbound.protection.outlook.com [104.47.37.82]) by dpdk.org (Postfix) with ESMTP id 8A81E1AEEB for ; Tue, 19 Sep 2017 03:31:19 +0200 (CEST) In-Reply-To: <1505784633-1171-1-git-send-email-rasesh.mody@cavium.com> List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" - Add support to retain/clear data for crash dump by introducing the mdump GET_RETAIN/CLR_RETAIN sub commands, new APIs ecore_mcp_mdump_get_retain() and ecore_mcp_mdump_clr_retain() - Avoid checking for mdump logs and data in case of an emulator - Fix "deadbeaf" returned value in case of pcie status command read fails (prevent false detection) Signed-off-by: Rasesh Mody --- drivers/net/qede/base/ecore_dev.c | 24 +++++++-- drivers/net/qede/base/ecore_mcp.c | 87 +++++++++++++++++++++++++++------ drivers/net/qede/base/ecore_mcp.h | 21 ++++++++ drivers/net/qede/base/ecore_mcp_api.h | 11 +++++ drivers/net/qede/base/mcp_public.h | 10 ++++ 5 files changed, 132 insertions(+), 21 deletions(-) diff --git a/drivers/net/qede/base/ecore_dev.c b/drivers/net/qede/base/ecore_dev.c index 938834b..93c2306 100644 --- a/drivers/net/qede/base/ecore_dev.c +++ b/drivers/net/qede/base/ecore_dev.c @@ -3564,6 +3564,7 @@ void ecore_prepare_hibernate(struct ecore_dev *p_dev) void OSAL_IOMEM * p_doorbells, struct ecore_hw_prepare_params *p_params) { + struct ecore_mdump_retain_data mdump_retain; struct ecore_dev *p_dev = p_hwfn->p_dev; struct ecore_mdump_info mdump_info; enum _ecore_status_t rc = ECORE_SUCCESS; @@ -3631,24 +3632,37 @@ void ecore_prepare_hibernate(struct ecore_dev *p_dev) /* Sending a mailbox to the MFW should be after ecore_get_hw_info() is * called, since among others it sets the ports number in an engine. */ - if (p_params->initiate_pf_flr && p_hwfn == ECORE_LEADING_HWFN(p_dev) && + if (p_params->initiate_pf_flr && IS_LEAD_HWFN(p_hwfn) && !p_dev->recov_in_prog) { rc = ecore_mcp_initiate_pf_flr(p_hwfn, p_hwfn->p_main_ptt); if (rc != ECORE_SUCCESS) DP_NOTICE(p_hwfn, false, "Failed to initiate PF FLR\n"); } - /* Check if mdump logs are present and update the epoch value */ - if (p_hwfn == ECORE_LEADING_HWFN(p_hwfn->p_dev)) { + /* Check if mdump logs/data are present and update the epoch value */ + if (IS_LEAD_HWFN(p_hwfn)) { +#ifndef ASIC_ONLY + if (!CHIP_REV_IS_EMUL(p_dev)) { +#endif rc = ecore_mcp_mdump_get_info(p_hwfn, p_hwfn->p_main_ptt, &mdump_info); - if (rc == ECORE_SUCCESS && mdump_info.num_of_logs > 0) { + if (rc == ECORE_SUCCESS && mdump_info.num_of_logs) DP_NOTICE(p_hwfn, false, "* * * IMPORTANT - HW ERROR register dump captured by device * * *\n"); - } + + rc = ecore_mcp_mdump_get_retain(p_hwfn, p_hwfn->p_main_ptt, + &mdump_retain); + if (rc == ECORE_SUCCESS && mdump_retain.valid) + DP_NOTICE(p_hwfn, false, + "mdump retained data: epoch 0x%08x, pf 0x%x, status 0x%08x\n", + mdump_retain.epoch, mdump_retain.pf, + mdump_retain.status); ecore_mcp_mdump_set_values(p_hwfn, p_hwfn->p_main_ptt, p_params->epoch); +#ifndef ASIC_ONLY + } +#endif } /* Allocate the init RT array and initialize the init-ops engine */ diff --git a/drivers/net/qede/base/ecore_mcp.c b/drivers/net/qede/base/ecore_mcp.c index 868b075..462fcc9 100644 --- a/drivers/net/qede/base/ecore_mcp.c +++ b/drivers/net/qede/base/ecore_mcp.c @@ -1434,11 +1434,16 @@ struct ecore_mdump_cmd_params { return rc; p_mdump_cmd_params->mcp_resp = mb_params.mcp_resp; + if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_MDUMP_INVALID_CMD) { - DP_NOTICE(p_hwfn, false, - "MFW claims that the mdump command is illegal [mdump_cmd 0x%x]\n", - p_mdump_cmd_params->cmd); - rc = ECORE_INVAL; + DP_INFO(p_hwfn, + "The mdump sub command is unsupported by the MFW [mdump_cmd 0x%x]\n", + p_mdump_cmd_params->cmd); + rc = ECORE_NOTIMPL; + } else if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_UNSUPPORTED) { + DP_INFO(p_hwfn, + "The mdump command is not supported by the MFW\n"); + rc = ECORE_NOTIMPL; } return rc; @@ -1496,16 +1501,10 @@ enum _ecore_status_t ecore_mcp_mdump_trigger(struct ecore_hwfn *p_hwfn, if (rc != ECORE_SUCCESS) return rc; - if (mdump_cmd_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) { - DP_INFO(p_hwfn, - "The mdump command is not supported by the MFW\n"); - return ECORE_NOTIMPL; - } - if (mdump_cmd_params.mcp_resp != FW_MSG_CODE_OK) { - DP_NOTICE(p_hwfn, false, - "Failed to get the mdump configuration and logs info [mcp_resp 0x%x]\n", - mdump_cmd_params.mcp_resp); + DP_INFO(p_hwfn, + "Failed to get the mdump configuration and logs info [mcp_resp 0x%x]\n", + mdump_cmd_params.mcp_resp); rc = ECORE_UNKNOWN_ERROR; } @@ -1566,17 +1565,71 @@ enum _ecore_status_t ecore_mcp_mdump_clear_logs(struct ecore_hwfn *p_hwfn, return ecore_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params); } +enum _ecore_status_t +ecore_mcp_mdump_get_retain(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt, + struct ecore_mdump_retain_data *p_mdump_retain) +{ + struct ecore_mdump_cmd_params mdump_cmd_params; + struct mdump_retain_data_stc mfw_mdump_retain; + enum _ecore_status_t rc; + + OSAL_MEM_ZERO(&mdump_cmd_params, sizeof(mdump_cmd_params)); + mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_GET_RETAIN; + mdump_cmd_params.p_data_dst = &mfw_mdump_retain; + mdump_cmd_params.data_dst_size = sizeof(mfw_mdump_retain); + + rc = ecore_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params); + if (rc != ECORE_SUCCESS) + return rc; + + if (mdump_cmd_params.mcp_resp != FW_MSG_CODE_OK) { + DP_INFO(p_hwfn, + "Failed to get the mdump retained data [mcp_resp 0x%x]\n", + mdump_cmd_params.mcp_resp); + return ECORE_UNKNOWN_ERROR; + } + + p_mdump_retain->valid = mfw_mdump_retain.valid; + p_mdump_retain->epoch = mfw_mdump_retain.epoch; + p_mdump_retain->pf = mfw_mdump_retain.pf; + p_mdump_retain->status = mfw_mdump_retain.status; + + return ECORE_SUCCESS; +} + +enum _ecore_status_t ecore_mcp_mdump_clr_retain(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt) +{ + struct ecore_mdump_cmd_params mdump_cmd_params; + + OSAL_MEM_ZERO(&mdump_cmd_params, sizeof(mdump_cmd_params)); + mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_CLR_RETAIN; + + return ecore_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params); +} + static void ecore_mcp_handle_critical_error(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt) { + struct ecore_mdump_retain_data mdump_retain; + enum _ecore_status_t rc; + /* In CMT mode - no need for more than a single acknowledgment to the * MFW, and no more than a single notification to the upper driver. */ if (p_hwfn != ECORE_LEADING_HWFN(p_hwfn->p_dev)) return; - DP_NOTICE(p_hwfn, false, - "Received a critical error notification from the MFW!\n"); + rc = ecore_mcp_mdump_get_retain(p_hwfn, p_ptt, &mdump_retain); + if (rc == ECORE_SUCCESS && mdump_retain.valid) { + DP_NOTICE(p_hwfn, false, + "The MFW notified that a critical error occurred in the device [epoch 0x%08x, pf 0x%x, status 0x%08x]\n", + mdump_retain.epoch, mdump_retain.pf, + mdump_retain.status); + } else { + DP_NOTICE(p_hwfn, false, + "The MFW notified that a critical error occurred in the device\n"); + } if (p_hwfn->p_dev->allow_mdump) { DP_NOTICE(p_hwfn, false, @@ -1584,6 +1637,8 @@ static void ecore_mcp_handle_critical_error(struct ecore_hwfn *p_hwfn, return; } + DP_NOTICE(p_hwfn, false, + "Acknowledging the notification to not allow the MFW crash dump [driver debug data collection is preferable]\n"); ecore_mcp_mdump_ack(p_hwfn, p_ptt); ecore_hw_err_notify(p_hwfn, ECORE_HW_ERR_HW_ATTN); } @@ -2245,8 +2300,8 @@ enum _ecore_status_t ecore_mcp_mask_parities(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt, u32 mask_parities) { - enum _ecore_status_t rc; u32 resp = 0, param = 0; + enum _ecore_status_t rc; rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MASK_PARITIES, mask_parities, &resp, ¶m); diff --git a/drivers/net/qede/base/ecore_mcp.h b/drivers/net/qede/base/ecore_mcp.h index 9b6a9b4..b84f0d1 100644 --- a/drivers/net/qede/base/ecore_mcp.h +++ b/drivers/net/qede/base/ecore_mcp.h @@ -376,12 +376,33 @@ enum _ecore_status_t ecore_mcp_mdump_set_values(struct ecore_hwfn *p_hwfn, * * @param p_hwfn * @param p_ptt + * @param epoch * * @param return ECORE_SUCCESS upon success. */ enum _ecore_status_t ecore_mcp_mdump_trigger(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt); +struct ecore_mdump_retain_data { + u32 valid; + u32 epoch; + u32 pf; + u32 status; +}; + +/** + * @brief - Gets the mdump retained data from the MFW. + * + * @param p_hwfn + * @param p_ptt + * @param p_mdump_retain + * + * @param return ECORE_SUCCESS upon success. + */ +enum _ecore_status_t +ecore_mcp_mdump_get_retain(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt, + struct ecore_mdump_retain_data *p_mdump_retain); + /** * @brief - Sets the MFW's max value for the given resource * diff --git a/drivers/net/qede/base/ecore_mcp_api.h b/drivers/net/qede/base/ecore_mcp_api.h index 86fa0cb..059b55e 100644 --- a/drivers/net/qede/base/ecore_mcp_api.h +++ b/drivers/net/qede/base/ecore_mcp_api.h @@ -1123,6 +1123,17 @@ enum _ecore_status_t ecore_mcp_mdump_clear_logs(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt); /** + * @brief - Clear the mdump retained data. + * + * @param p_hwfn + * @param p_ptt + * + * @param return ECORE_SUCCESS upon success. + */ +enum _ecore_status_t ecore_mcp_mdump_clr_retain(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt); + +/** * @brief - Processes the TLV request from MFW i.e., get the required TLV info * from the ecore client and send it to the MFW. * diff --git a/drivers/net/qede/base/mcp_public.h b/drivers/net/qede/base/mcp_public.h index 41711cc..f934c17 100644 --- a/drivers/net/qede/base/mcp_public.h +++ b/drivers/net/qede/base/mcp_public.h @@ -1108,6 +1108,13 @@ struct load_rsp_stc { #define LOAD_RSP_FLAGS0_DRV_EXISTS (0x1 << 0) }; +struct mdump_retain_data_stc { + u32 valid; + u32 epoch; + u32 pf; + u32 status; +}; + union drv_union_data { struct mcp_mac wol_mac; /* UNLOAD_DONE */ @@ -1138,6 +1145,7 @@ struct load_rsp_stc { struct load_req_stc load_req; struct load_rsp_stc load_rsp; + struct mdump_retain_data_stc mdump_retain; /* ... */ }; @@ -1350,6 +1358,8 @@ struct public_drv_mb { #define DRV_MSG_CODE_MDUMP_SET_ENABLE 0x05 /* Clear all logs */ #define DRV_MSG_CODE_MDUMP_CLEAR_LOGS 0x06 +#define DRV_MSG_CODE_MDUMP_GET_RETAIN 0x07 /* Get retained data */ +#define DRV_MSG_CODE_MDUMP_CLR_RETAIN 0x08 /* Clear retain data */ #define DRV_MSG_CODE_MEM_ECC_EVENTS 0x00260000 /* Param: None */ /* Param: [0:15] - gpio number */ #define DRV_MSG_CODE_GPIO_INFO 0x00270000 -- 1.7.10.3