From: Huazhong Tan <tanhuazhong@huawei.com>
To: <davem@davemloft.net>
Cc: <netdev@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
<salil.mehta@huawei.com>, <yisen.zhuang@huawei.com>,
<linuxarm@huawei.com>, Shiju Jose <shiju.jose@huawei.com>,
Peng Li <lipeng321@huawei.com>,
"Huazhong Tan" <tanhuazhong@huawei.com>
Subject: [PATCH net-next 02/12] net: hns3: fix avoid unnecessary resetting for the H/W errors which do not require reset
Date: Thu, 13 Jun 2019 17:12:22 +0800 [thread overview]
Message-ID: <1560417152-53050-3-git-send-email-tanhuazhong@huawei.com> (raw)
In-Reply-To: <1560417152-53050-1-git-send-email-tanhuazhong@huawei.com>
From: Shiju Jose <shiju.jose@huawei.com>
HNS does not need to be reset when errors occur in some bits.
However presently the HNAE3_FUNC_RESET is set in this case and
as a result the default_reset is done when these errors are reported.
This patch fix this issue. Also patch does some optimization
in setting the reset level for the error recovery.
Reported-by: Weihang Li <liweihang@hisilicon.com>
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
---
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c | 280 ++++++++-------------
1 file changed, 109 insertions(+), 171 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index 1a2ea1b..3ea305e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -631,29 +631,20 @@ static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = {
{ /* sentinel */ }
};
-static enum hnae3_reset_type hclge_log_error(struct device *dev, char *reg,
- const struct hclge_hw_error *err,
- u32 err_sts)
+static void hclge_log_error(struct device *dev, char *reg,
+ const struct hclge_hw_error *err,
+ u32 err_sts, unsigned long *reset_requests)
{
- enum hnae3_reset_type reset_level = HNAE3_FUNC_RESET;
- bool need_reset = false;
-
while (err->msg) {
if (err->int_msk & err_sts) {
dev_warn(dev, "%s %s found [error status=0x%x]\n",
reg, err->msg, err_sts);
- if (err->reset_level != HNAE3_NONE_RESET &&
- err->reset_level >= reset_level) {
- reset_level = err->reset_level;
- need_reset = true;
- }
+ if (err->reset_level &&
+ err->reset_level != HNAE3_NONE_RESET)
+ set_bit(err->reset_level, reset_requests);
}
err++;
}
- if (need_reset)
- return reset_level;
- else
- return HNAE3_NONE_RESET;
}
/* hclge_cmd_query_error: read the error information
@@ -1082,7 +1073,6 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev,
int num)
{
struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
- enum hnae3_reset_type reset_level;
struct device *dev = &hdev->pdev->dev;
__le32 *desc_data;
u32 status;
@@ -1099,49 +1089,39 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev,
/* log HNS common errors */
status = le32_to_cpu(desc[0].data[0]);
- if (status) {
- reset_level = hclge_log_error(dev, "IMP_TCM_ECC_INT_STS",
- &hclge_imp_tcm_ecc_int[0],
- status);
- set_bit(reset_level, &ae_dev->hw_err_reset_req);
- }
+ if (status)
+ hclge_log_error(dev, "IMP_TCM_ECC_INT_STS",
+ &hclge_imp_tcm_ecc_int[0], status,
+ &ae_dev->hw_err_reset_req);
status = le32_to_cpu(desc[0].data[1]);
- if (status) {
- reset_level = hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS",
- &hclge_cmdq_nic_mem_ecc_int[0],
- status);
- set_bit(reset_level, &ae_dev->hw_err_reset_req);
- }
+ if (status)
+ hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS",
+ &hclge_cmdq_nic_mem_ecc_int[0], status,
+ &ae_dev->hw_err_reset_req);
if ((le32_to_cpu(desc[0].data[2])) & BIT(0))
dev_warn(dev, "imp_rd_data_poison_err found\n");
status = le32_to_cpu(desc[0].data[3]);
- if (status) {
- reset_level = hclge_log_error(dev, "TQP_INT_ECC_INT_STS",
- &hclge_tqp_int_ecc_int[0],
- status);
- set_bit(reset_level, &ae_dev->hw_err_reset_req);
- }
+ if (status)
+ hclge_log_error(dev, "TQP_INT_ECC_INT_STS",
+ &hclge_tqp_int_ecc_int[0], status,
+ &ae_dev->hw_err_reset_req);
status = le32_to_cpu(desc[0].data[4]);
- if (status) {
- reset_level = hclge_log_error(dev, "MSIX_ECC_INT_STS",
- &hclge_msix_sram_ecc_int[0],
- status);
- set_bit(reset_level, &ae_dev->hw_err_reset_req);
- }
+ if (status)
+ hclge_log_error(dev, "MSIX_ECC_INT_STS",
+ &hclge_msix_sram_ecc_int[0], status,
+ &ae_dev->hw_err_reset_req);
/* log SSU(Storage Switch Unit) errors */
desc_data = (__le32 *)&desc[2];
status = le32_to_cpu(*(desc_data + 2));
- if (status) {
- reset_level = hclge_log_error(dev, "SSU_ECC_MULTI_BIT_INT_0",
- &hclge_ssu_mem_ecc_err_int[0],
- status);
- set_bit(reset_level, &ae_dev->hw_err_reset_req);
- }
+ if (status)
+ hclge_log_error(dev, "SSU_ECC_MULTI_BIT_INT_0",
+ &hclge_ssu_mem_ecc_err_int[0], status,
+ &ae_dev->hw_err_reset_req);
status = le32_to_cpu(*(desc_data + 3)) & BIT(0);
if (status) {
@@ -1151,41 +1131,32 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev,
}
status = le32_to_cpu(*(desc_data + 4)) & HCLGE_SSU_COMMON_ERR_INT_MASK;
- if (status) {
- reset_level = hclge_log_error(dev, "SSU_COMMON_ERR_INT",
- &hclge_ssu_com_err_int[0],
- status);
- set_bit(reset_level, &ae_dev->hw_err_reset_req);
- }
+ if (status)
+ hclge_log_error(dev, "SSU_COMMON_ERR_INT",
+ &hclge_ssu_com_err_int[0], status,
+ &ae_dev->hw_err_reset_req);
/* log IGU(Ingress Unit) errors */
desc_data = (__le32 *)&desc[3];
status = le32_to_cpu(*desc_data) & HCLGE_IGU_INT_MASK;
- if (status) {
- reset_level = hclge_log_error(dev, "IGU_INT_STS",
- &hclge_igu_int[0], status);
- set_bit(reset_level, &ae_dev->hw_err_reset_req);
- }
+ if (status)
+ hclge_log_error(dev, "IGU_INT_STS",
+ &hclge_igu_int[0], status,
+ &ae_dev->hw_err_reset_req);
/* log PPP(Programmable Packet Process) errors */
desc_data = (__le32 *)&desc[4];
status = le32_to_cpu(*(desc_data + 1));
- if (status) {
- reset_level =
- hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1",
- &hclge_ppp_mpf_abnormal_int_st1[0],
- status);
- set_bit(reset_level, &ae_dev->hw_err_reset_req);
- }
+ if (status)
+ hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1",
+ &hclge_ppp_mpf_abnormal_int_st1[0], status,
+ &ae_dev->hw_err_reset_req);
status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPP_MPF_INT_ST3_MASK;
- if (status) {
- reset_level =
- hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3",
- &hclge_ppp_mpf_abnormal_int_st3[0],
- status);
- set_bit(reset_level, &ae_dev->hw_err_reset_req);
- }
+ if (status)
+ hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3",
+ &hclge_ppp_mpf_abnormal_int_st3[0], status,
+ &ae_dev->hw_err_reset_req);
/* log PPU(RCB) errors */
desc_data = (__le32 *)&desc[5];
@@ -1197,57 +1168,46 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev,
}
status = le32_to_cpu(*(desc_data + 2));
- if (status) {
- reset_level =
- hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2",
- &hclge_ppu_mpf_abnormal_int_st2[0],
- status);
- set_bit(reset_level, &ae_dev->hw_err_reset_req);
- }
+ if (status)
+ hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2",
+ &hclge_ppu_mpf_abnormal_int_st2[0], status,
+ &ae_dev->hw_err_reset_req);
status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPU_MPF_INT_ST3_MASK;
- if (status) {
- reset_level =
- hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3",
- &hclge_ppu_mpf_abnormal_int_st3[0],
- status);
- set_bit(reset_level, &ae_dev->hw_err_reset_req);
- }
+ if (status)
+ hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3",
+ &hclge_ppu_mpf_abnormal_int_st3[0], status,
+ &ae_dev->hw_err_reset_req);
/* log TM(Traffic Manager) errors */
desc_data = (__le32 *)&desc[6];
status = le32_to_cpu(*desc_data);
- if (status) {
- reset_level = hclge_log_error(dev, "TM_SCH_RINT",
- &hclge_tm_sch_rint[0], status);
- set_bit(reset_level, &ae_dev->hw_err_reset_req);
- }
+ if (status)
+ hclge_log_error(dev, "TM_SCH_RINT",
+ &hclge_tm_sch_rint[0], status,
+ &ae_dev->hw_err_reset_req);
/* log QCN(Quantized Congestion Control) errors */
desc_data = (__le32 *)&desc[7];
status = le32_to_cpu(*desc_data) & HCLGE_QCN_FIFO_INT_MASK;
- if (status) {
- reset_level = hclge_log_error(dev, "QCN_FIFO_RINT",
- &hclge_qcn_fifo_rint[0], status);
- set_bit(reset_level, &ae_dev->hw_err_reset_req);
- }
+ if (status)
+ hclge_log_error(dev, "QCN_FIFO_RINT",
+ &hclge_qcn_fifo_rint[0], status,
+ &ae_dev->hw_err_reset_req);
status = le32_to_cpu(*(desc_data + 1)) & HCLGE_QCN_ECC_INT_MASK;
- if (status) {
- reset_level = hclge_log_error(dev, "QCN_ECC_RINT",
- &hclge_qcn_ecc_rint[0],
- status);
- set_bit(reset_level, &ae_dev->hw_err_reset_req);
- }
+ if (status)
+ hclge_log_error(dev, "QCN_ECC_RINT",
+ &hclge_qcn_ecc_rint[0], status,
+ &ae_dev->hw_err_reset_req);
/* log NCSI errors */
desc_data = (__le32 *)&desc[9];
status = le32_to_cpu(*desc_data) & HCLGE_NCSI_ECC_INT_MASK;
- if (status) {
- reset_level = hclge_log_error(dev, "NCSI_ECC_INT_RPT",
- &hclge_ncsi_err_int[0], status);
- set_bit(reset_level, &ae_dev->hw_err_reset_req);
- }
+ if (status)
+ hclge_log_error(dev, "NCSI_ECC_INT_RPT",
+ &hclge_ncsi_err_int[0], status,
+ &ae_dev->hw_err_reset_req);
/* clear all main PF RAS errors */
hclge_cmd_reuse_desc(&desc[0], false);
@@ -1272,7 +1232,6 @@ static int hclge_handle_pf_ras_error(struct hclge_dev *hdev,
{
struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
struct device *dev = &hdev->pdev->dev;
- enum hnae3_reset_type reset_level;
__le32 *desc_data;
u32 status;
int ret;
@@ -1288,48 +1247,38 @@ static int hclge_handle_pf_ras_error(struct hclge_dev *hdev,
/* log SSU(Storage Switch Unit) errors */
status = le32_to_cpu(desc[0].data[0]);
- if (status) {
- reset_level = hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
- &hclge_ssu_port_based_err_int[0],
- status);
- set_bit(reset_level, &ae_dev->hw_err_reset_req);
- }
+ if (status)
+ hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
+ &hclge_ssu_port_based_err_int[0], status,
+ &ae_dev->hw_err_reset_req);
status = le32_to_cpu(desc[0].data[1]);
- if (status) {
- reset_level = hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT",
- &hclge_ssu_fifo_overflow_int[0],
- status);
- set_bit(reset_level, &ae_dev->hw_err_reset_req);
- }
+ if (status)
+ hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT",
+ &hclge_ssu_fifo_overflow_int[0], status,
+ &ae_dev->hw_err_reset_req);
status = le32_to_cpu(desc[0].data[2]);
- if (status) {
- reset_level = hclge_log_error(dev, "SSU_ETS_TCG_INT",
- &hclge_ssu_ets_tcg_int[0],
- status);
- set_bit(reset_level, &ae_dev->hw_err_reset_req);
- }
+ if (status)
+ hclge_log_error(dev, "SSU_ETS_TCG_INT",
+ &hclge_ssu_ets_tcg_int[0], status,
+ &ae_dev->hw_err_reset_req);
/* log IGU(Ingress Unit) EGU(Egress Unit) TNL errors */
desc_data = (__le32 *)&desc[1];
status = le32_to_cpu(*desc_data) & HCLGE_IGU_EGU_TNL_INT_MASK;
- if (status) {
- reset_level = hclge_log_error(dev, "IGU_EGU_TNL_INT_STS",
- &hclge_igu_egu_tnl_int[0],
- status);
- set_bit(reset_level, &ae_dev->hw_err_reset_req);
- }
+ if (status)
+ hclge_log_error(dev, "IGU_EGU_TNL_INT_STS",
+ &hclge_igu_egu_tnl_int[0], status,
+ &ae_dev->hw_err_reset_req);
/* log PPU(RCB) errors */
desc_data = (__le32 *)&desc[3];
status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_RAS_MASK;
- if (status) {
- reset_level = hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST0",
- &hclge_ppu_pf_abnormal_int[0],
- status);
- set_bit(reset_level, &ae_dev->hw_err_reset_req);
- }
+ if (status)
+ hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST0",
+ &hclge_ppu_pf_abnormal_int[0], status,
+ &ae_dev->hw_err_reset_req);
/* clear all PF RAS errors */
hclge_cmd_reuse_desc(&desc[0], false);
@@ -1671,8 +1620,9 @@ pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev)
hclge_handle_rocee_ras_error(ae_dev);
}
- if (status & HCLGE_RAS_REG_NFE_MASK ||
- status & HCLGE_RAS_REG_ROCEE_ERR_MASK) {
+ if ((status & HCLGE_RAS_REG_NFE_MASK ||
+ status & HCLGE_RAS_REG_ROCEE_ERR_MASK) &&
+ ae_dev->hw_err_reset_req) {
ae_dev->override_pci_need_reset = 0;
return PCI_ERS_RESULT_NEED_RESET;
}
@@ -1762,7 +1712,6 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
struct hclge_mac_tnl_stats mac_tnl_stats;
struct device *dev = &hdev->pdev->dev;
u32 mpf_bd_num, pf_bd_num, bd_num;
- enum hnae3_reset_type reset_level;
struct hclge_desc desc_bd;
struct hclge_desc *desc;
__le32 *desc_data;
@@ -1800,24 +1749,19 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
/* log MAC errors */
desc_data = (__le32 *)&desc[1];
status = le32_to_cpu(*desc_data);
- if (status) {
- reset_level = hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R",
- &hclge_mac_afifo_tnl_int[0],
- status);
- set_bit(reset_level, reset_requests);
- }
+ if (status)
+ hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R",
+ &hclge_mac_afifo_tnl_int[0], status,
+ reset_requests);
/* log PPU(RCB) MPF errors */
desc_data = (__le32 *)&desc[5];
status = le32_to_cpu(*(desc_data + 2)) &
HCLGE_PPU_MPF_INT_ST2_MSIX_MASK;
- if (status) {
- reset_level =
- hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2",
- &hclge_ppu_mpf_abnormal_int_st2[0],
- status);
- set_bit(reset_level, reset_requests);
- }
+ if (status)
+ hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2",
+ &hclge_ppu_mpf_abnormal_int_st2[0],
+ status, reset_requests);
/* clear all main PF MSIx errors */
hclge_cmd_reuse_desc(&desc[0], false);
@@ -1841,32 +1785,26 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
/* log SSU PF errors */
status = le32_to_cpu(desc[0].data[0]) & HCLGE_SSU_PORT_INT_MSIX_MASK;
- if (status) {
- reset_level = hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
- &hclge_ssu_port_based_pf_int[0],
- status);
- set_bit(reset_level, reset_requests);
- }
+ if (status)
+ hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
+ &hclge_ssu_port_based_pf_int[0],
+ status, reset_requests);
/* read and log PPP PF errors */
desc_data = (__le32 *)&desc[2];
status = le32_to_cpu(*desc_data);
- if (status) {
- reset_level = hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0",
- &hclge_ppp_pf_abnormal_int[0],
- status);
- set_bit(reset_level, reset_requests);
- }
+ if (status)
+ hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0",
+ &hclge_ppp_pf_abnormal_int[0],
+ status, reset_requests);
/* log PPU(RCB) PF errors */
desc_data = (__le32 *)&desc[3];
status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_MSIX_MASK;
- if (status) {
- reset_level = hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST",
- &hclge_ppu_pf_abnormal_int[0],
- status);
- set_bit(reset_level, reset_requests);
- }
+ if (status)
+ hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST",
+ &hclge_ppu_pf_abnormal_int[0],
+ status, reset_requests);
status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_OVER_8BD_ERR_MASK;
if (status)
--
2.7.4
next prev parent reply other threads:[~2019-06-13 15:50 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-06-13 9:12 [PATCH net-next 00/12] net: hns3: some code optimizations & cleanups & bugfixes Huazhong Tan
2019-06-13 9:12 ` [PATCH net-next 01/12] net: hns3: delay setting of reset level for hw errors until slot_reset is called Huazhong Tan
2019-06-13 9:12 ` Huazhong Tan [this message]
2019-06-13 9:12 ` [PATCH net-next 03/12] net: hns3: process H/W errors occurred before HNS dev initialization Huazhong Tan
2019-06-13 9:12 ` [PATCH net-next 04/12] net: hns3: add recovery for the H/W errors occurred before the " Huazhong Tan
2019-06-13 9:12 ` [PATCH net-next 05/12] net: hns3: some changes of MSI-X bits in PPU(RCB) Huazhong Tan
2019-06-13 9:12 ` [PATCH net-next 06/12] net: hns3: extract handling of mpf/pf msi-x errors into functions Huazhong Tan
2019-06-13 9:12 ` [PATCH net-next 07/12] net: hns3: clear restting state when initializing HW device Huazhong Tan
2019-06-13 9:12 ` [PATCH net-next 08/12] net: hns3: free irq when exit from abnormal branch Huazhong Tan
2019-06-13 9:12 ` [PATCH net-next 09/12] net: hns3: fix for dereferencing before null checking Huazhong Tan
2019-06-13 9:12 ` [PATCH net-next 10/12] net: hns3: fix for skb leak when doing selftest Huazhong Tan
2019-06-13 9:12 ` [PATCH net-next 11/12] net: hns3: delay ring buffer clearing during reset Huazhong Tan
2019-06-13 9:12 ` [PATCH net-next 12/12] net: hns3: some variable modification Huazhong Tan
2019-06-15 2:26 ` [PATCH net-next 00/12] net: hns3: some code optimizations & cleanups & bugfixes David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1560417152-53050-3-git-send-email-tanhuazhong@huawei.com \
--to=tanhuazhong@huawei.com \
--cc=davem@davemloft.net \
--cc=linux-kernel@vger.kernel.org \
--cc=linuxarm@huawei.com \
--cc=lipeng321@huawei.com \
--cc=netdev@vger.kernel.org \
--cc=salil.mehta@huawei.com \
--cc=shiju.jose@huawei.com \
--cc=yisen.zhuang@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).